A computer-assisted exploration of visual themes in Taylor Swift's music videos

Eamonn Bell, Columbia University <[email protected]>


For academic use only. WORK IN PROGRESS.


Summary

In this notebook I show how to use the Clarifai neural-network-driven image tagging API on music videos released by Taylor Swift in order to demonstrate the utility of this technology to the study of music videos.

Abstract

TBD

In [1]:
from clarifai.client import ClarifaiApi
import glob
import os
import pandas 
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
clarifai_api = ClarifaiApi()
In [3]:
TS_WE_ARE_NEVER = 'WA4iX5D9Z64'
TS_22 = 'AgFeZr5ptV8'
video_ids = [TS_WE_ARE_NEVER, TS_22]
In [20]:
for video_id in video_ids:
    os.system(r"youtube-dl {} -o '%(id)s'".format(video_id))
0
0
In [28]:
for video_id in video_ids:
    os.system(r"ffmpeg -i {0}.mp4 -r 1 {0}-%3d.jpeg".format(video_id))
0
0
In [15]:
results = {}

# http://stackoverflow.com/a/434328
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

def tag_images_queue(queue):
    results = []
    
    for frames in queue:
        
        open_files = [open(frame) for frame in frames]
   
       
        api_response = clarifai_api.tag_images(open_files)
        
        closed = [f.close() for f in open_files]
        
        results.extend(api_response['results'])
        
    return results    

def tag_batch(frames):
    file_count = len(frames)
    
    q = []
    
    if file_count >= 127:
        for chunk in chunker(frames, 127):
            q.append(chunk)
    else:
        q.append(frames)
    
    return tag_images_queue(q)
    
for video_id in video_ids:
    frames = sorted(glob.glob('{}-*.jpeg'.format(video_id)))
    api_response_results = tag_batch(frames)
    results[video_id] = api_response_results
In [19]:
len(results['AgFeZr5ptV8'])
Out[19]:
245
In [20]:
def parse_video_results(video_results):
    classes_seen = []
    tags_dicts = []
    
    for frame_result in video_results:
        classes_seen.extend(frame_result['result']['tag']['classes'])
        
    for frame_result in video_results:
        classes = frame_result['result']['tag']['classes']
        probs = frame_result['result']['tag']['probs']
        
        tags_dict = dict(zip(classes, probs))
        
        unseen_classes = (class_ for class_ in classes_seen if class_ not in classes)
        
        for unseen_class in unseen_classes:
            tags_dict[unseen_class] = np.nan
        
        tags_dicts.append(tags_dict)
           
    return tags_dicts            
In [21]:
def parse_results_dict(results_dict):
    _ = []
    
    for video_id in results_dict.keys():
        video_results = results_dict[video_id]
        
        parsed_video = parse_video_results(video_results)
        video_df = pandas.DataFrame(parsed_video)
        video_df['_video_id'] = video_id
        video_df.index.names = ['frame']
        _.append(video_df)
        
    
    return pandas.concat(_)
In [22]:
data = parse_results_dict(results)
In [23]:
data.head()
Out[23]:
_video_id abstract action actor actress adult affection ailment animal anticipation ... winter woman women wood woodland wool writing young youth zombie
frame
0 WA4iX5D9Z64 0.987164 NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 WA4iX5D9Z64 0.978400 NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 WA4iX5D9Z64 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 WA4iX5D9Z64 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 WA4iX5D9Z64 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 535 columns

In [26]:
data.to_pickle('video_data.pkl')

Recurring imagery

In [103]:
def most_common_images(video_id, limit=100):
    video_data = data[data._video_id == video_id]
    return video_data.count().order(ascending=False).head(limit)
In [104]:
most_common_images(TS_WE_ARE_NEVER)
Out[104]:
_video_id      217
people         192
adult          183
women          172
portrait       163
clothing       141
one            123
politics       122
men            117
two            103
music           98
group           91
recreation      91
indoors         89
festival        82
room            72
musician        67
child           59
protest         55
performance     51
religion        48
singer          43
light           40
night           39
education       38
conflict        37
fashion         36
furniture       36
family          31
dark            31
              ... 
backstage       15
stage           14
shape           14
nature          14
dress           14
health care     14
youth           14
graphic         14
action          14
museum          13
war             13
election        12
industry        12
retro           12
funeral         12
three           12
school          12
hospital        11
violence        11
building        11
love            10
seat            10
city            10
ceremony        10
colour          10
side view       10
street          10
monochrome      10
modern          10
sepia           10
dtype: int64

Narratology: male/female dynamics

In [68]:
data[data._video_id == TS_WE_ARE_NEVER][['women', 'men']].plot()
plt.title('Taylor Swift - We Are Never Getting Back Together')
plt.xlabel('Video location (s)')
plt.ylabel('Tag class confidence (proprietary)')
Out[68]:
<matplotlib.text.Text at 0x10a7b3450>
In [67]:
data[data._video_id == TS_22][['women', 'men']].plot()
plt.title('Taylor Swift - 22')
plt.xlabel('Video location (s)')
plt.ylabel('Tag class confidence (proprietary)')
Out[67]:
<matplotlib.text.Text at 0x109c80ed0>
In [111]:
d = data[data._video_id == TS_22][['women', 'men']].interpolate()

woman_on_screen = pandas.rolling_mean(d['women'], 2) > 0.90
to_plot = woman_on_screen.apply(lambda x: 0.85 if x else 0.35)
to_plot.plot(color='red')

men_on_screen = pandas.rolling_mean(d['men'], 2) > 0.90
to_plot = men_on_screen.apply(lambda x: 0.75 if x else 0.25)
to_plot.plot(color='blue')
plt.title('Gendered Screentime : Taylor Swift - 22')
Out[111]:
<matplotlib.text.Text at 0x10cea9290>
In [108]:
d = data[data._video_id == TS_WE_ARE_NEVER][['women', 'men']].interpolate()

woman_on_screen = pandas.rolling_mean(d['women'], 2) > 0.90
to_plot = woman_on_screen.apply(lambda x: 0.85 if x else 0.35)
to_plot.plot(color='red')

men_on_screen = pandas.rolling_mean(d['men'], 2) > 0.90
to_plot = men_on_screen.apply(lambda x: 0.75 if x else 0.25)
to_plot.plot(color='blue')
plt.title('Gendered Screentime : We Are Never Getting Back Together')
Out[108]:
<matplotlib.text.Text at 0x10cb35f10>
In [ ]: