Eamonn Bell, Columbia University <epb2125@columbia.edu>
For academic use only. WORK IN PROGRESS.
from clarifai.client import ClarifaiApi
import glob
import os
import pandas
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
clarifai_api = ClarifaiApi()
TS_WE_ARE_NEVER = 'WA4iX5D9Z64'
TS_22 = 'AgFeZr5ptV8'
video_ids = [TS_WE_ARE_NEVER, TS_22]
for video_id in video_ids:
os.system(r"youtube-dl {} -o '%(id)s'".format(video_id))
0 0
for video_id in video_ids:
os.system(r"ffmpeg -i {0}.mp4 -r 1 {0}-%3d.jpeg".format(video_id))
0 0
results = {}
# http://stackoverflow.com/a/434328
def chunker(seq, size):
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
def tag_images_queue(queue):
results = []
for frames in queue:
open_files = [open(frame) for frame in frames]
api_response = clarifai_api.tag_images(open_files)
closed = [f.close() for f in open_files]
results.extend(api_response['results'])
return results
def tag_batch(frames):
file_count = len(frames)
q = []
if file_count >= 127:
for chunk in chunker(frames, 127):
q.append(chunk)
else:
q.append(frames)
return tag_images_queue(q)
for video_id in video_ids:
frames = sorted(glob.glob('{}-*.jpeg'.format(video_id)))
api_response_results = tag_batch(frames)
results[video_id] = api_response_results
len(results['AgFeZr5ptV8'])
245
def parse_video_results(video_results):
classes_seen = []
tags_dicts = []
for frame_result in video_results:
classes_seen.extend(frame_result['result']['tag']['classes'])
for frame_result in video_results:
classes = frame_result['result']['tag']['classes']
probs = frame_result['result']['tag']['probs']
tags_dict = dict(zip(classes, probs))
unseen_classes = (class_ for class_ in classes_seen if class_ not in classes)
for unseen_class in unseen_classes:
tags_dict[unseen_class] = np.nan
tags_dicts.append(tags_dict)
return tags_dicts
def parse_results_dict(results_dict):
_ = []
for video_id in results_dict.keys():
video_results = results_dict[video_id]
parsed_video = parse_video_results(video_results)
video_df = pandas.DataFrame(parsed_video)
video_df['_video_id'] = video_id
video_df.index.names = ['frame']
_.append(video_df)
return pandas.concat(_)
data = parse_results_dict(results)
data.head()
_video_id | abstract | action | actor | actress | adult | affection | ailment | animal | anticipation | ... | winter | woman | women | wood | woodland | wool | writing | young | youth | zombie | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
frame | |||||||||||||||||||||
0 | WA4iX5D9Z64 | 0.987164 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | WA4iX5D9Z64 | 0.978400 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | WA4iX5D9Z64 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | WA4iX5D9Z64 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | WA4iX5D9Z64 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 535 columns
data.to_pickle('video_data.pkl')
def most_common_images(video_id, limit=100):
video_data = data[data._video_id == video_id]
return video_data.count().order(ascending=False).head(limit)
most_common_images(TS_WE_ARE_NEVER)
_video_id 217 people 192 adult 183 women 172 portrait 163 clothing 141 one 123 politics 122 men 117 two 103 music 98 group 91 recreation 91 indoors 89 festival 82 room 72 musician 67 child 59 protest 55 performance 51 religion 48 singer 43 light 40 night 39 education 38 conflict 37 fashion 36 furniture 36 family 31 dark 31 ... backstage 15 stage 14 shape 14 nature 14 dress 14 health care 14 youth 14 graphic 14 action 14 museum 13 war 13 election 12 industry 12 retro 12 funeral 12 three 12 school 12 hospital 11 violence 11 building 11 love 10 seat 10 city 10 ceremony 10 colour 10 side view 10 street 10 monochrome 10 modern 10 sepia 10 dtype: int64
data[data._video_id == TS_WE_ARE_NEVER][['women', 'men']].plot()
plt.title('Taylor Swift - We Are Never Getting Back Together')
plt.xlabel('Video location (s)')
plt.ylabel('Tag class confidence (proprietary)')
<matplotlib.text.Text at 0x10a7b3450>
data[data._video_id == TS_22][['women', 'men']].plot()
plt.title('Taylor Swift - 22')
plt.xlabel('Video location (s)')
plt.ylabel('Tag class confidence (proprietary)')
<matplotlib.text.Text at 0x109c80ed0>
d = data[data._video_id == TS_22][['women', 'men']].interpolate()
woman_on_screen = pandas.rolling_mean(d['women'], 2) > 0.90
to_plot = woman_on_screen.apply(lambda x: 0.85 if x else 0.35)
to_plot.plot(color='red')
men_on_screen = pandas.rolling_mean(d['men'], 2) > 0.90
to_plot = men_on_screen.apply(lambda x: 0.75 if x else 0.25)
to_plot.plot(color='blue')
plt.title('Gendered Screentime : Taylor Swift - 22')
<matplotlib.text.Text at 0x10cea9290>
d = data[data._video_id == TS_WE_ARE_NEVER][['women', 'men']].interpolate()
woman_on_screen = pandas.rolling_mean(d['women'], 2) > 0.90
to_plot = woman_on_screen.apply(lambda x: 0.85 if x else 0.35)
to_plot.plot(color='red')
men_on_screen = pandas.rolling_mean(d['men'], 2) > 0.90
to_plot = men_on_screen.apply(lambda x: 0.75 if x else 0.25)
to_plot.plot(color='blue')
plt.title('Gendered Screentime : We Are Never Getting Back Together')
<matplotlib.text.Text at 0x10cb35f10>