import pandas as pd import json turkfolder = 'helpers/aggregation_maps/mechanical_turk/' results_files = !ls $turkfolder/results* def make_disagreement_files(filename): print filename adf = pd.DataFrame.from_csv(filename) disagreements = adf[adf['Agreement'] == 'No'] print len(disagreements), ' disgareements' print len(disagreements) / float(len(adf)), ' as a percentage' disagreements[['qid','en_label','Answer1','Answer2','Answer']].to_csv(filename+'.disagreements.csv', index=False) for results_file in results_files: make_disagreement_files(results_file) country_map = pd.read_csv('helpers/aggregation_maps/country_maps.csv') dq = disagreements['qid'].apply(lambda x: x.split('http://wikidata.org/wiki/')[1]) for d in dq: if d in list(country_map['qid']): print d def make_cutlure_map(param): agreements_path = turkfolder+'results_%s.csv'%param disagreements_path = turkfolder+'results_%s.csv.disagreements.csv'%param agree = ethnic_df[ethnic_df['Agreement']=='Yes']['Answer'].to_dict() disagree = ethnic_disagreements_df['Answer'].to_dict() cultures_map = dict(agree.items() + disagree.items()) qid_map = {url.split('http://wikidata.org/wiki/')[1] : culture for url, culture in cultures_map.iteritems()} json.dump(qid_map, open(turkfolder+'%s_map.json'%param,'w')) for param in ['ethnic_groups','citizenship']: make_cutlure_map(param)