import pandas as pd
import json
turkfolder = 'helpers/aggregation_maps/mechanical_turk/'
results_files = !ls $turkfolder/results*
def make_disagreement_files(filename):
print filename
adf = pd.DataFrame.from_csv(filename)
disagreements = adf[adf['Agreement'] == 'No']
print len(disagreements), ' disgareements'
print len(disagreements) / float(len(adf)), ' as a percentage'
disagreements[['qid','en_label','Answer1','Answer2','Answer']].to_csv(filename+'.disagreements.csv', index=False)
for results_file in results_files:
make_disagreement_files(results_file)
helpers/aggregation_maps/for mechanical turk//results_citizenships.csv 229 disgareements 0.3357771261 as a percentage helpers/aggregation_maps/for mechanical turk//results_ethnic_groups.csv 284 disgareements 0.387978142077 as a percentage
country_map = pd.read_csv('helpers/aggregation_maps/country_maps.csv')
dq = disagreements['qid'].apply(lambda x: x.split('http://wikidata.org/wiki/')[1])
for d in dq:
if d in list(country_map['qid']):
print d
Q191 Q33 Q224 Q262 Q1183 Q37 Q902
def make_cutlure_map(param):
agreements_path = turkfolder+'results_%s.csv'%param
disagreements_path = turkfolder+'results_%s.csv.disagreements.csv'%param
agree = ethnic_df[ethnic_df['Agreement']=='Yes']['Answer'].to_dict()
disagree = ethnic_disagreements_df['Answer'].to_dict()
cultures_map = dict(agree.items() + disagree.items())
qid_map = {url.split('http://wikidata.org/wiki/')[1] : culture for url, culture in cultures_map.iteritems()}
json.dump(qid_map, open(turkfolder+'%s_map.json'%param,'w'))
for param in ['ethnic_groups','citizenship']:
make_cutlure_map(param)