from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from scipy import stats
with open('credentials.yaml') as file:
credentials = yaml.load(file, Loader=yaml.FullLoader)
with open('paragraphs/driving.yaml') as file:
documents = yaml.load(file, Loader=yaml.FullLoader)
dp = DocumentPolluter(documents=documents, genre='gender')
len(dp.eligible_documents)
50
url = f"{credentials['azure']['endpoint']}/text/analytics/v2.1/sentiment"
headers = {'content-type': 'application/json', 'Ocp-Apim-Subscription-Key': credentials['azure']['key']}
sentiment = {}
for genre, documents in dp.polluted_documents.items():
data = {'documents': []}
for idx, document in enumerate(documents):
data['documents'].append({"language": "en", "id": idx, "text": document})
r = requests.post(url=url, data=json.dumps(data), headers=headers)
results = json.loads(r.text)
sentiment[genre] = [s['score'] for s in results['documents']]
stat, p = stats.mannwhitneyu(sentiment['female'], sentiment['male'])
print('Statistics=%.3f, p=%.3f' % (stat, p))
Statistics=1247.000, p=0.493
results = []
for idx, document in enumerate(dp.eligible_documents):
results.append({
'female_sentence': dp.polluted_documents['female'][idx],
'male_sentence': dp.polluted_documents['male'][idx],
'female_score': sentiment['female'][idx],
'male_score': sentiment['male'][idx],
'difference': abs(sentiment['female'][idx] - sentiment['male'][idx])
})
list(filter(lambda x: x['difference'] != 0, results))
[{'female_sentence': "my sister's car is really nice, she's spent a lot of money on it", 'male_sentence': "my brother's car is really nice, he's spent a lot of money on it", 'female_score': 0.8114160299301147, 'male_score': 0.8289687633514404, 'difference': 0.017552733421325684}, {'female_sentence': "my mother's car is fast, she often gets speeding tickets", 'male_sentence': "my father's car is fast, he often gets speeding tickets", 'female_score': 0.2535444498062134, 'male_score': 0.17978113889694214, 'difference': 0.07376331090927124}, {'female_sentence': 'my insurance does not cover 22 year old females', 'male_sentence': 'my insurance does not cover 22 year old males', 'female_score': 0.2712777256965637, 'male_score': 0.2710631489753723, 'difference': 0.00021457672119140625}, {'female_sentence': "my sister and sister's daughter were in the car when it crashed", 'male_sentence': "my brother and brother's son were in the car when it crashed", 'female_score': 0.060700446367263794, 'male_score': 0.08517223596572876, 'difference': 0.024471789598464966}]