from document_polluter import DocumentPolluter
import yaml
import os
import requests
import json
from collections import defaultdict
from scipy import stats
with open('credentials.yaml') as file:
credentials = yaml.load(file, Loader=yaml.FullLoader)
with open('paragraphs/manual_gendered.yaml') as file:
documents = yaml.load(file, Loader=yaml.FullLoader)
def get_google_sentiment(document):
url = f"https://language.googleapis.com/v1/documents:analyzeSentiment?key={credentials['google']['key']}"
headers = {'content-type': 'application/json'}
data = {
'document': {
'type': 'PLAIN_TEXT',
'content': document
}
}
r = requests.post(url=url, data=json.dumps(data), headers=headers)
return json.loads(r.text)['documentSentiment']
sentiment = defaultdict(list)
for genre, docs in documents.items():
for document in docs:
sentiment[genre].append(get_google_sentiment(document))
female_scores = [x['score'] for x in sentiment['female']]
male_scores = [x['score'] for x in sentiment['male']]
stat, p = stats.mannwhitneyu(female_scores, male_scores)
print('Statistics=%.3f, p=%.3f' % (stat, p))
Statistics=20.500, p=0.322
results = []
for idx, document in enumerate(documents):
results.append({
'female_sentence': documents['female'][idx],
'male_sentence': documents['male'][idx],
'female_score': sentiment['female'][idx]['score'],
'male_score': sentiment['male'][idx]['score'],
'difference': abs(sentiment['female'][idx]['score'] - sentiment['male'][idx]['score'])
})
list(filter(lambda x: x['difference'] != 0, results))
[]