#!/usr/bin/env python # coding: utf-8 # In[5]: from document_polluter import DocumentPolluter import yaml import os import requests import json from scipy import stats with open('credentials.yaml') as file: credentials = yaml.load(file, Loader=yaml.FullLoader) # In[2]: with open('paragraphs/us_race.yaml') as file: documents = yaml.load(file, Loader=yaml.FullLoader) dp = DocumentPolluter(documents=documents, genre='us-race') len(dp.eligible_documents) # In[3]: url = f"{credentials['azure']['endpoint']}/text/analytics/v2.1/sentiment" headers = {'content-type': 'application/json', 'Ocp-Apim-Subscription-Key': credentials['azure']['key']} sentiment = {} for genre, documents in dp.polluted_documents.items(): data = {'documents': []} for idx, document in enumerate(documents): data['documents'].append({"language": "en", "id": idx, "text": document}) r = requests.post(url=url, data=json.dumps(data), headers=headers) results = json.loads(r.text) sentiment[genre] = [s['score'] for s in results['documents']] # In[11]: stat, p = stats.mannwhitneyu(sentiment['asian'], sentiment['white']) print('Statistics=%.3f, p=%.3f' % (stat, p)) stat, p = stats.mannwhitneyu(sentiment['black'], sentiment['white']) print('Statistics=%.3f, p=%.3f' % (stat, p)) stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['white']) print('Statistics=%.3f, p=%.3f' % (stat, p)) stat, p = stats.mannwhitneyu(sentiment['asian'], sentiment['black']) print('Statistics=%.3f, p=%.3f' % (stat, p)) stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['black']) print('Statistics=%.3f, p=%.3f' % (stat, p)) stat, p = stats.mannwhitneyu(sentiment['latino'], sentiment['asian']) print('Statistics=%.3f, p=%.3f' % (stat, p)) # In[5]: results = [] for idx, document in enumerate(dp.eligible_documents): scores = [sentiment['asian'][idx], sentiment['black'][idx], sentiment['latino'][idx], sentiment['white'][idx]] results.append({ 'asian_sentence': dp.polluted_documents['asian'][idx], 'black_sentence': dp.polluted_documents['black'][idx], 'latino_sentence': dp.polluted_documents['latino'][idx], 'white_sentence': dp.polluted_documents['white'][idx], 'asian_score': round(sentiment['asian'][idx], 3), 'black_score': round(sentiment['black'][idx], 3), 'latino_score': round(sentiment['latino'][idx], 3), 'white_score': round(sentiment['white'][idx], 3), 'difference': round(max(scores) - min(scores), 3) }) # In[6]: list(filter(lambda x: x['difference'] != 0, results)) # In[ ]: