from scipy.cluster import hierarchy
import numpy as np
import matplotlib.pyplot as plt
import random
import math
import os
import re
filedir = "WOTclean"
documents = []
filenames = []
for f in os.listdir(filedir):
if f.endswith(".txt"):
filenames.append(f[:-4])
fopen = open(filedir + "/" + f)
documents.append(re.sub(r'[^\w\s]', '', fopen.read().lower().replace("\n", " ")))
def term_freqs(text, names):
tf = {}
for i in range(len(text)):
tf[names[i]] = {}
for w in text[i].lower().split():
tf[names[i]][w] = tf[names[i]].get(w, 0) + 1
i += 1
return tf
tf = term_freqs(documents, filenames)
# http://xpo6.com/download-stop-word-list/
stopf = open("stop-word-list.txt")
for line in stopf:
w = line.strip()
for d in tf:
tf[d][w] = 0
dopen = open("english2.txt")
english = {}
for line in dopen:
w = line.strip()
for d in tf:
if w != "mat":
tf[d][w] = 0
# https://stackoverflow.com/questions/3121979/how-to-sort-list-tuple-of-lists-tuples
allfreqs = {}
for d in tf:
data = []
for k in tf[d]:
data.append((k, tf[d][k]))
data.sort(key=lambda tup: tup[1], reverse=True)
allfreqs[d] = data[:16]
allfreqs
{'CrossroadsOfTwilight': [('sedai', 498), ('aes', 491), ('egwene', 372), ('elayne', 343), ('perrin', 322), ('mat', 281), ('seanchan', 233), ('faile', 183), ('aviendha', 164), ('aiel', 147), ('elaida', 144), ('siuan', 139), ('tuon', 131), ('birgitte', 129), ('shaido', 120), ('ajah', 118)], 'CrownOfSwords': [('rand', 749), ('sedai', 730), ('aes', 722), ('elayne', 520), ('nynaeve', 512), ('mat', 407), ('perrin', 358), ('egwene', 311), ('aiel', 250), ('min', 242), ('siuan', 187), ('elaida', 166), ('ashaman', 158), ('althor', 151), ('amyrlin', 147), ('cadsuane', 147)], 'DragonReborn': [('egwene', 797), ('perrin', 616), ('sedai', 615), ('nynaeve', 597), ('aes', 584), ('mat', 574), ('elayne', 404), ('moiraine', 378), ('rand', 328), ('amyrlin', 262), ('thom', 226), ('aiel', 207), ('lan', 193), ('verin', 169), ('valon', 158), ('loial', 152)], 'EyeOfTheWorld': [('rand', 1929), ('mat', 988), ('moiraine', 767), ('perrin', 618), ('sedai', 599), ('aes', 579), ('egwene', 542), ('lan', 483), ('trollocs', 421), ('nynaeve', 331), ('thom', 317), ('loial', 216), ('elyas', 214), ('caemlyn', 212), ('rands', 190), ('emonds', 171)], 'FiresOfHeaven': [('nynaeve', 1017), ('rand', 956), ('elayne', 794), ('aiel', 551), ('egwene', 478), ('sedai', 448), ('aes', 442), ('aviendha', 337), ('siuan', 330), ('moiraine', 329), ('birgitte', 271), ('mat', 260), ('thom', 249), ('moghedien', 244), ('min', 197), ('asmodean', 168)], 'GatheringStorm': [('egwene', 1257), ('rand', 1193), ('mat', 652), ('sedai', 591), ('aes', 552), ('nynaeve', 481), ('gawyn', 402), ('siuan', 389), ('hed', 361), ('cadsuane', 336), ('elaida', 322), ('bryne', 302), ('seanchan', 295), ('min', 257), ('amyrlin', 252), ('aiel', 245)], 'GreatHunt': [('rand', 1600), ('sedai', 645), ('aes', 588), ('egwene', 562), ('nynaeve', 498), ('loial', 446), ('ingtar', 404), ('hurin', 379), ('mat', 376), ('moiraine', 364), ('amyrlin', 295), ('perrin', 268), ('verin', 257), ('min', 231), ('seanchan', 215), ('elayne', 207)], 'KnifeOfDreams': [('aes', 480), ('sedai', 480), ('elayne', 392), ('mat', 382), ('tuon', 297), ('seanchan', 296), ('perrin', 279), ('rand', 250), ('egwene', 241), ('faile', 240), ('birgitte', 195), ('elaida', 161), ('shaido', 137), ('selucia', 136), ('youre', 106), ('ive', 106)], 'LordOfChaos': [('rand', 1449), ('sedai', 1266), ('aes', 1261), ('nynaeve', 858), ('elayne', 851), ('egwene', 648), ('aiel', 602), ('mat', 485), ('perrin', 424), ('siuan', 315), ('althor', 276), ('min', 259), ('sheriam', 234), ('lews', 223), ('salidar', 215), ('taim', 212)], 'MemoryOfLight': [('rand', 1252), ('mat', 983), ('perrin', 816), ('trollocs', 802), ('egwene', 774), ('elayne', 680), ('lan', 470), ('androl', 469), ('sedai', 430), ('hed', 401), ('aes', 394), ('aviendha', 380), ('gawyn', 292), ('demandred', 283), ('seanchan', 271), ('aiel', 266)], 'PathOfDaggers': [('rand', 574), ('elayne', 446), ('sedai', 389), ('aes', 388), ('perrin', 323), ('nynaeve', 298), ('egwene', 254), ('aviendha', 218), ('siuan', 175), ('faile', 153), ('seanchan', 149), ('min', 106), ('aiel', 97), ('cadsuane', 93), ('ashaman', 89), ('amyrlin', 84)], 'ShadowRising': [('rand', 1033), ('perrin', 981), ('aes', 645), ('aiel', 645), ('sedai', 642), ('egwene', 629), ('elayne', 614), ('nynaeve', 598), ('mat', 514), ('moiraine', 474), ('faile', 412), ('trollocs', 359), ('aviendha', 243), ('rhuarc', 240), ('min', 198), ('loial', 190)], 'TowersOfMidnight': [('perrin', 1550), ('mat', 1090), ('elayne', 788), ('egwene', 669), ('rand', 482), ('faile', 469), ('galad', 447), ('hed', 404), ('sedai', 385), ('aes', 370), ('gawyn', 330), ('nynaeve', 325), ('ituralde', 221), ('trollocs', 217), ('morgase', 213), ('aiel', 209)], 'WintersHeart': [('elayne', 505), ('rand', 356), ('sedai', 334), ('nynaeve', 332), ('aes', 325), ('mat', 299), ('cadsuane', 248), ('seanchan', 229), ('aviendha', 204), ('min', 182), ('birgitte', 178), ('perrin', 139), ('faile', 136), ('aiel', 121), ('suldam', 117), ('damane', 111)]}
names = set()
for d in allfreqs:
for k in allfreqs[d]:
names.add(k[0])
names
{'aes', 'aiel', 'ajah', 'althor', 'amyrlin', 'androl', 'ashaman', 'asmodean', 'aviendha', 'birgitte', 'bryne', 'cadsuane', 'caemlyn', 'damane', 'demandred', 'egwene', 'elaida', 'elayne', 'elyas', 'emonds', 'faile', 'galad', 'gawyn', 'hed', 'hurin', 'ingtar', 'ituralde', 'ive', 'lan', 'lews', 'loial', 'mat', 'min', 'moghedien', 'moiraine', 'morgase', 'nynaeve', 'perrin', 'rand', 'rands', 'rhuarc', 'salidar', 'seanchan', 'sedai', 'selucia', 'shaido', 'sheriam', 'siuan', 'suldam', 'taim', 'thom', 'trollocs', 'tuon', 'valon', 'verin', 'youre'}
names.remove("hed")
names.remove("ive")
names.remove("youre")
ranks = {}
for d in allfreqs:
t = {}
for i in range(len(allfreqs[d])):
w = allfreqs[d][i]
t[w[0]] = i
ranks[d] = t
bookorder = ["EyeOfTheWorld", "GreatHunt", "DragonReborn", "ShadowRising", "FiresOfHeaven", "LordOfChaos",
"CrownOfSwords", "PathOfDaggers", "WintersHeart", "CrossroadsOfTwilight", "KnifeOfDreams",
"GatheringStorm", "TowersOfMidnight", "MemoryOfLight"]
ranklists = {}
for n in names:
t = []
for b in bookorder:
t.append(1 + ranks[b].get(n, -1))
if t.count(0) < 13:
ranklists[n] = t
[x.capitalize() for x in ranklists.keys()]
['Gawyn', 'Sedai', 'Elaida', 'Mat', 'Moiraine', 'Elayne', 'Aes', 'Loial', 'Rand', 'Perrin', 'Nynaeve', 'Cadsuane', 'Tuon', 'Aviendha', 'Verin', 'Thom', 'Aiel', 'Althor', 'Seanchan', 'Shaido', 'Faile', 'Siuan', 'Min', 'Egwene', 'Lan', 'Ashaman', 'Amyrlin', 'Birgitte', 'Trollocs']
def splitlist(t):
xs = []
ys = []
curx = []
cury = []
for i in range(14):
if t[i] > 0:
curx.append(i)
cury.append(t[i])
else:
xs.append(curx)
ys.append(cury)
curx = []
cury = []
if (len(curx) > 0):
xs.append(curx)
ys.append(cury)
return xs, ys
splitlist(ranklists["mat"])
([[0, 1, 2, 3, 4, 5, 6], [8, 9, 10, 11, 12, 13]], [[2, 9, 6, 9, 12, 8, 6], [6, 6, 4, 3, 2, 2]])
def ranking_plot(who, bookorder, ranklists, title):
#https://matplotlib.org/gallery/subplots_axes_and_figures/invert_axes.html
#https://stackoverflow.com/questions/12050393/how-to-force-the-y-axis-to-only-use-integers-in-matplotlib
#https://matplotlib.org/gallery/ticks_and_spines/ticklabels_rotation.html#sphx-glr-gallery-ticks-and-spines-ticklabels-rotation-py
#https://matplotlib.org/examples/color/named_colors.html
maxrank = 16
colors = ["b", "y", "g", "r", "m", "c", "darkorange", "silver", "indigo"]
plt.figure(figsize=(10, 6))
for i in range(len(who)) :
n = who[i]
xs, ys = splitlist(ranklists[n])
for j in range(len(xs)):
plt.scatter(xs[j], ys[j], color=colors[i % len(colors)])
if j == 0:
plt.plot(xs[j], ys[j], color=colors[i % len(colors)], label=n.capitalize())
else:
plt.plot(xs[j], ys[j], color=colors[i % len(colors)])
plt.title(title)
plt.ylabel("frequency rank")
plt.ylim(maxrank + 1, 0)
yint = range(1, maxrank, 2)
plt.yticks(yint)
plt.xticks(range(14), bookorder, rotation='vertical')
plt.xlim(-1, 14)
plt.legend()
plt.show()
ranking_plot(["rand", "mat", "perrin"], bookorder, ranklists,
"Male Trio Ranking in The Wheel Of Time")
ranking_plot(["egwene", "elayne", "nynaeve"], bookorder, ranklists,
"Female Trio Ranking in The Wheel Of Time")
ranking_plot(["moiraine", "siuan", "cadsuane", "elaida"], bookorder, ranklists,
"Elder Aes Sedai Ranking in The Wheel Of Time")
ranking_plot(["trollocs", "aiel", "seanchan", "shaido", "ashaman"], bookorder, ranklists,
"Outsider Ranking in The Wheel Of Time")
ranking_plot(["aviendha", "birgitte", "faile", "gawyn", "lan", "loial", "min", "thom", "verin"], bookorder, ranklists,
"Other Supporting Character Ranking in The Wheel Of Time")