Computing Patterns in The Wheel of Time : Character Ranking

Mark Goadrich

In [1]:
from scipy.cluster import hierarchy
import numpy as np
import matplotlib.pyplot as plt
import random
import math
import os
import re

Loading up the files

In [2]:
filedir = "WOTclean"
documents = []
filenames = []
for f in os.listdir(filedir):
    if f.endswith(".txt"):
        filenames.append(f[:-4])
        fopen = open(filedir + "/" + f)
        documents.append(re.sub(r'[^\w\s]', '', fopen.read().lower().replace("\n", " ")))

Determining word frequencies and removing stop words and English words

In [3]:
def term_freqs(text, names):
    tf = {}
    for i in range(len(text)):
        tf[names[i]] = {}
        for w in text[i].lower().split():
            tf[names[i]][w] = tf[names[i]].get(w, 0) + 1
        i += 1
    return tf
In [4]:
tf = term_freqs(documents, filenames)
In [5]:
# http://xpo6.com/download-stop-word-list/
stopf = open("stop-word-list.txt")
for line in stopf:
    w = line.strip()
    for d in tf:
        tf[d][w] = 0

dopen = open("english2.txt")
english = {}
for line in dopen:
    w = line.strip()
    for d in tf:
        if w != "mat":
            tf[d][w] = 0

Sorting the words per book and saving the top 16

In [6]:
# https://stackoverflow.com/questions/3121979/how-to-sort-list-tuple-of-lists-tuples
allfreqs = {}
for d in tf:
    data = []
    for k in tf[d]:
        data.append((k, tf[d][k]))
    data.sort(key=lambda tup: tup[1], reverse=True)

    allfreqs[d] = data[:16]
allfreqs
Out[6]:
{'CrossroadsOfTwilight': [('sedai', 498),
  ('aes', 491),
  ('egwene', 372),
  ('elayne', 343),
  ('perrin', 322),
  ('mat', 281),
  ('seanchan', 233),
  ('faile', 183),
  ('aviendha', 164),
  ('aiel', 147),
  ('elaida', 144),
  ('siuan', 139),
  ('tuon', 131),
  ('birgitte', 129),
  ('shaido', 120),
  ('ajah', 118)],
 'CrownOfSwords': [('rand', 749),
  ('sedai', 730),
  ('aes', 722),
  ('elayne', 520),
  ('nynaeve', 512),
  ('mat', 407),
  ('perrin', 358),
  ('egwene', 311),
  ('aiel', 250),
  ('min', 242),
  ('siuan', 187),
  ('elaida', 166),
  ('ashaman', 158),
  ('althor', 151),
  ('amyrlin', 147),
  ('cadsuane', 147)],
 'DragonReborn': [('egwene', 797),
  ('perrin', 616),
  ('sedai', 615),
  ('nynaeve', 597),
  ('aes', 584),
  ('mat', 574),
  ('elayne', 404),
  ('moiraine', 378),
  ('rand', 328),
  ('amyrlin', 262),
  ('thom', 226),
  ('aiel', 207),
  ('lan', 193),
  ('verin', 169),
  ('valon', 158),
  ('loial', 152)],
 'EyeOfTheWorld': [('rand', 1929),
  ('mat', 988),
  ('moiraine', 767),
  ('perrin', 618),
  ('sedai', 599),
  ('aes', 579),
  ('egwene', 542),
  ('lan', 483),
  ('trollocs', 421),
  ('nynaeve', 331),
  ('thom', 317),
  ('loial', 216),
  ('elyas', 214),
  ('caemlyn', 212),
  ('rands', 190),
  ('emonds', 171)],
 'FiresOfHeaven': [('nynaeve', 1017),
  ('rand', 956),
  ('elayne', 794),
  ('aiel', 551),
  ('egwene', 478),
  ('sedai', 448),
  ('aes', 442),
  ('aviendha', 337),
  ('siuan', 330),
  ('moiraine', 329),
  ('birgitte', 271),
  ('mat', 260),
  ('thom', 249),
  ('moghedien', 244),
  ('min', 197),
  ('asmodean', 168)],
 'GatheringStorm': [('egwene', 1257),
  ('rand', 1193),
  ('mat', 652),
  ('sedai', 591),
  ('aes', 552),
  ('nynaeve', 481),
  ('gawyn', 402),
  ('siuan', 389),
  ('hed', 361),
  ('cadsuane', 336),
  ('elaida', 322),
  ('bryne', 302),
  ('seanchan', 295),
  ('min', 257),
  ('amyrlin', 252),
  ('aiel', 245)],
 'GreatHunt': [('rand', 1600),
  ('sedai', 645),
  ('aes', 588),
  ('egwene', 562),
  ('nynaeve', 498),
  ('loial', 446),
  ('ingtar', 404),
  ('hurin', 379),
  ('mat', 376),
  ('moiraine', 364),
  ('amyrlin', 295),
  ('perrin', 268),
  ('verin', 257),
  ('min', 231),
  ('seanchan', 215),
  ('elayne', 207)],
 'KnifeOfDreams': [('aes', 480),
  ('sedai', 480),
  ('elayne', 392),
  ('mat', 382),
  ('tuon', 297),
  ('seanchan', 296),
  ('perrin', 279),
  ('rand', 250),
  ('egwene', 241),
  ('faile', 240),
  ('birgitte', 195),
  ('elaida', 161),
  ('shaido', 137),
  ('selucia', 136),
  ('youre', 106),
  ('ive', 106)],
 'LordOfChaos': [('rand', 1449),
  ('sedai', 1266),
  ('aes', 1261),
  ('nynaeve', 858),
  ('elayne', 851),
  ('egwene', 648),
  ('aiel', 602),
  ('mat', 485),
  ('perrin', 424),
  ('siuan', 315),
  ('althor', 276),
  ('min', 259),
  ('sheriam', 234),
  ('lews', 223),
  ('salidar', 215),
  ('taim', 212)],
 'MemoryOfLight': [('rand', 1252),
  ('mat', 983),
  ('perrin', 816),
  ('trollocs', 802),
  ('egwene', 774),
  ('elayne', 680),
  ('lan', 470),
  ('androl', 469),
  ('sedai', 430),
  ('hed', 401),
  ('aes', 394),
  ('aviendha', 380),
  ('gawyn', 292),
  ('demandred', 283),
  ('seanchan', 271),
  ('aiel', 266)],
 'PathOfDaggers': [('rand', 574),
  ('elayne', 446),
  ('sedai', 389),
  ('aes', 388),
  ('perrin', 323),
  ('nynaeve', 298),
  ('egwene', 254),
  ('aviendha', 218),
  ('siuan', 175),
  ('faile', 153),
  ('seanchan', 149),
  ('min', 106),
  ('aiel', 97),
  ('cadsuane', 93),
  ('ashaman', 89),
  ('amyrlin', 84)],
 'ShadowRising': [('rand', 1033),
  ('perrin', 981),
  ('aes', 645),
  ('aiel', 645),
  ('sedai', 642),
  ('egwene', 629),
  ('elayne', 614),
  ('nynaeve', 598),
  ('mat', 514),
  ('moiraine', 474),
  ('faile', 412),
  ('trollocs', 359),
  ('aviendha', 243),
  ('rhuarc', 240),
  ('min', 198),
  ('loial', 190)],
 'TowersOfMidnight': [('perrin', 1550),
  ('mat', 1090),
  ('elayne', 788),
  ('egwene', 669),
  ('rand', 482),
  ('faile', 469),
  ('galad', 447),
  ('hed', 404),
  ('sedai', 385),
  ('aes', 370),
  ('gawyn', 330),
  ('nynaeve', 325),
  ('ituralde', 221),
  ('trollocs', 217),
  ('morgase', 213),
  ('aiel', 209)],
 'WintersHeart': [('elayne', 505),
  ('rand', 356),
  ('sedai', 334),
  ('nynaeve', 332),
  ('aes', 325),
  ('mat', 299),
  ('cadsuane', 248),
  ('seanchan', 229),
  ('aviendha', 204),
  ('min', 182),
  ('birgitte', 178),
  ('perrin', 139),
  ('faile', 136),
  ('aiel', 121),
  ('suldam', 117),
  ('damane', 111)]}

Who are the top characters?

In [7]:
names = set()
for d in allfreqs:
    for k in allfreqs[d]:
        names.add(k[0])
In [8]:
names
Out[8]:
{'aes',
 'aiel',
 'ajah',
 'althor',
 'amyrlin',
 'androl',
 'ashaman',
 'asmodean',
 'aviendha',
 'birgitte',
 'bryne',
 'cadsuane',
 'caemlyn',
 'damane',
 'demandred',
 'egwene',
 'elaida',
 'elayne',
 'elyas',
 'emonds',
 'faile',
 'galad',
 'gawyn',
 'hed',
 'hurin',
 'ingtar',
 'ituralde',
 'ive',
 'lan',
 'lews',
 'loial',
 'mat',
 'min',
 'moghedien',
 'moiraine',
 'morgase',
 'nynaeve',
 'perrin',
 'rand',
 'rands',
 'rhuarc',
 'salidar',
 'seanchan',
 'sedai',
 'selucia',
 'shaido',
 'sheriam',
 'siuan',
 'suldam',
 'taim',
 'thom',
 'trollocs',
 'tuon',
 'valon',
 'verin',
 'youre'}
In [9]:
names.remove("hed")
names.remove("ive")
names.remove("youre")
In [10]:
ranks = {}
for d in allfreqs:
    t = {}
    for i in range(len(allfreqs[d])):
        w = allfreqs[d][i]
        t[w[0]] = i
    ranks[d] = t

Finding the characters highly ranked in more than one book, and saving their ranks as a list

In [11]:
bookorder = ["EyeOfTheWorld", "GreatHunt", "DragonReborn", "ShadowRising", "FiresOfHeaven", "LordOfChaos",
            "CrownOfSwords", "PathOfDaggers", "WintersHeart", "CrossroadsOfTwilight", "KnifeOfDreams",
            "GatheringStorm", "TowersOfMidnight", "MemoryOfLight"]
ranklists = {}
for n in names:
    t = []
    for b in bookorder:
        t.append(1 + ranks[b].get(n, -1))
    if t.count(0) < 13:
        ranklists[n] = t
In [12]:
[x.capitalize() for x in ranklists.keys()]
Out[12]:
['Gawyn',
 'Sedai',
 'Elaida',
 'Mat',
 'Moiraine',
 'Elayne',
 'Aes',
 'Loial',
 'Rand',
 'Perrin',
 'Nynaeve',
 'Cadsuane',
 'Tuon',
 'Aviendha',
 'Verin',
 'Thom',
 'Aiel',
 'Althor',
 'Seanchan',
 'Shaido',
 'Faile',
 'Siuan',
 'Min',
 'Egwene',
 'Lan',
 'Ashaman',
 'Amyrlin',
 'Birgitte',
 'Trollocs']

Making the graphs look better by only plotting when character in top 16

In [13]:
def splitlist(t):
    xs = []
    ys = []
    curx = []
    cury = []
    for i in range(14):
        if t[i] > 0:
            curx.append(i)
            cury.append(t[i])
            
        else:
            xs.append(curx)
            ys.append(cury)
            curx = []
            cury = []
    if (len(curx) > 0):
        xs.append(curx)
        ys.append(cury)
    return xs, ys       
In [14]:
splitlist(ranklists["mat"])
Out[14]:
([[0, 1, 2, 3, 4, 5, 6], [8, 9, 10, 11, 12, 13]],
 [[2, 9, 6, 9, 12, 8, 6], [6, 6, 4, 3, 2, 2]])

Final plotting function

In [15]:
def ranking_plot(who, bookorder, ranklists, title):
    #https://matplotlib.org/gallery/subplots_axes_and_figures/invert_axes.html
    #https://stackoverflow.com/questions/12050393/how-to-force-the-y-axis-to-only-use-integers-in-matplotlib
    #https://matplotlib.org/gallery/ticks_and_spines/ticklabels_rotation.html#sphx-glr-gallery-ticks-and-spines-ticklabels-rotation-py
    #https://matplotlib.org/examples/color/named_colors.html
    maxrank = 16
    colors = ["b", "y", "g", "r", "m", "c", "darkorange", "silver", "indigo"]

    plt.figure(figsize=(10, 6))
    for i in range(len(who)) :
        n = who[i]
        xs, ys = splitlist(ranklists[n])
        for j in range(len(xs)):
            plt.scatter(xs[j], ys[j], color=colors[i % len(colors)])
            if j == 0:
                plt.plot(xs[j], ys[j], color=colors[i % len(colors)], label=n.capitalize())
            else:
                plt.plot(xs[j], ys[j], color=colors[i % len(colors)])
    plt.title(title)
    plt.ylabel("frequency rank")
    plt.ylim(maxrank + 1, 0)
    yint = range(1, maxrank, 2)
    plt.yticks(yint)
    plt.xticks(range(14), bookorder, rotation='vertical')
    plt.xlim(-1, 14)
    plt.legend()
    plt.show()
In [16]:
ranking_plot(["rand", "mat", "perrin"], bookorder, ranklists, 
             "Male Trio Ranking in The Wheel Of Time")
In [17]:
ranking_plot(["egwene", "elayne", "nynaeve"], bookorder, ranklists, 
             "Female Trio Ranking in The Wheel Of Time")
In [18]:
ranking_plot(["moiraine", "siuan", "cadsuane", "elaida"], bookorder, ranklists, 
             "Elder Aes Sedai Ranking in The Wheel Of Time")
In [19]:
ranking_plot(["trollocs", "aiel", "seanchan", "shaido", "ashaman"], bookorder, ranklists, 
             "Outsider Ranking in The Wheel Of Time")
In [20]:
ranking_plot(["aviendha", "birgitte", "faile", "gawyn", "lan", "loial", "min", "thom", "verin"], bookorder, ranklists,
            "Other Supporting Character Ranking in The Wheel Of Time")
In [ ]:
 
In [ ]: