In [80]:
%pylab inline
rcParams['savefig.dpi'] = 150
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['title']
`%matplotlib` prevents importing * from pylab and numpy
In [16]:
import seaborn as sns
import pandas as pd
sns.set_style('white', {'axes.edgecolor':'0.5',
                        'axes.linewidth':'0.5'})

Importing the data into pandas

In [3]:
chapter_df = pd.DataFrame.from_csv("chapters_with_ratings.csv")
character_df = pd.DataFrame.from_csv("characters.csv", parse_dates=False)
event_df = pd.DataFrame.from_csv("events.csv")

AGOT = chapter_df[chapter_df.bookID == 0]
ACOK = chapter_df[chapter_df.bookID == 1]
ASOS = chapter_df[chapter_df.bookID == 2]
AFFC = chapter_df[chapter_df.bookID == 3]
ADWD = chapter_df[chapter_df.bookID == 4]
books = [AGOT, ACOK, ASOS, AFFC, ADWD]

Chapter ratings - all books together

In [54]:
palette = sns.color_palette()
bar_colours = [palette[n] for n in chapter_df.bookID]

plt.bar(chapter_df.index, chapter_df.ChapterRating, color=bar_colours, alpha=1.0, lw=0)

plt.xlim(0,len(chapter_df))
plt.ylim(0,10)
plt.xticks(map(lambda x: np.average(x.index), [AGOT, ACOK, ASOS, AFFC, ADWD]),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Ratings for each chapter in the Series')
#plt.xlabel('Chapter')
plt.ylabel('Chapter Rating')

plt.gcf().set_size_inches(15,5)
plt.savefig("ASOIAF_-_all_books_-_chapter_ratings.png", dpi=148)

Chapter ratings - by book

In [125]:
for book, title, colour in zip(books, ["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"], palette):

    plt.figure()
    plt.bar(book['chapterInBook'], book.ChapterRating, color=colour, alpha=0.5)
    for n, chapter in book.iterrows():
        plt.text(chapter['chapterInBook']+.5, chapter['ChapterRating']-0.1, chapter['title'], size='x-small', va='top', ha='center', rotation='vertical')

    plt.xlim(0,len(book))
    plt.ylim(0,10)
    plt.title('Chapter ratings for all chapters in {}'.format(title))
    plt.xlabel("Chapters")
    plt.ylabel("Chapter Rating")
    plt.xticks([])

    plt.gcf().set_size_inches(15,5)
    #plt.gcf().savefig("ASOIAF_-_{}_-_chapter_ratings.png".format(title), dpi=150)
plt.gca().get_position()
Out[125]:
Bbox([[0.125, 0.125], [0.9, 0.9]])

Chapter rating distribution - by book

In [83]:
# Violin plot
plt.figure()
sns.violinplot(chapter_df['bookID'], chapter_df['ChapterRating'], inner='points')
plt.xticks(range(5),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Chapter rating distributions by book - Violin plot')
plt.xlabel('')
plt.ylim(5,10)
plt.gcf().set_size_inches(15,5)
plt.gcf().savefig('Chapter_rating_distributions_by_book_-_violin.png', dpi=150)

#Box plot
plt.figure()
sns.boxplot(chapter_df['bookID'], chapter_df['ChapterRating'])
plt.xticks(range(5),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Chapter rating distributions by book - Box plot')
plt.xlabel('')
plt.ylim(5,10)
plt.gcf().set_size_inches(15,5)
plt.gcf().savefig('Chapter_rating_distributions_by_book_-_box.png', dpi=150)
In [151]:
# Mean and std by book
chapter_df.groupby('bookID').ChapterRating.agg(['mean', 'std'])
Out[151]:
mean std
bookID
0 8.212603 0.562781
1 7.750429 0.702376
2 7.994756 0.634635
3 7.545435 0.522484
4 8.029589 0.692492

Chapter rating distribution - by POV Character

In [140]:
order = chapter_df.groupby('povID').mean().ChapterRating.sort_values(ascending=False).reset_index().povID.map(character_df.Name)

# Violin plot
plt.figure()
sns.violinplot(chapter_df['povID'].map(character_df['Name']), chapter_df['ChapterRating'], inner='points', order=order)
plt.xticks(rotation='vertical')
plt.xlabel("POV character")
plt.gcf().set_size_inches(15,6.5)
plt.gca().set_position([.125,.3,.775,.6])
plt.gcf().savefig("Chapter_rating_distributions_by_POV_character_-_violin.png", dpi=150)

# Box plot
plt.figure()
sns.boxplot(chapter_df['povID'].map(character_df['Name']), chapter_df['ChapterRating'], order=order)
plt.xticks(rotation='vertical')
plt.xlabel("POV character")
plt.gcf().set_size_inches(15,6.5)
plt.gca().set_position([.125,.3,.775,.6])
plt.gcf().savefig("Chapter_rating_distributions_by_POV_character_-_box.png", dpi=150)