%pylab inline
rcParams['savefig.dpi'] = 150
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['title'] `%matplotlib` prevents importing * from pylab and numpy
import seaborn as sns
import pandas as pd
sns.set_style('white', {'axes.edgecolor':'0.5',
'axes.linewidth':'0.5'})
Importing the data into pandas
chapter_df = pd.DataFrame.from_csv("chapters_with_ratings.csv")
character_df = pd.DataFrame.from_csv("characters.csv", parse_dates=False)
event_df = pd.DataFrame.from_csv("events.csv")
AGOT = chapter_df[chapter_df.bookID == 0]
ACOK = chapter_df[chapter_df.bookID == 1]
ASOS = chapter_df[chapter_df.bookID == 2]
AFFC = chapter_df[chapter_df.bookID == 3]
ADWD = chapter_df[chapter_df.bookID == 4]
books = [AGOT, ACOK, ASOS, AFFC, ADWD]
palette = sns.color_palette()
bar_colours = [palette[n] for n in chapter_df.bookID]
plt.bar(chapter_df.index, chapter_df.ChapterRating, color=bar_colours, alpha=1.0, lw=0)
plt.xlim(0,len(chapter_df))
plt.ylim(0,10)
plt.xticks(map(lambda x: np.average(x.index), [AGOT, ACOK, ASOS, AFFC, ADWD]),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Ratings for each chapter in the Series')
#plt.xlabel('Chapter')
plt.ylabel('Chapter Rating')
plt.gcf().set_size_inches(15,5)
plt.savefig("ASOIAF_-_all_books_-_chapter_ratings.png", dpi=148)
for book, title, colour in zip(books, ["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"], palette):
plt.figure()
plt.bar(book['chapterInBook'], book.ChapterRating, color=colour, alpha=0.5)
for n, chapter in book.iterrows():
plt.text(chapter['chapterInBook']+.5, chapter['ChapterRating']-0.1, chapter['title'], size='x-small', va='top', ha='center', rotation='vertical')
plt.xlim(0,len(book))
plt.ylim(0,10)
plt.title('Chapter ratings for all chapters in {}'.format(title))
plt.xlabel("Chapters")
plt.ylabel("Chapter Rating")
plt.xticks([])
plt.gcf().set_size_inches(15,5)
#plt.gcf().savefig("ASOIAF_-_{}_-_chapter_ratings.png".format(title), dpi=150)
plt.gca().get_position()
Bbox([[0.125, 0.125], [0.9, 0.9]])
# Violin plot
plt.figure()
sns.violinplot(chapter_df['bookID'], chapter_df['ChapterRating'], inner='points')
plt.xticks(range(5),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Chapter rating distributions by book - Violin plot')
plt.xlabel('')
plt.ylim(5,10)
plt.gcf().set_size_inches(15,5)
plt.gcf().savefig('Chapter_rating_distributions_by_book_-_violin.png', dpi=150)
#Box plot
plt.figure()
sns.boxplot(chapter_df['bookID'], chapter_df['ChapterRating'])
plt.xticks(range(5),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.title('Chapter rating distributions by book - Box plot')
plt.xlabel('')
plt.ylim(5,10)
plt.gcf().set_size_inches(15,5)
plt.gcf().savefig('Chapter_rating_distributions_by_book_-_box.png', dpi=150)
# Mean and std by book
chapter_df.groupby('bookID').ChapterRating.agg(['mean', 'std'])
mean | std | |
---|---|---|
bookID | ||
0 | 8.212603 | 0.562781 |
1 | 7.750429 | 0.702376 |
2 | 7.994756 | 0.634635 |
3 | 7.545435 | 0.522484 |
4 | 8.029589 | 0.692492 |
order = chapter_df.groupby('povID').mean().ChapterRating.sort_values(ascending=False).reset_index().povID.map(character_df.Name)
# Violin plot
plt.figure()
sns.violinplot(chapter_df['povID'].map(character_df['Name']), chapter_df['ChapterRating'], inner='points', order=order)
plt.xticks(rotation='vertical')
plt.xlabel("POV character")
plt.gcf().set_size_inches(15,6.5)
plt.gca().set_position([.125,.3,.775,.6])
plt.gcf().savefig("Chapter_rating_distributions_by_POV_character_-_violin.png", dpi=150)
# Box plot
plt.figure()
sns.boxplot(chapter_df['povID'].map(character_df['Name']), chapter_df['ChapterRating'], order=order)
plt.xticks(rotation='vertical')
plt.xlabel("POV character")
plt.gcf().set_size_inches(15,6.5)
plt.gca().set_position([.125,.3,.775,.6])
plt.gcf().savefig("Chapter_rating_distributions_by_POV_character_-_box.png", dpi=150)
order = chapter_df.groupby('povID').mean().ChapterRating.sort_values(ascending=False).reset_index().povID
cmap = plt.get_cmap('RdBu')
cmap = sns.blend_palette([sns.desaturate("red", 1), "white", sns.desaturate("blue", 1)], as_cmap=True)
vmin = chapter_df.ChapterRating.mean()-2*chapter_df.ChapterRating.std()
vmax = chapter_df.ChapterRating.mean()+2*chapter_df.ChapterRating.std()
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm._A = []
plt.figure()
for n, characterID in enumerate(order):
df = chapter_df[chapter_df['povID'] == characterID]
ratings = df['ChapterRating']
chapter_numbers = df.index
bar_heights = (ratings-5)/6.25
a = plt.bar(chapter_numbers, bar_heights,
bottom=n-(bar_heights/2), color=sm.to_rgba(ratings), zorder=3, alpha=1) # Bar height proportional to chapter rating
#a = plt.bar(chapter_numbers, [0.8]*len(bar_heights),
# bottom=n-0.4, color=cmap(bar_heights)) # Fixed bar height
for color, bookdf in zip(palette, books):
plt.fill_between(bookdf.index.tolist()+[bookdf.index.tolist()[-1]+1],-1,32, color=color, alpha=0.5, zorder=1)
for n in range(31):
plt.fill_betweenx([n-.4, n+.4],0,500, color="white", alpha=0.5, zorder=2)
plt.xlim(0,books[-1].index[-1]+1)
plt.xticks(map(lambda x: np.average(x.index), [AGOT, ACOK, ASOS, AFFC, ADWD]),["AGOT", "ACOK", "ASOS", "AFFC", "ADWD"])
plt.ylim(-0.5,30.5)
plt.yticks(range(len(order)), [x for x in character_df.loc[order]['Name']], size='small')
plt.grid(False)
cb = plt.colorbar(sm)
cb.set_label("Chapter Rating")
plt.gcf().set_size_inches(15,5)
plt.gca().get_position()
plt.gca().set_position([.125,.125,.75,.775])
cb.ax.set_position([.885,.125,.55,.775])
plt.gcf().savefig("Chapter_ratings_by_POV_character_-_all_books.png", dpi=300)