Table of Contents
sqlite3
(database small enough to be held in memory)import os
from os.path import dirname, join
import numpy as np
import pandas.io.sql as psql
import sqlite3 as sql
from bokeh.sampledata.movies_data import movie_path
__file__ = os.path.expanduser('~/git_local/bokeh_original/examples/app/movies/main.py')
conn = sql.connect(movie_path)
query = open(join(dirname(__file__), 'query.sql')).read()
movies = psql.read_sql(query, conn)
print query
SELECT omdb.ID, imdbID, Title, Year, omdb.Rating as mpaaRating, Runtime, Genre, Released, Director, Writer, omdb.Cast, imdbRating, imdbVotes, Language, Country, Oscars, tomatoes.Rating as numericRating, Meter, Reviews, Fresh, Rotten, userMeter, userRating, userReviews, BoxOffice, Production FROM omdb, tomatoes WHERE omdb.ID = tomatoes.ID AND Reviews >= 10
movies.head(n=3)
ID | imdbID | Title | Year | mpaaRating | Runtime | Genre | Released | Director | Writer | Cast | imdbRating | imdbVotes | Language | Country | Oscars | numericRating | Meter | Reviews | Fresh | Rotten | userMeter | userRating | userReviews | BoxOffice | Production | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4972 | tt0004972 | The Birth of a Nation | 1915 | Not Rated | 165.0 | Drama, History, Romance | 1915-03-03 | D.W. Griffith | Thomas F. Dixon Jr. (adapted from his novel: "... | Lillian Gish, Mae Marsh, Henry B. Walthall, Mi... | 6.9 | 13793.0 | None | USA | 0 | 8.0 | 100 | 38 | 38 | 0 | 58.0 | 3.3 | 4034.0 | NaN | Gravitas |
1 | 6206 | tt0006206 | Les vampires | 1915 | Not Rated | 399.0 | Action, Adventure, Crime | 1915-11-13 | Louis Feuillade | Louis Feuillade | Musidora, Édouard Mathé, Marcel Lévesque, Jean... | 6.6 | 2502.0 | French | France | 0 | 8.8 | 100 | 13 | 13 | 0 | 85.0 | 3.8 | 2075.0 | NaN | None |
2 | 6864 | tt0006864 | Intolerance: Love's Struggle Throughout the Ages | 1916 | Not Rated | 197.0 | Drama, History | 1916-09-05 | D.W. Griffith | D.W. Griffith (scenario), Anita Loos (titles) | Lillian Gish, Spottiswoode Aitken, Mary Alden,... | 8.0 | 8673.0 | None | USA | 0 | 8.0 | 96 | 28 | 27 | 1 | 78.0 | 3.8 | 4604.0 | NaN | Cohen Media Group |
movies.describe()
ID | Year | Runtime | imdbRating | imdbVotes | Oscars | numericRating | Meter | Reviews | Fresh | Rotten | userMeter | userRating | userReviews | BoxOffice | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1.2569e+04 | 12569.0000 | 12569.0000 | 12549.0000 | 1.2548e+04 | 12569.0000 | 12569.0000 | 12569.0000 | 12569.0000 | 12569.0000 | 12569.0000 | 12478.0000 | 12358.0000 | 1.2557e+04 | 4.1890e+03 |
mean | 5.8837e+05 | 1996.3287 | 102.7464 | 6.6102 | 2.7486e+04 | 0.1071 | 6.0808 | 62.6467 | 52.4545 | 32.4200 | 20.0345 | 64.2593 | 3.4025 | 1.8156e+05 | 3.0246e+07 |
std | 6.5278e+05 | 17.6909 | 23.5299 | 1.0107 | 6.5074e+04 | 0.6050 | 1.4497 | 27.5983 | 52.6809 | 38.3948 | 27.1021 | 19.8652 | 0.4614 | 1.9692e+06 | 5.7222e+07 |
min | 4.9720e+03 | 1902.0000 | 0.0000 | 1.1000 | 5.0000e+00 | 0.0000 | 1.1000 | 0.0000 | 10.0000 | 0.0000 | 0.0000 | 0.0000 | 0.5000 | 2.0000e+00 | 3.8200e+02 |
25% | 1.0386e+05 | 1991.0000 | 91.0000 | 6.0000 | 1.5518e+03 | 0.0000 | 5.1000 | 41.0000 | 16.0000 | 9.0000 | 4.0000 | 50.0000 | 3.1000 | 1.4560e+03 | 2.0000e+05 |
50% | 3.0021e+05 | 2002.0000 | 100.0000 | 6.7000 | 5.6120e+03 | 0.0000 | 6.2000 | 69.0000 | 30.0000 | 18.0000 | 9.0000 | 67.0000 | 3.4000 | 6.7600e+03 | 5.1000e+06 |
75% | 9.9717e+05 | 2008.0000 | 112.0000 | 7.3000 | 2.2700e+04 | 0.0000 | 7.2000 | 86.0000 | 68.0000 | 40.0000 | 23.0000 | 81.0000 | 3.7000 | 4.1481e+04 | 3.6000e+07 |
max | 3.4041e+06 | 2014.0000 | 566.0000 | 9.3000 | 1.1875e+06 | 11.0000 | 10.0000 | 100.0000 | 304.0000 | 292.0000 | 192.0000 | 99.0000 | 5.0000 | 3.5778e+07 | 7.6050e+08 |
movies.shape
(12569, 26)
conn
<sqlite3.Connection at 0x7f71373a5118>
movies["color"] = np.where(movies["Oscars"] > 0, "orange", "grey")
movies["alpha"] = np.where(movies["Oscars"] > 0, 0.9, 0.25)
movies.columns
Index([u'ID', u'imdbID', u'Title', u'Year', u'mpaaRating', u'Runtime', u'Genre', u'Released', u'Director', u'Writer', u'Cast', u'imdbRating', u'imdbVotes', u'Language', u'Country', u'Oscars', u'numericRating', u'Meter', u'Reviews', u'Fresh', u'Rotten', u'userMeter', u'userRating', u'userReviews', u'BoxOffice', u'Production', u'color', u'alpha'], dtype='object')
movies[['color','alpha']].T
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | ... | 12319 | 12320 | 12321 | 12322 | 12323 | 12324 | 12325 | 12326 | 12327 | 12328 | 12329 | 12330 | 12331 | 12332 | 12333 | 12334 | 12335 | 12336 | 12337 | 12338 | 12339 | 12340 | 12341 | 12342 | 12343 | 12344 | 12345 | 12346 | 12347 | 12348 | 12349 | 12350 | 12351 | 12352 | 12353 | 12354 | 12355 | 12356 | 12357 | 12358 | 12359 | 12360 | 12361 | 12362 | 12363 | 12364 | 12365 | 12366 | 12367 | 12368 | 12369 | 12370 | 12371 | 12372 | 12373 | 12374 | 12375 | 12376 | 12377 | 12378 | 12379 | 12380 | 12381 | 12382 | 12383 | 12384 | 12385 | 12386 | 12387 | 12388 | 12389 | 12390 | 12391 | 12392 | 12393 | 12394 | 12395 | 12396 | 12397 | 12398 | 12399 | 12400 | 12401 | 12402 | 12403 | 12404 | 12405 | 12406 | 12407 | 12408 | 12409 | 12410 | 12411 | 12412 | 12413 | 12414 | 12415 | 12416 | 12417 | 12418 | 12419 | 12420 | 12421 | 12422 | 12423 | 12424 | 12425 | 12426 | 12427 | 12428 | 12429 | 12430 | 12431 | 12432 | 12433 | 12434 | 12435 | 12436 | 12437 | 12438 | 12439 | 12440 | 12441 | 12442 | 12443 | 12444 | 12445 | 12446 | 12447 | 12448 | 12449 | 12450 | 12451 | 12452 | 12453 | 12454 | 12455 | 12456 | 12457 | 12458 | 12459 | 12460 | 12461 | 12462 | 12463 | 12464 | 12465 | 12466 | 12467 | 12468 | 12469 | 12470 | 12471 | 12472 | 12473 | 12474 | 12475 | 12476 | 12477 | 12478 | 12479 | 12480 | 12481 | 12482 | 12483 | 12484 | 12485 | 12486 | 12487 | 12488 | 12489 | 12490 | 12491 | 12492 | 12493 | 12494 | 12495 | 12496 | 12497 | 12498 | 12499 | 12500 | 12501 | 12502 | 12503 | 12504 | 12505 | 12506 | 12507 | 12508 | 12509 | 12510 | 12511 | 12512 | 12513 | 12514 | 12515 | 12516 | 12517 | 12518 | 12519 | 12520 | 12521 | 12522 | 12523 | 12524 | 12525 | 12526 | 12527 | 12528 | 12529 | 12530 | 12531 | 12532 | 12533 | 12534 | 12535 | 12536 | 12537 | 12538 | 12539 | 12540 | 12541 | 12542 | 12543 | 12544 | 12545 | 12546 | 12547 | 12548 | 12549 | 12550 | 12551 | 12552 | 12553 | 12554 | 12555 | 12556 | 12557 | 12558 | 12559 | 12560 | 12561 | 12562 | 12563 | 12564 | 12565 | 12566 | 12567 | 12568 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
color | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | orange | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | orange | grey | orange | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | orange | grey | grey | grey | grey | grey | grey | orange | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | orange | grey | grey | grey | grey | grey | grey | grey | orange | grey | orange | grey | grey | orange | grey | grey | orange | grey | grey | orange | grey | grey | orange | grey | orange | grey | grey | orange | grey | grey | orange | orange | grey | grey | grey | grey | grey | orange | grey | grey | orange | grey | grey | orange | grey | grey | grey | grey | orange | grey | orange | grey | grey | orange | grey | grey | grey | grey | grey | orange | orange | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | orange | orange | grey | orange | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | orange | orange | orange | grey | grey | grey | orange | grey | orange | orange | grey | orange | orange | grey | orange | grey | grey | grey | grey | grey | grey | grey | ... | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | orange | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey | grey |
alpha | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.9 | 0.9 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.9 | 0.9 | 0.25 | 0.9 | 0.9 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | ... | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.9 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 | 0.25 |
2 rows × 12569 columns
movies.isnull().sum()
ID 0 imdbID 0 Title 0 Year 0 mpaaRating 2216 Runtime 0 Genre 21 Released 188 Director 20 Writer 717 Cast 157 imdbRating 20 imdbVotes 21 Language 229 Country 177 Oscars 0 numericRating 0 Meter 0 Reviews 0 Fresh 0 Rotten 0 userMeter 91 userRating 211 userReviews 12 BoxOffice 8380 Production 1104 color 0 alpha 0 dtype: int64
movies.fillna(0, inplace=True) # just replace missing values with zero
movies.isnull().sum()
ID 0 imdbID 0 Title 0 Year 0 mpaaRating 0 Runtime 0 Genre 0 Released 0 Director 0 Writer 0 Cast 0 imdbRating 0 imdbVotes 0 Language 0 Country 0 Oscars 0 numericRating 0 Meter 0 Reviews 0 Fresh 0 Rotten 0 userMeter 0 userRating 0 userReviews 0 BoxOffice 0 Production 0 color 0 alpha 0 dtype: int64
movies["revenue"] = movies.BoxOffice.apply(lambda x: '{:,d}'.format(int(x)))
movies['revenue']
0 0 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0 9 0 10 0 11 0 12 0 13 0 14 0 15 0 16 0 17 0 18 0 19 0 20 0 21 0 22 0 23 0 24 51,000 25 0 26 0 27 0 28 0 29 0 ... 12539 0 12540 0 12541 0 12542 99,700 12543 0 12544 300,000 12545 12,800 12546 0 12547 0 12548 0 12549 79,300 12550 0 12551 15,300 12552 0 12553 0 12554 102,000,000 12555 0 12556 500,000 12557 5,100 12558 0 12559 0 12560 0 12561 0 12562 0 12563 0 12564 55,700,000 12565 0 12566 0 12567 0 12568 600,000 Name: revenue, dtype: object
with open(join(dirname(__file__), "razzies-clean.csv")) as f:
razzies = f.read().splitlines()
movies.loc[movies.imdbID.isin(razzies), "color"] = "purple"
movies.loc[movies.imdbID.isin(razzies), "alpha"] = 0.9
axis_map = {
"Tomato Meter": "Meter",
"Numeric Rating": "numericRating",
"Number of Reviews": "Reviews",
"Box Office (dollars)": "BoxOffice",
"Length (minutes)": "Runtime",
"Year": "Year",
}
from bokeh.plotting import figure
from bokeh.layouts import layout, widgetbox
from bokeh.models import ColumnDataSource, HoverTool, Div
from bokeh.models.widgets import Slider, Select, TextInput
from bokeh.io import curdoc
desc = Div(text=open(join(dirname(__file__), "description.html")).read(), width=800)
reviews = Slider(title="Minimum number of reviews", value=80, start=10, end=300, step=10)
min_year = Slider(title="Year released", start=1940, end=2014, value=1970, step=1)
max_year = Slider(title="End Year released", start=1940, end=2014, value=2014, step=1)
oscars = Slider(title="Minimum number of Oscar wins", start=0, end=4, value=0, step=1)
boxoffice = Slider(title="Dollars at Box Office (millions)", start=0, end=800, value=0, step=1)
genre = Select(title="Genre", value="All",
options=open(join(dirname(__file__), 'genres.txt')).read().split())
director = TextInput(title="Director name contains")
cast = TextInput(title="Cast names contains")
x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="Tomato Meter")
y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="Number of Reviews")
source = ColumnDataSource(data=dict(x=[], y=[], color=[], title=[], year=[], revenue=[], alpha=[]))
hover = HoverTool(tooltips=[
("Title", "@title"),
("Year", "@year"),
("$", "@revenue")
])
p = figure(plot_height=600, plot_width=700, title="", toolbar_location=None, tools=[hover])
p.circle(x="x", y="y", source=source, size=7, color="color", line_color=None, fill_alpha="alpha")
<bokeh.models.renderers.GlyphRenderer at 0x7f715a10d350>
def select_movies():
genre_val = genre.value
director_val = director.value.strip()
cast_val = cast.value.strip()
selected = movies[
(movies.Reviews >= reviews.value) &
(movies.BoxOffice >= (boxoffice.value * 1e6)) &
(movies.Year >= min_year.value) &
(movies.Year <= max_year.value) &
(movies.Oscars >= oscars.value)
]
if (genre_val != "All"):
selected = selected[selected.Genre.str.contains(genre_val)==True]
if (director_val != ""):
selected = selected[selected.Director.str.contains(director_val)==True]
if (cast_val != ""):
selected = selected[selected.Cast.str.contains(cast_val)==True]
return selected
def update():
df = select_movies()
x_name = axis_map[x_axis.value]
y_name = axis_map[y_axis.value]
p.xaxis.axis_label = x_axis.value
p.yaxis.axis_label = y_axis.value
p.title.text = "%d movies selected" % len(df)
source.data = dict(
x=df[x_name],
y=df[y_name],
color=df["color"],
title=df["Title"],
year=df["Year"],
revenue=df["revenue"],
alpha=df["alpha"],
)
controls = [reviews, boxoffice, genre, min_year, max_year, oscars, director, cast, x_axis, y_axis]
for control in controls:
control.on_change('value', lambda attr, old, new: update())
sizing_mode = 'fixed' # 'scale_width' also looks nice with this example
inputs = widgetbox(*controls, sizing_mode=sizing_mode)
l = layout([
[desc],
[inputs, p],
], sizing_mode=sizing_mode)
update() # initial load of the data
curdoc().add_root(l)
curdoc().title = "Movies"