%%html
<style>
.output_wrapper, .output {
height:auto !important;
max-height:350px; /* your desired max-height here */
}
.output_scroll {
box-shadow:none !important;
webkit-box-shadow:none !important;
}
</style>
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
df = pd.read_csv("../input/datasetsdifferent-format/IMDB.csv", encoding="ISO-8859-1")
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
df = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", header=None)
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide |
1 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96 | 20000000 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8 | 7.9 | 8 | 7.8 | 7.8 | 8.1 | 8 | 7.7 | 8.3 | 8 | $56,671,993 | 131061209 | $187,733,202 |
2 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82 | 18000000 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
3 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72 | 8000000 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
4 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8 | 8 | 8 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
df = pd.read_csv("../input/datasetsdifferent-format/IMDB.csv", encoding = "ISO-8859-1", header=2)
df.head()
2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82 | 18000000 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745 | 7.6.1 | 7.6.2 | 7.9 | 7.9.1 | 7.9.2 | 7.7 | 7.8 | 7.7.1 | 7.5 | 7.5.1 | 7.5.2 | 7.3 | 7.3.1 | 7.5.3 | 7.6.3 | 7 | 7.7.2 | 7.6.4 | $18,335,230 | 42403567 | $60,738,797 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
1 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
2 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
3 | 6 | Argo (2012) | 7.7 | 486840 | Action | Biography | Drama | 86.0 | 44500000.0 | 120 min | 43875 | 89490 | 171495 | 115165 | 37332 | 12630 | 4992 | 2910 | 2020 | 6941 | 334838 | 67910 | 971 | 795 | 162 | 178794 | 146371 | 30643 | 163795 | 136391 | 24948 | 36215 | 28817 | 6752 | 740 | 70110 | 229137.0 | 7.7 | 7.9 | 8.0 | 8.0 | 7.8 | 7.8 | 7.8 | 7.9 | 7.7 | 7.6 | 7.8 | 7.7 | 7.7 | 8.0 | 8.1 | 7.2 | 8.0 | 7.6 | $136,025,503 | 96300000 | $232,325,503 |
4 | 7 | Arrival (2016) | 8.0 | 370842 | Drama | Mystery | Sci-Fi | 81.0 | 47000000.0 | 116 min | 55533 | 87850 | 109536 | 65440 | 26913 | 10556 | 5057 | 3083 | 2194 | 4734 | 237437 | 46272 | 1943 | 1544 | 376 | 126301 | 101741 | 23163 | 111985 | 95005 | 15227 | 24027 | 20118 | 3440 | 537 | 42062 | 163774.0 | 7.9 | 8.0 | 8.6 | 8.6 | 8.4 | 8.2 | 8.2 | 8.1 | 7.8 | 7.8 | 7.8 | 7.6 | 7.6 | 7.7 | 8.3 | 7.3 | 8.0 | 7.9 | $100,546,139 | 102842047 | $203,388,186 |
df = pd.read_csv("../input/datasetsdifferent-format/IMDB.csv", encoding = "ISO-8859-1", index_col="Title")
df.head()
X | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Title | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||
12 Years a Slave (2013) | 1 | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
127 Hours (2010) | 2 | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
50/50Â (2011) | 3 | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
About Time (2013) | 4 | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
Amour (2012) | 5 | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
Title, Genre1, Genre2, Budget
columnsdf = pd.read_csv("../input/datasetsdifferent-format/IMDB.csv", encoding = "ISO-8859-1", usecols=['Title','Genre1','Genre2','Budget'])
df.head()
Title | Genre1 | Genre2 | Budget | |
---|---|---|---|---|
0 | 12 Years a Slave (2013) | Biography | Drama | 20000000.0 |
1 | 127 Hours (2010) | Adventure | Biography | 18000000.0 |
2 | 50/50Â (2011) | Comedy | Drama | 8000000.0 |
3 | About Time (2013) | Comedy | Drama | 12000000.0 |
4 | Amour (2012) | Drama | Romance | 8900000.0 |
*Missing Value format :* NaN: ”, ‘#N/A’, ‘#N/A N/A’, ‘#NA’, ‘-1.#IND’, ‘-1.#QNAN’, ‘-NaN’, ‘-nan’, ‘1.#IND’, ‘1.#QNAN’, ‘N/A’, ‘NA’, ‘NULL’, ‘NaN’, ‘nan’`.
nan
kind missing valuedf = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", na_values=['nan'])
display(df.head())
print(df.shape)
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
(117, 58)
df = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", skip_blank_lines=False)
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
df = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", skiprows = [1,3,7])
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
1 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
2 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
3 | 6 | Argo (2012) | 7.7 | 486840 | Action | Biography | Drama | 86.0 | 44500000.0 | 120 min | 43875 | 89490 | 171495 | 115165 | 37332 | 12630 | 4992 | 2910 | 2020 | 6941 | 334838 | 67910 | 971 | 795 | 162 | 178794 | 146371 | 30643 | 163795 | 136391 | 24948 | 36215 | 28817 | 6752 | 740 | 70110 | 229137.0 | 7.7 | 7.9 | 8.0 | 8.0 | 7.8 | 7.8 | 7.8 | 7.9 | 7.7 | 7.6 | 7.8 | 7.7 | 7.7 | 8.0 | 8.1 | 7.2 | 8.0 | 7.6 | $136,025,503 | 96300000 | $232,325,503 |
4 | 9 | Before Midnight (2013) | 7.9 | 106553 | Drama | Romance | NaN | 94.0 | 3000000.0 | 109 min | 16953 | 22109 | 31439 | 19251 | 8142 | 3412 | 1649 | 1033 | 826 | 1745 | 67076 | 23823 | 208 | 138 | 66 | 43312 | 30016 | 12857 | 37072 | 28401 | 8189 | 7479 | 5891 | 1470 | 447 | 12382 | 59116.0 | 7.9 | 7.8 | 8.1 | 8.3 | 7.4 | 8.1 | 8.2 | 7.9 | 7.8 | 7.8 | 7.6 | 7.3 | 7.4 | 7.2 | 8.5 | 7.0 | 8.0 | 7.9 | $8,114,627 | 3061842 | $11,176,469 |
df.tail(2)
print("After Skipping the Rows")
df = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", skipfooter=2, engine='python')
df.tail(2)
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
112 | 117 | X-Men: First Class (2011) | 7.8 | 556713 | Action | Adventure | Sci-Fi | 65.0 | 160000000.0 | 132 min | 64428 | 96219 | 200144 | 129352 | 41945 | 12861 | 4799 | 2349 | 1448 | 3182 | 382107 | 80444 | 2075 | 1612 | 443 | 223309 | 176821 | 44428 | 185909 | 157332 | 26094 | 30217 | 25051 | 4691 | 780 | 87542 | 257681.0 | 7.7 | 7.9 | 8.0 | 7.9 | 8.3 | 7.9 | 7.9 | 8.0 | 7.7 | 7.6 | 7.8 | 7.6 | 7.5 | 7.7 | 7.6 | 7.3 | 7.8 | 7.7 | $146,408,305 | 207215819 | $353,624,124 |
113 | 118 | Zootopia (2016) | 8.1 | 309474 | Animation | Adventure | Comedy | 78.0 | 150000000.0 | 108 min | 53626 | 70912 | 102352 | 57261 | 16719 | 4539 | 1467 | 733 | 496 | 1386 | 176202 | 52345 | 2362 | 1641 | 706 | 119637 | 87499 | 30813 | 75474 | 61358 | 13034 | 12353 | 9959 | 2151 | 518 | 35975 | 122844.0 | 8.0 | 8.3 | 8.4 | 8.3 | 8.7 | 8.2 | 8.1 | 8.4 | 7.8 | 7.8 | 8.1 | 7.8 | 7.8 | 8.1 | 7.7 | 7.6 | 8.0 | 8.0 | $341,268,248 | 682515947 | $1,023,784,195 |
After Skipping the Rows
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
113 | 115 | Wreck-It Ralph (2012) | 7.7 | 295125 | Animation | Adventure | Comedy | 72.0 | 165000000.0 | NaN | 41980 | 50262 | 96477 | 67934 | 24894 | 7748 | 2724 | 1190 | 703 | 1226 | 190983 | 50202 | 1663 | 1182 | 467 | 120962 | 90759 | 29003 | 90203 | 74767 | 14148 | 13706 | 11356 | 2112 | 614 | 44962 | 129487.0 | 7.7 | 7.8 | 7.9 | 7.9 | 8.0 | 7.9 | 7.9 | 7.9 | 7.6 | 7.6 | 7.7 | 7.4 | 7.4 | 7.5 | 7.4 | 7.2 | 7.9 | 7.6 | $189,422,889 | 281800000 | $471,222,889 |
114 | 116 | X-Men: Days of Future Past (2014) | 8.0 | 560736 | Action | Adventure | Sci-Fi | 74.0 | 200000000.0 | 132 min | 91765 | 127521 | 183578 | 104658 | 33027 | 10059 | 3710 | 1903 | 1225 | 3301 | 370835 | 71008 | 3038 | 2403 | 614 | 220178 | 179039 | 39094 | 158607 | 135392 | 20927 | 26834 | 22460 | 3884 | 710 | 67889 | 229049.0 | 8.0 | 8.1 | 8.4 | 8.4 | 8.6 | 8.1 | 8.1 | 8.2 | 7.8 | 7.8 | 8.0 | 7.7 | 7.7 | 7.9 | 7.5 | 7.4 | 8.1 | 7.9 | $233,921,534 | 513941241 | $747,862,775 |
print("Before Shape:",df.shape)
print("After Selecting 100 Rows")
df = pd.read_csv('../input/datasetsdifferent-format/IMDB.csv', encoding = "ISO-8859-1", nrows=100)
print("After Shape:",df.shape)
Before Shape: (115, 58) After Selecting 100 Rows After Shape: (100, 58)
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx')
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
pandas.read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, dtype=None, true_values=None, false_values=None, engine=None, squeeze=False, **kwds)
*Reference:* Pandas Doc
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=0)
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
df_excel = pd.ExcelFile('../input/datasetsdifferent-format/IMDB.xlsx')
df_excel.sheet_names
['movies', 'by genre']
df1 = df_excel.parse('movies')
df2 = df_excel.parse('by genre')
df1.head()
df2.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
read_excel()
functiondf = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=1, header=3)
df.head()
3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72 | 8000000 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849 | 7.7.1 | 7.7.2 | 7.9 | 7.9.1 | 7.9.2 | 7.8 | 7.8.1 | 7.7.3 | 7.6 | 7.6.1 | 7.6.2 | 7.4 | 7.4.1 | 7.5 | 7.4.2 | 7 | 7.9.3 | 7.6.3 | $35,014,192 | 4173591 | $39,187,783 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
1 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
2 | 6 | Argo (2012) | 7.7 | 486840 | Action | Biography | Drama | 86.0 | 44500000.0 | 120 min | 43875 | 89490 | 171495 | 115165 | 37332 | 12630 | 4992 | 2910 | 2020 | 6941 | 334838 | 67910 | 971 | 795 | 162 | 178794 | 146371 | 30643 | 163795 | 136391 | 24948 | 36215 | 28817 | 6752 | 740 | 70110 | 229137.0 | 7.7 | 7.9 | 8.0 | 8.0 | 7.8 | 7.8 | 7.8 | 7.9 | 7.7 | 7.6 | 7.8 | 7.7 | 7.7 | 8.0 | 8.1 | 7.2 | 8.0 | 7.6 | $136,025,503 | 96300000 | $232,325,503 |
3 | 7 | Arrival (2016) | 8.0 | 370842 | Drama | Mystery | Sci-Fi | 81.0 | 47000000.0 | 116 min | 55533 | 87850 | 109536 | 65440 | 26913 | 10556 | 5057 | 3083 | 2194 | 4734 | 237437 | 46272 | 1943 | 1544 | 376 | 126301 | 101741 | 23163 | 111985 | 95005 | 15227 | 24027 | 20118 | 3440 | 537 | 42062 | 163774.0 | 7.9 | 8.0 | 8.6 | 8.6 | 8.4 | 8.2 | 8.2 | 8.1 | 7.8 | 7.8 | 7.8 | 7.6 | 7.6 | 7.7 | 8.3 | 7.3 | 8.0 | 7.9 | $100,546,139 | 102842047 | $203,388,186 |
4 | 9 | Before Midnight (2013) | 7.9 | 106553 | Drama | Romance | NaN | 94.0 | 3000000.0 | 109 min | 16953 | 22109 | 31439 | 19251 | 8142 | 3412 | 1649 | 1033 | 826 | 1745 | 67076 | 23823 | 208 | 138 | 66 | 43312 | 30016 | 12857 | 37072 | 28401 | 8189 | 7479 | 5891 | 1470 | 447 | 12382 | 59116.0 | 7.9 | 7.8 | 8.1 | 8.3 | 7.4 | 8.1 | 8.2 | 7.9 | 7.8 | 7.8 | 7.6 | 7.3 | 7.4 | 7.2 | 8.5 | 7.0 | 8.0 | 7.9 | $8,114,627 | 3061842 | $11,176,469 |
header = None
for not seeing headerdf = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=1, header=None)
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide |
1 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96 | 20000000 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8 | 7.9 | 8 | 7.8 | 7.8 | 8.1 | 8 | 7.7 | 8.3 | 8 | $56,671,993 | 131061209 | $187,733,202 |
2 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82 | 18000000 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
3 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72 | 8000000 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
4 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8 | 8 | 8 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=1, skiprows=7)
df.head(10)
7 | Arrival (2016) | 8 | 370842 | Drama | Mystery | Sci-Fi | 81 | 47000000 | 116 min | 55533 | 87850 | 109536 | 65440 | 26913 | 10556 | 5057 | 3083 | 2194 | 4734 | 237437 | 46272 | 1943 | 1544 | 376 | 126301 | 101741 | 23163 | 111985 | 95005 | 15227 | 24027 | 20118 | 3440 | 537 | 42062 | 163774 | 7.9 | 8.1 | 8.6 | 8.6.1 | 8.4 | 8.2 | 8.2.1 | 8.1 | 7.8 | 7.8.1 | 7.8.2 | 7.6 | 7.6.1 | 7.7 | 8.3 | 7.3 | 8.2 | 7.9.1 | $100,546,139 | 102842047 | $203,388,186 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 9 | Before Midnight (2013) | 7.9 | 106553 | Drama | Romance | NaN | 94.0 | 3000000.0 | 109 min | 16953 | 22109 | 31439 | 19251 | 8142 | 3412 | 1649 | 1033 | 826 | 1745 | 67076 | 23823 | 208 | 138 | 66 | 43312 | 30016 | 12857 | 37072 | 28401 | 8189 | 7479 | 5891 | 1470 | 447 | 12382 | 59116.0 | 7.9 | 7.8 | 8.1 | 8.3 | 7.4 | 8.1 | 8.2 | 7.9 | 7.8 | 7.8 | 7.6 | 7.3 | 7.4 | 7.2 | 8.5 | 7.0 | 8.0 | 7.9 | $8,114,627 | 3061842 | $11,176,469 |
1 | 10 | Big Hero 6 (2014) | 7.8 | 315485 | Animation | Action | Adventure | 74.0 | 165000000.0 | NaN | 50311 | 61304 | 103726 | 65681 | 22389 | 6830 | 2251 | 1036 | 539 | 1439 | 187383 | 58731 | 2446 | 1571 | 855 | 128237 | 91744 | 35122 | 84098 | 68040 | 14796 | 13974 | 11304 | 2400 | 525 | 36702 | 131818.0 | 7.7 | 8.2 | 8.2 | 8.0 | 8.7 | 7.9 | 7.8 | 8.3 | 7.7 | 7.6 | 8.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.2 | 7.9 | 7.7 | $222,527,828 | 435290784 | $657,818,612 |
2 | 11 | Birdman or (The Unexpected Virtue of Ignorance... | 7.8 | 448725 | Comedy | Drama | NaN | 88.0 | 18000000.0 | 119 min | 60209 | 94476 | 121637 | 80828 | 38373 | 19161 | 10116 | 6750 | 5378 | 11807 | 292808 | 63310 | 1891 | 1538 | 334 | 178850 | 142244 | 34666 | 129547 | 108049 | 19457 | 26016 | 21166 | 4329 | 656 | 52288 | 203731.0 | 7.8 | 7.5 | 8.5 | 8.6 | 7.9 | 8.0 | 8.1 | 7.7 | 7.6 | 7.6 | 7.3 | 7.2 | 7.3 | 7.0 | 7.8 | 7.1 | 7.9 | 7.7 | $42,340,598 | 60874496 | $103,215,094 |
3 | 12 | Black Swan (2010) | 8.0 | 587893 | Drama | Thriller | NaN | 79.0 | 13000000.0 | 108 min | 93798 | 136615 | 174500 | 97826 | 40319 | 16993 | 9084 | 6065 | 3981 | 8726 | 356707 | 143077 | 1112 | 583 | 516 | 244970 | 159567 | 82856 | 204465 | 156163 | 45352 | 35111 | 27022 | 7459 | 802 | 86552 | 306578.0 | 8.0 | 8.0 | 8.5 | 8.6 | 8.4 | 8.1 | 8.1 | 8.1 | 7.9 | 7.9 | 8.0 | 7.5 | 7.5 | 7.4 | 7.9 | 7.6 | 8.0 | 8.0 | $106,954,678 | 222443368 | $329,398,046 |
4 | 13 | Boyhood (2014) | 7.9 | 290327 | Drama | NaN | NaN | 100.0 | 4000000.0 | 165 min | 49673 | 62055 | 76838 | 52238 | 23789 | 10431 | 4906 | 3071 | 2248 | 5086 | 183807 | 51558 | 1393 | 995 | 381 | 123006 | 92639 | 29076 | 81594 | 65261 | 15118 | 17881 | 13995 | 3567 | 559 | 36433 | 134679.0 | 8.0 | 7.7 | 8.1 | 8.1 | 8.0 | 8.1 | 8.1 | 7.8 | 7.8 | 7.8 | 7.6 | 7.7 | 7.7 | 7.7 | 8.2 | 7.2 | 8.0 | 7.9 | $25,352,281 | 19143000 | $44,495,281 |
5 | 14 | Bridge of Spies (2015) | 7.6 | 223756 | Drama | History | Thriller | 81.0 | 40000000.0 | 142 min | 15757 | 32840 | 83322 | 63800 | 19183 | 5178 | 1657 | 735 | 419 | 878 | 152707 | 23978 | 846 | 732 | 104 | 76784 | 64810 | 11177 | 70780 | 61525 | 8196 | 18494 | 15504 | 2667 | 545 | 24273 | 105678.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.8 | 7.7 | 7.7 | 7.6 | 7.5 | 7.5 | 7.5 | 7.7 | 7.6 | 7.9 | 7.7 | 7.4 | 7.7 | 7.5 | $72,313,754 | 93164594 | $165,478,348 |
6 | 15 | Captain America: Civil War (2016) | 7.9 | 431555 | Action | Adventure | Sci-Fi | 75.0 | 250000000.0 | 147 min | 81893 | 90156 | 117188 | 79377 | 32782 | 12322 | 5095 | 2994 | 1989 | 7786 | 264239 | 43818 | 3572 | 2865 | 683 | 148991 | 124124 | 23355 | 105069 | 91345 | 12135 | 19151 | 16351 | 2459 | 593 | 48777 | 153638.0 | 7.8 | 7.9 | 8.3 | 8.3 | 8.6 | 8.0 | 8.0 | 8.0 | 7.7 | 7.7 | 7.8 | 7.6 | 7.6 | 7.9 | 7.7 | 7.5 | 8.1 | 7.7 | $408,084,349 | 745220146 | $1,153,304,495 |
7 | 16 | Captain America: The Winter Soldier (2014) | 7.8 | 552706 | Action | Adventure | Sci-Fi | 70.0 | 170000000.0 | 136 min | 84943 | 103896 | 169440 | 120197 | 44124 | 14639 | 5571 | 2735 | 1932 | 5248 | 360615 | 66751 | 3765 | 2900 | 844 | 208526 | 170111 | 36456 | 150264 | 129500 | 18637 | 28922 | 24313 | 4103 | 720 | 72120 | 213180.0 | 7.7 | 7.9 | 8.3 | 8.2 | 8.5 | 7.8 | 7.8 | 7.9 | 7.6 | 7.6 | 7.7 | 7.7 | 7.6 | 7.8 | 7.6 | 7.5 | 8.1 | 7.5 | $259,766,572 | 454497695 | $714,264,267 |
8 | 17 | Captain Fantastic (2016) | 7.9 | 115194 | Comedy | Drama | NaN | 72.0 | 5000000.0 | NaN | 16165 | 24762 | 39686 | 22429 | 7134 | 2255 | 982 | 542 | 419 | 832 | 71760 | 19138 | 447 | 329 | 112 | 40918 | 30740 | 9707 | 36357 | 29410 | 6414 | 8123 | 6521 | 1433 | 351 | 10694 | 56956.0 | 7.9 | 8.1 | 8.2 | 8.2 | 8.2 | 8.0 | 8.0 | 8.2 | 7.8 | 7.7 | 7.9 | 7.7 | 7.6 | 7.9 | 8.5 | 6.8 | 7.8 | 7.8 | $5,879,736 | n/a | $5,879,736 |
9 | 18 | Captain Phillips (2013) | 7.8 | 350818 | Biography | Drama | Thriller | 83.0 | 55000000.0 | 134 min | 37461 | 70216 | 133266 | 76657 | 21791 | 6099 | 2051 | 1062 | 707 | 1517 | 247889 | 41602 | 995 | 838 | 147 | 131052 | 110723 | 19092 | 114418 | 98191 | 14686 | 24670 | 20178 | 4053 | 633 | 43042 | 165981.0 | 7.8 | 7.9 | 8.2 | 8.2 | 8.1 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.8 | 7.7 | 7.7 | 8.0 | 8.0 | 7.4 | 7.8 | 7.8 | $107,100,855 | 111690956 | $218,791,811 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=1, ski_footer=10)
df.tail(10)
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
107 | 109 | True Grit (2010) | 7.6 | 257670 | Adventure | Drama | Western | 80.0 | 38000000.0 | 110 min | 21094 | 40901 | 91825 | 67175 | 23055 | 7191 | 2678 | 1305 | 779 | 1672 | 197105 | 27125 | 381 | 340 | 35 | 89394 | 76864 | 11720 | 104201 | 91807 | 11163 | 25641 | 21885 | 3369 | 747 | 53749 | 137672.0 | 7.7 | 7.6 | 7.8 | 7.8 | 7.5 | 7.7 | 7.7 | 7.6 | 7.6 | 7.6 | 7.5 | 7.7 | 7.7 | 7.7 | 7.8 | 7.3 | 7.9 | 7.6 | $171,243,005 | 81033922 | $252,276,927 |
108 | 110 | Tucker and Dale vs Evil (2010) | 7.6 | 138624 | Comedy | Horror | NaN | 65.0 | 5000000.0 | NaN | 16572 | 19818 | 44460 | 35863 | 13456 | 4588 | 1684 | 855 | 479 | 848 | 106144 | 15113 | 219 | 198 | 20 | 52889 | 45169 | 7232 | 56379 | 49634 | 6156 | 8861 | 7645 | 1072 | 540 | 26213 | 73918.0 | 7.5 | 7.7 | 7.7 | 7.7 | 8.2 | 7.6 | 7.6 | 7.6 | 7.5 | 7.5 | 7.7 | 7.5 | 7.4 | 7.7 | 7.5 | 7.1 | 7.7 | 7.5 | $223,838 | 4525678 | $4,749,516 |
109 | 111 | Tyrannosaur (2011) | 7.6 | 26016 | Drama | NaN | NaN | 65.0 | 1000000.0 | NaN | 2060 | 4083 | 9078 | 6754 | 2468 | 755 | 310 | 146 | 111 | 251 | 19827 | 3649 | 6 | 6 | 7314 | 5920 | 1338 | 12497 | 10628 | 1724 | 3311 | 2784 | 480 | 4 | 2231 | 18173 | NaN | 7.6 | 7.6 | 6.0 | 6.0 | 7.6 | 7.6 | 7.6 | 7.5 | 7.5 | 7.6 | 7.5 | 7.5 | 7.4 | 5.8 | 6.5 | 7.4 | 7.6 | NaN | $22,321 | n/a | $22,321 |
110 | 112 | Warrior (2011) | 8.2 | 361049 | Action | Drama | Sport | 71.0 | 25000000.0 | 140 min | 74983 | 96953 | 106673 | 52972 | 16668 | 5727 | 2353 | 1205 | 1050 | 2479 | 270734 | 31075 | 673 | 583 | 84 | 153824 | 136536 | 16000 | 117636 | 105144 | 11019 | 15201 | 12960 | 1990 | 586 | 45342 | 176397.0 | 8.2 | 8.2 | 8.5 | 8.5 | 8.7 | 8.4 | 8.4 | 8.4 | 8.0 | 8.0 | 8.0 | 7.7 | 7.7 | 7.5 | 7.8 | 7.1 | 8.2 | 8.1 | $13,657,115 | 9400000 | $23,057,115 |
111 | 113 | What We Do in the Shadows (2014) | 7.6 | 87975 | Comedy | Horror | NaN | 76.0 | 1600000.0 | NaN | 10485 | 14507 | 28608 | 20735 | 7696 | 2802 | 1200 | 721 | 450 | 781 | 57028 | 15840 | 268 | 209 | 56 | 32406 | 23869 | 8125 | 31707 | 25592 | 5689 | 6013 | 4740 | 1138 | 389 | 12341 | 45062.0 | 7.5 | 7.8 | 7.9 | 7.9 | 8.0 | 7.7 | 7.7 | 7.9 | 7.5 | 7.4 | 7.8 | 7.3 | 7.3 | 7.7 | 7.7 | 6.8 | 7.7 | 7.5 | $3,469,224 | 2794000 | $6,263,224 |
112 | 114 | Whiplash (2014) | 8.5 | 492285 | Drama | Music | NaN | 88.0 | 3300000.0 | 107 min | 110404 | 161864 | 132656 | 56007 | 16577 | 6031 | 2937 | 1859 | 1263 | 2723 | 308900 | 71066 | 2878 | 2200 | 660 | 205839 | 161853 | 41944 | 123712 | 102839 | 19018 | 23345 | 19072 | 3812 | 590 | 49868 | 213952.0 | 8.5 | 8.4 | 9.0 | 9.1 | 8.9 | 8.6 | 8.7 | 8.5 | 8.3 | 8.3 | 8.2 | 8.1 | 8.1 | 8.2 | 8.7 | 8.0 | 8.6 | 8.4 | $13,092,000 | 35890041 | $48,982,041 |
113 | 115 | Wreck-It Ralph (2012) | 7.7 | 295125 | Animation | Adventure | Comedy | 72.0 | 165000000.0 | NaN | 41980 | 50262 | 96477 | 67934 | 24894 | 7748 | 2724 | 1190 | 703 | 1226 | 190983 | 50202 | 1663 | 1182 | 467 | 120962 | 90759 | 29003 | 90203 | 74767 | 14148 | 13706 | 11356 | 2112 | 614 | 44962 | 129487.0 | 7.7 | 7.8 | 7.9 | 7.9 | 8.0 | 7.9 | 7.9 | 7.9 | 7.6 | 7.6 | 7.7 | 7.4 | 7.4 | 7.5 | 7.4 | 7.2 | 7.9 | 7.6 | $189,422,889 | 281800000 | $471,222,889 |
114 | 116 | X-Men: Days of Future Past (2014) | 8.0 | 560736 | Action | Adventure | Sci-Fi | 74.0 | 200000000.0 | 132 min | 91765 | 127521 | 183578 | 104658 | 33027 | 10059 | 3710 | 1903 | 1225 | 3301 | 370835 | 71008 | 3038 | 2403 | 614 | 220178 | 179039 | 39094 | 158607 | 135392 | 20927 | 26834 | 22460 | 3884 | 710 | 67889 | 229049.0 | 8.0 | 8.1 | 8.4 | 8.4 | 8.6 | 8.1 | 8.1 | 8.2 | 7.8 | 7.8 | 8.0 | 7.7 | 7.7 | 7.9 | 7.5 | 7.4 | 8.1 | 7.9 | $233,921,534 | 513941241 | $747,862,775 |
115 | 117 | X-Men: First Class (2011) | 7.8 | 556713 | Action | Adventure | Sci-Fi | 65.0 | 160000000.0 | 132 min | 64428 | 96219 | 200144 | 129352 | 41945 | 12861 | 4799 | 2349 | 1448 | 3182 | 382107 | 80444 | 2075 | 1612 | 443 | 223309 | 176821 | 44428 | 185909 | 157332 | 26094 | 30217 | 25051 | 4691 | 780 | 87542 | 257681.0 | 7.7 | 7.9 | 8.0 | 7.9 | 8.3 | 7.9 | 7.9 | 8.0 | 7.7 | 7.6 | 7.8 | 7.6 | 7.5 | 7.7 | 7.6 | 7.3 | 7.8 | 7.7 | $146,408,305 | 207215819 | $353,624,124 |
116 | 118 | Zootopia (2016) | 8.1 | 309474 | Animation | Adventure | Comedy | 78.0 | 150000000.0 | 108 min | 53626 | 70912 | 102352 | 57261 | 16719 | 4539 | 1467 | 733 | 496 | 1386 | 176202 | 52345 | 2362 | 1641 | 706 | 119637 | 87499 | 30813 | 75474 | 61358 | 13034 | 12353 | 9959 | 2151 | 518 | 35975 | 122844.0 | 8.0 | 8.3 | 8.4 | 8.3 | 8.7 | 8.2 | 8.1 | 8.4 | 7.8 | 7.8 | 8.1 | 7.8 | 7.8 | 8.1 | 7.7 | 7.6 | 8.0 | 8.0 | $341,268,248 | 682515947 | $1,023,784,195 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name= 0, usecols=2)
df.head()
X | Title | Rating | |
---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 |
1 | 2 | 127 Hours (2010) | 7.6 |
2 | 3 | 50/50 (2011) | 7.7 |
3 | 4 | About Time (2013) | 7.8 |
4 | 5 | Amour (2012) | 7.9 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=0, usecols = 2, names=['X','Title', 'Rating'], )
df.head()
X | Title | Rating | |
---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 |
1 | 2 | 127 Hours (2010) | 7.6 |
2 | 3 | 50/50 (2011) | 7.7 |
3 | 4 | About Time (2013) | 7.8 |
4 | 5 | Amour (2012) | 7.9 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name=0, index_col='Title')
df.head()
X | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Title | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||
12 Years a Slave (2013) | 1 | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
127 Hours (2010) | 2 | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
50/50 (2011) | 3 | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
About Time (2013) | 4 | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
Amour (2012) | 5 | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
df = pd.read_excel('../input/datasetsdifferent-format/IMDB.xlsx', sheet_name= 0, na_values=['nan']) ## as per missing value
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
movies_json = pd.read_json('../input/datasetsdifferent-format/IMDB.json')
movies_json.head()
Budget | CVotes01 | CVotes02 | CVotes03 | CVotes04 | CVotes05 | CVotes06 | CVotes07 | CVotes08 | CVotes09 | CVotes10 | CVotes1000 | CVotes1829 | CVotes1829F | CVotes1829M | CVotes3044 | CVotes3044F | CVotes3044M | CVotes45A | CVotes45AF | CVotes45AM | CVotesFemale | CVotesMale | CVotesU18 | CVotesU18F | CVotesU18M | CVotesUS | CVotesnUS | Domestic | Foreign | Genre1 | Genre2 | Genre3 | MetaCritic | Rating | Runtime | Title | TotalVotes | Votes1000 | Votes1829 | Votes1829F | Votes1829M | Votes3044 | Votes3044F | Votes3044M | Votes45A | Votes45AF | Votes45AM | VotesF | VotesIMDB | VotesM | VotesU18 | VotesU18F | VotesU18M | VotesUS | VotesnUS | Worldwide | X | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20000000 | 4739 | 1785 | 2420 | 4021 | 9603 | 27231 | 83070 | 161460 | 126223 | 75556 | 664 | 200910 | 45301 | 153669 | 138762 | 23895 | 112943 | 29252 | 5726 | 23072 | 82012 | 313823 | 1837 | 457 | 1363 | 53328 | 224519 | $56,671,993 | 131061209 | Biography | Drama | History | 96 | 8.1 | 134 min | 12 Years a Slave�(2013) | 496092 | 7.7 | 8.2 | 8.2 | 8.2 | 8.0 | 8.0 | 7.9 | 7.8 | 8.1 | 7.8 | 8.1 | 8.0 | 8.1 | 8.4 | 8.5 | 8.4 | 8.3 | 8 | $187,733,202 | 1 |
1 | 18000000 | 2059 | 1161 | 1930 | 3796 | 9403 | 28394 | 78451 | 98845 | 44110 | 28939 | 649 | 133336 | 26152 | 106007 | 102120 | 14304 | 86609 | 14895 | 2261 | 12400 | 44600 | 212866 | 745 | 170 | 567 | 38478 | 169745 | $18,335,230 | 42403567 | Adventure | Biography | Drama | 82 | 7.6 | 94 min | 127 Hours�(2010) | 297075 | 7.0 | 7.7 | 7.7 | 7.8 | 7.5 | 7.5 | 7.5 | 7.3 | 7.5 | 7.3 | 7.6 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.6 | $60,738,797 | 2 |
2 | 8000000 | 1202 | 634 | 1109 | 2381 | 7545 | 24252 | 71485 | 99524 | 47501 | 28304 | 555 | 132350 | 34765 | 96269 | 94745 | 18163 | 75394 | 12829 | 2681 | 9912 | 58348 | 188925 | 506 | 153 | 348 | 46947 | 147849 | $35,014,192 | 4173591 | Comedy | Drama | Romance | 72 | 7.7 | 100 min | 50/50�(2011) | 283935 | 7.0 | 7.8 | 7.7 | 7.8 | 7.6 | 7.6 | 7.6 | 7.4 | 7.5 | 7.4 | 7.7 | 7.4 | 7.7 | 7.9 | 7.9 | 7.9 | 7.9 | 7.6 | $39,187,783 | 3 |
3 | 12000000 | 1182 | 664 | 1084 | 2210 | 5673 | 16542 | 45487 | 70850 | 43170 | 38556 | 475 | 92940 | 34126 | 57778 | 67477 | 16222 | 50212 | 13973 | 3026 | 10690 | 58098 | 126718 | 654 | 321 | 325 | 20450 | 111670 | $15,322,921 | 71777528 | Comedy | Drama | Fantasy | NA | 7.8 | 123 min | About Time�(2013) | 225412 | 6.9 | 8.0 | 8.0 | 8.0 | 7.6 | 7.7 | 7.6 | 7.6 | 7.8 | 7.5 | 7.9 | 7.7 | 7.8 | 8.2 | 8.3 | 8.1 | 7.8 | 7.7 | $87,100,449 | 4 |
4 | 8900000 | 995 | 534 | 710 | 1188 | 2585 | 5945 | 14187 | 22942 | 15944 | 11093 | 391 | 28593 | 8167 | 20107 | 28691 | 6269 | 21990 | 7425 | 1490 | 5803 | 16719 | 49808 | 121 | 24 | 95 | 7959 | 46138 | $6,739,492 | 13100000 | Drama | Romance | 94 | 7.9 | 127 min | Amour�(2012) | 76121 | 7.2 | 8.0 | 7.9 | 8.0 | 7.7 | 7.9 | 7.7 | 7.9 | 8.1 | 7.8 | 7.9 | 6.6 | 7.8 | 8.6 | 8.5 | 8.7 | 7.9 | 7.8 | $19,839,492 | 5 |
df = pd.read_html('../input/datasetsdifferent-format/IMDB.html')
# df
df = pd.read_pickle('../input/datasetsdifferent-format/IMDB.p')
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96.0 | 20000000.0 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519.0 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8.0 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82.0 | 18000000.0 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745.0 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50Â (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72.0 | 8000000.0 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849.0 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NaN | 12000000.0 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670.0 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | NaN | 94.0 | 8900000.0 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138.0 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
import sqlite3
conn = sqlite3.connect("../input/datasetsdifferent-format/IMDB.sqlite")
df = pd.read_sql_query("SELECT * FROM IMDB;", conn)
df.head()
X | Title | Rating | TotalVotes | Genre1 | Genre2 | Genre3 | MetaCritic | Budget | Runtime | CVotes10 | CVotes09 | CVotes08 | CVotes07 | CVotes06 | CVotes05 | CVotes04 | CVotes03 | CVotes02 | CVotes01 | CVotesMale | CVotesFemale | CVotesU18 | CVotesU18M | CVotesU18F | CVotes1829 | CVotes1829M | CVotes1829F | CVotes3044 | CVotes3044M | CVotes3044F | CVotes45A | CVotes45AM | CVotes45AF | CVotes1000 | CVotesUS | CVotesnUS | VotesM | VotesF | VotesU18 | VotesU18M | VotesU18F | Votes1829 | Votes1829M | Votes1829F | Votes3044 | Votes3044M | Votes3044F | Votes45A | Votes45AM | Votes45AF | VotesIMDB | Votes1000 | VotesUS | VotesnUS | Domestic | Foreign | Worldwide | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 12 Years a Slave (2013) | 8.1 | 496092 | Biography | Drama | History | 96 | 20000000 | 134 min | 75556 | 126223 | 161460 | 83070 | 27231 | 9603 | 4021 | 2420 | 1785 | 4739 | 313823 | 82012 | 1837 | 1363 | 457 | 200910 | 153669 | 45301 | 138762 | 112943 | 23895 | 29252 | 23072 | 5726 | 664 | 53328 | 224519 | 8.1 | 8.1 | 8.4 | 8.4 | 8.5 | 8.2 | 8.2 | 8.2 | 8.0 | 7.9 | 8.0 | 7.8 | 7.8 | 8.1 | 8.0 | 7.7 | 8.3 | 8 | $56,671,993 | 131061209 | $187,733,202 |
1 | 2 | 127 Hours (2010) | 7.6 | 297075 | Adventure | Biography | Drama | 82 | 18000000 | 94 min | 28939 | 44110 | 98845 | 78451 | 28394 | 9403 | 3796 | 1930 | 1161 | 2059 | 212866 | 44600 | 745 | 567 | 170 | 133336 | 106007 | 26152 | 102120 | 86609 | 14304 | 14895 | 12400 | 2261 | 649 | 38478 | 169745 | 7.6 | 7.6 | 7.9 | 7.9 | 7.9 | 7.7 | 7.8 | 7.7 | 7.5 | 7.5 | 7.5 | 7.3 | 7.3 | 7.5 | 7.6 | 7.0 | 7.7 | 7.6 | $18,335,230 | 42403567 | $60,738,797 |
2 | 3 | 50/50 (2011) | 7.7 | 283935 | Comedy | Drama | Romance | 72 | 8000000 | 100 min | 28304 | 47501 | 99524 | 71485 | 24252 | 7545 | 2381 | 1109 | 634 | 1202 | 188925 | 58348 | 506 | 348 | 153 | 132350 | 96269 | 34765 | 94745 | 75394 | 18163 | 12829 | 9912 | 2681 | 555 | 46947 | 147849 | 7.7 | 7.7 | 7.9 | 7.9 | 7.9 | 7.8 | 7.8 | 7.7 | 7.6 | 7.6 | 7.6 | 7.4 | 7.4 | 7.5 | 7.4 | 7.0 | 7.9 | 7.6 | $35,014,192 | 4173591 | $39,187,783 |
3 | 4 | About Time (2013) | 7.8 | 225412 | Comedy | Drama | Fantasy | NA | 12000000 | 123 min | 38556 | 43170 | 70850 | 45487 | 16542 | 5673 | 2210 | 1084 | 664 | 1182 | 126718 | 58098 | 654 | 325 | 321 | 92940 | 57778 | 34126 | 67477 | 50212 | 16222 | 13973 | 10690 | 3026 | 475 | 20450 | 111670 | 7.8 | 7.9 | 8.2 | 8.1 | 8.3 | 8.0 | 8.0 | 8.0 | 7.6 | 7.6 | 7.7 | 7.6 | 7.5 | 7.8 | 7.7 | 6.9 | 7.8 | 7.7 | $15,322,921 | 71777528 | $87,100,449 |
4 | 5 | Amour (2012) | 7.9 | 76121 | Drama | Romance | 94 | 8900000 | 127 min | 11093 | 15944 | 22942 | 14187 | 5945 | 2585 | 1188 | 710 | 534 | 995 | 49808 | 16719 | 121 | 95 | 24 | 28593 | 20107 | 8167 | 28691 | 21990 | 6269 | 7425 | 5803 | 1490 | 391 | 7959 | 46138 | 7.8 | 7.9 | 8.6 | 8.7 | 8.5 | 8.0 | 8.0 | 7.9 | 7.7 | 7.7 | 7.9 | 7.9 | 7.8 | 8.1 | 6.6 | 7.2 | 7.9 | 7.8 | $6,739,492 | 13100000 | $19,839,492 |
# df = pd.read_clipboard()
# # df.head()
data_zillow = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data_zillow.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data_zillow[(data_zillow['Zhvi'] > 1000000) & (data_zillow['State'] == 'NY')].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
1132 | 2017-05-31 | 18375 | Great Neck | NY | New York | Nassau | 1132 | 1235800 |
2405 | 2017-05-31 | 54333 | Scarsdale | NY | New York | Westchester | 2405 | 1468100 |
2619 | 2017-05-31 | 47495 | Rye | NY | New York | Westchester | 2619 | 1736400 |
3032 | 2017-05-31 | 25725 | Manhasset | NY | New York | Nassau | 3032 | 1483400 |
3064 | 2017-05-31 | 18955 | Larchmont | NY | New York | Westchester | 3064 | 1052200 |
data_zillow[((data_zillow['State'] == 'CA') | (data_zillow['State'] == 'NY'))].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
6 | 2017-05-31 | 54296 | San Diego | CA | San Diego | San Diego | 6 | 572100 |
8 | 2017-05-31 | 33839 | San Jose | CA | San Jose | Santa Clara | 8 | 877400 |
10 | 2017-05-31 | 20330 | San Francisco | CA | San Francisco | San Francisco | 10 | 1194300 |
zillow_filter = data_zillow['Metro'].isin(['New York','San Diego'])
data_zillow[zillow_filter].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
6 | 2017-05-31 | 54296 | San Diego | CA | San Diego | San Diego | 6 | 572100 |
63 | 2017-05-31 | 12970 | Newark | NJ | New York | Essex | 63 | 232800 |
72 | 2017-05-31 | 25320 | Jersey City | NJ | New York | Hudson | 72 | 380000 |
85 | 2017-05-31 | 51405 | Chula Vista | CA | San Diego | San Diego | 85 | 486900 |
zillow_filter1 = data_zillow.isin({'State': ['CA'], 'Metro': ['San Francisco']})
data_zillow[zillow_filter1].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | NaN | CA | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
data_zillow = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data_zillow.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data_zillow.dtypes
Date object RegionID int64 RegionName object State object Metro object County object SizeRank int64 Zhvi int64 dtype: object
data_zillow['Zhvi'] = data_zillow.Zhvi.astype(float)
data_zillow.dtypes
Date object RegionID int64 RegionName object State object Metro object County object SizeRank int64 Zhvi float64 dtype: object
dtype
parameter in reading function we can change data types of any column as per below exampledata_zillow1 = pd.read_csv('../input/datasetsdifferent-format/data-zillow.csv', sep=',', dtype={'Zhvi':float})
data_zillow1.dtypes
Date object RegionID int64 RegionName object State object Metro object County object SizeRank int64 Zhvi float64 dtype: object
date
data type by using pd.to_datetime()
pd.to_datetime(data_zillow1.Date,infer_datetime_format=True).head()
0 2017-05-31 1 2017-05-31 2 2017-05-31 3 2017-05-31 4 2017-05-31 Name: Date, dtype: datetime64[ns]
data = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
filter()
filter()
functionfiltered_data = data.filter(items=['State', 'Metro'])
filtered_data.head(6)
State | Metro | |
---|---|---|
0 | NY | New York |
1 | CA | Los Angeles-Long Beach-Anaheim |
2 | IL | Chicago |
3 | PA | Philadelphia |
4 | AZ | Phoenix |
5 | NV | Las Vegas |
filtered_data = data.filter(regex='Region', axis=1)
filtered_data.head()
RegionID | RegionName | |
---|---|---|
0 | 6181 | New York |
1 | 12447 | Los Angeles |
2 | 17426 | Chicago |
3 | 13271 | Philadelphia |
4 | 40326 | Phoenix |
price_filter_series = data['Zhvi'] > 500000
price_filter_series.head()
0 True 1 True 2 False 3 False 4 False Name: Zhvi, dtype: bool
data[price_filter_series].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
6 | 2017-05-31 | 54296 | San Diego | CA | San Diego | San Diego | 6 | 572100 |
8 | 2017-05-31 | 33839 | San Jose | CA | San Jose | Santa Clara | 8 | 877400 |
10 | 2017-05-31 | 20330 | San Francisco | CA | San Francisco | San Francisco | 10 | 1194300 |
data[data.Zhvi >= 1000000].head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
10 | 2017-05-31 | 20330 | San Francisco | CA | San Francisco | San Francisco | 10 | 1194300 |
181 | 2017-05-31 | 54626 | Sunnyvale | CA | San Jose | Santa Clara | 181 | 1509300 |
234 | 2017-05-31 | 13713 | Santa Clara | CA | San Jose | Santa Clara | 234 | 1071500 |
238 | 2017-05-31 | 16992 | Berkeley | CA | San Francisco | Alameda | 238 | 1102000 |
308 | 2017-05-31 | 13699 | San Mateo | CA | San Francisco | San Mateo | 308 | 1198300 |
data_zillow = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data_zillow.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data_zillow.loc[7, 'Metro']
'Dallas-Fort Worth'
data_zillow.iloc[7,4]
'Dallas-Fort Worth'
data_zillow.loc[7, ['Metro', 'County']]
Metro Dallas-Fort Worth County Dallas Name: 7, dtype: object
data_zillow.iloc[7, [4,5]]
Metro Dallas-Fort Worth County Dallas Name: 7, dtype: object
data_zillow.loc[11, :]
Date 2017-05-31 RegionID 10221 RegionName Austin State TX Metro Austin County Travis SizeRank 11 Zhvi 321600 Name: 11, dtype: object
data_zillow.loc[101:105, 'Metro']
101 Winston-Salem 102 Los Angeles-Long Beach-Anaheim 103 Richmond 104 Miami-Fort Lauderdale 105 Ventura Name: Metro, dtype: object
loc
we pass the column label to fetch data.iloc
we pass the number to fetch data.data_zillow.loc[201:204, "State":"County"]
State | Metro | County | |
---|---|---|---|
201 | OH | Canton | Stark |
202 | LA | New Orleans | Jefferson |
203 | CA | Santa Maria-Santa Barbara | Santa Barbara |
204 | CA | Los Angeles-Long Beach-Anaheim | Los Angeles |
data_zillow.iloc[201:205, 3:6]
State | Metro | County | |
---|---|---|---|
201 | OH | Canton | Stark |
202 | LA | New Orleans | Jefferson |
203 | CA | Santa Maria-Santa Barbara | Santa Barbara |
204 | CA | Los Angeles-Long Beach-Anaheim | Los Angeles |
data_zillow.loc[201:205, ['RegionName', 'State']]
RegionName | State | |
---|---|---|
201 | Canton | OH |
202 | Metairie | LA |
203 | Santa Maria | CA |
204 | Inglewood | CA |
205 | Orange | CA |
data_zillow.loc[201:205, :]
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
201 | 2017-05-31 | 51260 | Canton | OH | Canton | Stark | 201 | 94400 |
202 | 2017-05-31 | 5914 | Metairie | LA | New Orleans | Jefferson | 202 | 232700 |
203 | 2017-05-31 | 47570 | Santa Maria | CA | Santa Maria-Santa Barbara | Santa Barbara | 203 | 354600 |
204 | 2017-05-31 | 45888 | Inglewood | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 204 | 470600 |
205 | 2017-05-31 | 33252 | Orange | CA | Los Angeles-Long Beach-Anaheim | Orange | 205 | 652000 |
data_zillow.loc[[0,5,10], :]
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
5 | 2017-05-31 | 18959 | Las Vegas | NV | Las Vegas | Clark | 5 | 216500 |
10 | 2017-05-31 | 20330 | San Francisco | CA | San Francisco | San Francisco | 10 | 1194300 |
data_zillow.loc[data_zillow.County=="Queens"]
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
data_zillow.loc[data_zillow.Metro=="New York", "County"].head()
0 Queens 63 Essex 72 Hudson 138 Westchester 176 Passaic Name: County, dtype: object
data_zillow = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data_zillow.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data_zillow.sort_values('Metro').head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
9851 | 2017-05-31 | 48458 | Westport | WA | Aberdeen | Grays Harbor | 9851 | 144600 |
4996 | 2017-05-31 | 36873 | Elma | WA | Aberdeen | Grays Harbor | 4996 | 175200 |
5090 | 2017-05-31 | 35514 | Hoquiam | WA | Aberdeen | Grays Harbor | 5090 | 95700 |
9401 | 2017-05-31 | 33215 | Ocean Shores | WA | Aberdeen | Grays Harbor | 9401 | 152400 |
9149 | 2017-05-31 | 18370 | Grayland | WA | Aberdeen | Grays Harbor | 9149 | 143900 |
sorted = data_zillow.sort_values('Metro', ascending=False)
sorted.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
8064 | 2017-05-31 | 19538 | Nashport | OH | Zanesville | Muskingum | 8064 | 153800 |
10271 | 2017-05-31 | 15262 | Hopewell | OH | Zanesville | Muskingum | 10271 | 138700 |
10373 | 2017-05-31 | 49730 | Norwich | OH | Zanesville | Muskingum | 10373 | 145100 |
5423 | 2017-05-31 | 53527 | New Concord | OH | Zanesville | Muskingum | 5423 | 138300 |
7595 | 2017-05-31 | 17815 | Dresden | OH | Zanesville | Muskingum | 7595 | 118400 |
sorted = data_zillow.sort_values(by=['Metro','County'])
sorted.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
2073 | 2017-05-31 | 30116 | Aberdeen | WA | Aberdeen | Grays Harbor | 2073 | 127800 |
4568 | 2017-05-31 | 56078 | Montesano | WA | Aberdeen | Grays Harbor | 4568 | 182000 |
4996 | 2017-05-31 | 36873 | Elma | WA | Aberdeen | Grays Harbor | 4996 | 175200 |
5090 | 2017-05-31 | 35514 | Hoquiam | WA | Aberdeen | Grays Harbor | 5090 | 95700 |
7108 | 2017-05-31 | 6275 | Oakville | WA | Aberdeen | Grays Harbor | 7108 | 186900 |
sorted = data_zillow.sort_values(by=['Metro','County', 'Zhvi'],
ascending=[True, True, False])
sorted.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
7108 | 2017-05-31 | 6275 | Oakville | WA | Aberdeen | Grays Harbor | 7108 | 186900 |
4568 | 2017-05-31 | 56078 | Montesano | WA | Aberdeen | Grays Harbor | 4568 | 182000 |
4996 | 2017-05-31 | 36873 | Elma | WA | Aberdeen | Grays Harbor | 4996 | 175200 |
8420 | 2017-05-31 | 19269 | McCleary | WA | Aberdeen | Grays Harbor | 8420 | 170700 |
9401 | 2017-05-31 | 33215 | Ocean Shores | WA | Aberdeen | Grays Harbor | 9401 | 152400 |
regions = data_zillow.RegionID
type(regions)
pandas.core.series.Series
Let's sort the series¶
regions.head()
0 6181 1 12447 2 17426 3 13271 4 40326 Name: RegionID, dtype: int64
regions.sort_values().head()
3043 3301 4159 3304 4986 3305 1762 3310 3116 3312 Name: RegionID, dtype: int64
data = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
regions = data['RegionName']
type(regions)
pandas.core.series.Series
regions.head()
0 New York 1 Los Angeles 2 Chicago 3 Philadelphia 4 Phoenix Name: RegionName, dtype: object
region_n_state = data[['RegionName', 'State']]
region_n_state.head()
RegionName | State | |
---|---|---|
0 | New York | NY |
1 | Los Angeles | CA |
2 | Chicago | IL |
3 | Philadelphia | PA |
4 | Phoenix | AZ |
type(region_n_state)
pandas.core.frame.DataFrame
data.State.head()
0 NY 1 CA 2 IL 3 PA 4 AZ Name: State, dtype: object
data['Address'] = data.County + ', ' + data.Metro + ', ' + data.State
data.Address.head()
0 Queens, New York, NY 1 Los Angeles, Los Angeles-Long Beach-Anaheim, CA 2 Cook, Chicago, IL 3 Philadelphia, Philadelphia, PA 4 Maricopa, Phoenix, AZ Name: Address, dtype: object
data = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data.RegionName.str.contains('New').head()
0 True 1 False 2 False 3 False 4 False Name: RegionName, dtype: bool
data.RegionName.str.upper().head()
0 NEW YORK 1 LOS ANGELES 2 CHICAGO 3 PHILADELPHIA 4 PHOENIX Name: RegionName, dtype: object
data.RegionName.str.lower().head()
0 new york 1 los angeles 2 chicago 3 philadelphia 4 phoenix Name: RegionName, dtype: object
data.County.str.len().head()
0 6 1 11 2 4 3 12 4 8 Name: County, dtype: int64
data.RegionName.str.lstrip().head()
0 New York 1 Los Angeles 2 Chicago 3 Philadelphia 4 Phoenix Name: RegionName, dtype: object
data.RegionName.str.replace(' ', '').head()
0 NewYork 1 LosAngeles 2 Chicago 3 Philadelphia 4 Phoenix Name: RegionName, dtype: object
data = pd.read_table('../input/datasetsdifferent-format/data-zillow.csv', sep=',')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data.axes
[RangeIndex(start=0, stop=10830, step=1), Index(['Date', 'RegionID', 'RegionName', 'State', 'Metro', 'County', 'SizeRank', 'Zhvi'], dtype='object')]
data.mean(axis=0)
RegionID 84344.818837 SizeRank 5414.500000 Zhvi 250307.590028 dtype: float64
data.mean(axis=1).head()
0 226193.666667 1 214116.000000 2 80042.666667 3 50191.333333 4 83876.666667 dtype: float64
data.mean(axis='rows')
RegionID 84344.818837 SizeRank 5414.500000 Zhvi 250307.590028 dtype: float64
data.mean(axis='columns').head()
0 226193.666667 1 214116.000000 2 80042.666667 3 50191.333333 4 83876.666667 dtype: float64
data.drop(0, axis=0).head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|---|
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
5 | 2017-05-31 | 18959 | Las Vegas | NV | Las Vegas | Clark | 5 | 216500 |
data.drop('Date', axis=1).head()
RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|
0 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data.drop('Date', axis=1).head()
RegionID | RegionName | State | Metro | County | SizeRank | Zhvi | |
---|---|---|---|---|---|---|---|
0 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data = pd.read_csv('../input/datasetsdifferent-format/data-titanic.csv')
data.head()
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
func_lower = lambda x: x.lower()
data.Name.apply(func_lower).head()
0 braund, mr. owen harris 1 cumings, mrs. john bradley (florence briggs th... 2 heikkinen, miss. laina 3 futrelle, mrs. jacques heath (lily may peel) 4 allen, mr. william henry Name: Name, dtype: object
data[['Age', 'Pclass']].applymap(np.square).head()
Age | Pclass | |
---|---|---|
0 | 484.0 | 9 |
1 | 1444.0 | 1 |
2 | 676.0 | 9 |
3 | 1225.0 | 1 |
4 | 1225.0 | 9 |
def my_func(i):
return i + 20
data[['Age', 'Pclass']].applymap(my_func).head()
Age | Pclass | |
---|---|---|
0 | 42.0 | 23 |
1 | 58.0 | 21 |
2 | 46.0 | 23 |
3 | 55.0 | 21 |
4 | 55.0 | 23 |
data[data.Age.isnull()].Age = data.Age.mean()
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self[name] = value
data[data.Age.isnull()].Age.head()
5 NaN 17 NaN 19 NaN 26 NaN 28 NaN Name: Age, dtype: float64
data.loc[data.Age.isnull(), 'Age'] = data.Age.mean
data[data.Age.isnull()]
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |
---|
data = pd.read_csv("../input/datasetsdifferent-format/data-titanic.csv")
data.shape
(891, 12)
data.count()
PassengerId 891 Survived 891 Pclass 891 Name 891 Sex 891 Age 714 SibSp 891 Parch 891 Ticket 891 Fare 891 Cabin 204 Embarked 889 dtype: int64
data_missing_dropped = data.dropna()
data_missing_dropped.shape
(183, 12)
data_all_missing_dropped = data.dropna(how="all")
data_all_missing_dropped.shape
(891, 12)
data_filled_zeros = data.fillna(0)
data_filled_zeros.count()
PassengerId 891 Survived 891 Pclass 891 Name 891 Sex 891 Age 891 SibSp 891 Parch 891 Ticket 891 Fare 891 Cabin 891 Embarked 891 dtype: int64
data_filled_in_mean = data.copy()
data_filled_in_mean.Age.fillna(data.Age.mean(), inplace=True)
data_filled_in_mean.count()
PassengerId 891 Survived 891 Pclass 891 Name 891 Sex 891 Age 891 SibSp 891 Parch 891 Ticket 891 Fare 891 Cabin 204 Embarked 889 dtype: int64
data = pd.read_csv('../input/datasetsdifferent-format/data-titanic.csv')
data.head()
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
data.set_index('Name').head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
Name | |||||||||||
Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
data = pd.read_csv('../input/datasetsdifferent-format/data-titanic.csv', index_col=3)
data.head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
Name | |||||||||||
Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
data.loc['Braund, Mr. Owen Harris',:]
PassengerId 1 Survived 0 Pclass 3 Sex male Age 22 SibSp 1 Parch 0 Ticket A/5 21171 Fare 7.25 Cabin NaN Embarked S Name: Braund, Mr. Owen Harris, dtype: object
data.reset_index(inplace=True)
data.head()
Name | PassengerId | Survived | Pclass | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | 2 | 1 | 1 | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
dataset1 = pd.DataFrame({'Age': ['32', '26', '29'],
'Sex': ['F', 'M', 'F'],
'State': ['CA', 'NY', 'OH']},
index=['Jane', 'John', 'Cathy'])
dataset2 = pd.DataFrame({'Age': ['34', '23', '24', '21'],
'Sex': ['M', 'F', 'F', 'F'],
'State': ['AZ', 'OR', 'CA', 'WA']},
index=['Dave', 'Kris', 'Xi', 'Jo'])
pd.concat([dataset1, dataset2])
Age | Sex | State | |
---|---|---|---|
Jane | 32 | F | CA |
John | 26 | M | NY |
Cathy | 29 | F | OH |
Dave | 34 | M | AZ |
Kris | 23 | F | OR |
Xi | 24 | F | CA |
Jo | 21 | F | WA |
dataset1.append(dataset2)
Age | Sex | State | |
---|---|---|---|
Jane | 32 | F | CA |
John | 26 | M | NY |
Cathy | 29 | F | OH |
Dave | 34 | M | AZ |
Kris | 23 | F | OR |
Xi | 24 | F | CA |
Jo | 21 | F | WA |
dataset1 = pd.DataFrame({'Age': ['32', '26', '29'],
'Sex': ['F', 'M', 'F'],
'State': ['CA', 'NY', 'OH']},
index=['Jane', 'John', 'Cathy'])
dataset2 = pd.DataFrame({'City': ['SF', 'NY', 'Columbus'],
'Work Status': ['No', 'Yes', 'Yes']},
index=['Jane', 'John', 'Cathy'])
pd.concat([dataset1, dataset2], axis=1)
Age | Sex | State | City | Work Status | |
---|---|---|---|---|---|
Jane | 32 | F | CA | SF | No |
John | 26 | M | NY | NY | Yes |
Cathy | 29 | F | OH | Columbus | Yes |
dataset1 = pd.DataFrame({'Name': ['Jane', 'John', 'Cathy', 'Sarah'],
'Age': ['32', '26', '29', '23'],
'Sex': ['F', 'M', 'F', 'F'],
'State': ['CA', 'NY', 'OH', 'TX']})
dataset2 = pd.DataFrame({'Name': ['Jane', 'John', 'Cathy', 'Rob'],
'City': ['SF', 'NY', 'Columbus', 'Austin'],
'Work Status': ['No', 'Yes', 'Yes', 'Yes']})
pd.merge(dataset1, dataset2, on='Name', how='inner')
Name | Age | Sex | State | City | Work Status | |
---|---|---|---|---|---|---|
0 | Jane | 32 | F | CA | SF | No |
1 | John | 26 | M | NY | NY | Yes |
2 | Cathy | 29 | F | OH | Columbus | Yes |
pd.merge(dataset1, dataset2, on='Name', how='left')
Name | Age | Sex | State | City | Work Status | |
---|---|---|---|---|---|---|
0 | Jane | 32 | F | CA | SF | No |
1 | John | 26 | M | NY | NY | Yes |
2 | Cathy | 29 | F | OH | Columbus | Yes |
3 | Sarah | 23 | F | TX | NaN | NaN |
pd.merge(dataset1, dataset2, on='Name', how='right')
Name | Age | Sex | State | City | Work Status | |
---|---|---|---|---|---|---|
0 | Jane | 32 | F | CA | SF | No |
1 | John | 26 | M | NY | NY | Yes |
2 | Cathy | 29 | F | OH | Columbus | Yes |
3 | Rob | NaN | NaN | NaN | Austin | Yes |
pd.merge(dataset1, dataset2, on='Name', how='outer')
Name | Age | Sex | State | City | Work Status | |
---|---|---|---|---|---|---|
0 | Jane | 32 | F | CA | SF | No |
1 | John | 26 | M | NY | NY | Yes |
2 | Cathy | 29 | F | OH | Columbus | Yes |
3 | Sarah | 23 | F | TX | NaN | NaN |
4 | Rob | NaN | NaN | NaN | Austin | Yes |
top_movies = pd.read_table('../input/datasetsdifferent-format/data-movies-top-grossing.csv', sep=',')
top_movies.head()
Rank | Title | Worldwide gross | Year | |
---|---|---|---|---|
0 | 1 | Avatar | $2,787,965,087 | 2009 |
1 | 2 | Titanic | $2,186,772,302 | 1997 |
2 | 3 | Star Wars: The Force Awakens | $2,068,223,624 | 2015 |
3 | 4 | Jurassic World | $1,671,713,208 | 2015 |
4 | 5 | The Avengers | $1,518,812,988 | 2012 |
top_movies.set_index('Rank').head()
Title | Worldwide gross | Year | |
---|---|---|---|
Rank | |||
1 | Avatar | $2,787,965,087 | 2009 |
2 | Titanic | $2,186,772,302 | 1997 |
3 | Star Wars: The Force Awakens | $2,068,223,624 | 2015 |
4 | Jurassic World | $1,671,713,208 | 2015 |
5 | The Avengers | $1,518,812,988 | 2012 |
top_movies.head()
Rank | Title | Worldwide gross | Year | |
---|---|---|---|---|
0 | 1 | Avatar | $2,787,965,087 | 2009 |
1 | 2 | Titanic | $2,186,772,302 | 1997 |
2 | 3 | Star Wars: The Force Awakens | $2,068,223,624 | 2015 |
3 | 4 | Jurassic World | $1,671,713,208 | 2015 |
4 | 5 | The Avengers | $1,518,812,988 | 2012 |
top_movies.set_index('Rank', inplace=True)
top_movies.head()
Title | Worldwide gross | Year | |
---|---|---|---|
Rank | |||
1 | Avatar | $2,787,965,087 | 2009 |
2 | Titanic | $2,186,772,302 | 1997 |
3 | Star Wars: The Force Awakens | $2,068,223,624 | 2015 |
4 | Jurassic World | $1,671,713,208 | 2015 |
5 | The Avengers | $1,518,812,988 | 2012 |
top_movies.rename(columns = {'Year': 'Release Year'}).head()
Title | Worldwide gross | Release Year | |
---|---|---|---|
Rank | |||
1 | Avatar | $2,787,965,087 | 2009 |
2 | Titanic | $2,186,772,302 | 1997 |
3 | Star Wars: The Force Awakens | $2,068,223,624 | 2015 |
4 | Jurassic World | $1,671,713,208 | 2015 |
5 | The Avengers | $1,518,812,988 | 2012 |
data = pd.read_csv('../input/datasetsdifferent-format/data-titanic.csv', index_col=3)
data.head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
Name | |||||||||||
Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
data.drop('Ticket', axis=1, inplace=True)
data.head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|
Name | ||||||||||
Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C85 | C |
Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | NaN | S |
data.drop(['Parch', 'Fare'], axis=1, inplace=True)
data.head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|
Name | ||||||||
Braund, Mr. Owen Harris | 1 | 0 | 3 | male | 22.0 | 1 | NaN | S |
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | C85 | C |
Heikkinen, Miss. Laina | 3 | 1 | 3 | female | 26.0 | 0 | NaN | S |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | NaN | S |
data.drop(['Braund, Mr. Owen Harris', 'Heikkinen, Miss. Laina'], inplace=True)
data.head()
PassengerId | Survived | Pclass | Sex | Age | SibSp | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|
Name | ||||||||
Cumings, Mrs. John Bradley (Florence Briggs Thayer) | 2 | 1 | 1 | female | 38.0 | 1 | C85 | C |
Futrelle, Mrs. Jacques Heath (Lily May Peel) | 4 | 1 | 1 | female | 35.0 | 1 | C123 | S |
Allen, Mr. William Henry | 5 | 0 | 3 | male | 35.0 | 0 | NaN | S |
Moran, Mr. James | 6 | 0 | 3 | male | NaN | 0 | NaN | Q |
McCarthy, Mr. Timothy J | 7 | 0 | 1 | male | 54.0 | 0 | E46 | S |
list_columns = ['Date', 'Region ID', 'Region Name', 'State',
'City', 'County', 'Size Rank','Price']
data = pd.read_csv('../input/datasetsdifferent-format/data-zillow1.csv', names = list_columns)
data.head()
Date | Region ID | Region Name | State | City | County | Size Rank | Price | |
---|---|---|---|---|---|---|---|---|
0 | Date | RegionID | RegionName | State | Metro | County | SizeRank | Price |
1 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
2 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
3 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
4 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
data = pd.read_csv('../input/datasetsdifferent-format/data-zillow1.csv')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Price | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
data.columns
Index(['Date', 'RegionID', 'RegionName', 'State', 'Metro', 'County', 'SizeRank', 'Price'], dtype='object')
data.rename(columns={'RegionName':'Region', 'Metro':'City'}, inplace=True)
data.columns
Index(['Date', 'RegionID', 'Region', 'State', 'City', 'County', 'SizeRank', 'Price'], dtype='object')
data.columns = ['Date', 'Region ID', 'Region Name', 'State',
'City', 'County', 'Size Rank','Price']
data = pd.read_csv('../input/datasetsdifferent-format/data-zillow1.csv')
data.head()
Date | RegionID | RegionName | State | Metro | County | SizeRank | Price | |
---|---|---|---|---|---|---|---|---|
0 | 2017-05-31 | 6181 | New York | NY | New York | Queens | 0 | 672400 |
1 | 2017-05-31 | 12447 | Los Angeles | CA | Los Angeles-Long Beach-Anaheim | Los Angeles | 1 | 629900 |
2 | 2017-05-31 | 17426 | Chicago | IL | Chicago | Cook | 2 | 222700 |
3 | 2017-05-31 | 13271 | Philadelphia | PA | Philadelphia | Philadelphia | 3 | 137300 |
4 | 2017-05-31 | 40326 | Phoenix | AZ | Phoenix | Maricopa | 4 | 211300 |
grouped_data = data[['State', 'Price']].groupby('State').mean()
grouped_data.head()
Price | |
---|---|
State | |
AK | 237783.333333 |
AL | 137645.637584 |
AR | 136331.707317 |
AZ | 232353.921569 |
CA | 617425.392297 |
grouped_data = data[['State', 'Price']].groupby('State')
grouped_data.head(2)
State | Price | |
---|---|---|
0 | NY | 672400 |
1 | CA | 629900 |
2 | IL | 222700 |
3 | PA | 137300 |
4 | AZ | 211300 |
5 | NV | 216500 |
6 | CA | 572100 |
7 | TX | 164700 |
9 | FL | 152300 |
11 | TX | 321600 |
12 | MI | 41500 |
13 | OH | 128300 |
14 | TN | 81100 |
15 | NC | 183800 |
17 | MA | 554600 |
18 | WA | 670300 |
19 | MD | 121100 |
20 | CO | 383200 |
21 | DC | 555900 |
22 | TN | 228500 |
23 | WI | 107900 |
24 | AZ | 164800 |
25 | OR | 417900 |
26 | OK | 132700 |
27 | NE | 152100 |
28 | NM | 189600 |
33 | MO | 121600 |
34 | VA | 259500 |
35 | CO | 251200 |
36 | GA | 208100 |
... | ... | ... |
137 | IN | 108800 |
138 | NY | 439700 |
145 | MS | 52800 |
148 | KS | 262600 |
174 | OR | 232900 |
183 | NM | 140300 |
185 | CT | 164100 |
189 | DE | 223200 |
194 | SC | 129900 |
199 | UT | 202100 |
206 | CT | 161200 |
229 | MT | 204800 |
255 | NH | 215500 |
275 | ID | 155100 |
290 | ND | 217800 |
302 | IA | 131300 |
368 | WY | 205000 |
390 | NH | 251600 |
417 | MS | 92500 |
453 | AR | 111500 |
454 | RI | 223000 |
519 | DE | 135500 |
547 | HI | 633700 |
550 | ND | 263500 |
561 | MT | 160500 |
572 | WY | 178900 |
842 | AK | 221000 |
1007 | WV | 99400 |
3545 | WV | 96200 |
7646 | ME | 73900 |
96 rows × 2 columns
grouped_data.mean().head()
Price | |
---|---|
State | |
AK | 237783.333333 |
AL | 137645.637584 |
AR | 136331.707317 |
AZ | 232353.921569 |
CA | 617425.392297 |
grouped_data.describe().head()
Price | ||||||||
---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | |
State | ||||||||
AK | 12.0 | 237783.333333 | 41433.711205 | 175800.0 | 211700.0 | 222850.0 | 254950.0 | 323100.0 |
AL | 149.0 | 137645.637584 | 72538.539135 | 44700.0 | 103900.0 | 126400.0 | 155800.0 | 598900.0 |
AR | 82.0 | 136331.707317 | 42370.537394 | 65300.0 | 108175.0 | 128750.0 | 155050.0 | 268800.0 |
AZ | 102.0 | 232353.921569 | 173068.589203 | 81500.0 | 148875.0 | 211950.0 | 258425.0 | 1611700.0 |
CA | 701.0 | 617425.392297 | 604628.412673 | 74400.0 | 277000.0 | 453500.0 | 720200.0 | 6343800.0 |
grouped_data = data[['State',
'RegionName',
'Price']].groupby(['State','RegionName']).mean()
grouped_data.head()
Price | ||
---|---|---|
State | RegionName | |
AK | Anchor Point | 175800.0 |
Anchorage | 293900.0 | |
Fairbanks | 221000.0 | |
Juneau | 323100.0 | |
Kenai | 206500.0 |
grouped_data = data.groupby(['State']).size()
grouped_data.head()
State AK 12 AL 149 AR 82 AZ 102 CA 701 dtype: int64
grouped_data = data.groupby(data.dtypes, axis=1)
# list(grouped_data)
# for state, grouped_data in data.groupby('State'):
# print(state, '\n', grouped_data)
dataset = pd.DataFrame({'DOB': ['1976-06-01', '1980-09-23', '1984-03-30', '1991-12-31', '1994-10-2', '1973-11-11'],
'Sex': ['F', 'M', 'F', 'M', 'M', 'F'],
'State': ['CA', 'NY', 'OH', 'OR', 'TX', 'CA'],
'Name': ['Jane', 'John', 'Cathy', 'Jo', 'Sam', 'Tai']})
dataset
DOB | Sex | State | Name | |
---|---|---|---|---|
0 | 1976-06-01 | F | CA | Jane |
1 | 1980-09-23 | M | NY | John |
2 | 1984-03-30 | F | OH | Cathy |
3 | 1991-12-31 | M | OR | Jo |
4 | 1994-10-2 | M | TX | Sam |
5 | 1973-11-11 | F | CA | Tai |
dataset.dtypes
DOB object Sex object State object Name object dtype: object
dataset.DOB = pd.to_datetime(dataset.DOB)
dataset.dtypes
DOB datetime64[ns] Sex object State object Name object dtype: object
dataset.set_index('DOB', inplace=True)
dataset
Sex | State | Name | |
---|---|---|---|
DOB | |||
1976-06-01 | F | CA | Jane |
1980-09-23 | M | NY | John |
1984-03-30 | F | OH | Cathy |
1991-12-31 | M | OR | Jo |
1994-10-02 | M | TX | Sam |
1973-11-11 | F | CA | Tai |
dataset['1980']
Sex | State | Name | |
---|---|---|---|
DOB | |||
1980-09-23 | M | NY | John |
dataset['1980':]
Sex | State | Name | |
---|---|---|---|
DOB | |||
1980-09-23 | M | NY | John |
1984-03-30 | F | OH | Cathy |
1991-12-31 | M | OR | Jo |
1994-10-02 | M | TX | Sam |
dataset[:'1980']
Sex | State | Name | |
---|---|---|---|
DOB | |||
1976-06-01 | F | CA | Jane |
1980-09-23 | M | NY | John |
1973-11-11 | F | CA | Tai |
display(dataset['1980':'1984'])
dataset.reset_index(inplace=True)
Sex | State | Name | |
---|---|---|---|
DOB | |||
1980-09-23 | M | NY | John |
1984-03-30 | F | OH | Cathy |
dataset.DOB.dt.dayofyear
0 153 1 267 2 90 3 365 4 275 5 315 Name: DOB, dtype: int64
dataset.DOB.dt.weekday_name
0 Tuesday 1 Tuesday 2 Friday 3 Tuesday 4 Sunday 5 Sunday Name: DOB, dtype: object
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
df = pd.read_csv('../input/datasetsdifferent-format/data-alcohol.csv')
df.head()
country | beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | |
---|---|---|---|---|---|
0 | Afghanistan | 0 | 0 | 0 | 0.0 |
1 | Albania | 89 | 132 | 54 | 4.9 |
2 | Algeria | 25 | 0 | 14 | 0.7 |
3 | Andorra | 245 | 138 | 312 | 12.4 |
4 | Angola | 217 | 57 | 45 | 5.9 |
sns.palplot(sns.color_palette())
plt.figure(figsize = (15,8))
sns.set()
sns.boxplot(data=df);
sns.set_palette("bright")
plt.figure(figsize = (15,8))
sns.boxplot(data=df);
sns.palplot(sns.color_palette("deep", 7))
sns.palplot(sns.color_palette("muted", 7))
sns.palplot(sns.color_palette("pastel", 7))
sns.palplot(sns.color_palette("bright", 7))
sns.palplot(sns.color_palette("dark", 7))
sns.palplot(sns.color_palette("colorblind", 7))
sns.palplot(sns.color_palette("RdBu", 7))
sns.palplot(sns.color_palette("Blues_d", 7))
sns.set_palette("Blues_d")
plt.figure(figsize = (15,8))
sns.boxplot(data=df);
my_palette = ['#4B0082', '#0000FF', '#00FF00', '#FFFF00', '#FF7F00', '#FF0000']
sns.set_palette(my_palette)
sns.palplot(sns.color_palette())
plt.figure(figsize = (15,8))
sns.boxplot(data=df);
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
df = pd.read_csv('../input/datasetsdifferent-format/data-alcohol.csv')
df.head()
country | beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | |
---|---|---|---|---|---|
0 | Afghanistan | 0 | 0 | 0 | 0.0 |
1 | Albania | 89 | 132 | 54 | 4.9 |
2 | Algeria | 25 | 0 | 14 | 0.7 |
3 | Andorra | 245 | 138 | 312 | 12.4 |
4 | Angola | 217 | 57 | 45 | 5.9 |
sns.distplot(df.beer_servings)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb885df9dd8>
sns.set()
sns.set_style("whitegrid")
sns.lmplot(x='beer_servings', y='wine_servings', data=df);
sns.set()
sns.set_style("dark")
sns.lmplot(x='beer_servings', y='wine_servings', data=df, fit_reg=False);
# sns.set()
sns.set_style("white")
plt.figure(figsize=(15,8))
sns.swarmplot(x='country', y='wine_servings', data=df);
plt.figure(figsize=(15,8))
sns.set_style("ticks")
sns.boxplot(data=df);
sns.axes_style()
{'axes.facecolor': 'white', 'axes.edgecolor': '.15', 'axes.grid': False, 'axes.axisbelow': True, 'axes.labelcolor': '.15', 'figure.facecolor': 'white', 'grid.color': '.8', 'grid.linestyle': '-', 'text.color': '.15', 'xtick.color': '.15', 'ytick.color': '.15', 'xtick.direction': 'out', 'ytick.direction': 'out', 'lines.solid_capstyle': 'round', 'patch.edgecolor': 'w', 'image.cmap': 'rocket', 'font.family': ['sans-serif'], 'font.sans-serif': ['Arial', 'DejaVu Sans', 'Liberation Sans', 'Bitstream Vera Sans', 'sans-serif'], 'patch.force_edgecolor': True, 'xtick.bottom': True, 'xtick.top': False, 'ytick.left': True, 'ytick.right': False, 'axes.spines.left': True, 'axes.spines.bottom': True, 'axes.spines.right': True, 'axes.spines.top': True}
plt.figure(figsize=(15,8))
sns.set_style("ticks", {"axes.facecolor": ".1"})
sns.boxplot(data=df);
sns.set()
sns.set_context("paper")
plt.figure(figsize=(15, 8))
sns.lmplot(x='beer_servings', y='wine_servings', data=df);
<Figure size 1080x576 with 0 Axes>
sns.set()
sns.set_context("talk")
plt.figure(figsize=(8, 6))
sns.lmplot(x='beer_servings', y='wine_servings', data=df);
<Figure size 576x432 with 0 Axes>
sns.set()
sns.set_context("poster")
plt.figure(figsize=(8, 6))
sns.lmplot(x='beer_servings', y='wine_servings', data=df);
<Figure size 576x432 with 0 Axes>
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
df = pd.read_csv('../input/datasetsdifferent-format/data_simpsons_episodes.csv')
df.head()
id | title | original_air_date | production_code | season | number_in_season | number_in_series | us_viewers_in_millions | views | imdb_rating | imdb_votes | image_url | video_url | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 10 | Homer's Night Out | 1990-03-25 | 7G10 | 1 | 10 | 10 | 30.3 | 50816.0 | 7.4 | 1511.0 | http://static-media.fxx.com/img/FX_Networks_-_... | http://www.simpsonsworld.com/video/275197507879 |
1 | 12 | Krusty Gets Busted | 1990-04-29 | 7G12 | 1 | 12 | 12 | 30.4 | 62561.0 | 8.3 | 1716.0 | http://static-media.fxx.com/img/FX_Networks_-_... | http://www.simpsonsworld.com/video/288019523914 |
2 | 14 | Bart Gets an "F" | 1990-10-11 | 7F03 | 2 | 1 | 14 | 33.6 | 59575.0 | 8.2 | 1638.0 | http://static-media.fxx.com/img/FX_Networks_-_... | http://www.simpsonsworld.com/video/260539459671 |
3 | 17 | Two Cars in Every Garage and Three Eyes on Eve... | 1990-11-01 | 7F01 | 2 | 4 | 17 | 26.1 | 64959.0 | 8.1 | 1457.0 | http://static-media.fxx.com/img/FX_Networks_-_... | http://www.simpsonsworld.com/video/260537411822 |
4 | 19 | Dead Putting Society | 1990-11-15 | 7F08 | 2 | 6 | 19 | 25.4 | 50691.0 | 8.0 | 1366.0 | http://static-media.fxx.com/img/FX_Networks_-_... | http://www.simpsonsworld.com/video/260539459670 |
plt.figure(figsize=(20,8))
sns.stripplot(x="season", y="us_viewers_in_millions", data=df);
plt.figure(figsize=(20,8))
sns.swarmplot(x="season", y="us_viewers_in_millions", data=df);
plt.figure(figsize=(20,8))
sns.boxplot(x="season", y="us_viewers_in_millions", data=df);
# sns.boxenplot(x="season", y="us_viewers_in_millions", data=df);
plt.figure(figsize=(20,8))
sns.violinplot(x="season", y="us_viewers_in_millions", data=df);
plt.figure(figsize=(20,8))
sns.barplot(x="season", y="us_viewers_in_millions", data=df);
plt.figure(figsize=(20,8))
sns.countplot(x="season", data=df);
df = pd.read_csv('../input/datasetsdifferent-format/data-alcohol.csv')
df.head()
country | beer_servings | spirit_servings | wine_servings | total_litres_of_pure_alcohol | |
---|---|---|---|---|---|
0 | Afghanistan | 0 | 0 | 0 | 0.0 |
1 | Albania | 89 | 132 | 54 | 4.9 |
2 | Algeria | 25 | 0 | 14 | 0.7 |
3 | Andorra | 245 | 138 | 312 | 12.4 |
4 | Angola | 217 | 57 | 45 | 5.9 |
plt.figure(figsize=(20,8))
sns.boxplot(data=df, orient="h");
df = pd.read_csv('../input/datasetsdifferent-format/data-titanic.csv')
df.head()
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
g = sns.FacetGrid(df, col="Sex", hue='Survived')
g.map(plt.hist, "Age");
g.add_legend();
mlb = pd.read_csv('../input/datasetsdifferent-format/data-mlb-players.csv')
mlb.head()
Position | Height | Weight | Age | |
---|---|---|---|---|
0 | Catcher | 74 | 180.0 | 22.99 |
1 | Catcher | 74 | 215.0 | 34.69 |
2 | Catcher | 72 | 210.0 | 30.78 |
3 | First_Baseman | 72 | 210.0 | 35.43 |
4 | First_Baseman | 73 | 188.0 | 35.71 |
g = sns.PairGrid(mlb, vars=["Height", "Weight"], hue="Position")
g.map(plt.scatter);
g.add_legend();
sns.pairplot(mlb, hue="Position", size=2.5);
/opt/conda/lib/python3.6/site-packages/seaborn/axisgrid.py:2065: UserWarning: The `size` parameter has been renamed to `height`; pleaes update your code. warnings.warn(msg, UserWarning)