In [2]:
x = float(input("Enter some number: "))
print(x ** 2)
Enter some number: asfasdf
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-95e45c49c33b> in <module>
----> 1 x = float(input("Enter some number: "))
      2 print(x ** 2)

ValueError: could not convert string to float: 'asfasdf'
In [3]:
s = "12.34"
In [6]:
float("-12.34")
Out[6]:
-12.34
In [7]:
float("2e-3") # e-3 = 10^{-3} = 1/10^3=1/1000
Out[7]:
0.002
In [9]:
try:
    x = float(input("Enter some number: "))
    print(x ** 2)
except ValueError:
    print("This is not a number")
Enter some number: ajsdlfjas
This is not a number
In [16]:
try:
    x = float(input("Enter some number: "))
    print(1 / x)
except ValueError:
    print("This is not a number")
except ZeroDivisionError:
    print("Cannot divide by zero!")
Enter some number: 123
0.008130081300813009
In [19]:
try:
    x = float(input("Enter some number: "))
    print(1 / x)
except ZeroDivisionError:
    print("Zero division")
except:
    print("Unknown error occured")
Enter some number: asf
Unknown error occured
In [22]:
try:
    x = float(input("Enter some number: "))
    print(1 / x)
except Exception as e:
    print("Unkown error occured:", e)
Enter some number: 0
Unkown error occured: float division by zero
In [23]:
import requests
In [24]:
r = requests.get("http://hse.ru/alsdfjalsfjkalsfj")
if not r.ok:
    print("Page not found")
In [32]:
try:
    r = requests.get("http://hse.rururu/")
    if not r.ok:
        print("Page not found")
except Exception as e:
    print("Incorred address", e)
Incorred address HTTPConnectionPool(host='hse.rururu', port=80): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10fcc66d8>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))
In [36]:
try:
    r = requests.get("http://hse.rururu/")
except requests.ConnectionError:
    print("Connection Error Occured")
Connection Error Occured
In [37]:
import pandas as pd

pandas возвращается

In [57]:
df = pd.read_csv("https://bit.ly/2BYqr3c")[:3000]
In [58]:
df.dtypes
Out[58]:
color                         object
director_name                 object
num_critic_for_reviews       float64
duration                     float64
director_facebook_likes      float64
actor_3_facebook_likes       float64
actor_2_name                  object
actor_1_facebook_likes       float64
gross                        float64
genres                        object
actor_1_name                  object
movie_title                   object
num_voted_users                int64
cast_total_facebook_likes      int64
actor_3_name                  object
facenumber_in_poster         float64
plot_keywords                 object
movie_imdb_link               object
num_user_for_reviews         float64
language                      object
country                       object
content_rating                object
budget                       float64
title_year                   float64
actor_2_facebook_likes       float64
imdb_score                   float64
aspect_ratio                 float64
movie_facebook_likes           int64
dtype: object
In [59]:
df['color'].unique()
Out[59]:
array(['Color', nan, ' Black and White'], dtype=object)
In [60]:
float("NaN") == float("NaN")
Out[60]:
False
In [61]:
x = float("NaN")
if x == float("NaN"):
    print("x is NaN")
else:
    print("x is not NaN")
x is not NaN
In [62]:
pd.isna(float("NaN"))
Out[62]:
True
In [63]:
x = float("NaN")
if pd.isna(x):
    print("x is NaN")
else:
    print("x is not NaN")
x is NaN
In [64]:
df['color'].value_counts()
Out[64]:
Color               2910
 Black and White      83
Name: color, dtype: int64
In [79]:
df['color'].unique()[2]
Out[79]:
' Black and White'
In [66]:
df['duration'].mean()
Out[66]:
111.71328437917224
In [67]:
df.columns
Out[67]:
Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')
In [69]:
df['color'] == 'Color'
Out[69]:
0        True
1        True
2        True
3        True
4       False
5        True
6        True
7        True
8        True
9        True
10       True
11       True
12       True
13       True
14       True
15       True
16       True
17       True
18       True
19       True
20       True
21       True
22       True
23       True
24       True
25       True
26       True
27       True
28       True
29       True
        ...  
2970     True
2971     True
2972     True
2973     True
2974     True
2975     True
2976     True
2977     True
2978     True
2979     True
2980     True
2981    False
2982     True
2983     True
2984     True
2985     True
2986     True
2987     True
2988     True
2989     True
2990     True
2991     True
2992     True
2993     True
2994     True
2995    False
2996     True
2997     True
2998     True
2999     True
Name: color, Length: 3000, dtype: bool
In [71]:
color_films = df[df['color'] == 'Color']
In [72]:
color_films['color'].unique()
Out[72]:
array(['Color'], dtype=object)
In [73]:
len(color_films)
Out[73]:
2910
In [74]:
color_films['duration'].mean()
Out[74]:
111.43481252149982
In [76]:
bw_films = df[df['color'] == 'Black and White']
In [77]:
bw_films
Out[77]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes

0 rows × 28 columns

In [87]:
df['color'] = df['color'].str.strip()
In [88]:
df['color'].unique()
Out[88]:
array(['Color', nan, 'Black and White'], dtype=object)
In [89]:
bw_films = df[df['color'] == 'Black and White']
In [91]:
bw_films['duration'].mean()
Out[91]:
123.96385542168674
In [93]:
df['director_name'].value_counts()
Out[93]:
Steven Spielberg         25
Clint Eastwood           18
Woody Allen              17
Martin Scorsese          15
Ridley Scott             15
Robert Zemeckis          13
Tim Burton               13
Renny Harlin             13
Michael Bay              13
Ron Howard               12
Shawn Levy               12
Tony Scott               12
Richard Donner           11
Chris Columbus           11
Steven Soderbergh        11
Oliver Stone             11
Barry Levinson           11
Joel Schumacher          11
Robert Rodriguez         11
Rob Reiner               10
Rob Cohen                10
John McTiernan           10
David Fincher            10
Brett Ratner             10
Paul W.S. Anderson       10
Bobby Farrelly           10
Stephen Frears            9
Peter Jackson             9
Wes Craven                9
John Carpenter            9
                         ..
Michel Hazanavicius       1
Don Michael Paul          1
Todd Field                1
Andrew Jarecki            1
Bryan Barber              1
Perry Andelin Blake       1
Glen Morgan               1
Pou-Soi Cheang            1
Ulu Grosbard              1
Ash Brannon               1
Rick Rosenthal            1
Scott Waugh               1
Drew Barrymore            1
Joan Chen                 1
Jim Goddard               1
Jon Hess                  1
John Blanchard            1
Udayan Prasad             1
Joachim Rønning           1
Julien Temple             1
Tommy O'Haver             1
Adam Rifkin               1
Cedric Nicolas-Troyan     1
Frédéric Forestier        1
Eric Bress                1
Nick Gomez                1
Rob Zombie                1
Lexi Alexander            1
Dennis Iliadis            1
Peter Landesman           1
Name: director_name, Length: 1260, dtype: int64
In [104]:
%matplotlib inline
In [109]:
(df.groupby('director_name')
 .mean()
 .sort_values('num_critic_for_reviews', ascending=False)
 [['num_critic_for_reviews', 'num_user_for_reviews']]
 .plot(x='num_critic_for_reviews', y='num_user_for_reviews', 
       kind='scatter', ylim=(0, 700))
)
Out[109]:
<matplotlib.axes._subplots.AxesSubplot at 0x12ac70ba8>
In [99]:
max_sum = 7 + 6 + 12
#          10, 4, 10
obtained_hw = 7 + 4 + 10
project = 8
cw = 9
In [101]:
(obtained / max_sum) * 10 * 0.5 + project * 0.3 + cw * 0.2
Out[101]:
4.2
In [112]:
import sys
!"{sys.executable}" -m pip install plotly
!"{sys.executable}" -m pip install plotly_express
Requirement already satisfied: plotly in ./anaconda3/lib/python3.7/site-packages (3.8.1)
Requirement already satisfied: requests in ./anaconda3/lib/python3.7/site-packages (from plotly) (2.21.0)
Requirement already satisfied: retrying>=1.3.3 in ./anaconda3/lib/python3.7/site-packages (from plotly) (1.3.3)
Requirement already satisfied: six in ./anaconda3/lib/python3.7/site-packages (from plotly) (1.12.0)
Requirement already satisfied: nbformat>=4.2 in ./anaconda3/lib/python3.7/site-packages (from plotly) (4.4.0)
Requirement already satisfied: decorator>=4.0.6 in ./anaconda3/lib/python3.7/site-packages (from plotly) (4.3.0)
Requirement already satisfied: pytz in ./anaconda3/lib/python3.7/site-packages (from plotly) (2018.7)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly) (3.0.4)
Requirement already satisfied: idna<2.9,>=2.5 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly) (2.8)
Requirement already satisfied: urllib3<1.25,>=1.21.1 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly) (1.24.1)
Requirement already satisfied: certifi>=2017.4.17 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly) (2018.11.29)
Requirement already satisfied: ipython-genutils in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly) (0.2.0)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly) (2.6.0)
Requirement already satisfied: jupyter-core in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly) (4.4.0)
Requirement already satisfied: traitlets>=4.1 in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly) (4.3.2)
Collecting plotly_express
  Downloading https://files.pythonhosted.org/packages/2a/ef/2fe7317ac47f84c032eb70f9b0bb5331ef435c11cd323c79059e3cd0cdd7/plotly_express-0.1.7-py2.py3-none-any.whl (72kB)
    100% |████████████████████████████████| 81kB 745kB/s ta 0:00:01
Requirement already satisfied: patsy>=0.5 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (0.5.1)
Requirement already satisfied: scipy>=0.18 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (1.1.0)
Requirement already satisfied: pandas>=0.20.0 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (0.23.4)
Requirement already satisfied: plotly>=3.8.1 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (3.8.1)
Requirement already satisfied: numpy>=1.11 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (1.15.4)
Requirement already satisfied: statsmodels>=0.9.0 in ./anaconda3/lib/python3.7/site-packages (from plotly_express) (0.9.0)
Requirement already satisfied: six in ./anaconda3/lib/python3.7/site-packages (from patsy>=0.5->plotly_express) (1.12.0)
Requirement already satisfied: python-dateutil>=2.5.0 in ./anaconda3/lib/python3.7/site-packages (from pandas>=0.20.0->plotly_express) (2.7.5)
Requirement already satisfied: pytz>=2011k in ./anaconda3/lib/python3.7/site-packages (from pandas>=0.20.0->plotly_express) (2018.7)
Requirement already satisfied: requests in ./anaconda3/lib/python3.7/site-packages (from plotly>=3.8.1->plotly_express) (2.21.0)
Requirement already satisfied: decorator>=4.0.6 in ./anaconda3/lib/python3.7/site-packages (from plotly>=3.8.1->plotly_express) (4.3.0)
Requirement already satisfied: retrying>=1.3.3 in ./anaconda3/lib/python3.7/site-packages (from plotly>=3.8.1->plotly_express) (1.3.3)
Requirement already satisfied: nbformat>=4.2 in ./anaconda3/lib/python3.7/site-packages (from plotly>=3.8.1->plotly_express) (4.4.0)
Requirement already satisfied: urllib3<1.25,>=1.21.1 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly>=3.8.1->plotly_express) (1.24.1)
Requirement already satisfied: certifi>=2017.4.17 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly>=3.8.1->plotly_express) (2018.11.29)
Requirement already satisfied: idna<2.9,>=2.5 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly>=3.8.1->plotly_express) (2.8)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in ./anaconda3/lib/python3.7/site-packages (from requests->plotly>=3.8.1->plotly_express) (3.0.4)
Requirement already satisfied: traitlets>=4.1 in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly>=3.8.1->plotly_express) (4.3.2)
Requirement already satisfied: jupyter-core in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly>=3.8.1->plotly_express) (4.4.0)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly>=3.8.1->plotly_express) (2.6.0)
Requirement already satisfied: ipython-genutils in ./anaconda3/lib/python3.7/site-packages (from nbformat>=4.2->plotly>=3.8.1->plotly_express) (0.2.0)
Installing collected packages: plotly-express
Successfully installed plotly-express-0.1.7
In [113]:
import plotly_express as ply
In [126]:
critics_by_directors = (
    df.groupby('director_name')
    .mean()
    .sort_values('num_critic_for_reviews', ascending=False)
    [['num_critic_for_reviews', 'num_user_for_reviews']]
    .reset_index()
)
In [122]:
ply.scatter(critics_by_directors, 
            x='num_critic_for_reviews',
            y='num_user_for_reviews', 
            hover_name='director_name', )
In [129]:
critics_by_directors = (df.groupby('director_name')
    .mean()
    .sort_values('num_critic_for_reviews', ascending=False)
    [['num_critic_for_reviews', 'num_user_for_reviews']])
In [131]:
critics_by_directors['num_reviews_adjusted'] = (
    critics_by_directors['num_critic_for_reviews'] * 5 +
    critics_by_directors['num_user_for_reviews'])
In [133]:
critics_by_directors.sort_values('num_reviews_adjusted',
                                 ascending=False)
Out[133]:
num_critic_for_reviews num_user_for_reviews num_reviews_adjusted
director_name
Christopher Nolan 545.142857 2476.000000 5201.714286
George Lucas 321.000000 3466.333333 5071.333333
Nicolas Winding Refn 676.000000 1264.000000 4644.000000
Colin Trevorrow 644.000000 1290.000000 4510.000000
Joss Whedon 579.250000 1538.750000 4435.000000
Peter Jackson 410.555556 2179.444444 4232.222222
Drew Goddard 634.000000 986.000000 4156.000000
Tim Miller 579.000000 1058.000000 3953.000000
Marc Webb 547.000000 1146.000000 3881.000000
Zack Snyder 461.000000 1570.142857 3875.142857
Alfonso Cuarón 461.333333 1531.666667 3838.333333
Steve McQueen 597.000000 695.000000 3680.000000
J.J. Abrams 489.250000 1112.500000 3558.750000
Fede Alvarez 543.000000 789.000000 3504.000000
Michel Hazanavicius 576.000000 583.000000 3463.000000
Lana Wachowski 300.200000 1938.000000 3439.000000
Quentin Tarantino 440.833333 1206.666667 3410.833333
Darren Aronofsky 459.000000 1098.333333 3393.333333
Matt Reeves 456.666667 1059.333333 3342.666667
Joseph Kosinski 492.333333 740.666667 3202.333333
Sam Taylor-Johnson 362.000000 1360.000000 3170.000000
Tom Hooper 483.500000 740.500000 3158.000000
Ben Affleck 517.000000 557.500000 3142.500000
Alan Taylor 480.000000 699.500000 3099.500000
Neill Blomkamp 439.666667 901.000000 3099.333333
José Padilha 492.000000 630.000000 3090.000000
Christophe Gans 267.000000 1740.000000 3075.000000
Alex Garland 489.000000 611.000000 3056.000000
Mel Gibson 273.666667 1640.666667 3009.000000
Lee Unkrich 453.000000 733.000000 2998.000000
... ... ... ...
Randal Kleiser 10.000000 31.000000 81.000000
Pou-Soi Cheang 14.000000 9.000000 79.000000
John Cromwell 7.000000 44.000000 79.000000
Lionel C. Martin 11.000000 15.000000 70.000000
Terence Young 10.000000 16.000000 66.000000
Emile Ardolino 10.000000 15.000000 65.000000
Frédéric Auburtin 7.000000 22.000000 57.000000
Charles T. Kanganis 5.000000 26.000000 51.000000
Jamel Debbouze 9.000000 5.000000 50.000000
Mario Van Peebles 7.000000 11.000000 46.000000
Jérôme Deschamps 4.000000 24.000000 44.000000
Peter Cousens 5.000000 13.000000 38.000000
Yuefeng Song 4.000000 18.000000 38.000000
Rob Hawk 6.000000 6.000000 36.000000
Maksim Fadeev 5.000000 7.000000 32.000000
Daniele Luchetti 4.000000 4.000000 24.000000
James Fargo 2.000000 8.000000 18.000000
Hideaki Anno 1.000000 13.000000 18.000000
Andrés Couturier 3.000000 1.000000 16.000000
Matt Birch 2.500000 2.000000 14.500000
Tom Walsh 2.000000 3.000000 13.000000
Gnana Rajasekaran 2.000000 1.000000 11.000000
John H. Lee 2.000000 1.000000 11.000000
Christopher Barnard NaN NaN NaN
Dan Curtis NaN 56.000000 NaN
Doug Walker NaN NaN NaN
Jane Clark NaN NaN NaN
John Blanchard NaN NaN NaN
Jonathan Jakubowicz NaN 1.000000 NaN
Tony Kaye NaN NaN NaN

1260 rows × 3 columns

In [136]:
# Вопрос: какие страны снимают самые качественные фильмы с точки зрения среднего
# imdb_score ?
Out[136]:
color director_name num_critic_for_reviews duration director_facebook_likes actor_3_facebook_likes actor_2_name actor_1_facebook_likes gross genres ... num_user_for_reviews language country content_rating budget title_year actor_2_facebook_likes imdb_score aspect_ratio movie_facebook_likes
0 Color James Cameron 723.0 178.0 0.0 855.0 Joel David Moore 1000.0 760505847.0 Action|Adventure|Fantasy|Sci-Fi ... 3054.0 English USA PG-13 2.370000e+08 2009.0 936.0 7.9 1.78 33000
1 Color Gore Verbinski 302.0 169.0 563.0 1000.0 Orlando Bloom 40000.0 309404152.0 Action|Adventure|Fantasy ... 1238.0 English USA PG-13 3.000000e+08 2007.0 5000.0 7.1 2.35 0
2 Color Sam Mendes 602.0 148.0 0.0 161.0 Rory Kinnear 11000.0 200074175.0 Action|Adventure|Thriller ... 994.0 English UK PG-13 2.450000e+08 2015.0 393.0 6.8 2.35 85000
3 Color Christopher Nolan 813.0 164.0 22000.0 23000.0 Christian Bale 27000.0 448130642.0 Action|Thriller ... 2701.0 English USA PG-13 2.500000e+08 2012.0 23000.0 8.5 2.35 164000
4 NaN Doug Walker NaN NaN 131.0 NaN Rob Walker 131.0 NaN Documentary ... NaN NaN NaN NaN NaN NaN 12.0 7.1 NaN 0
5 Color Andrew Stanton 462.0 132.0 475.0 530.0 Samantha Morton 640.0 73058679.0 Action|Adventure|Sci-Fi ... 738.0 English USA PG-13 2.637000e+08 2012.0 632.0 6.6 2.35 24000
6 Color Sam Raimi 392.0 156.0 0.0 4000.0 James Franco 24000.0 336530303.0 Action|Adventure|Romance ... 1902.0 English USA PG-13 2.580000e+08 2007.0 11000.0 6.2 2.35 0
7 Color Nathan Greno 324.0 100.0 15.0 284.0 Donna Murphy 799.0 200807262.0 Adventure|Animation|Comedy|Family|Fantasy|Musi... ... 387.0 English USA PG 2.600000e+08 2010.0 553.0 7.8 1.85 29000
8 Color Joss Whedon 635.0 141.0 0.0 19000.0 Robert Downey Jr. 26000.0 458991599.0 Action|Adventure|Sci-Fi ... 1117.0 English USA PG-13 2.500000e+08 2015.0 21000.0 7.5 2.35 118000
9 Color David Yates 375.0 153.0 282.0 10000.0 Daniel Radcliffe 25000.0 301956980.0 Adventure|Family|Fantasy|Mystery ... 973.0 English UK PG 2.500000e+08 2009.0 11000.0 7.5 2.35 10000
10 Color Zack Snyder 673.0 183.0 0.0 2000.0 Lauren Cohan 15000.0 330249062.0 Action|Adventure|Sci-Fi ... 3018.0 English USA PG-13 2.500000e+08 2016.0 4000.0 6.9 2.35 197000
11 Color Bryan Singer 434.0 169.0 0.0 903.0 Marlon Brando 18000.0 200069408.0 Action|Adventure|Sci-Fi ... 2367.0 English USA PG-13 2.090000e+08 2006.0 10000.0 6.1 2.35 0
12 Color Marc Forster 403.0 106.0 395.0 393.0 Mathieu Amalric 451.0 168368427.0 Action|Adventure ... 1243.0 English UK PG-13 2.000000e+08 2008.0 412.0 6.7 2.35 0
13 Color Gore Verbinski 313.0 151.0 563.0 1000.0 Orlando Bloom 40000.0 423032628.0 Action|Adventure|Fantasy ... 1832.0 English USA PG-13 2.250000e+08 2006.0 5000.0 7.3 2.35 5000
14 Color Gore Verbinski 450.0 150.0 563.0 1000.0 Ruth Wilson 40000.0 89289910.0 Action|Adventure|Western ... 711.0 English USA PG-13 2.150000e+08 2013.0 2000.0 6.5 2.35 48000
15 Color Zack Snyder 733.0 143.0 0.0 748.0 Christopher Meloni 15000.0 291021565.0 Action|Adventure|Fantasy|Sci-Fi ... 2536.0 English USA PG-13 2.250000e+08 2013.0 3000.0 7.2 2.35 118000
16 Color Andrew Adamson 258.0 150.0 80.0 201.0 Pierfrancesco Favino 22000.0 141614023.0 Action|Adventure|Family|Fantasy ... 438.0 English USA PG 2.250000e+08 2008.0 216.0 6.6 2.35 0
17 Color Joss Whedon 703.0 173.0 0.0 19000.0 Robert Downey Jr. 26000.0 623279547.0 Action|Adventure|Sci-Fi ... 1722.0 English USA PG-13 2.200000e+08 2012.0 21000.0 8.1 1.85 123000
18 Color Rob Marshall 448.0 136.0 252.0 1000.0 Sam Claflin 40000.0 241063875.0 Action|Adventure|Fantasy ... 484.0 English USA PG-13 2.500000e+08 2011.0 11000.0 6.7 2.35 58000
19 Color Barry Sonnenfeld 451.0 106.0 188.0 718.0 Michael Stuhlbarg 10000.0 179020854.0 Action|Adventure|Comedy|Family|Fantasy|Sci-Fi ... 341.0 English USA PG-13 2.250000e+08 2012.0 816.0 6.8 1.85 40000
20 Color Peter Jackson 422.0 164.0 0.0 773.0 Adam Brown 5000.0 255108370.0 Adventure|Fantasy ... 802.0 English New Zealand PG-13 2.500000e+08 2014.0 972.0 7.5 2.35 65000
21 Color Marc Webb 599.0 153.0 464.0 963.0 Andrew Garfield 15000.0 262030663.0 Action|Adventure|Fantasy ... 1225.0 English USA PG-13 2.300000e+08 2012.0 10000.0 7.0 2.35 56000
22 Color Ridley Scott 343.0 156.0 0.0 738.0 William Hurt 891.0 105219735.0 Action|Adventure|Drama|History ... 546.0 English USA PG-13 2.000000e+08 2010.0 882.0 6.7 2.35 17000
23 Color Peter Jackson 509.0 186.0 0.0 773.0 Adam Brown 5000.0 258355354.0 Adventure|Fantasy ... 951.0 English USA PG-13 2.250000e+08 2013.0 972.0 7.9 2.35 83000
24 Color Chris Weitz 251.0 113.0 129.0 1000.0 Eva Green 16000.0 70083519.0 Adventure|Family|Fantasy ... 666.0 English USA PG-13 1.800000e+08 2007.0 6000.0 6.1 2.35 0
25 Color Peter Jackson 446.0 201.0 0.0 84.0 Thomas Kretschmann 6000.0 218051260.0 Action|Adventure|Drama|Romance ... 2618.0 English New Zealand PG-13 2.070000e+08 2005.0 919.0 7.2 2.35 0
26 Color James Cameron 315.0 194.0 0.0 794.0 Kate Winslet 29000.0 658672302.0 Drama|Romance ... 2528.0 English USA PG-13 2.000000e+08 1997.0 14000.0 7.7 2.35 26000
27 Color Anthony Russo 516.0 147.0 94.0 11000.0 Scarlett Johansson 21000.0 407197282.0 Action|Adventure|Sci-Fi ... 1022.0 English USA PG-13 2.500000e+08 2016.0 19000.0 8.2 2.35 72000
28 Color Peter Berg 377.0 131.0 532.0 627.0 Alexander Skarsgård 14000.0 65173160.0 Action|Adventure|Sci-Fi|Thriller ... 751.0 English USA PG-13 2.090000e+08 2012.0 10000.0 5.9 2.35 44000
29 Color Colin Trevorrow 644.0 124.0 365.0 1000.0 Judy Greer 3000.0 652177271.0 Action|Adventure|Sci-Fi|Thriller ... 1290.0 English USA PG-13 1.500000e+08 2015.0 2000.0 7.0 2.00 150000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2970 Color Wolfgang Petersen 96.0 293.0 249.0 18.0 Martin Semmelrogge 362.0 11433134.0 Adventure|Drama|Thriller|War ... 426.0 German West Germany R 1.400000e+07 1981.0 21.0 8.4 1.85 11000
2971 Color John Lee Hancock 106.0 137.0 102.0 877.0 Marc Blucas 2000.0 22406362.0 Drama|History|War|Western ... 267.0 English USA PG-13 1.070000e+08 2004.0 973.0 6.0 2.35 701
2972 Color Wallace Wolodarsky 53.0 93.0 32.0 503.0 Barry Watson 529.0 10198766.0 Comedy ... 105.0 English USA R 1.200000e+07 2002.0 526.0 5.4 1.85 507
2973 Color Richard Curtis 274.0 123.0 628.0 171.0 Tom Hollander 565.0 15294553.0 Drama|Fantasy|Romance ... 391.0 English UK R 1.200000e+07 2013.0 555.0 7.8 2.35 105000
2974 Color Yimou Zhang 248.0 119.0 611.0 4.0 Andy Lau 755.0 11041228.0 Action|Adventure|Drama|Romance ... 420.0 Mandarin China PG-13 1.000000e+08 2004.0 483.0 7.6 2.35 0
2975 Color Nicholas Jarecki 288.0 107.0 27.0 360.0 Curtiss Cook 664.0 7918283.0 Drama|Thriller ... 145.0 English USA R 1.200000e+07 2012.0 591.0 6.6 1.85 0
2976 Color Dean Israelite 177.0 106.0 16.0 265.0 Jonny Weston 452.0 22331028.0 Sci-Fi|Thriller ... 177.0 English USA PG-13 1.200000e+07 2015.0 328.0 6.4 2.35 0
2977 Color Darnell Martin 81.0 109.0 67.0 192.0 Veronika Dash 436.0 8134217.0 Biography|Drama|Music ... 90.0 English USA R 1.200000e+07 2008.0 223.0 7.0 2.35 0
2978 Color Scott Alexander 29.0 81.0 25.0 654.0 Dave Chappelle 931.0 6982680.0 Comedy|Crime ... 69.0 English USA PG-13 1.000000e+07 2000.0 744.0 5.7 1.85 262
2979 Color Stuart Gordon 58.0 95.0 216.0 968.0 Kurtwood Smith 1000.0 6739141.0 Action|Crime|Sci-Fi|Thriller ... 68.0 English Australia R 8.000000e+06 1992.0 1000.0 5.9 1.85 1000
2980 Color Christopher Guest 144.0 86.0 378.0 783.0 Catherine O'Hara 957.0 5542025.0 Comedy ... 150.0 English USA PG-13 1.200000e+07 2006.0 925.0 6.3 1.85 650
2981 Black and White Woody Allen 114.0 113.0 11000.0 99.0 Aleksa Palladino 537.0 5032496.0 Comedy|Drama ... 161.0 English USA R 1.200000e+07 1998.0 255.0 6.3 1.66 652
2982 Color Ryan Murphy 132.0 122.0 708.0 316.0 Joseph Cross 433.0 6754898.0 Comedy|Drama ... 188.0 English USA R 1.200000e+07 2006.0 398.0 6.2 2.35 0
2983 Color Robert Iscove 60.0 90.0 7.0 251.0 Kelly Clarkson 525.0 4922166.0 Comedy|Musical|Romance ... 304.0 English USA PG 1.200000e+07 2003.0 281.0 2.1 1.85 0
2984 Color Spike Lee 30.0 108.0 0.0 599.0 Debi Mazar 873.0 4903000.0 Comedy|Drama ... 33.0 English USA R 1.200000e+07 1996.0 680.0 5.0 1.85 251
2985 Color Jane Campion 138.0 113.0 319.0 48.0 Nick Damici 1000.0 4717455.0 Mystery|Thriller ... 354.0 English Australia R 1.200000e+07 2003.0 65.0 5.3 1.85 656
2986 Color James Gray 190.0 110.0 115.0 103.0 Vinessa Shaw 812.0 3148482.0 Drama|Romance ... 98.0 English USA R 1.200000e+07 2008.0 580.0 7.1 2.35 0
2987 Color Fred Schepisi 61.0 109.0 40.0 794.0 Ray Winstone 5000.0 2326407.0 Drama ... 99.0 English UK R 1.200000e+07 2001.0 1000.0 7.0 2.35 305
2988 Color Joon-ho Bong 363.0 110.0 584.0 74.0 Kang-ho Song 629.0 2201412.0 Comedy|Drama|Horror|Sci-Fi ... 279.0 Korean South Korea R 1.221550e+10 2006.0 398.0 7.0 1.85 7000
2989 Color Roger Spottiswoode 2.0 100.0 55.0 212.0 Treat Williams 3000.0 NaN Adventure|Crime|Thriller ... 10.0 English USA PG 1.200000e+07 1981.0 642.0 5.7 1.85 135
2990 Color Antonia Bird 131.0 101.0 61.0 611.0 Jeffrey Jones 769.0 2060953.0 Fantasy|Horror|Thriller ... 316.0 English Czech Republic R 1.200000e+07 1999.0 692.0 7.1 2.35 0
2991 Color Jon Poll 146.0 97.0 20.0 442.0 Megan Park 21000.0 3950294.0 Comedy|Drama|Romance ... 124.0 English USA R 1.200000e+07 2007.0 569.0 7.0 1.85 0
2992 Color Paolo Sorrentino 280.0 172.0 667.0 70.0 Sabrina Ferilli 211.0 2835886.0 Drama ... 124.0 Italian Italy Not Rated 9.200000e+06 2013.0 98.0 7.7 2.35 29000
2993 Color Peter Care 78.0 104.0 0.0 77.0 Michael Harding 1000.0 1779284.0 Comedy|Drama ... 122.0 English USA R 1.200000e+07 2002.0 165.0 7.1 1.85 677
2994 Color Chan-wook Park 469.0 99.0 0.0 520.0 Alden Ehrenreich 3000.0 1702277.0 Drama|Thriller ... 235.0 English UK R 1.200000e+07 2013.0 1000.0 6.8 2.35 27000
2995 Black and White Kar-Wai Wong 194.0 129.0 0.0 576.0 Tony Chiu Wai Leung 878.0 261481.0 Drama|Romance|Sci-Fi ... 210.0 Cantonese Hong Kong R 1.200000e+07 2004.0 643.0 7.5 2.35 0
2996 Color Ira Sachs 121.0 91.0 60.0 18.0 Erin Boyes 60.0 1506998.0 Crime|Drama|Romance ... 50.0 English USA PG-13 1.200000e+07 2007.0 46.0 6.3 1.85 314
2997 Color Carroll Ballard 38.0 100.0 19.0 393.0 Hope Davis 706.0 860002.0 Adventure|Drama|Family ... 43.0 English USA PG 1.200000e+07 2005.0 442.0 7.3 1.85 0
2998 Color Neil Jordan 123.0 111.0 277.0 289.0 Stephen Rea 845.0 548934.0 Drama|Mystery|Romance ... 57.0 English Ireland PG-13 1.200000e+07 2009.0 327.0 6.8 1.85 0
2999 Color Takeshi Kitano 81.0 114.0 0.0 403.0 Tatyana Ali 865.0 447750.0 Crime|Drama|Thriller ... 133.0 English USA R 1.000000e+07 2000.0 685.0 7.2 1.85 0

3000 rows × 28 columns

In [141]:
df.groupby('country')['imdb_score'].mean().sort_values(ascending=False)
Out[141]:
country
Libya             8.400000
Poland            8.250000
Iceland           8.200000
Soviet Union      8.100000
Norway            8.100000
West Germany      7.900000
India             7.733333
New Zealand       7.483333
Sweden            7.350000
Denmark           7.266667
Panama            7.200000
Taiwan            7.150000
Japan             6.960000
Italy             6.912500
Chile             6.900000
China             6.830000
Spain             6.820000
UK                6.754867
Hong Kong         6.750000
Czech Republic    6.750000
Ireland           6.740000
Greece            6.700000
France            6.626804
Iran              6.500000
Mexico            6.433333
South Africa      6.420000
Netherlands       6.400000
USA               6.383236
Australia         6.337037
Official site     6.300000
Thailand          6.200000
Germany           6.141538
Bulgaria          6.100000
Slovakia          6.000000
Canada            5.946809
Russia            5.842857
Hungary           5.800000
Switzerland       5.700000
Cambodia          5.600000
Belgium           5.600000
South Korea       5.562500
Peru              5.400000
Romania           4.900000
Aruba             4.800000
New Line          4.400000
Name: imdb_score, dtype: float64
In [142]:
df.groupby('country')['imdb_score'].count().sort_values(ascending=False)
Out[142]:
country
USA               2392
UK                 226
France              97
Germany             65
Canada              47
Australia           27
China               20
Japan               15
Spain               15
Hong Kong           10
Italy                8
South Korea          8
Russia               7
New Zealand          6
Ireland              5
South Africa         5
Belgium              4
India                3
Denmark              3
Mexico               3
Czech Republic       2
West Germany         2
Netherlands          2
Sweden               2
Taiwan               2
Poland               2
Official site        1
Soviet Union         1
Thailand             1
New Line             1
Bulgaria             1
Cambodia             1
Switzerland          1
Chile                1
Norway               1
Slovakia             1
Iran                 1
Romania              1
Libya                1
Greece               1
Peru                 1
Hungary              1
Iceland              1
Panama               1
Aruba                1
Name: imdb_score, dtype: int64
In [147]:
(df.groupby('country')['imdb_score']
 .agg(['mean', 'count'])
 .sort_values('count', ascending=False))
Out[147]:
mean count
country
USA 6.383236 2392
UK 6.754867 226
France 6.626804 97
Germany 6.141538 65
Canada 5.946809 47
Australia 6.337037 27
China 6.830000 20
Japan 6.960000 15
Spain 6.820000 15
Hong Kong 6.750000 10
South Korea 5.562500 8
Italy 6.912500 8
Russia 5.842857 7
New Zealand 7.483333 6
South Africa 6.420000 5
Ireland 6.740000 5
Belgium 5.600000 4
Mexico 6.433333 3
India 7.733333 3
Denmark 7.266667 3
Sweden 7.350000 2
Poland 8.250000 2
Taiwan 7.150000 2
West Germany 7.900000 2
Czech Republic 6.750000 2
Netherlands 6.400000 2
New Line 4.400000 1
Hungary 5.800000 1
Bulgaria 6.100000 1
Cambodia 5.600000 1
Thailand 6.200000 1
Chile 6.900000 1
Switzerland 5.700000 1
Greece 6.700000 1
Soviet Union 8.100000 1
Iceland 8.200000 1
Slovakia 6.000000 1
Iran 6.500000 1
Romania 4.900000 1
Libya 8.400000 1
Peru 5.400000 1
Panama 7.200000 1
Official site 6.300000 1
Norway 8.100000 1
Aruba 4.800000 1
In [154]:
from urllib.parse import quote
In [155]:
quote("Москва")
Out[155]:
'%D0%9C%D0%BE%D1%81%D0%BA%D0%B2%D0%B0'
In [181]:
tables = pd.read_html(
    "https://ru.wikipedia.org/wiki/" + quote("Москва"),header=1,
    decimal=',', thousands=None, 
)
In [184]:
ord('−')
Out[184]:
8722
In [185]:
ord('-')
Out[185]:
45
In [182]:
weather = tables[5]
In [194]:
weather.dtypes
Out[194]:
Показатель     object
Янв.           object
Фев.           object
Март           object
Апр.          float64
Май           float64
Июнь          float64
Июль          float64
Авг.          float64
Сен.          float64
Окт.          float64
Нояб.          object
Дек.           object
Год           float64
dtype: object
In [207]:
weather.dtypes[weather.dtypes == object].index
Out[207]:
Index(['Показатель'], dtype='object')
In [201]:
obj_columns = weather.dtypes[weather.dtypes == object].index

# weather.loc[:, obj_columns]
Out[201]:
Показатель Янв. Фев. Март Нояб. Дек.
0 Средний максимум, °C −6 −3,6 2.4 2.1 −2,4
1 Средняя температура, °C −7,9 −6 −1 0.7 −3,9
2 Средний минимум, °C −9,7 −8,3 −4,5 −0,7 −5,4
3 Норма осадков, мм 49 44 38 56 62
4 Источник: www.weatheronline.co.uk NaN NaN NaN NaN NaN
In [204]:
for column in obj_columns:
    try:
        weather[column] = (weather[column]
                                   .str.replace('−', '-')
                                   .str.replace(',', '.')
                                   .astype(float))
    except:
        print(column, "not processed")
Показатель not processed
In [223]:
weather.loc[0]
Out[223]:
Показатель    Средний максимум, °C
Янв.                            -6
Фев.                          -3.6
Март                           2.4
Апр.                          11.4
Май                           20.1
Июнь                          22.6
Июль                          25.8
Авг.                          23.9
Сен.                          16.7
Окт.                           7.9
Нояб.                          2.1
Дек.                          -2.4
Год                           10.1
Name: 0, dtype: object
In [219]:
weather.set_index('Показатель').T.drop('Год').plot()
Out[219]:
<matplotlib.axes._subplots.AxesSubplot at 0x12da82c50>
In [ ]: