#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np # In[2]: x = np.array([1, 2, 3, 4, 5]) # In[4]: x[x > 2] = 12 # In[5]: x # In[6]: True + True # In[7]: False == 0 # In[9]: x = np.array([1, 2, 3, 2, 1]) (x == 1).nonzero() # In[12]: (np.array([[1, 2, 3], [2, 3, 2]]) == 2).nonzero() # In[13]: x = np.array([1, 2, 3, 4, 5, 6]) s = x[1:3] # In[14]: s # In[15]: s.sum() # In[16]: np.sum(s) # In[17]: s.mean() # In[18]: import pandas as pd # In[19]: df = pd.DataFrame([[1, 2, 3], [2, 3, 4]]) # In[20]: df # In[23]: grades = pd.DataFrame([[3, 4], [4, 3], [5, 3]], index=['Alice', 'Bob', 'Claudia'], columns=['Algebra', 'Geometry']) # In[22]: grades # In[28]: grades['Algebra'] # grades['Alice'] # In[30]: grades['Algebra':'Geometry'] # grades['Alice':'Bob'] # In[31]: grades['Alice':'Bob'] # In[32]: grades[:2] # In[33]: grades['Geometry'] # In[34]: grades[1:3] # In[36]: grades[0] # In[39]: grades.loc['Alice'] # In[40]: grades.loc[:, 'Algebra'] # In[42]: grades.loc[:, 'Algebra':'Geometry'] # In[43]: grades.loc[1] # In[44]: grades.iloc[1] # In[45]: grades.iloc[:, 1] # In[47]: grades.iloc[1, 1] # In[48]: grades.loc['Alice', 'Algebra'] # In[49]: x = "Alice" grades.loc[f"{x}"] # In[51]: grades.iloc[1+1] # In[56]: grades[grades.mean(axis=1) > 3.5] # In[57]: grades.loc[grades.mean(axis=1) > 3.5] # In[58]: grades # In[63]: grades.iloc[1,1] = 5 # In[64]: grades # In[68]: grades['Calculus'] = [2, 3, 2] # In[69]: grades.T # In[71]: grades.loc['Daniel'] = [2, 3, 5] # In[74]: grades.sort_index(axis=1) # In[76]: grades # In[77]: grades.sort_index(axis=1, inplace=True) # In[78]: grades # In[80]: grades.sort_values('Algebra', ascending=False) # In[82]: grades.sort_values(['Calculus', 'Algebra'], ascending=[False, True]) # In[87]: grades.loc[['Alice', 'Claudia']] # In[89]: grades['mean'] = grades.mean(axis=1) # In[91]: grades.sort_values('mean') # In[95]: grades.drop('mean', axis=1) # In[93]: grades # In[98]: grades.drop('mean', axis=1, inplace=True) # In[107]: (grades .assign(mean=grades.mean(axis=1)) .sort_values('mean')[:2] ) # In[112]: (grades .assign(mean=grades.mean(axis=1)) .sort_values('mean')[:2].index[0] ) # In[ ]: # In[113]: df = pd.read_csv("https://bit.ly/2VbRAty") # In[114]: url = "https://github.com/Godoy/imdb-5000-movie-dataset/raw/master/data/movie_metadata.csv" df = pd.read_csv(url) # In[116]: df['color'].unique() # In[119]: df[df['color'] == ' Black and White'].iloc[0] # In[121]: df['color'] # In[125]: df.color.value_counts(dropna=False) # In[133]: list(df['director_name'][:10]) # In[137]: df.sort_values('imdb_score') # In[139]: df.columns # In[158]: (df .groupby('country') ['imdb_score'] .mean() .sort_values(ascending=False))[:10].index # In[144]: df.loc[df['country'] == 'Kyrgyzstan', 'movie_title'] # In[146]: g = df.groupby('country') # In[150]: (df .groupby('country') .max()) # In[149]: (df .groupby('country') .max() .sort_values('imdb_score', ascending=False))[:10].index # In[156]: df[['color']] # In[164]: df.gross / df.budget # In[163]: df.assign(budget_delta=df.gross / df.budget) # In[168]: (df .assign(budget_delta=lambda x: x.gross / x.budget) .sort_values('budget_delta', ascending=False))['movie_title'] # In[ ]: