#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np


# In[2]:


x = np.array([1, 2, 3, 4, 5])


# In[4]:


x[x > 2] = 12


# In[5]:


x


# In[6]:


True + True


# In[7]:


False == 0


# In[9]:


x = np.array([1, 2, 3, 2, 1])
(x == 1).nonzero()


# In[12]:


(np.array([[1, 2, 3], [2, 3, 2]]) == 2).nonzero()


# In[13]:


x = np.array([1, 2, 3, 4, 5, 6])
s = x[1:3]


# In[14]:


s


# In[15]:


s.sum()


# In[16]:


np.sum(s)


# In[17]:


s.mean()


# In[18]:


import pandas as pd


# In[19]:


df = pd.DataFrame([[1, 2, 3], [2, 3, 4]])


# In[20]:


df


# In[23]:


grades = pd.DataFrame([[3, 4],
                       [4, 3],
                       [5, 3]],
                      index=['Alice', 'Bob', 'Claudia'],
                      columns=['Algebra', 'Geometry'])


# In[22]:


grades


# In[28]:


grades['Algebra']
# grades['Alice']


# In[30]:


grades['Algebra':'Geometry']
# grades['Alice':'Bob']


# In[31]:


grades['Alice':'Bob']


# In[32]:


grades[:2]


# In[33]:


grades['Geometry']


# In[34]:


grades[1:3]


# In[36]:


grades[0]


# In[39]:


grades.loc['Alice']


# In[40]:


grades.loc[:, 'Algebra']


# In[42]:


grades.loc[:, 'Algebra':'Geometry']


# In[43]:


grades.loc[1]


# In[44]:


grades.iloc[1]


# In[45]:


grades.iloc[:, 1]


# In[47]:


grades.iloc[1, 1]


# In[48]:


grades.loc['Alice', 'Algebra']


# In[49]:


x = "Alice"
grades.loc[f"{x}"]


# In[51]:


grades.iloc[1+1]


# In[56]:


grades[grades.mean(axis=1) > 3.5]


# In[57]:


grades.loc[grades.mean(axis=1) > 3.5]


# In[58]:


grades


# In[63]:


grades.iloc[1,1] = 5


# In[64]:


grades


# In[68]:


grades['Calculus'] = [2, 3, 2]


# In[69]:


grades.T


# In[71]:


grades.loc['Daniel'] = [2, 3, 5]


# In[74]:


grades.sort_index(axis=1)


# In[76]:


grades


# In[77]:


grades.sort_index(axis=1, inplace=True)


# In[78]:


grades


# In[80]:


grades.sort_values('Algebra', ascending=False)


# In[82]:


grades.sort_values(['Calculus', 'Algebra'], 
                   ascending=[False, True])


# In[87]:


grades.loc[['Alice', 'Claudia']]


# In[89]:


grades['mean'] = grades.mean(axis=1)


# In[91]:


grades.sort_values('mean')


# In[95]:


grades.drop('mean', axis=1)


# In[93]:


grades


# In[98]:


grades.drop('mean', axis=1, inplace=True)


# In[107]:


(grades
 .assign(mean=grades.mean(axis=1))
 .sort_values('mean')[:2]
)


# In[112]:


(grades
 .assign(mean=grades.mean(axis=1))
 .sort_values('mean')[:2].index[0]
)


# In[ ]:


# In[113]:


df = pd.read_csv("https://bit.ly/2VbRAty")


# In[114]:


url = "https://github.com/Godoy/imdb-5000-movie-dataset/raw/master/data/movie_metadata.csv"
df = pd.read_csv(url)


# In[116]:


df['color'].unique()


# In[119]:


df[df['color'] == ' Black and White'].iloc[0]


# In[121]:


df['color']


# In[125]:


df.color.value_counts(dropna=False)


# In[133]:


list(df['director_name'][:10])


# In[137]:


df.sort_values('imdb_score')


# In[139]:


df.columns


# In[158]:


(df
 .groupby('country')
 ['imdb_score']
 .mean()
 .sort_values(ascending=False))[:10].index


# In[144]:


df.loc[df['country'] == 'Kyrgyzstan', 'movie_title']


# In[146]:


g = df.groupby('country')


# In[150]:


(df
 .groupby('country')
 .max())


# In[149]:


(df
 .groupby('country')
 .max()
 .sort_values('imdb_score', ascending=False))[:10].index


# In[156]:


df[['color']]


# In[164]:


df.gross / df.budget


# In[163]:


df.assign(budget_delta=df.gross / df.budget)


# In[168]:


(df
 .assign(budget_delta=lambda x: x.gross / x.budget)
 .sort_values('budget_delta', ascending=False))['movie_title']


# In[ ]: