import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
df1 = pd.read_csv('https://dadosdatascience.netlify.com/df1', index_col=0)
df1.head(12)
A | B | C | D | |
---|---|---|---|---|
2000-01-01 | 1.339091 | -0.163643 | -0.646443 | 1.041233 |
2000-01-02 | -0.774984 | 0.137034 | -0.882716 | -2.253382 |
2000-01-03 | -0.921037 | -0.482943 | -0.417100 | 0.478638 |
2000-01-04 | -1.738808 | -0.072973 | 0.056517 | 0.015085 |
2000-01-05 | -0.905980 | 1.778576 | 0.381918 | 0.291436 |
2000-01-06 | -0.891165 | 0.741147 | 0.237482 | -1.501909 |
2000-01-07 | 1.111132 | 0.287674 | 0.183012 | 1.359046 |
2000-01-08 | -1.290901 | -0.549247 | -0.825183 | 0.328940 |
2000-01-09 | -1.689655 | 0.818016 | 1.137704 | 0.334530 |
2000-01-10 | 0.416580 | -1.188162 | 0.753229 | -0.728224 |
2000-01-11 | 0.328968 | -1.087598 | 1.352827 | 0.997733 |
2000-01-12 | 0.263585 | -0.187620 | -1.083475 | -0.099250 |
df2 = pd.read_csv('https://dadosdatascience.netlify.com/df2')
df2.head(10)
a | b | c | d | |
---|---|---|---|---|
0 | 0.039762 | 0.218517 | 0.103423 | 0.957904 |
1 | 0.937288 | 0.041567 | 0.899125 | 0.977680 |
2 | 0.780504 | 0.008948 | 0.557808 | 0.797510 |
3 | 0.672717 | 0.247870 | 0.264071 | 0.444358 |
4 | 0.053829 | 0.520124 | 0.552264 | 0.190008 |
5 | 0.286043 | 0.593465 | 0.907307 | 0.637898 |
6 | 0.430436 | 0.166230 | 0.469383 | 0.497701 |
7 | 0.312296 | 0.502823 | 0.806609 | 0.850519 |
8 | 0.187765 | 0.997075 | 0.895955 | 0.530390 |
9 | 0.908162 | 0.232726 | 0.414138 | 0.432007 |
df2.shape
(10, 4)
df2.describe()
a | b | c | d | |
---|---|---|---|---|
count | 10.000000 | 10.000000 | 10.000000 | 10.000000 |
mean | 0.460880 | 0.352935 | 0.587008 | 0.631597 |
std | 0.340793 | 0.301272 | 0.284332 | 0.258158 |
min | 0.039762 | 0.008948 | 0.103423 | 0.190008 |
25% | 0.212334 | 0.179302 | 0.427949 | 0.457694 |
50% | 0.371366 | 0.240298 | 0.555036 | 0.584144 |
75% | 0.753558 | 0.515799 | 0.873619 | 0.837267 |
max | 0.937288 | 0.997075 | 0.907307 | 0.977680 |
df1['A'].mean()
-0.017754837994150792
df1['A'].hist(figsize=(10,8), color='blue', edgecolor='black')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb693bbed0>
df1['A'].plot(kind='hist', figsize=(10,8), color='g', edgecolor='gray')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb69190d10>
df2.plot.area(alpha=0.4, figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb646f8f50>
df2.plot.bar(stacked=True, figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb68e74410>
df2.plot.bar(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb6464ba10>
df1.plot.scatter(x='A',y='B', s=df1['C']*33, figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb6448bf10>
df2.plot.box(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb6544bd10>
df = pd.DataFrame(np.random.randn(1000,2),columns=['a','b'])
df.head()
a | b | |
---|---|---|
0 | 1.847221 | -0.143557 |
1 | -0.422830 | -0.603359 |
2 | 1.483066 | 1.543320 |
3 | -0.002474 | 0.487647 |
4 | -0.752489 | -1.412335 |
df.plot.hexbin(x='a',y='b', gridsize=25, figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb65452390>
df2['a'].plot.kde(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb6511e550>
df2.plot.density(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64fad150>
df3 = pd.read_csv('https://dadosdatascience.netlify.com/df3')
df3.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 500 entries, 0 to 499 Data columns (total 4 columns): a 500 non-null float64 b 500 non-null float64 c 500 non-null float64 d 500 non-null float64 dtypes: float64(4) memory usage: 15.7 KB
df3.head()
a | b | c | d | |
---|---|---|---|---|
0 | 0.336272 | 0.325011 | 0.001020 | 0.401402 |
1 | 0.980265 | 0.831835 | 0.772288 | 0.076485 |
2 | 0.480387 | 0.686839 | 0.000575 | 0.746758 |
3 | 0.502106 | 0.305142 | 0.768608 | 0.654685 |
4 | 0.856602 | 0.171448 | 0.157971 | 0.321231 |
df3.plot.scatter(x='a',y='b',s=50,c='red',figsize=(16,5))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb77997350>
df3['a'].hist(figsize=(10,8), color='gray', edgecolor='white')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64e53390>
df3['a'].plot.hist(bins=50, figsize=(10,8), edgecolor='black')
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64c6a990>
plt.style.use('ggplot')
df3['a'].plot.hist(bins=20,alpha=0.6, figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64c1f690>
df3[['a','b']].plot.box(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb78b54f50>
df3['d'].plot.kde(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64adc090>
df3['d'].plot.kde(lw=5,ls='--', figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb64a79890>
df3.loc[0:30].plot.area(figsize=(10,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7fbb649d75d0>
f = plt.figure()
df3.loc[0:30].plot.area(alpha=0.4, figsize=(10,8))
plt.legend(loc='center left', bbox_to_anchor=(1.0,0.5))
plt.show()
<Figure size 432x288 with 0 Axes>