import pandas as pd
import numpy as np
np.random.seed(101)
samples = ['A','B','C','D','E']
features = ['W','X','Y','Z']
df = pd.DataFrame(np.random.randn(5,4), samples, features)
df
W | X | Y | Z | |
---|---|---|---|---|
A | 2.706850 | 0.628133 | 0.907969 | 0.503826 |
B | 0.651118 | -0.319318 | -0.848077 | 0.605965 |
C | -2.018168 | 0.740122 | 0.528813 | -0.589001 |
D | 0.188695 | -0.758872 | -0.933237 | 0.955057 |
E | 0.190794 | 1.978757 | 2.605967 | 0.683509 |
df.loc['A', :]
W 2.706850 X 0.628133 Y 0.907969 Z 0.503826 Name: A, dtype: float64
df.loc['A', 'W']
2.706849839399938
df.W
A 2.706850 B 0.651118 C -2.018168 D 0.188695 E 0.190794 Name: W, dtype: float64
df['X+Y'] = df.X + df.Y
df
W | X | Y | Z | X+Y | |
---|---|---|---|---|---|
A | 2.706850 | 0.628133 | 0.907969 | 0.503826 | 1.536102 |
B | 0.651118 | -0.319318 | -0.848077 | 0.605965 | -1.167395 |
C | -2.018168 | 0.740122 | 0.528813 | -0.589001 | 1.268936 |
D | 0.188695 | -0.758872 | -0.933237 | 0.955057 | -1.692109 |
E | 0.190794 | 1.978757 | 2.605967 | 0.683509 | 4.584725 |
df > 0
W | X | Y | Z | X+Y | |
---|---|---|---|---|---|
A | True | True | True | True | True |
B | True | False | False | True | False |
C | False | True | True | False | True |
D | True | False | False | True | False |
E | True | True | True | True | True |
df[df > 0]
W | X | Y | Z | X+Y | |
---|---|---|---|---|---|
A | 2.706850 | 0.628133 | 0.907969 | 0.503826 | 1.536102 |
B | 0.651118 | NaN | NaN | 0.605965 | NaN |
C | NaN | 0.740122 | 0.528813 | NaN | 1.268936 |
D | 0.188695 | NaN | NaN | 0.955057 | NaN |
E | 0.190794 | 1.978757 | 2.605967 | 0.683509 | 4.584725 |
data = {
'company':'Google Google Microsoft Microsoft Facebook Facebook'.split(),
'Person':'A B C D E F'.split(),
'Sales':[200, 300, 100, 120, 400, 500]
}
data
{'Person': ['A', 'B', 'C', 'D', 'E', 'F'], 'Sales': [200, 300, 100, 120, 400, 500], 'company': ['Google', 'Google', 'Microsoft', 'Microsoft', 'Facebook', 'Facebook']}
df = pd.DataFrame(data)
df
Person | Sales | company | |
---|---|---|---|
0 | A | 200 | |
1 | B | 300 | |
2 | C | 100 | Microsoft |
3 | D | 120 | Microsoft |
4 | E | 400 | |
5 | F | 500 |
df.groupby('company').max()
Person | Sales | |
---|---|---|
company | ||
F | 500 | |
B | 300 | |
Microsoft | D | 120 |
df.groupby('company').mean()
Sales | |
---|---|
company | |
450 | |
250 | |
Microsoft | 110 |
df.groupby('company').std()
Sales | |
---|---|
company | |
70.710678 | |
70.710678 | |
Microsoft | 14.142136 |
df.groupby('company').describe()
Sales | ||||||||
---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | |
company | ||||||||
2.0 | 450.0 | 70.710678 | 400.0 | 425.0 | 450.0 | 475.0 | 500.0 | |
2.0 | 250.0 | 70.710678 | 200.0 | 225.0 | 250.0 | 275.0 | 300.0 | |
Microsoft | 2.0 | 110.0 | 14.142136 | 100.0 | 105.0 | 110.0 | 115.0 | 120.0 |
df.company.value_counts()
Facebook 2 Google 2 Microsoft 2 Name: company, dtype: int64
df
Person | Sales | company | |
---|---|---|---|
0 | A | 200 | |
1 | B | 300 | |
2 | C | 100 | Microsoft |
3 | D | 120 | Microsoft |
4 | E | 400 | |
5 | F | 500 |
df.sort_values(by='Sales', ascending=False)
Person | Sales | company | |
---|---|---|---|
5 | F | 500 | |
4 | E | 400 | |
1 | B | 300 | |
0 | A | 200 | |
3 | D | 120 | Microsoft |
2 | C | 100 | Microsoft |
df = pd.read_html('http://www.fdic.gov/bank/individual/failed/banklist.html')
df[0]
Bank Name | City | ST | CERT | Acquiring Institution | Closing Date | Updated Date | |
---|---|---|---|---|---|---|---|
0 | Washington Federal Bank for Savings | Chicago | IL | 30570 | Royal Savings Bank | December 15, 2017 | February 21, 2018 |
1 | The Farmers and Merchants State Bank of Argonia | Argonia | KS | 17719 | Conway Bank | October 13, 2017 | February 21, 2018 |
2 | Fayette County Bank | Saint Elmo | IL | 1802 | United Fidelity Bank, fsb | May 26, 2017 | July 26, 2017 |
3 | Guaranty Bank, (d/b/a BestBank in Georgia & Mi... | Milwaukee | WI | 30003 | First-Citizens Bank & Trust Company | May 5, 2017 | March 22, 2018 |
4 | First NBC Bank | New Orleans | LA | 58302 | Whitney Bank | April 28, 2017 | December 5, 2017 |
5 | Proficio Bank | Cottonwood Heights | UT | 35495 | Cache Valley Bank | March 3, 2017 | March 7, 2018 |
6 | Seaway Bank and Trust Company | Chicago | IL | 19328 | State Bank of Texas | January 27, 2017 | May 18, 2017 |
7 | Harvest Community Bank | Pennsville | NJ | 34951 | First-Citizens Bank & Trust Company | January 13, 2017 | May 18, 2017 |
8 | Allied Bank | Mulberry | AR | 91 | Today's Bank | September 23, 2016 | September 25, 2017 |
9 | The Woodbury Banking Company | Woodbury | GA | 11297 | United Bank | August 19, 2016 | June 1, 2017 |
10 | First CornerStone Bank | King of Prussia | PA | 35312 | First-Citizens Bank & Trust Company | May 6, 2016 | September 6, 2016 |
11 | Trust Company Bank | Memphis | TN | 9956 | The Bank of Fayette County | April 29, 2016 | September 6, 2016 |
12 | North Milwaukee State Bank | Milwaukee | WI | 20364 | First-Citizens Bank & Trust Company | March 11, 2016 | March 13, 2017 |
13 | Hometown National Bank | Longview | WA | 35156 | Twin City Bank | October 2, 2015 | February 19, 2018 |
14 | The Bank of Georgia | Peachtree City | GA | 35259 | Fidelity Bank | October 2, 2015 | July 9, 2018 |
15 | Premier Bank | Denver | CO | 34112 | United Fidelity Bank, fsb | July 10, 2015 | February 20, 2018 |
16 | Edgebrook Bank | Chicago | IL | 57772 | Republic Bank of Chicago | May 8, 2015 | July 12, 2016 |
17 | Doral Bank En Espanol | San Juan | PR | 32102 | Banco Popular de Puerto Rico | February 27, 2015 | May 13, 2015 |
18 | Capitol City Bank & Trust Company | Atlanta | GA | 33938 | First-Citizens Bank & Trust Company | February 13, 2015 | April 21, 2015 |
19 | Highland Community Bank | Chicago | IL | 20290 | United Fidelity Bank, fsb | January 23, 2015 | November 15, 2017 |
20 | First National Bank of Crestview | Crestview | FL | 17557 | First NBC Bank | January 16, 2015 | November 15, 2017 |
21 | Northern Star Bank | Mankato | MN | 34983 | BankVista | December 19, 2014 | January 3, 2018 |
22 | Frontier Bank, FSB D/B/A El Paseo Bank | Palm Desert | CA | 34738 | Bank of Southern California, N.A. | November 7, 2014 | November 10, 2016 |
23 | The National Republic Bank of Chicago | Chicago | IL | 916 | State Bank of Texas | October 24, 2014 | January 6, 2016 |
24 | NBRS Financial | Rising Sun | MD | 4862 | Howard Bank | October 17, 2014 | February 19, 2018 |
25 | GreenChoice Bank, fsb | Chicago | IL | 28462 | Providence Bank, LLC | July 25, 2014 | December 12, 2016 |
26 | Eastside Commercial Bank | Conyers | GA | 58125 | Community & Southern Bank | July 18, 2014 | October 6, 2017 |
27 | The Freedom State Bank | Freedom | OK | 12483 | Alva State Bank & Trust Company | June 27, 2014 | February 21, 2018 |
28 | Valley Bank | Fort Lauderdale | FL | 21793 | Landmark Bank, National Association | June 20, 2014 | February 14, 2018 |
29 | Valley Bank | Moline | IL | 10450 | Great Southern Bank | June 20, 2014 | June 26, 2015 |
... | ... | ... | ... | ... | ... | ... | ... |
525 | ANB Financial, NA | Bentonville | AR | 33901 | Pulaski Bank and Trust Company | May 9, 2008 | August 28, 2012 |
526 | Hume Bank | Hume | MO | 1971 | Security Bank | March 7, 2008 | August 28, 2012 |
527 | Douglass National Bank | Kansas City | MO | 24660 | Liberty Bank and Trust Company | January 25, 2008 | October 26, 2012 |
528 | Miami Valley Bank | Lakeview | OH | 16848 | The Citizens Banking Company | October 4, 2007 | September 12, 2016 |
529 | NetBank | Alpharetta | GA | 32575 | ING DIRECT | September 28, 2007 | August 28, 2012 |
530 | Metropolitan Savings Bank | Pittsburgh | PA | 35353 | Allegheny Valley Bank of Pittsburgh | February 2, 2007 | October 27, 2010 |
531 | Bank of Ephraim | Ephraim | UT | 1249 | Far West Bank | June 25, 2004 | April 9, 2008 |
532 | Reliance Bank | White Plains | NY | 26778 | Union State Bank | March 19, 2004 | April 9, 2008 |
533 | Guaranty National Bank of Tallahassee | Tallahassee | FL | 26838 | Hancock Bank of Florida | March 12, 2004 | April 17, 2018 |
534 | Dollar Savings Bank | Newark | NJ | 31330 | No Acquirer | February 14, 2004 | April 9, 2008 |
535 | Pulaski Savings Bank | Philadelphia | PA | 27203 | Earthstar Bank | November 14, 2003 | October 6, 2017 |
536 | First National Bank of Blanchardville | Blanchardville | WI | 11639 | The Park Bank | May 9, 2003 | June 5, 2012 |
537 | Southern Pacific Bank | Torrance | CA | 27094 | Beal Bank | February 7, 2003 | October 20, 2008 |
538 | Farmers Bank of Cheneyville | Cheneyville | LA | 16445 | Sabine State Bank & Trust | December 17, 2002 | October 20, 2004 |
539 | Bank of Alamo | Alamo | TN | 9961 | No Acquirer | November 8, 2002 | March 18, 2005 |
540 | AmTrade International Bank En Espanol | Atlanta | GA | 33784 | No Acquirer | September 30, 2002 | September 11, 2006 |
541 | Universal Federal Savings Bank | Chicago | IL | 29355 | Chicago Community Bank | June 27, 2002 | October 6, 2017 |
542 | Connecticut Bank of Commerce | Stamford | CT | 19183 | Hudson United Bank | June 26, 2002 | February 14, 2012 |
543 | New Century Bank | Shelby Township | MI | 34979 | No Acquirer | March 28, 2002 | March 18, 2005 |
544 | Net 1st National Bank | Boca Raton | FL | 26652 | Bank Leumi USA | March 1, 2002 | April 9, 2008 |
545 | NextBank, NA | Phoenix | AZ | 22314 | No Acquirer | February 7, 2002 | February 5, 2015 |
546 | Oakwood Deposit Bank Co. | Oakwood | OH | 8966 | The State Bank & Trust Company | February 1, 2002 | October 25, 2012 |
547 | Bank of Sierra Blanca | Sierra Blanca | TX | 22002 | The Security State Bank of Pecos | January 18, 2002 | November 6, 2003 |
548 | Hamilton Bank, NA En Espanol | Miami | FL | 24382 | Israel Discount Bank of New York | January 11, 2002 | September 21, 2015 |
549 | Sinclair National Bank | Gravette | AR | 34248 | Delta Trust & Bank | September 7, 2001 | October 6, 2017 |
550 | Superior Bank, FSB | Hinsdale | IL | 32646 | Superior Federal, FSB | July 27, 2001 | August 19, 2014 |
551 | Malta National Bank | Malta | OH | 6629 | North Valley Bank | May 3, 2001 | November 18, 2002 |
552 | First Alliance Bank & Trust Co. | Manchester | NH | 34264 | Southern New Hampshire Bank & Trust | February 2, 2001 | February 18, 2003 |
553 | National State Bank of Metropolis | Metropolis | IL | 3815 | Banterra Bank of Marion | December 14, 2000 | March 17, 2005 |
554 | Bank of Honolulu | Honolulu | HI | 21029 | Bank of the Orient | October 13, 2000 | March 17, 2005 |
555 rows × 7 columns
#!pip install plotly
#!pip install --upgrade pip
#!pip install cufflinks
#!pip install pandas-datareader
# Create data
data = {'score': [1,1,1,2,2,2,3,3,3]}
# Create dataframe
df = pd.DataFrame(data)
# View dataframe
df
score | |
---|---|
0 | 1 |
1 | 1 |
2 | 1 |
3 | 2 |
4 | 2 |
5 | 2 |
6 | 3 |
7 | 3 |
8 | 3 |
# Calculate the moving average. That is, take
# the first two values, average them,
# then drop the first and add the third, etc.
df.rolling(window=2).mean()
score | |
---|---|
0 | NaN |
1 | 1.0 |
2 | 1.0 |
3 | 1.5 |
4 | 2.0 |
5 | 2.0 |
6 | 2.5 |
7 | 3.0 |
8 | 3.0 |
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
plt.plot(df.index, df.score)
plt.plot(df.index, df.rolling(window=2).mean())
[<matplotlib.lines.Line2D at 0x11c6f04e0>]
np.random.seed(0)
x = np.linspace(1,20,100)
y = 2 * x + 5 * np.random.randn(100)
df = pd.DataFrame({'x':x, 'y':y})
df.head()
x | y | |
---|---|---|
0 | 1.000000 | 10.820262 |
1 | 1.191919 | 4.384624 |
2 | 1.383838 | 7.661367 |
3 | 1.575758 | 14.355981 |
4 | 1.767677 | 12.873143 |
plt.plot(df.x, df.y)
plt.plot(df.x, df.y.rolling(window=5).mean())
[<matplotlib.lines.Line2D at 0x1a1ed57320>]
plt.plot(df.x, df.y)
plt.plot(df.x, df.y.rolling(window=10).mean())
[<matplotlib.lines.Line2D at 0x11c6f0208>]
plt.plot(df.x, df.y - df.y.rolling(window=10).mean())
[<matplotlib.lines.Line2D at 0x11c66c668>]
w = df.y - df.y.rolling(window=10).mean()
w
0 NaN 1 NaN 2 NaN 3 NaN 4 NaN 5 NaN 6 NaN 7 NaN 8 NaN 9 0.090149 10 -0.432621 11 5.591471 12 2.234142 13 0.096937 14 2.419726 15 1.213305 16 6.743334 17 -1.725953 18 0.657034 19 -4.546436 20 -11.692390 21 4.740980 22 5.743369 23 -1.857718 24 12.288935 25 -5.437646 26 2.787135 27 1.613436 28 9.603395 29 8.124566 ... 70 7.581233 71 4.336427 72 8.412242 73 -3.704619 74 4.368761 75 -0.925483 76 -2.235119 77 -0.254566 78 0.784047 79 2.620526 80 -2.538929 81 7.405031 82 5.566080 83 -4.292742 84 10.286782 85 11.034617 86 6.424281 87 -0.568704 88 -4.643242 89 5.483636 90 -2.185494 91 5.781807 92 0.839650 93 3.425029 94 0.889609 95 3.235301 96 0.339075 97 8.233030 98 -0.660595 99 1.041023 Name: y, Length: 100, dtype: float64
w = w[9:]
sns.distplot(w)
<matplotlib.axes._subplots.AxesSubplot at 0x11c6633c8>
sns.boxplot(w)
<matplotlib.axes._subplots.AxesSubplot at 0x11c6557f0>
q75, q25 = np.percentile(w, [75 ,25])
iqr = q75 - q25
mini = q25 - (iqr*1.5)
maxi = q75 + (iqr*1.5)
q75, q25
(5.157466166849959, -0.7073976359559406)
iqr
5.8648638028059
mini
-9.50469334016479
maxi
13.954761871058809
sns.boxplot(w, whis=1.5)
plt.axvline(x=mini, c='r')
plt.axvline(x=maxi, c='r')
plt.axvline(x=q75, c='r')
plt.axvline(x=q25, c='r')
plt.axvline(x=w.median(), c='r')
<matplotlib.lines.Line2D at 0x11c5e48d0>
wr = w[(w>mini) & (w<maxi)]
q75, q25 = np.percentile(wr, [75 ,25])
iqr = q75 - q25
mini = q25 - (iqr*1.5)
maxi = q75 + (iqr*1.5)
sns.boxplot(w, whis=1.5)
plt.axvline(x=mini, c='r')
plt.axvline(x=maxi, c='r')
plt.axvline(x=q75, c='r')
plt.axvline(x=q25, c='r')
plt.axvline(x=wr.median(), c='r')
<matplotlib.lines.Line2D at 0x11c41a240>