import pandas as pd
xls = pd.ExcelFile('code/titanic3.xls')
sheet_1 = xls.parse(0)
sheet_1.columns
Index(['pclass', 'survived', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked', 'boat', 'body', 'home.dest'], dtype='object')
print(type(sheet_1))
<class 'pandas.core.frame.DataFrame'>
sheet_1
pclass | survived | name | sex | age | sibsp | parch | ticket | fare | cabin | embarked | boat | body | home.dest | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 0 | 0 | 24160 | 211.3375 | B5 | S | 2 | NaN | St Louis, MO |
1 | 1 | 1 | Allison, Master. Hudson Trevor | male | 0.9167 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | 11 | NaN | Montreal, PQ / Chesterville, ON |
2 | 1 | 0 | Allison, Miss. Helen Loraine | female | 2.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | NaN | Montreal, PQ / Chesterville, ON |
3 | 1 | 0 | Allison, Mr. Hudson Joshua Creighton | male | 30.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | 135.0 | Montreal, PQ / Chesterville, ON |
4 | 1 | 0 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0000 | 1 | 2 | 113781 | 151.5500 | C22 C26 | S | NaN | NaN | Montreal, PQ / Chesterville, ON |
5 | 1 | 1 | Anderson, Mr. Harry | male | 48.0000 | 0 | 0 | 19952 | 26.5500 | E12 | S | 3 | NaN | New York, NY |
6 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 63.0000 | 1 | 0 | 13502 | 77.9583 | D7 | S | 10 | NaN | Hudson, NY |
7 | 1 | 0 | Andrews, Mr. Thomas Jr | male | 39.0000 | 0 | 0 | 112050 | 0.0000 | A36 | S | NaN | NaN | Belfast, NI |
8 | 1 | 1 | Appleton, Mrs. Edward Dale (Charlotte Lamson) | female | 53.0000 | 2 | 0 | 11769 | 51.4792 | C101 | S | D | NaN | Bayside, Queens, NY |
9 | 1 | 0 | Artagaveytia, Mr. Ramon | male | 71.0000 | 0 | 0 | PC 17609 | 49.5042 | NaN | C | NaN | 22.0 | Montevideo, Uruguay |
10 | 1 | 0 | Astor, Col. John Jacob | male | 47.0000 | 1 | 0 | PC 17757 | 227.5250 | C62 C64 | C | NaN | 124.0 | New York, NY |
11 | 1 | 1 | Astor, Mrs. John Jacob (Madeleine Talmadge Force) | female | 18.0000 | 1 | 0 | PC 17757 | 227.5250 | C62 C64 | C | 4 | NaN | New York, NY |
12 | 1 | 1 | Aubart, Mme. Leontine Pauline | female | 24.0000 | 0 | 0 | PC 17477 | 69.3000 | B35 | C | 9 | NaN | Paris, France |
13 | 1 | 1 | Barber, Miss. Ellen "Nellie" | female | 26.0000 | 0 | 0 | 19877 | 78.8500 | NaN | S | 6 | NaN | NaN |
14 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 80.0000 | 0 | 0 | 27042 | 30.0000 | A23 | S | B | NaN | Hessle, Yorks |
15 | 1 | 0 | Baumann, Mr. John D | male | NaN | 0 | 0 | PC 17318 | 25.9250 | NaN | S | NaN | NaN | New York, NY |
16 | 1 | 0 | Baxter, Mr. Quigg Edmond | male | 24.0000 | 0 | 1 | PC 17558 | 247.5208 | B58 B60 | C | NaN | NaN | Montreal, PQ |
17 | 1 | 1 | Baxter, Mrs. James (Helene DeLaudeniere Chaput) | female | 50.0000 | 0 | 1 | PC 17558 | 247.5208 | B58 B60 | C | 6 | NaN | Montreal, PQ |
18 | 1 | 1 | Bazzani, Miss. Albina | female | 32.0000 | 0 | 0 | 11813 | 76.2917 | D15 | C | 8 | NaN | NaN |
19 | 1 | 0 | Beattie, Mr. Thomson | male | 36.0000 | 0 | 0 | 13050 | 75.2417 | C6 | C | A | NaN | Winnipeg, MN |
20 | 1 | 1 | Beckwith, Mr. Richard Leonard | male | 37.0000 | 1 | 1 | 11751 | 52.5542 | D35 | S | 5 | NaN | New York, NY |
21 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 47.0000 | 1 | 1 | 11751 | 52.5542 | D35 | S | 5 | NaN | New York, NY |
22 | 1 | 1 | Behr, Mr. Karl Howell | male | 26.0000 | 0 | 0 | 111369 | 30.0000 | C148 | C | 5 | NaN | New York, NY |
23 | 1 | 1 | Bidois, Miss. Rosalie | female | 42.0000 | 0 | 0 | PC 17757 | 227.5250 | NaN | C | 4 | NaN | NaN |
24 | 1 | 1 | Bird, Miss. Ellen | female | 29.0000 | 0 | 0 | PC 17483 | 221.7792 | C97 | S | 8 | NaN | NaN |
25 | 1 | 0 | Birnbaum, Mr. Jakob | male | 25.0000 | 0 | 0 | 13905 | 26.0000 | NaN | C | NaN | 148.0 | San Francisco, CA |
26 | 1 | 1 | Bishop, Mr. Dickinson H | male | 25.0000 | 1 | 0 | 11967 | 91.0792 | B49 | C | 7 | NaN | Dowagiac, MI |
27 | 1 | 1 | Bishop, Mrs. Dickinson H (Helen Walton) | female | 19.0000 | 1 | 0 | 11967 | 91.0792 | B49 | C | 7 | NaN | Dowagiac, MI |
28 | 1 | 1 | Bissette, Miss. Amelia | female | 35.0000 | 0 | 0 | PC 17760 | 135.6333 | C99 | S | 8 | NaN | NaN |
29 | 1 | 1 | Bjornstrom-Steffansson, Mr. Mauritz Hakan | male | 28.0000 | 0 | 0 | 110564 | 26.5500 | C52 | S | D | NaN | Stockholm, Sweden / Washington, DC |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1279 | 3 | 0 | Vestrom, Miss. Hulda Amanda Adolfina | female | 14.0000 | 0 | 0 | 350406 | 7.8542 | NaN | S | NaN | NaN | NaN |
1280 | 3 | 0 | Vovk, Mr. Janko | male | 22.0000 | 0 | 0 | 349252 | 7.8958 | NaN | S | NaN | NaN | NaN |
1281 | 3 | 0 | Waelens, Mr. Achille | male | 22.0000 | 0 | 0 | 345767 | 9.0000 | NaN | S | NaN | NaN | Antwerp, Belgium / Stanton, OH |
1282 | 3 | 0 | Ware, Mr. Frederick | male | NaN | 0 | 0 | 359309 | 8.0500 | NaN | S | NaN | NaN | NaN |
1283 | 3 | 0 | Warren, Mr. Charles William | male | NaN | 0 | 0 | C.A. 49867 | 7.5500 | NaN | S | NaN | NaN | NaN |
1284 | 3 | 0 | Webber, Mr. James | male | NaN | 0 | 0 | SOTON/OQ 3101316 | 8.0500 | NaN | S | NaN | NaN | NaN |
1285 | 3 | 0 | Wenzel, Mr. Linhart | male | 32.5000 | 0 | 0 | 345775 | 9.5000 | NaN | S | NaN | 298.0 | NaN |
1286 | 3 | 1 | Whabee, Mrs. George Joseph (Shawneene Abi-Saab) | female | 38.0000 | 0 | 0 | 2688 | 7.2292 | NaN | C | C | NaN | NaN |
1287 | 3 | 0 | Widegren, Mr. Carl/Charles Peter | male | 51.0000 | 0 | 0 | 347064 | 7.7500 | NaN | S | NaN | NaN | NaN |
1288 | 3 | 0 | Wiklund, Mr. Jakob Alfred | male | 18.0000 | 1 | 0 | 3101267 | 6.4958 | NaN | S | NaN | 314.0 | NaN |
1289 | 3 | 0 | Wiklund, Mr. Karl Johan | male | 21.0000 | 1 | 0 | 3101266 | 6.4958 | NaN | S | NaN | NaN | NaN |
1290 | 3 | 1 | Wilkes, Mrs. James (Ellen Needs) | female | 47.0000 | 1 | 0 | 363272 | 7.0000 | NaN | S | NaN | NaN | NaN |
1291 | 3 | 0 | Willer, Mr. Aaron ("Abi Weller") | male | NaN | 0 | 0 | 3410 | 8.7125 | NaN | S | NaN | NaN | NaN |
1292 | 3 | 0 | Willey, Mr. Edward | male | NaN | 0 | 0 | S.O./P.P. 751 | 7.5500 | NaN | S | NaN | NaN | NaN |
1293 | 3 | 0 | Williams, Mr. Howard Hugh "Harry" | male | NaN | 0 | 0 | A/5 2466 | 8.0500 | NaN | S | NaN | NaN | NaN |
1294 | 3 | 0 | Williams, Mr. Leslie | male | 28.5000 | 0 | 0 | 54636 | 16.1000 | NaN | S | NaN | 14.0 | NaN |
1295 | 3 | 0 | Windelov, Mr. Einar | male | 21.0000 | 0 | 0 | SOTON/OQ 3101317 | 7.2500 | NaN | S | NaN | NaN | NaN |
1296 | 3 | 0 | Wirz, Mr. Albert | male | 27.0000 | 0 | 0 | 315154 | 8.6625 | NaN | S | NaN | 131.0 | NaN |
1297 | 3 | 0 | Wiseman, Mr. Phillippe | male | NaN | 0 | 0 | A/4. 34244 | 7.2500 | NaN | S | NaN | NaN | NaN |
1298 | 3 | 0 | Wittevrongel, Mr. Camille | male | 36.0000 | 0 | 0 | 345771 | 9.5000 | NaN | S | NaN | NaN | NaN |
1299 | 3 | 0 | Yasbeck, Mr. Antoni | male | 27.0000 | 1 | 0 | 2659 | 14.4542 | NaN | C | C | NaN | NaN |
1300 | 3 | 1 | Yasbeck, Mrs. Antoni (Selini Alexander) | female | 15.0000 | 1 | 0 | 2659 | 14.4542 | NaN | C | NaN | NaN | NaN |
1301 | 3 | 0 | Youseff, Mr. Gerious | male | 45.5000 | 0 | 0 | 2628 | 7.2250 | NaN | C | NaN | 312.0 | NaN |
1302 | 3 | 0 | Yousif, Mr. Wazli | male | NaN | 0 | 0 | 2647 | 7.2250 | NaN | C | NaN | NaN | NaN |
1303 | 3 | 0 | Yousseff, Mr. Gerious | male | NaN | 0 | 0 | 2627 | 14.4583 | NaN | C | NaN | NaN | NaN |
1304 | 3 | 0 | Zabour, Miss. Hileni | female | 14.5000 | 1 | 0 | 2665 | 14.4542 | NaN | C | NaN | 328.0 | NaN |
1305 | 3 | 0 | Zabour, Miss. Thamine | female | NaN | 1 | 0 | 2665 | 14.4542 | NaN | C | NaN | NaN | NaN |
1306 | 3 | 0 | Zakarian, Mr. Mapriededer | male | 26.5000 | 0 | 0 | 2656 | 7.2250 | NaN | C | NaN | 304.0 | NaN |
1307 | 3 | 0 | Zakarian, Mr. Ortin | male | 27.0000 | 0 | 0 | 2670 | 7.2250 | NaN | C | NaN | NaN | NaN |
1308 | 3 | 0 | Zimmerman, Mr. Leo | male | 29.0000 | 0 | 0 | 315082 | 7.8750 | NaN | S | NaN | NaN | NaN |
1309 rows × 14 columns
sheet_1.iloc[0]
pclass 1 survived 1 name Allen, Miss. Elisabeth Walton sex female age 29 sibsp 0 parch 0 ticket 24160 fare 211.338 cabin B5 embarked S boat 2 body NaN home.dest St Louis, MO Name: 0, dtype: object
from collections import Counter
c = Counter('baracadabra')
print(c)
d = c.most_common(3)
print(d)
Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1}) [('a', 5), ('b', 2), ('r', 2)]
counter = Counter(sheet_1["survived"].values)
print(counter)
print("survived:", counter[1])
print("survival rate:", counter[1]/(counter[0]+counter[1]))
Counter({0: 809, 1: 500}) survived: 500 survival rate: 0.3819709702062643
counter = Counter(sheet_1.loc[sheet_1["body"].notna(), "survived"].values)
print(counter)
Counter({0: 121})
print(sheet_1.groupby('pclass')['survived'].mean())
pclass 1 0.619195 2 0.429603 3 0.255289 Name: survived, dtype: float64
print(sheet_1.groupby('sex')['survived'].mean())
sex female 0.727468 male 0.190985 Name: survived, dtype: float64
print(sheet_1.groupby(["pclass","sex"])["survived"].mean())
sex female 0.727468 male 0.190985 Name: survived, dtype: float64 pclass sex 1 female 0.965278 male 0.340782 2 female 0.886792 male 0.146199 3 female 0.490741 male 0.152130 Name: survived, dtype: float64
print(sheet_1.groupby(["age"])["survived"].mean())
age 0.1667 1.000000 0.3333 0.000000 0.4167 1.000000 0.6667 1.000000 0.7500 0.666667 0.8333 1.000000 0.9167 1.000000 1.0000 0.700000 2.0000 0.333333 3.0000 0.714286 4.0000 0.700000 5.0000 0.800000 6.0000 0.500000 7.0000 0.500000 8.0000 0.666667 9.0000 0.400000 10.0000 0.000000 11.0000 0.250000 11.5000 0.000000 12.0000 1.000000 13.0000 0.600000 14.0000 0.500000 14.5000 0.000000 15.0000 0.833333 16.0000 0.421053 17.0000 0.350000 18.0000 0.358974 18.5000 0.000000 19.0000 0.379310 20.0000 0.347826 ... 46.0000 0.000000 47.0000 0.214286 48.0000 0.714286 49.0000 0.555556 50.0000 0.400000 51.0000 0.375000 52.0000 0.500000 53.0000 1.000000 54.0000 0.500000 55.0000 0.500000 55.5000 0.000000 56.0000 0.500000 57.0000 0.000000 58.0000 0.666667 59.0000 0.333333 60.0000 0.571429 60.5000 0.000000 61.0000 0.000000 62.0000 0.400000 63.0000 0.500000 64.0000 0.400000 65.0000 0.000000 66.0000 0.000000 67.0000 0.000000 70.0000 0.000000 70.5000 0.000000 71.0000 0.000000 74.0000 0.000000 76.0000 1.000000 80.0000 1.000000 Name: survived, Length: 98, dtype: float64
Counter(sheet_1["age"])
Counter({29.0: 30, 0.9167: 2, 2.0: 12, 30.0: 40, 25.0: 34, 48.0: 14, 63.0: 4, 39.0: 20, 53.0: 4, 71.0: 2, 47.0: 14, 18.0: 39, 24.0: 47, 26.0: 30, 80.0: 1, nan: 1, 50.0: 15, 32.0: 24, 36.0: 31, 37.0: 9, 42.0: 18, 19.0: 29, 35.0: 23, 28.0: 32, 45.0: 21, 40.0: 18, 58.0: 6, 22.0: 43, nan: 1, 41.0: 11, nan: 1, 44.0: 10, 59.0: 3, 60.0: 7, nan: 1, 33.0: 21, 17.0: 20, 11.0: 4, 14.0: 8, 49.0: 9, nan: 1, 76.0: 1, 46.0: 6, 27.0: 30, nan: 1, nan: 1, nan: 1, 64.0: 5, 55.0: 8, nan: 1, 70.0: 2, 38.0: 14, 51.0: 8, 31.0: 23, 4.0: 10, 54.0: 10, 23.0: 26, nan: 1, nan: 1, nan: 1, nan: 1, 43.0: 9, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 52.0: 6, nan: 1, 16.0: 19, nan: 1, 32.5: 4, nan: 1, nan: 1, nan: 1, 21.0: 41, 15.0: 6, nan: 1, nan: 1, 65.0: 3, nan: 1, 28.5: 3, nan: 1, 45.5: 2, 56.0: 4, nan: 1, nan: 1, nan: 1, 13.0: 5, 61.0: 5, nan: 1, nan: 1, 34.0: 16, nan: 1, 6.0: 6, 57.0: 5, nan: 1, 62.0: 5, nan: 1, 67.0: 1, nan: 1, nan: 1, nan: 1, nan: 1, 1.0: 10, 12.0: 3, 20.0: 23, 0.8333: 3, nan: 1, 8.0: 6, nan: 1, nan: 1, nan: 1, 0.6667: 1, 7.0: 4, nan: 1, nan: 1, nan: 1, 3.0: 7, nan: 1, nan: 1, nan: 1, 36.5: 2, nan: 1, nan: 1, nan: 1, 18.5: 3, nan: 1, 5.0: 5, 66.0: 1, nan: 1, nan: 1, 9.0: 10, 0.75: 3, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 70.5: 1, 22.5: 1, 0.3333: 1, nan: 1, nan: 1, 0.1667: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 40.5: 3, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 10.0: 4, nan: 1, nan: 1, nan: 1, 23.5: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 34.5: 2, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 20.5: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 30.5: 2, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 55.5: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 38.5: 1, nan: 1, 14.5: 2, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 24.5: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 60.5: 1, 74.0: 1, 0.4167: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 11.5: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, nan: 1, 26.5: 1})
print(sheet_1.shape)
sheet_1.count()
(1309, 14)
pclass 1309 survived 1309 name 1309 sex 1309 age 1046 sibsp 1309 parch 1309 ticket 1309 fare 1308 cabin 295 embarked 1307 boat 486 body 121 home.dest 745 dtype: int64
criteria = sheet_1.count() == sheet_1.shape[0]
print(criteria)
pclass True survived True name True sex True age False sibsp True parch True ticket True fare False cabin False embarked False boat False body False home.dest False dtype: bool
df = sheet_1[criteria.index[criteria]]
df
pclass | survived | name | sex | sibsp | parch | ticket | |
---|---|---|---|---|---|---|---|
0 | 1 | 1 | Allen, Miss. Elisabeth Walton | female | 0 | 0 | 24160 |
1 | 1 | 1 | Allison, Master. Hudson Trevor | male | 1 | 2 | 113781 |
2 | 1 | 0 | Allison, Miss. Helen Loraine | female | 1 | 2 | 113781 |
3 | 1 | 0 | Allison, Mr. Hudson Joshua Creighton | male | 1 | 2 | 113781 |
4 | 1 | 0 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 1 | 2 | 113781 |
5 | 1 | 1 | Anderson, Mr. Harry | male | 0 | 0 | 19952 |
6 | 1 | 1 | Andrews, Miss. Kornelia Theodosia | female | 1 | 0 | 13502 |
7 | 1 | 0 | Andrews, Mr. Thomas Jr | male | 0 | 0 | 112050 |
8 | 1 | 1 | Appleton, Mrs. Edward Dale (Charlotte Lamson) | female | 2 | 0 | 11769 |
9 | 1 | 0 | Artagaveytia, Mr. Ramon | male | 0 | 0 | PC 17609 |
10 | 1 | 0 | Astor, Col. John Jacob | male | 1 | 0 | PC 17757 |
11 | 1 | 1 | Astor, Mrs. John Jacob (Madeleine Talmadge Force) | female | 1 | 0 | PC 17757 |
12 | 1 | 1 | Aubart, Mme. Leontine Pauline | female | 0 | 0 | PC 17477 |
13 | 1 | 1 | Barber, Miss. Ellen "Nellie" | female | 0 | 0 | 19877 |
14 | 1 | 1 | Barkworth, Mr. Algernon Henry Wilson | male | 0 | 0 | 27042 |
15 | 1 | 0 | Baumann, Mr. John D | male | 0 | 0 | PC 17318 |
16 | 1 | 0 | Baxter, Mr. Quigg Edmond | male | 0 | 1 | PC 17558 |
17 | 1 | 1 | Baxter, Mrs. James (Helene DeLaudeniere Chaput) | female | 0 | 1 | PC 17558 |
18 | 1 | 1 | Bazzani, Miss. Albina | female | 0 | 0 | 11813 |
19 | 1 | 0 | Beattie, Mr. Thomson | male | 0 | 0 | 13050 |
20 | 1 | 1 | Beckwith, Mr. Richard Leonard | male | 1 | 1 | 11751 |
21 | 1 | 1 | Beckwith, Mrs. Richard Leonard (Sallie Monypeny) | female | 1 | 1 | 11751 |
22 | 1 | 1 | Behr, Mr. Karl Howell | male | 0 | 0 | 111369 |
23 | 1 | 1 | Bidois, Miss. Rosalie | female | 0 | 0 | PC 17757 |
24 | 1 | 1 | Bird, Miss. Ellen | female | 0 | 0 | PC 17483 |
25 | 1 | 0 | Birnbaum, Mr. Jakob | male | 0 | 0 | 13905 |
26 | 1 | 1 | Bishop, Mr. Dickinson H | male | 1 | 0 | 11967 |
27 | 1 | 1 | Bishop, Mrs. Dickinson H (Helen Walton) | female | 1 | 0 | 11967 |
28 | 1 | 1 | Bissette, Miss. Amelia | female | 0 | 0 | PC 17760 |
29 | 1 | 1 | Bjornstrom-Steffansson, Mr. Mauritz Hakan | male | 0 | 0 | 110564 |
... | ... | ... | ... | ... | ... | ... | ... |
1279 | 3 | 0 | Vestrom, Miss. Hulda Amanda Adolfina | female | 0 | 0 | 350406 |
1280 | 3 | 0 | Vovk, Mr. Janko | male | 0 | 0 | 349252 |
1281 | 3 | 0 | Waelens, Mr. Achille | male | 0 | 0 | 345767 |
1282 | 3 | 0 | Ware, Mr. Frederick | male | 0 | 0 | 359309 |
1283 | 3 | 0 | Warren, Mr. Charles William | male | 0 | 0 | C.A. 49867 |
1284 | 3 | 0 | Webber, Mr. James | male | 0 | 0 | SOTON/OQ 3101316 |
1285 | 3 | 0 | Wenzel, Mr. Linhart | male | 0 | 0 | 345775 |
1286 | 3 | 1 | Whabee, Mrs. George Joseph (Shawneene Abi-Saab) | female | 0 | 0 | 2688 |
1287 | 3 | 0 | Widegren, Mr. Carl/Charles Peter | male | 0 | 0 | 347064 |
1288 | 3 | 0 | Wiklund, Mr. Jakob Alfred | male | 1 | 0 | 3101267 |
1289 | 3 | 0 | Wiklund, Mr. Karl Johan | male | 1 | 0 | 3101266 |
1290 | 3 | 1 | Wilkes, Mrs. James (Ellen Needs) | female | 1 | 0 | 363272 |
1291 | 3 | 0 | Willer, Mr. Aaron ("Abi Weller") | male | 0 | 0 | 3410 |
1292 | 3 | 0 | Willey, Mr. Edward | male | 0 | 0 | S.O./P.P. 751 |
1293 | 3 | 0 | Williams, Mr. Howard Hugh "Harry" | male | 0 | 0 | A/5 2466 |
1294 | 3 | 0 | Williams, Mr. Leslie | male | 0 | 0 | 54636 |
1295 | 3 | 0 | Windelov, Mr. Einar | male | 0 | 0 | SOTON/OQ 3101317 |
1296 | 3 | 0 | Wirz, Mr. Albert | male | 0 | 0 | 315154 |
1297 | 3 | 0 | Wiseman, Mr. Phillippe | male | 0 | 0 | A/4. 34244 |
1298 | 3 | 0 | Wittevrongel, Mr. Camille | male | 0 | 0 | 345771 |
1299 | 3 | 0 | Yasbeck, Mr. Antoni | male | 1 | 0 | 2659 |
1300 | 3 | 1 | Yasbeck, Mrs. Antoni (Selini Alexander) | female | 1 | 0 | 2659 |
1301 | 3 | 0 | Youseff, Mr. Gerious | male | 0 | 0 | 2628 |
1302 | 3 | 0 | Yousif, Mr. Wazli | male | 0 | 0 | 2647 |
1303 | 3 | 0 | Yousseff, Mr. Gerious | male | 0 | 0 | 2627 |
1304 | 3 | 0 | Zabour, Miss. Hileni | female | 1 | 0 | 2665 |
1305 | 3 | 0 | Zabour, Miss. Thamine | female | 1 | 0 | 2665 |
1306 | 3 | 0 | Zakarian, Mr. Mapriededer | male | 0 | 0 | 2656 |
1307 | 3 | 0 | Zakarian, Mr. Ortin | male | 0 | 0 | 2670 |
1308 | 3 | 0 | Zimmerman, Mr. Leo | male | 0 | 0 | 315082 |
1309 rows × 7 columns
print(sheet_1.shape)
print(df.shape)
(1309, 14) (1309, 7)
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
data = sheet_1[criteria.index[criteria]]
data.loc[0:data.shape[0],'sex'] = le.fit_transform(data.sex)
/Users/yueli/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self.obj[item] = s
data = data.drop(['name','ticket'],axis=1)
data
pclass | survived | sex | sibsp | parch | |
---|---|---|---|---|---|
0 | 1 | 1 | 0 | 0 | 0 |
1 | 1 | 1 | 1 | 1 | 2 |
2 | 1 | 0 | 0 | 1 | 2 |
3 | 1 | 0 | 1 | 1 | 2 |
4 | 1 | 0 | 0 | 1 | 2 |
5 | 1 | 1 | 1 | 0 | 0 |
6 | 1 | 1 | 0 | 1 | 0 |
7 | 1 | 0 | 1 | 0 | 0 |
8 | 1 | 1 | 0 | 2 | 0 |
9 | 1 | 0 | 1 | 0 | 0 |
10 | 1 | 0 | 1 | 1 | 0 |
11 | 1 | 1 | 0 | 1 | 0 |
12 | 1 | 1 | 0 | 0 | 0 |
13 | 1 | 1 | 0 | 0 | 0 |
14 | 1 | 1 | 1 | 0 | 0 |
15 | 1 | 0 | 1 | 0 | 0 |
16 | 1 | 0 | 1 | 0 | 1 |
17 | 1 | 1 | 0 | 0 | 1 |
18 | 1 | 1 | 0 | 0 | 0 |
19 | 1 | 0 | 1 | 0 | 0 |
20 | 1 | 1 | 1 | 1 | 1 |
21 | 1 | 1 | 0 | 1 | 1 |
22 | 1 | 1 | 1 | 0 | 0 |
23 | 1 | 1 | 0 | 0 | 0 |
24 | 1 | 1 | 0 | 0 | 0 |
25 | 1 | 0 | 1 | 0 | 0 |
26 | 1 | 1 | 1 | 1 | 0 |
27 | 1 | 1 | 0 | 1 | 0 |
28 | 1 | 1 | 0 | 0 | 0 |
29 | 1 | 1 | 1 | 0 | 0 |
... | ... | ... | ... | ... | ... |
1279 | 3 | 0 | 0 | 0 | 0 |
1280 | 3 | 0 | 1 | 0 | 0 |
1281 | 3 | 0 | 1 | 0 | 0 |
1282 | 3 | 0 | 1 | 0 | 0 |
1283 | 3 | 0 | 1 | 0 | 0 |
1284 | 3 | 0 | 1 | 0 | 0 |
1285 | 3 | 0 | 1 | 0 | 0 |
1286 | 3 | 1 | 0 | 0 | 0 |
1287 | 3 | 0 | 1 | 0 | 0 |
1288 | 3 | 0 | 1 | 1 | 0 |
1289 | 3 | 0 | 1 | 1 | 0 |
1290 | 3 | 1 | 0 | 1 | 0 |
1291 | 3 | 0 | 1 | 0 | 0 |
1292 | 3 | 0 | 1 | 0 | 0 |
1293 | 3 | 0 | 1 | 0 | 0 |
1294 | 3 | 0 | 1 | 0 | 0 |
1295 | 3 | 0 | 1 | 0 | 0 |
1296 | 3 | 0 | 1 | 0 | 0 |
1297 | 3 | 0 | 1 | 0 | 0 |
1298 | 3 | 0 | 1 | 0 | 0 |
1299 | 3 | 0 | 1 | 1 | 0 |
1300 | 3 | 1 | 0 | 1 | 0 |
1301 | 3 | 0 | 1 | 0 | 0 |
1302 | 3 | 0 | 1 | 0 | 0 |
1303 | 3 | 0 | 1 | 0 | 0 |
1304 | 3 | 0 | 0 | 1 | 0 |
1305 | 3 | 0 | 0 | 1 | 0 |
1306 | 3 | 0 | 1 | 0 | 0 |
1307 | 3 | 0 | 1 | 0 | 0 |
1308 | 3 | 0 | 1 | 0 | 0 |
1309 rows × 5 columns
from sklearn import model_selection
X = data.drop(["survived"], axis=1).values
y = data["survived"].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X,y,test_size=0.2, shuffle=True)
X_train
array([[1, 0, 1, 2], [2, 0, 0, 0], [3, 1, 0, 0], ..., [3, 1, 0, 0], [1, 1, 0, 0], [3, 0, 0, 0]])
from sklearn.linear_model import LogisticRegression
logitreg = LogisticRegression(solver='liblinear')
fit = logitreg.fit(X_train, y_train)
fit.coef_
array([[-0.80616445, -2.53244105, -0.20512107, 0.04807778]])
effect_size = pd.DataFrame(fit.coef_)
effect_size.columns = data.drop(["survived"], axis=1).columns
effect_size
pclass | sex | sibsp | parch | |
---|---|---|---|---|
0 | -0.806164 | -2.532441 | -0.205121 | 0.048078 |
y_train_pred = fit.predict(X_train)
y_test_pred = fit.predict(X_test)
The predictions are already thesholded to be either 0 or 1
print(y_test_pred)
[0 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 0 0 1 1 1 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0]
acc_train = sum(y_train_pred==y_train)/len(y_train)
acc_test = sum(y_test_pred==y_test)/len(y_test)
print(f"train accuracy: {acc_train:.3f}; \
test accuracy: {acc_test:.3f}")
train accuracy: 0.799; test accuracy: 0.760
To get probabilities from logistic regression, we need to call the following function
y_test_prob = fit.predict_proba(X_test)
y_test_prob
array([[0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.38132888, 0.61867112], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.48159095, 0.51840905], [0.75881787, 0.24118213], [0.77597541, 0.22402459], [0.92120054, 0.07879946], [0.24360185, 0.75639815], [0.38132888, 0.61867112], [0.77597541, 0.22402459], [0.25256948, 0.74743052], [0.25256948, 0.74743052], [0.35891714, 0.64108286], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.77597541, 0.22402459], [0.80961123, 0.19038877], [0.43075031, 0.56924969], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.60957787, 0.39042213], [0.12573608, 0.87426392], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.23485257, 0.76514743], [0.43075031, 0.56924969], [0.38132888, 0.61867112], [0.13111688, 0.86888312], [0.90496496, 0.09503504], [0.12573608, 0.87426392], [0.13111688, 0.86888312], [0.10486335, 0.89513665], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.38132888, 0.61867112], [0.10043541, 0.89956459], [0.38132888, 0.61867112], [0.38132888, 0.61867112], [0.13111688, 0.86888312], [0.88579894, 0.11420106], [0.4190028 , 0.5809972 ], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.59583071, 0.40416929], [0.88579894, 0.11420106], [0.8870884 , 0.1129116 ], [0.77597541, 0.22402459], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.21583994, 0.78416006], [0.88579894, 0.11420106], [0.90496496, 0.09503504], [0.88579894, 0.11420106], [0.65505215, 0.34494785], [0.60735332, 0.39264668], [0.90496496, 0.09503504], [0.90496496, 0.09503504], [0.77597541, 0.22402459], [0.60735332, 0.39264668], [0.59583071, 0.40416929], [0.77597541, 0.22402459], [0.55981643, 0.44018357], [0.88579894, 0.11420106], [0.13111688, 0.86888312], [0.13111688, 0.86888312], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.90496496, 0.09503504], [0.43075031, 0.56924969], [0.37005301, 0.62994699], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.21583994, 0.78416006], [0.40734641, 0.59265359], [0.88579894, 0.11420106], [0.90074883, 0.09925117], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.38132888, 0.61867112], [0.90496496, 0.09503504], [0.21583994, 0.78416006], [0.94379673, 0.05620327], [0.64410908, 0.35589092], [0.38132888, 0.61867112], [0.38132888, 0.61867112], [0.64410908, 0.35589092], [0.77597541, 0.22402459], [0.25256948, 0.74743052], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.32926956, 0.67073044], [0.38132888, 0.61867112], [0.90496496, 0.09503504], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.90074883, 0.09925117], [0.10946276, 0.89053724], [0.88579894, 0.11420106], [0.65505215, 0.34494785], [0.88579894, 0.11420106], [0.10946276, 0.89053724], [0.35891714, 0.64108286], [0.37005301, 0.62994699], [0.13111688, 0.86888312], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.12573608, 0.87426392], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.67924126, 0.32075874], [0.77597541, 0.22402459], [0.88579894, 0.11420106], [0.90496496, 0.09503504], [0.58420145, 0.41579855], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.90496496, 0.09503504], [0.88579894, 0.11420106], [0.10946276, 0.89053724], [0.38132888, 0.61867112], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.43075031, 0.56924969], [0.43075031, 0.56924969], [0.60735332, 0.39264668], [0.77597541, 0.22402459], [0.13111688, 0.86888312], [0.90496496, 0.09503504], [0.38132888, 0.61867112], [0.94379673, 0.05620327], [0.77597541, 0.22402459], [0.21583994, 0.78416006], [0.43075031, 0.56924969], [0.21583994, 0.78416006], [0.13111688, 0.86888312], [0.88579894, 0.11420106], [0.25256948, 0.74743052], [0.88579894, 0.11420106], [0.10946276, 0.89053724], [0.80208997, 0.19791003], [0.60735332, 0.39264668], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.77597541, 0.22402459], [0.38132888, 0.61867112], [0.77597541, 0.22402459], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.63301335, 0.36698665], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.43075031, 0.56924969], [0.88579894, 0.11420106], [0.94379673, 0.05620327], [0.24360185, 0.75639815], [0.20781382, 0.79218618], [0.43075031, 0.56924969], [0.88579894, 0.11420106], [0.91393181, 0.08606819], [0.19242812, 0.80757188], [0.94379673, 0.05620327], [0.52083139, 0.47916861], [0.38132888, 0.61867112], [0.60735332, 0.39264668], [0.21583994, 0.78416006], [0.77597541, 0.22402459], [0.10486335, 0.89513665], [0.40734641, 0.59265359], [0.88579894, 0.11420106], [0.24360185, 0.75639815], [0.88579894, 0.11420106], [0.21583994, 0.78416006], [0.13111688, 0.86888312], [0.88579894, 0.11420106], [0.92120054, 0.07879946], [0.77597541, 0.22402459], [0.17121611, 0.82878389], [0.35891714, 0.64108286], [0.88579894, 0.11420106], [0.59583071, 0.40416929], [0.88579894, 0.11420106], [0.13111688, 0.86888312], [0.92120054, 0.07879946], [0.11071774, 0.88928226], [0.21583994, 0.78416006], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.60735332, 0.39264668], [0.77597541, 0.22402459], [0.88579894, 0.11420106], [0.64410908, 0.35589092], [0.90074883, 0.09925117], [0.59583071, 0.40416929], [0.77597541, 0.22402459], [0.88579894, 0.11420106], [0.80208997, 0.19791003], [0.38132888, 0.61867112], [0.38132888, 0.61867112], [0.48159095, 0.51840905], [0.38132888, 0.61867112], [0.38132888, 0.61867112], [0.88579894, 0.11420106], [0.65505215, 0.34494785], [0.88579894, 0.11420106], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.13111688, 0.86888312], [0.65505215, 0.34494785], [0.10946276, 0.89053724], [0.88579894, 0.11420106], [0.38132888, 0.61867112], [0.92875611, 0.07124389], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.55981643, 0.44018357], [0.37005301, 0.62994699], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.88579894, 0.11420106], [0.87570526, 0.12429474], [0.35891714, 0.64108286], [0.90496496, 0.09503504], [0.60735332, 0.39264668], [0.92875611, 0.07124389], [0.95156933, 0.04843067], [0.65505215, 0.34494785], [0.77597541, 0.22402459], [0.77597541, 0.22402459], [0.77597541, 0.22402459], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.80961123, 0.19038877], [0.88579894, 0.11420106], [0.33710487, 0.66289513], [0.88579894, 0.11420106], [0.38132888, 0.61867112], [0.80961123, 0.19038877], [0.12573608, 0.87426392], [0.60735332, 0.39264668], [0.88579894, 0.11420106], [0.64410908, 0.35589092], [0.80208997, 0.19791003], [0.90074883, 0.09925117], [0.10946276, 0.89053724], [0.74286276, 0.25713724], [0.88579894, 0.11420106], [0.93187249, 0.06812751], [0.60735332, 0.39264668]])
y_test_prob[:,1]
array([0.11420106, 0.11420106, 0.61867112, 0.61867112, 0.11420106, 0.51840905, 0.24118213, 0.22402459, 0.07879946, 0.75639815, 0.61867112, 0.22402459, 0.74743052, 0.74743052, 0.64108286, 0.11420106, 0.11420106, 0.11420106, 0.22402459, 0.19038877, 0.56924969, 0.39264668, 0.11420106, 0.39042213, 0.87426392, 0.39264668, 0.11420106, 0.76514743, 0.56924969, 0.61867112, 0.86888312, 0.09503504, 0.87426392, 0.86888312, 0.89513665, 0.61867112, 0.11420106, 0.61867112, 0.89956459, 0.61867112, 0.61867112, 0.86888312, 0.11420106, 0.5809972 , 0.11420106, 0.11420106, 0.40416929, 0.11420106, 0.1129116 , 0.22402459, 0.61867112, 0.11420106, 0.11420106, 0.11420106, 0.78416006, 0.11420106, 0.09503504, 0.11420106, 0.34494785, 0.39264668, 0.09503504, 0.09503504, 0.22402459, 0.39264668, 0.40416929, 0.22402459, 0.44018357, 0.11420106, 0.86888312, 0.86888312, 0.11420106, 0.11420106, 0.09503504, 0.56924969, 0.62994699, 0.11420106, 0.11420106, 0.78416006, 0.59265359, 0.11420106, 0.09925117, 0.39264668, 0.11420106, 0.61867112, 0.09503504, 0.78416006, 0.05620327, 0.35589092, 0.61867112, 0.61867112, 0.35589092, 0.22402459, 0.74743052, 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.67073044, 0.61867112, 0.09503504, 0.11420106, 0.11420106, 0.09925117, 0.89053724, 0.11420106, 0.34494785, 0.11420106, 0.89053724, 0.64108286, 0.62994699, 0.86888312, 0.61867112, 0.11420106, 0.87426392, 0.11420106, 0.11420106, 0.11420106, 0.32075874, 0.22402459, 0.11420106, 0.09503504, 0.41579855, 0.61867112, 0.11420106, 0.09503504, 0.11420106, 0.89053724, 0.61867112, 0.39264668, 0.11420106, 0.56924969, 0.56924969, 0.39264668, 0.22402459, 0.86888312, 0.09503504, 0.61867112, 0.05620327, 0.22402459, 0.78416006, 0.56924969, 0.78416006, 0.86888312, 0.11420106, 0.74743052, 0.11420106, 0.89053724, 0.19791003, 0.39264668, 0.39264668, 0.11420106, 0.22402459, 0.61867112, 0.22402459, 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.36698665, 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.56924969, 0.11420106, 0.05620327, 0.75639815, 0.79218618, 0.56924969, 0.11420106, 0.08606819, 0.80757188, 0.05620327, 0.47916861, 0.61867112, 0.39264668, 0.78416006, 0.22402459, 0.89513665, 0.59265359, 0.11420106, 0.75639815, 0.11420106, 0.78416006, 0.86888312, 0.11420106, 0.07879946, 0.22402459, 0.82878389, 0.64108286, 0.11420106, 0.40416929, 0.11420106, 0.86888312, 0.07879946, 0.88928226, 0.78416006, 0.61867112, 0.11420106, 0.39264668, 0.22402459, 0.11420106, 0.35589092, 0.09925117, 0.40416929, 0.22402459, 0.11420106, 0.19791003, 0.61867112, 0.61867112, 0.51840905, 0.61867112, 0.61867112, 0.11420106, 0.34494785, 0.11420106, 0.39264668, 0.11420106, 0.11420106, 0.86888312, 0.34494785, 0.89053724, 0.11420106, 0.61867112, 0.07124389, 0.11420106, 0.11420106, 0.44018357, 0.62994699, 0.11420106, 0.11420106, 0.11420106, 0.12429474, 0.64108286, 0.09503504, 0.39264668, 0.07124389, 0.04843067, 0.34494785, 0.22402459, 0.22402459, 0.22402459, 0.39264668, 0.11420106, 0.19038877, 0.11420106, 0.66289513, 0.11420106, 0.61867112, 0.19038877, 0.87426392, 0.39264668, 0.11420106, 0.35589092, 0.19791003, 0.09925117, 0.89053724, 0.25713724, 0.11420106, 0.06812751, 0.39264668])
Often we don't want to set a single threshold but rather evaluate the model based on all thresholds. ROC is the a way to this.
At each threshold, it calculates two rates:
True Positive Rate = True Positives / (True Positives + False Negatives) False Positive Rate = False Positives / (False Positives + True Negatives)
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
# calculate AUC
probs = y_test_prob[:,1]
auc = roc_auc_score(y_test, probs)
print('AUC: %.3f' % auc)
pyplot.plot([0, 1], [0, 1], linestyle='--')
# plot the roc curve for the model
fpr, tpr, thresholds = roc_curve(y_test, probs)
pyplot.plot(fpr, tpr, marker='.')
pyplot.xlabel("False Positive Rate")
pyplot.ylabel("True Positive Rate")
pyplot.show()
AUC: 0.794