import numpy as np
arr = np.array([1,3,4,5,6])
arr
array([1, 3, 4, 5, 6])
arr.shape
(5,)
arr.dtype
dtype('int32')
arr = np.array([1,'st','er',3])
arr.dtype
dtype('<U11')
np.sum(arr)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-5-883ceacaba8a> in <module>() ----> 1 np.sum(arr) C:\Users\sharmatu\AppData\Local\Continuum\Anaconda\envs\Python3.5\lib\site-packages\numpy\core\fromnumeric.py in sum(a, axis, dtype, out, keepdims) 1812 return sum(axis=axis, dtype=dtype, out=out, **kwargs) 1813 return _methods._sum(a, axis=axis, dtype=dtype, -> 1814 out=out, **kwargs) 1815 1816 C:\Users\sharmatu\AppData\Local\Continuum\Anaconda\envs\Python3.5\lib\site-packages\numpy\core\_methods.py in _sum(a, axis, dtype, out, keepdims) 30 31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False): ---> 32 return umr_sum(a, axis, dtype, out, keepdims) 33 34 def _prod(a, axis=None, dtype=None, out=None, keepdims=False): TypeError: cannot perform reduce with flexible type
arr = np.array([[1,2,3],[2,4,6],[8,8,8]])
arr.shape
(3, 3)
arr
array([[1, 2, 3], [2, 4, 6], [8, 8, 8]])
arr = np.zeros((2,4))
arr
array([[ 0., 0., 0., 0.], [ 0., 0., 0., 0.]])
arr = np.ones((2,4))
arr
array([[ 1., 1., 1., 1.], [ 1., 1., 1., 1.]])
arr = np.identity(3)
arr
array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]])
arr = np.random.randn(3,4)
arr
array([[ 0.11069212, -1.3712359 , -0.35438971, 0.03397169], [ 0.35755146, -1.15864674, 0.49294546, -0.59452261], [ 0.85139437, 0.75329689, -0.57315488, -0.02419983]])
from io import BytesIO
b = BytesIO(b"2,23,33\n32,42,63.4\n35,77,12")
arr = np.genfromtxt(b, delimiter=",")
arr
array([[ 2. , 23. , 33. ], [ 32. , 42. , 63.4], [ 35. , 77. , 12. ]])
arr[1]
array([ 32. , 42. , 63.4])
arr = np.arange(12).reshape(2,2,3)
arr
array([[[ 0, 1, 2], [ 3, 4, 5]], [[ 6, 7, 8], [ 9, 10, 11]]])
arr[0]
array([[0, 1, 2], [3, 4, 5]])
arr = np.arange(10)
arr[5:]
array([5, 6, 7, 8, 9])
arr[5:8]
array([5, 6, 7])
arr[:-5]
array([0, 1, 2, 3, 4])
arr = np.arange(12).reshape(2,2,3)
arr
array([[[ 0, 1, 2], [ 3, 4, 5]], [[ 6, 7, 8], [ 9, 10, 11]]])
arr[1:2]
array([[[ 6, 7, 8], [ 9, 10, 11]]])
arr = np.arange(27).reshape(3,3,3)
arr
array([[[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8]], [[ 9, 10, 11], [12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23], [24, 25, 26]]])
arr[:,:,2]
array([[ 2, 5, 8], [11, 14, 17], [20, 23, 26]])
arr[...,2]
array([[ 2, 5, 8], [11, 14, 17], [20, 23, 26]])
arr = np.arange(9).reshape(3,3)
arr
array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr[[0,1,2],[1,0,0]]
array([1, 3, 6])
cities = np.array(["delhi","banglaore","mumbai","chennai","bhopal"])
city_data = np.random.randn(5,3)
city_data
array([[-0.04941315, -0.41476745, -0.60236098], [-1.75033842, 0.62559942, -0.58148095], [ 0.43502897, -0.06588454, -0.40865494], [-0.53978394, -0.7317352 , -0.66959325], [ 0.45550659, -0.53018559, -0.2241479 ]])
city_data[cities =="delhi"]
array([[-0.04941315, -0.41476745, -0.60236098]])
city_data[city_data >0]
array([ 0.62559942, 0.43502897, 0.45550659])
city_data[city_data >0] = 0
city_data
array([[-0.04941315, -0.41476745, -0.60236098], [-1.75033842, 0. , -0.58148095], [ 0. , -0.06588454, -0.40865494], [-0.53978394, -0.7317352 , -0.66959325], [ 0. , -0.53018559, -0.2241479 ]])
arr = np.arange(15).reshape(3,5)
arr
array([[ 0, 1, 2, 3, 4], [ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14]])
arr + 5
array([[ 5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17, 18, 19]])
arr * 2
array([[ 0, 2, 4, 6, 8], [10, 12, 14, 16, 18], [20, 22, 24, 26, 28]])
arr1 = np.arange(15).reshape(5,3)
arr2 = np.arange(5).reshape(5,1)
arr2 + arr1
array([[ 0, 1, 2], [ 4, 5, 6], [ 8, 9, 10], [12, 13, 14], [16, 17, 18]])
arr1
array([[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8], [ 9, 10, 11], [12, 13, 14]])
arr2
array([[0], [1], [2], [3], [4]])
arr1 = np.random.randn(5,3)
arr1
array([[-0.92631238, -0.75087049, 0.38818842], [ 1.34359452, -0.68896739, -0.58429706], [ 1.06638747, -0.40104143, 0.99089011], [ 0.26232893, 1.4349162 , -0.97503394], [ 0.35716111, 0.20198017, 0.08151897]])
np.modf(arr1)
(array([[-0.92631238, -0.75087049, 0.38818842], [ 0.34359452, -0.68896739, -0.58429706], [ 0.06638747, -0.40104143, 0.99089011], [ 0.26232893, 0.4349162 , -0.97503394], [ 0.35716111, 0.20198017, 0.08151897]]), array([[-0., -0., 0.], [ 1., -0., -0.], [ 1., -0., 0.], [ 0., 1., -0.], [ 0., 0., 0.]]))
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([[9,8,7],[6,5,4],[1,2,3]])
A.dot(B)
array([[ 24, 24, 24], [ 72, 69, 66], [120, 114, 108]])
A = np.arange(15).reshape(3,5)
A.T
array([[ 0, 5, 10], [ 1, 6, 11], [ 2, 7, 12], [ 3, 8, 13], [ 4, 9, 14]])
np.linalg.svd(A)
(array([[-0.15425367, 0.89974393, 0.40824829], [-0.50248417, 0.28432901, -0.81649658], [-0.85071468, -0.3310859 , 0.40824829]]), array([ 3.17420265e+01, 2.72832424e+00, 4.58204637e-16]), array([[-0.34716018, -0.39465093, -0.44214167, -0.48963242, -0.53712316], [-0.69244481, -0.37980343, -0.06716206, 0.24547932, 0.55812069], [ 0.33717486, -0.77044776, 0.28661392, 0.38941603, -0.24275704], [-0.36583339, 0.32092943, -0.08854543, 0.67763613, -0.54418674], [-0.39048565, 0.05843412, 0.8426222 , -0.29860414, -0.21196653]]))
a = np.array([[7,5,-3], [3,-5,2],[5,3,-7]])
b = np.array([16,-8,0])
x = np.linalg.solve(a, b)
x
array([ 1., 3., 2.])
np.allclose(np.dot(a, x), b)
True
import pandas as pd
d = [{'city':'Delhi',"data":1000},
{'city':'Banglaore',"data":2000},
{'city':'Mumbai',"data":1000}]
pd.DataFrame(d)
city | data | |
---|---|---|
0 | Delhi | 1000 |
1 | Banglaore | 2000 |
2 | Mumbai | 1000 |
df = pd.DataFrame(d)
city_data = pd.read_csv(filepath_or_buffer='simplemaps-worldcities-basic.csv')
city_data.head(n=10)
city | city_ascii | lat | lng | pop | country | iso2 | iso3 | province | |
---|---|---|---|---|---|---|---|---|---|
0 | Qal eh-ye Now | Qal eh-ye | 34.983000 | 63.133300 | 2997.0 | Afghanistan | AF | AFG | Badghis |
1 | Chaghcharan | Chaghcharan | 34.516701 | 65.250001 | 15000.0 | Afghanistan | AF | AFG | Ghor |
2 | Lashkar Gah | Lashkar Gah | 31.582998 | 64.360000 | 201546.0 | Afghanistan | AF | AFG | Hilmand |
3 | Zaranj | Zaranj | 31.112001 | 61.886998 | 49851.0 | Afghanistan | AF | AFG | Nimroz |
4 | Tarin Kowt | Tarin Kowt | 32.633298 | 65.866699 | 10000.0 | Afghanistan | AF | AFG | Uruzgan |
5 | Zareh Sharan | Zareh Sharan | 32.850000 | 68.416705 | 13737.0 | Afghanistan | AF | AFG | Paktika |
6 | Asadabad | Asadabad | 34.866000 | 71.150005 | 48400.0 | Afghanistan | AF | AFG | Kunar |
7 | Taloqan | Taloqan | 36.729999 | 69.540004 | 64256.0 | Afghanistan | AF | AFG | Takhar |
8 | Mahmud-E Eraqi | Mahmud-E Eraqi | 35.016696 | 69.333301 | 7407.0 | Afghanistan | AF | AFG | Kapisa |
9 | Mehtar Lam | Mehtar Lam | 34.650000 | 70.166701 | 17345.0 | Afghanistan | AF | AFG | Laghman |
city_data.tail()
city | city_ascii | lat | lng | pop | country | iso2 | iso3 | province | |
---|---|---|---|---|---|---|---|---|---|
7317 | Mutare | Mutare | -18.970019 | 32.650038 | 216785.0 | Zimbabwe | ZW | ZWE | Manicaland |
7318 | Kadoma | Kadoma | -18.330006 | 29.909947 | 56400.0 | Zimbabwe | ZW | ZWE | Mashonaland West |
7319 | Chitungwiza | Chitungwiza | -18.000001 | 31.100003 | 331071.0 | Zimbabwe | ZW | ZWE | Harare |
7320 | Harare | Harare | -17.817790 | 31.044709 | 1557406.5 | Zimbabwe | ZW | ZWE | Harare |
7321 | Bulawayo | Bulawayo | -20.169998 | 28.580002 | 697096.0 | Zimbabwe | ZW | ZWE | Bulawayo |
series_es = city_data.lat
type(series_es)
pandas.core.series.Series
series_es[1:10:2]
1 34.516701 3 31.112001 5 32.850000 7 36.729999 9 34.650000 Name: lat, dtype: float64
series_es[:7]
0 34.983000 1 34.516701 2 31.582998 3 31.112001 4 32.633298 5 32.850000 6 34.866000 Name: lat, dtype: float64
series_es[:-7315]
0 34.983000 1 34.516701 2 31.582998 3 31.112001 4 32.633298 5 32.850000 6 34.866000 Name: lat, dtype: float64
city_data[:7]
city | city_ascii | lat | lng | pop | country | iso2 | iso3 | province | |
---|---|---|---|---|---|---|---|---|---|
0 | Qal eh-ye Now | Qal eh-ye | 34.983000 | 63.133300 | 2997.0 | Afghanistan | AF | AFG | Badghis |
1 | Chaghcharan | Chaghcharan | 34.516701 | 65.250001 | 15000.0 | Afghanistan | AF | AFG | Ghor |
2 | Lashkar Gah | Lashkar Gah | 31.582998 | 64.360000 | 201546.0 | Afghanistan | AF | AFG | Hilmand |
3 | Zaranj | Zaranj | 31.112001 | 61.886998 | 49851.0 | Afghanistan | AF | AFG | Nimroz |
4 | Tarin Kowt | Tarin Kowt | 32.633298 | 65.866699 | 10000.0 | Afghanistan | AF | AFG | Uruzgan |
5 | Zareh Sharan | Zareh Sharan | 32.850000 | 68.416705 | 13737.0 | Afghanistan | AF | AFG | Paktika |
6 | Asadabad | Asadabad | 34.866000 | 71.150005 | 48400.0 | Afghanistan | AF | AFG | Kunar |
city_data.iloc[:5,:4]
city | city_ascii | lat | lng | |
---|---|---|---|---|
0 | Qal eh-ye Now | Qal eh-ye | 34.983000 | 63.133300 |
1 | Chaghcharan | Chaghcharan | 34.516701 | 65.250001 |
2 | Lashkar Gah | Lashkar Gah | 31.582998 | 64.360000 |
3 | Zaranj | Zaranj | 31.112001 | 61.886998 |
4 | Tarin Kowt | Tarin Kowt | 32.633298 | 65.866699 |
city_data[city_data['pop'] > 10000000][city_data.columns[pd.Series(city_data.columns).str.startswith('l')]]
lat | lng | |
---|---|---|
360 | -34.602502 | -58.397531 |
1171 | -23.558680 | -46.625020 |
2068 | 31.216452 | 121.436505 |
3098 | 28.669993 | 77.230004 |
3110 | 19.016990 | 72.856989 |
3492 | 35.685017 | 139.751407 |
4074 | 19.442442 | -99.130988 |
4513 | 24.869992 | 66.990009 |
5394 | 55.752164 | 37.615523 |
6124 | 41.104996 | 29.010002 |
7071 | 40.749979 | -73.980017 |
city_greater_10mil = city_data[city_data['pop'] > 10000000]
city_greater_10mil.rename(columns={'pop':'population'}, inplace=True)
city_greater_10mil.where(city_greater_10mil.population > 15000000)
C:\Users\sharmatu\AppData\Local\Continuum\Anaconda\envs\Python3.5\lib\site-packages\pandas\core\frame.py:2746: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy **kwargs)
city | city_ascii | lat | lng | population | country | iso2 | iso3 | province | |
---|---|---|---|---|---|---|---|---|---|
360 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1171 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2068 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3098 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3110 | Mumbai | Mumbai | 19.016990 | 72.856989 | 15834918.0 | India | IN | IND | Maharashtra |
3492 | Tokyo | Tokyo | 35.685017 | 139.751407 | 22006299.5 | Japan | JP | JPN | Tokyo |
4074 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4513 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5394 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
6124 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
7071 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
df = pd.DataFrame(np.random.randn(8, 3),
columns=['A', 'B', 'C'])
nparray = df.values
type(nparray)
numpy.ndarray
from numpy import nan
df.iloc[4,2] = nan
df
A | B | C | |
---|---|---|---|
0 | -1.279701 | -0.074395 | -1.370447 |
1 | 1.536038 | 0.060453 | 0.856685 |
2 | 0.475407 | 1.029245 | -0.420355 |
3 | -1.636635 | -0.385956 | -0.261129 |
4 | 1.259545 | 1.916660 | NaN |
5 | 1.591468 | 0.813209 | 0.605695 |
6 | -1.270361 | 0.200358 | 0.035595 |
7 | -0.189060 | -1.874718 | -1.088224 |
df.fillna(0)
A | B | C | |
---|---|---|---|
0 | -1.279701 | -0.074395 | -1.370447 |
1 | 1.536038 | 0.060453 | 0.856685 |
2 | 0.475407 | 1.029245 | -0.420355 |
3 | -1.636635 | -0.385956 | -0.261129 |
4 | 1.259545 | 1.916660 | 0.000000 |
5 | 1.591468 | 0.813209 | 0.605695 |
6 | -1.270361 | 0.200358 | 0.035595 |
7 | -0.189060 | -1.874718 | -1.088224 |
columns_numeric = ['lat','lng','pop']
city_data[columns_numeric].mean()
lat 20.662876 lng 10.711914 pop 265463.071633 dtype: float64
city_data[columns_numeric].sum()
lat 1.512936e+05 lng 7.843263e+04 pop 1.943721e+09 dtype: float64
city_data[columns_numeric].count()
lat 7322 lng 7322 pop 7322 dtype: int64
city_data[columns_numeric].median()
lat 26.792730 lng 18.617509 pop 61322.750000 dtype: float64
city_data[columns_numeric].quantile(0.8)
lat 46.852480 lng 89.900018 pop 269210.000000 Name: 0.8, dtype: float64
city_data[columns_numeric].sum(axis = 1).head()
0 3095.116300 1 15099.766702 2 201641.942998 3 49943.998999 4 10098.499997 dtype: float64
city_data[columns_numeric].describe()
lat | lng | pop | |
---|---|---|---|
count | 7322.000000 | 7322.000000 | 7.322000e+03 |
mean | 20.662876 | 10.711914 | 2.654631e+05 |
std | 29.134818 | 79.044615 | 8.287622e+05 |
min | -89.982894 | -179.589979 | -9.900000e+01 |
25% | -0.324710 | -64.788472 | 1.734425e+04 |
50% | 26.792730 | 18.617509 | 6.132275e+04 |
75% | 43.575448 | 73.103628 | 2.001726e+05 |
max | 82.483323 | 179.383304 | 2.200630e+07 |
city_data1 = city_data.sample(3)
city_data2 = city_data.sample(3)
city_data_combine = pd.concat([city_data1,city_data2])
city_data_combine
city | city_ascii | lat | lng | pop | country | iso2 | iso3 | province | |
---|---|---|---|---|---|---|---|---|---|
4857 | Shebekino | Shebekino | 50.414350 | 36.894378 | 41301.5 | Russia | RU | RUS | Belgorod |
1561 | Bouar | Bouar | 5.950010 | 15.599967 | 31476.5 | Central African Republic | CF | CAF | Nana-Mambéré |
6650 | Scottsbluff | Scottsbluff | 41.867508 | -103.660686 | 20172.0 | United States of America | US | USA | Nebraska |
964 | Janauba | Janauba | -15.799618 | -43.309977 | 38641.0 | Brazil | BR | BRA | Minas Gerais |
3896 | Altata | Altata | 24.636045 | -107.916215 | 750.0 | Mexico | MX | MEX | Sinaloa |
7201 | Tra Vinh | Tra Vinh | 9.934002 | 106.334002 | 131360.0 | Vietnam | VN | VNM | Trà Vinh |
df1 = pd.DataFrame({'col1': ['col10', 'col11', 'col12', 'col13'],
'col2': ['col20', 'col21', 'col22', 'col23'],
'col3': ['col30', 'col31', 'col32', 'col33'],
'col4': ['col40', 'col41', 'col42', 'col43']},
index=[0, 1, 2, 3])
df1
col1 | col2 | col3 | col4 | |
---|---|---|---|---|
0 | col10 | col20 | col30 | col40 |
1 | col11 | col21 | col31 | col41 |
2 | col12 | col22 | col32 | col42 |
3 | col13 | col23 | col33 | col43 |
df4 = pd.DataFrame({'col2': ['col22', 'col23', 'col26', 'col27'],
'Col4': ['Col42', 'Col43', 'Col46', 'Col47'],
'col6': ['col62', 'col63', 'col66', 'col67']},
index=[2, 3, 6, 7])
pd.concat([df1,df4], axis=1)
col1 | col2 | col3 | col4 | Col4 | col2 | col6 | |
---|---|---|---|---|---|---|---|
0 | col10 | col20 | col30 | col40 | NaN | NaN | NaN |
1 | col11 | col21 | col31 | col41 | NaN | NaN | NaN |
2 | col12 | col22 | col32 | col42 | Col42 | col22 | col62 |
3 | col13 | col23 | col33 | col43 | Col43 | col23 | col63 |
6 | NaN | NaN | NaN | NaN | Col46 | col26 | col66 |
7 | NaN | NaN | NaN | NaN | Col47 | col27 | col67 |
country_data = city_data[['iso3','country']].drop_duplicates()
country_data.shape
(223, 2)
country_data.head()
iso3 | country | |
---|---|---|
0 | AFG | Afghanistan |
33 | ALD | Aland |
34 | ALB | Albania |
60 | DZA | Algeria |
111 | ASM | American Samoa |
del(city_data['country'])
city_data.merge(country_data, 'inner').head()
city | city_ascii | lat | lng | pop | iso2 | iso3 | province | country | |
---|---|---|---|---|---|---|---|---|---|
0 | Qal eh-ye Now | Qal eh-ye | 34.983000 | 63.133300 | 2997.0 | AF | AFG | Badghis | Afghanistan |
1 | Chaghcharan | Chaghcharan | 34.516701 | 65.250001 | 15000.0 | AF | AFG | Ghor | Afghanistan |
2 | Lashkar Gah | Lashkar Gah | 31.582998 | 64.360000 | 201546.0 | AF | AFG | Hilmand | Afghanistan |
3 | Zaranj | Zaranj | 31.112001 | 61.886998 | 49851.0 | AF | AFG | Nimroz | Afghanistan |
4 | Tarin Kowt | Tarin Kowt | 32.633298 | 65.866699 | 10000.0 | AF | AFG | Uruzgan | Afghanistan |
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data[:10]
y = diabetes.target
X[:5]
array([[ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235 , -0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613], [-0.00188202, -0.04464164, -0.05147406, -0.02632783, -0.00844872, -0.01916334, 0.07441156, -0.03949338, -0.06832974, -0.09220405], [ 0.08529891, 0.05068012, 0.04445121, -0.00567061, -0.04559945, -0.03419447, -0.03235593, -0.00259226, 0.00286377, -0.02593034], [-0.08906294, -0.04464164, -0.01159501, -0.03665645, 0.01219057, 0.02499059, -0.03603757, 0.03430886, 0.02269202, -0.00936191], [ 0.00538306, -0.04464164, -0.03638469, 0.02187235, 0.00393485, 0.01559614, 0.00814208, -0.00259226, -0.03199144, -0.04664087]])
y[:10]
array([ 151., 75., 141., 206., 135., 97., 138., 63., 110., 310.])
feature_names=['age', 'sex', 'bmi', 'bp',
's1', 's2', 's3', 's4', 's5', 's6']
from sklearn import datasets
from sklearn.linear_model import Lasso
from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV
diabetes = datasets.load_diabetes()
X_train = diabetes.data[:310]
y_train = diabetes.target[:310]
X_test = diabetes.data[310:]
y_test = diabetes.target[310:]
lasso = Lasso(random_state=0)
alphas = np.logspace(-4, -0.5, 30)
scores = list()
scores_std = list()
estimator = GridSearchCV(lasso,
param_grid = dict(alpha=alphas))
estimator.fit(X_train, y_train)
GridSearchCV(cv=None, error_score='raise', estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000, normalize=False, positive=False, precompute=False, random_state=0, selection='cyclic', tol=0.0001, warm_start=False), fit_params={}, iid=True, n_jobs=1, param_grid={'alpha': array([ 1.00000e-04, 1.32035e-04, 1.74333e-04, 2.30181e-04, 3.03920e-04, 4.01281e-04, 5.29832e-04, 6.99564e-04, 9.23671e-04, 1.21957e-03, 1.61026e-03, 2.12611e-03, 2.80722e-03, 3.70651e-03, 4.89390e-03, 6.46167e-03, 8.... 7.88046e-02, 1.04050e-01, 1.37382e-01, 1.81393e-01, 2.39503e-01, 3.16228e-01])}, pre_dispatch='2*n_jobs', refit=True, return_train_score=True, scoring=None, verbose=0)
estimator.best_score_
0.46540637590235312
estimator.best_estimator_
Lasso(alpha=0.025929437974046669, copy_X=True, fit_intercept=True, max_iter=1000, normalize=False, positive=False, precompute=False, random_state=0, selection='cyclic', tol=0.0001, warm_start=False)
estimator.predict(X_test)
array([ 203.42104984, 177.6595529 , 122.62188598, 212.81136958, 173.61633075, 114.76145025, 202.36033584, 171.70767813, 164.28694562, 191.29091477, 191.41279009, 288.2772433 , 296.47009002, 234.53378413, 210.61427168, 228.62812055, 156.74489991, 225.08834492, 191.75874632, 102.81600989, 172.373221 , 111.20843429, 290.22242876, 178.64605207, 78.13722832, 86.35832297, 256.41378529, 165.99622543, 121.29260976, 153.48718848, 163.09835143, 180.0932902 , 161.4330553 , 155.80211635, 143.70181085, 126.13753819, 181.06471818, 105.03679977, 131.0479936 , 90.50606427, 252.66486639, 84.84786067, 59.41005358, 184.51368208, 201.46598714, 129.96333913, 90.65641478, 200.10932516, 55.2884802 , 171.60459062, 195.40750666, 122.14139787, 231.72783897, 159.49750022, 160.32104862, 165.53701866, 260.73217736, 259.77213787, 204.69526082, 185.66480969, 61.09821961, 209.9214333 , 108.50410841, 141.18424239, 126.10337002, 174.32819351, 214.4947322 , 162.1789921 , 160.57776438, 134.11449594, 171.63076427, 71.71500885, 263.46782314, 113.73653782, 112.76227977, 134.37721414, 110.67874472, 98.67153573, 157.2591359 , 78.32019218, 265.97090212, 57.85502185, 100.38532691, 101.91670102, 277.13032245, 168.6443445 , 64.75637937, 184.37359745, 174.74927914, 188.78215433, 181.56001383, 92.74463449, 145.41037529, 257.78620944, 196.57335354, 276.1920927 , 50.66776115, 179.12879963, 200.29366671, 167.29501922, 158.93206689, 156.08070427, 233.38241229, 125.30241353, 167.05404644, 171.66748431, 223.17843095, 156.7055944 , 103.29063169, 84.08205647, 139.87060658, 189.99648341, 200.20182211, 143.61906164, 170.00220231, 112.05886847, 160.76337573, 130.06232976, 261.83022688, 102.24589129, 115.12771477, 119.14505163, 225.96991263, 63.51874043, 134.88829709, 120.01764214, 55.32147904, 189.95346987, 105.8037979 , 120.46197038, 211.35568232, 56.78368048])
import numpy
import theano.tensor as T
from theano import function
x = T.dscalar('x')
y = T.dscalar('y')
z = x + y
f = function([x, y], z)
f(8, 2)
array(10.0)
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.Session()
print(sess.run(hello))
b'Hello, TensorFlow!'
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X_train = cancer.data[:340]
y_train = cancer.target[:340]
X_test = cancer.data[340:]
y_test = cancer.target[340:]
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
Using TensorFlow backend.
model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit(X_train, y_train,
epochs=20,
batch_size=50)
Epoch 1/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 2/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 3/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 4/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 5/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 6/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 7/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 8/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 9/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 10/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 11/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 12/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 13/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 14/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 15/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 16/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 17/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 18/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 19/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382 Epoch 20/20 340/340 [==============================] - 0s - loss: 7.3616 - acc: 0.5382
<keras.callbacks.History at 0x1d49ea58be0>
predictions = model.predict_classes(X_test)
32/229 [===>..........................] - ETA: 1s
from sklearn import metrics
print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))
Accuracy: 0.759825327511 precision recall f1-score support 0 0.00 0.00 0.00 55 1 0.76 1.00 0.86 174 avg / total 0.58 0.76 0.66 229
C:\Program Files\Anaconda3\lib\site-packages\sklearn\metrics\classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. 'precision', 'predicted', average, warn_for)
model = Sequential()
model.add(Dense(15, input_dim=30, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
model.fit(X_train, y_train,
epochs=20,
batch_size=50)
Epoch 1/20 340/340 [==============================] - 0s - loss: 3.3799 - acc: 0.3941 Epoch 2/20 340/340 [==============================] - 0s - loss: 1.3740 - acc: 0.6059 Epoch 3/20 340/340 [==============================] - 0s - loss: 0.4258 - acc: 0.8471 Epoch 4/20 340/340 [==============================] - 0s - loss: 0.2859 - acc: 0.8912 Epoch 5/20 340/340 [==============================] - 0s - loss: 0.2061 - acc: 0.9206 Epoch 6/20 340/340 [==============================] - 0s - loss: 0.2407 - acc: 0.8941 Epoch 7/20 340/340 [==============================] - 0s - loss: 0.2725 - acc: 0.9118 Epoch 8/20 340/340 [==============================] - 0s - loss: 0.5237 - acc: 0.8676 Epoch 9/20 340/340 [==============================] - 0s - loss: 0.2165 - acc: 0.9324 Epoch 10/20 340/340 [==============================] - 0s - loss: 0.2502 - acc: 0.9029 Epoch 11/20 340/340 [==============================] - 0s - loss: 0.3235 - acc: 0.8853 Epoch 12/20 340/340 [==============================] - 0s - loss: 0.3115 - acc: 0.8912 Epoch 13/20 340/340 [==============================] - 0s - loss: 0.2975 - acc: 0.9059 Epoch 14/20 340/340 [==============================] - 0s - loss: 0.3426 - acc: 0.9118 Epoch 15/20 340/340 [==============================] - 0s - loss: 0.3763 - acc: 0.9176 Epoch 16/20 340/340 [==============================] - 0s - loss: 0.2420 - acc: 0.9088 Epoch 17/20 340/340 [==============================] - 0s - loss: 0.4274 - acc: 0.8618 Epoch 18/20 340/340 [==============================] - 0s - loss: 0.1885 - acc: 0.9353 Epoch 19/20 340/340 [==============================] - 0s - loss: 0.2361 - acc: 0.9235 Epoch 20/20 340/340 [==============================] - 0s - loss: 0.3154 - acc: 0.9000
<keras.callbacks.History at 0x1d49ee45908>
predictions = model.predict_classes(X_test)
32/229 [===>..........................] - ETA: 1s
print('Accuracy:', metrics.accuracy_score(y_true=y_test, y_pred=predictions))
print(metrics.classification_report(y_true=y_test, y_pred=predictions))
Accuracy: 0.912663755459 precision recall f1-score support 0 0.78 0.89 0.83 55 1 0.96 0.92 0.94 174 avg / total 0.92 0.91 0.91 229