In [1]:
# IMPORT
import pandas as pd

Pandas Data Structure - Series

In [2]:
sr = pd.Series([1,2,3,-3,4,0])
print(sr)
0    1
1    2
2    3
3   -3
4    4
5    0
dtype: int64
In [3]:
# get values frm Series
print(sr.values)
[ 1  2  3 -3  4  0]
In [4]:
# get indexes from Series
print(sr.index)
RangeIndex(start=0, stop=6, step=1)
In [5]:
# some calc on Seires
print("2nd indexed value:",sr[2],
      "\nadding 2nd + 4rd index:", sr[2]+sr[4])
2nd indexed value: 3 
adding 2nd + 4rd index: 7
In [6]:
# creating Series with named index
sr1 = pd.Series([1,2,0,-3,4,-6], index=['a','b','c','d','e','f'])
print(sr1)
a    1
b    2
c    0
d   -3
e    4
f   -6
dtype: int64
In [7]:
for e in sr1.index:
    print(e, "\t", sr1[e])
a 	 1
b 	 2
c 	 0
d 	 -3
e 	 4
f 	 -6
In [8]:
# getting only positive values from Series
print(sr1[sr1>=0])   
a    1
b    2
c    0
e    4
dtype: int64
In [9]:
# we can convert a dictionary into Series
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
print(sdata, type(sdata))
{'Texas': 71000, 'Oregon': 16000, 'Ohio': 35000, 'Utah': 5000} <class 'dict'>
In [10]:
sr2 = pd.Series(sdata)
print(sr2)
Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64
In [11]:
ind = ['California', 'Alabama', 'Oregon', 'Texas']
sr3 = pd.Series(sdata, index=ind)
print(sr3)
California        NaN
Alabama           NaN
Oregon        16000.0
Texas         71000.0
dtype: float64
In [12]:
# Californica and Alabama's value is NaN (Not a Number) cause there is no value for these index in sdata
# let's check for NULLability
print(pd.isnull(sr3))
California     True
Alabama        True
Oregon        False
Texas         False
dtype: bool
In [13]:
print(pd.notnull(sr3))
California    False
Alabama       False
Oregon         True
Texas          True
dtype: bool
In [14]:
print(sr2+sr3)
Alabama            NaN
California         NaN
Ohio               NaN
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64
In [15]:
# Series object can have Name and index Name
sr3.name = "States"
sr3.index.name = "Population"
print(sr3)
Population
California        NaN
Alabama           NaN
Oregon        16000.0
Texas         71000.0
Name: States, dtype: float64
In [16]:
# Stats about Series
print(sr3.name,"\t",sr3.shape,"\t",len(sr3))  #print name, shape and length
States 	 (4,) 	 4
In [17]:
print(sr3.index,"\t",sr3.index.name)  #print indexes and index name
Index(['California', 'Alabama', 'Oregon', 'Texas'], dtype='object', name='Population') 	 Population
In [18]:
print(sr3.dtype)  #print data type
float64
In [19]:
print(sr3.values) #print Series values
[    nan     nan  16000.  71000.]