In [ ]:

#########################################################################
##  Introduction to Database #1 - Applying Filtering and Targeting data
##  Atul Singh
##  www.datagenx.net
#########################################################################

In [69]:

# import
from sqlalchemy import create_engine, Table, MetaData, select, or_, and_, desc, func
# pip install psycopg2  #PostgreSQL driver
import psycopg2
import pandas as pd

Connecting PostgreSQL¶

In [2]:

# dialect and driver ('postgresql+psycopg2://'), 
# followed by the username and password ('student:datacamp'), 
# followed by the host and port ('@postgresql.csrrinzqubik.us-east-1.rds.amazonaws.com:5432/'), 
# and finally, the database name ('census').

# creating engine
engine = create_engine("postgresql+psycopg2://postgres:postgres@localhost:5432/test")

# reading table names from database
print(engine.table_names())

['Employee']

In [3]:

### Importing metadata with Table and MetaData object
metadata = MetaData()

# getting albums metadata
emp = Table('Employee', metadata, autoload=True, autoload_with=engine)
connection = engine.connect()

In [4]:

print(repr(emp))

Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=<Employee>), Column('LastName', VARCHAR(), table=<Employee>), Column('Gender', String(), table=<Employee>), Column('EmpId', INTEGER(), table=<Employee>), schema=None)

In [6]:

# we can get the same metadata details from metadata.tables dictionaty
print(repr(metadata.tables['Employee']))

Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=<Employee>), Column('LastName', VARCHAR(), table=<Employee>), Column('Gender', String(), table=<Employee>), Column('EmpId', INTEGER(), table=<Employee>), schema=None)

In [15]:

## Let's query on table albums
stmt = select([emp])
stmt = stmt.where(emp.columns.Gender == 'F')

In [16]:

#results = connection.execute(stmt).fetchall()
result_proxy = connection.execute(stmt)
print(result_proxy)

<sqlalchemy.engine.result.ResultProxy object at 0x000001939727A828>

In [17]:

results = result_proxy.fetchall()
print(results)

[('Priya', 'Rajawat', 'F', 3), ('Divya', 'Patel', 'F', 4)]

In [19]:

# get all the column names
print(result_proxy.keys())

['FirstName', 'LastName', 'Gender', 'EmpId']

In [20]:

# get total no of rows in tables
print(len(results))

In [21]:

# get first row
result1 = results[0]
print(result1)

('Priya', 'Rajawat', 'F', 3)

In [22]:

# print column names
print(result1.keys())

['FirstName', 'LastName', 'Gender', 'EmpId']

In [24]:

# access particulat column data
print(result1.Gender)

In [26]:

# printing the results
for result in results:
    print(result.EmpId, result[0], result.LastName, result.Gender)

3 Priya Rajawat F
4 Divya Patel F

More complex Join and other conjuntion¶

In [30]:

gend = ['M']

In [32]:

# Now I have to fetch all the records which match with Gender list - gender
stmt = select([emp])
stmt = stmt.where(emp.columns.Gender.in_(gend))

In [33]:

# Here no need to use fetch all funct
for result in connection.execute(stmt):
    print(result)

('Atul', 'Singh', 'M', 1)
('Rahul', 'Singh', 'M', 2)

In [36]:

# same way we can use or_(), and_(), not_(), like() and between()
# let's see a example of or_()
stmt = select([emp])
stmt = stmt.where(or_(emp.columns.Gender=='M', emp.columns.Gender=='F'))
for result in connection.execute(stmt):
    print(result)

('Atul', 'Singh', 'M', 1)
('Rahul', 'Singh', 'M', 2)
('Priya', 'Rajawat', 'F', 3)
('Divya', 'Patel', 'F', 4)

In [39]:

# and statement
stmt = select([emp])
stmt = stmt.where(and_(emp.columns.Gender=='M', emp.columns.LastName=='Singh'))
for result in connection.execute(stmt):
    print(result)

('Atul', 'Singh', 'M', 1)
('Rahul', 'Singh', 'M', 2)

In [40]:

# like statement
stmt = select([emp])
stmt = stmt.where(emp.columns.LastName.like('P%'))
for result in connection.execute(stmt):
    print(result)

('Divya', 'Patel', 'F', 4)

Overview of Ordering¶

In [44]:

stmt = select([emp])
stmt = stmt.order_by(emp.columns.LastName)
results = connection.execute(stmt).fetchall()
print(results)

[('Divya', 'Patel', 'F', 4), ('Priya', 'Rajawat', 'F', 3), ('Atul', 'Singh', 'M', 1), ('Rahul', 'Singh', 'M', 2)]

In [45]:

# selecting any two columns
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(emp.columns.LastName)
results = connection.execute(stmt).fetchall()
print(results)

[('Divya', 'Patel'), ('Priya', 'Rajawat'), ('Atul', 'Singh'), ('Rahul', 'Singh')]

In [47]:

# selecting two columns in reverse order
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(desc(emp.columns.LastName))
results = connection.execute(stmt).fetchall()
print(results)

[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]

In [49]:

# one column in ascending order and one in desc
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(desc(emp.columns.LastName), emp.columns.FirstName)
results = connection.execute(stmt).fetchall()
print(results)

[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]

Aggregating the data¶

In [51]:

# import func
# from sqlalchemy import func

In [57]:

# counting distinct gender in table Employee
stmt = select([func.count(emp.columns.Gender.distinct())])
results = connection.execute(stmt)
print(results)

<sqlalchemy.engine.result.ResultProxy object at 0x00000193972B6588>

In [58]:

results = results.scalar()
print(results)

In [62]:

## taking the gender count per records
stmt = select([emp.columns.Gender, func.count(emp.columns.Gender)])
stmt = stmt.group_by(emp.columns.Gender)
results = connection.execute(stmt).fetchall()
print(results)
print(results[0].keys())

[('M', 2), ('F', 2)]
['Gender', 'count_1']

In [68]:

# labeling the count columns
stmt = select([emp.columns.Gender, func.count(emp.columns.Gender).label('Gender_Count')])
stmt = stmt.group_by(emp.columns.Gender)
results = connection.execute(stmt).fetchall()
print(results)
print(results[0].keys())

[('M', 2), ('F', 2)]
['Gender', 'Gender_Count']

Conveting results to DataFrame¶

In [ ]:

# importing pandas
# import pandas as pd

In [70]:

df = pd.DataFrame(results)
df.columns = results[0].keys()
print(df)

  Gender  Gender_Count
0      M             2
1      F             2

Connecting to Oracle¶

In [1]:

# installation of Oracle Driver
# pip install cx_Oracle
import cx_Oracle

In [6]:

con = cx_Oracle.connect('C##ATUL/atul@127.0.0.1/smpl')  #user/password@host/network_alias
print(con.version)

12.1.0.2.0

In [12]:

# creating query 
cur = con.cursor()
cur.execute('SELECT COURSE_ID, TITLE, DEPT_NAME, CREDITS FROM COURSE WHERE DEPT_NAME=\'Comp. Sci.\'')

Out[12]:

<cx_Oracle.Cursor on <cx_Oracle.Connection to C##ATUL@127.0.0.1/smpl>>

In [13]:

# fetching single records with fetchone()
print(cur.fetchone())

('539', 'International Finance', 'Comp. Sci.', 3)

In [15]:

# fetching mamy rows
print(cur.fetchmany(numRows=2))

[('274', 'Corporate Law', 'Comp. Sci.', 4), ('949', 'Japanese', 'Comp. Sci.', 3)]

In [16]:

# fetching all rows
print(cur.fetchall())

[('647', 'Service-Oriented Architectures', 'Comp. Sci.', 4), ('747', 'International Practicum', 'Comp. Sci.', 4), ('584', 'Computability Theory', 'Comp. Sci.', 3), ('276', 'Game Design', 'Comp. Sci.', 4), ('359', 'Game Programming', 'Comp. Sci.', 4), ('284', 'Topology', 'Comp. Sci.', 4), ('571', 'Plastics', 'Comp. Sci.', 4)]

In [17]:

# printing results   
for res in cur:
    print(res)  # this will not print any records 

In [ ]:

con.close()

In [ ]:

############################################################
## Atul Singh  | www.datagenx.net | lnked.in/atulsingh
############################################################