#########################################################################
## Introduction to Database #1 - Applying Filtering and Targeting data
## Atul Singh
## www.datagenx.net
#########################################################################
# import
from sqlalchemy import create_engine, Table, MetaData, select, or_, and_, desc, func
# pip install psycopg2 #PostgreSQL driver
import psycopg2
import pandas as pd
# dialect and driver ('postgresql+psycopg2://'),
# followed by the username and password ('student:datacamp'),
# followed by the host and port ('@postgresql.csrrinzqubik.us-east-1.rds.amazonaws.com:5432/'),
# and finally, the database name ('census').
# creating engine
engine = create_engine("postgresql+psycopg2://postgres:postgres@localhost:5432/test")
# reading table names from database
print(engine.table_names())
['Employee']
### Importing metadata with Table and MetaData object
metadata = MetaData()
# getting albums metadata
emp = Table('Employee', metadata, autoload=True, autoload_with=engine)
connection = engine.connect()
print(repr(emp))
Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=<Employee>), Column('LastName', VARCHAR(), table=<Employee>), Column('Gender', String(), table=<Employee>), Column('EmpId', INTEGER(), table=<Employee>), schema=None)
# we can get the same metadata details from metadata.tables dictionaty
print(repr(metadata.tables['Employee']))
Table('Employee', MetaData(bind=None), Column('FirstName', VARCHAR(), table=<Employee>), Column('LastName', VARCHAR(), table=<Employee>), Column('Gender', String(), table=<Employee>), Column('EmpId', INTEGER(), table=<Employee>), schema=None)
## Let's query on table albums
stmt = select([emp])
stmt = stmt.where(emp.columns.Gender == 'F')
#results = connection.execute(stmt).fetchall()
result_proxy = connection.execute(stmt)
print(result_proxy)
<sqlalchemy.engine.result.ResultProxy object at 0x000001939727A828>
results = result_proxy.fetchall()
print(results)
[('Priya', 'Rajawat', 'F', 3), ('Divya', 'Patel', 'F', 4)]
# get all the column names
print(result_proxy.keys())
['FirstName', 'LastName', 'Gender', 'EmpId']
# get total no of rows in tables
print(len(results))
2
# get first row
result1 = results[0]
print(result1)
('Priya', 'Rajawat', 'F', 3)
# print column names
print(result1.keys())
['FirstName', 'LastName', 'Gender', 'EmpId']
# access particulat column data
print(result1.Gender)
F
# printing the results
for result in results:
print(result.EmpId, result[0], result.LastName, result.Gender)
3 Priya Rajawat F 4 Divya Patel F
gend = ['M']
# Now I have to fetch all the records which match with Gender list - gender
stmt = select([emp])
stmt = stmt.where(emp.columns.Gender.in_(gend))
# Here no need to use fetch all funct
for result in connection.execute(stmt):
print(result)
('Atul', 'Singh', 'M', 1) ('Rahul', 'Singh', 'M', 2)
# same way we can use or_(), and_(), not_(), like() and between()
# let's see a example of or_()
stmt = select([emp])
stmt = stmt.where(or_(emp.columns.Gender=='M', emp.columns.Gender=='F'))
for result in connection.execute(stmt):
print(result)
('Atul', 'Singh', 'M', 1) ('Rahul', 'Singh', 'M', 2) ('Priya', 'Rajawat', 'F', 3) ('Divya', 'Patel', 'F', 4)
# and statement
stmt = select([emp])
stmt = stmt.where(and_(emp.columns.Gender=='M', emp.columns.LastName=='Singh'))
for result in connection.execute(stmt):
print(result)
('Atul', 'Singh', 'M', 1) ('Rahul', 'Singh', 'M', 2)
# like statement
stmt = select([emp])
stmt = stmt.where(emp.columns.LastName.like('P%'))
for result in connection.execute(stmt):
print(result)
('Divya', 'Patel', 'F', 4)
stmt = select([emp])
stmt = stmt.order_by(emp.columns.LastName)
results = connection.execute(stmt).fetchall()
print(results)
[('Divya', 'Patel', 'F', 4), ('Priya', 'Rajawat', 'F', 3), ('Atul', 'Singh', 'M', 1), ('Rahul', 'Singh', 'M', 2)]
# selecting any two columns
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(emp.columns.LastName)
results = connection.execute(stmt).fetchall()
print(results)
[('Divya', 'Patel'), ('Priya', 'Rajawat'), ('Atul', 'Singh'), ('Rahul', 'Singh')]
# selecting two columns in reverse order
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(desc(emp.columns.LastName))
results = connection.execute(stmt).fetchall()
print(results)
[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]
# one column in ascending order and one in desc
stmt = select([emp.columns.FirstName, emp.columns.LastName])
stmt = stmt.order_by(desc(emp.columns.LastName), emp.columns.FirstName)
results = connection.execute(stmt).fetchall()
print(results)
[('Atul', 'Singh'), ('Rahul', 'Singh'), ('Priya', 'Rajawat'), ('Divya', 'Patel')]
# import func
# from sqlalchemy import func
# counting distinct gender in table Employee
stmt = select([func.count(emp.columns.Gender.distinct())])
results = connection.execute(stmt)
print(results)
<sqlalchemy.engine.result.ResultProxy object at 0x00000193972B6588>
results = results.scalar()
print(results)
2
## taking the gender count per records
stmt = select([emp.columns.Gender, func.count(emp.columns.Gender)])
stmt = stmt.group_by(emp.columns.Gender)
results = connection.execute(stmt).fetchall()
print(results)
print(results[0].keys())
[('M', 2), ('F', 2)] ['Gender', 'count_1']
# labeling the count columns
stmt = select([emp.columns.Gender, func.count(emp.columns.Gender).label('Gender_Count')])
stmt = stmt.group_by(emp.columns.Gender)
results = connection.execute(stmt).fetchall()
print(results)
print(results[0].keys())
[('M', 2), ('F', 2)] ['Gender', 'Gender_Count']
# importing pandas
# import pandas as pd
df = pd.DataFrame(results)
df.columns = results[0].keys()
print(df)
Gender Gender_Count 0 M 2 1 F 2
# installation of Oracle Driver
# pip install cx_Oracle
import cx_Oracle
con = cx_Oracle.connect('C##ATUL/atul@127.0.0.1/smpl') #user/password@host/network_alias
print(con.version)
12.1.0.2.0
# creating query
cur = con.cursor()
cur.execute('SELECT COURSE_ID, TITLE, DEPT_NAME, CREDITS FROM COURSE WHERE DEPT_NAME=\'Comp. Sci.\'')
<cx_Oracle.Cursor on <cx_Oracle.Connection to C##ATUL@127.0.0.1/smpl>>
# fetching single records with fetchone()
print(cur.fetchone())
('539', 'International Finance', 'Comp. Sci.', 3)
# fetching mamy rows
print(cur.fetchmany(numRows=2))
[('274', 'Corporate Law', 'Comp. Sci.', 4), ('949', 'Japanese', 'Comp. Sci.', 3)]
# fetching all rows
print(cur.fetchall())
[('647', 'Service-Oriented Architectures', 'Comp. Sci.', 4), ('747', 'International Practicum', 'Comp. Sci.', 4), ('584', 'Computability Theory', 'Comp. Sci.', 3), ('276', 'Game Design', 'Comp. Sci.', 4), ('359', 'Game Programming', 'Comp. Sci.', 4), ('284', 'Topology', 'Comp. Sci.', 4), ('571', 'Plastics', 'Comp. Sci.', 4)]
# printing results
for res in cur:
print(res) # this will not print any records
con.close()
############################################################
## Atul Singh | www.datagenx.net | lnked.in/atulsingh
############################################################