import psycopg2
import pandas as pd
import sqlalchemy as sa
import time
import seaborn as sns
import re
! pip install pandasql
Requirement already satisfied: pandasql in c:\users\dell\anaconda3\lib\site-packages Requirement already satisfied: pandas in c:\users\dell\anaconda3\lib\site-packages (from pandasql) Requirement already satisfied: sqlalchemy in c:\users\dell\anaconda3\lib\site-packages (from pandasql) Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages (from pandasql) Requirement already satisfied: python-dateutil>=2 in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql) Requirement already satisfied: pytz>=2011k in c:\users\dell\anaconda3\lib\site-packages (from pandas->pandasql) Requirement already satisfied: six>=1.5 in c:\users\dell\anaconda3\lib\site-packages (from python-dateutil>=2->pandas->pandasql)
! pip install psycopg2
Requirement already satisfied: psycopg2 in c:\users\dell\anaconda3\lib\site-packages
parameters = {
'username': 'postgres',
'password': 'root',
'server': 'localhost',
'database': 'ajay'
}
connection= 'postgresql://{username}:{password}@{server}:5432/{database}'.format(**parameters)
print (connection)
postgresql://postgres:root@localhost:5432/ajay
engine = sa.create_engine(connection, encoding="utf-8")
insp = sa.inspect(engine)
print(insp)
<sqlalchemy.dialects.postgresql.base.PGInspector object at 0x000000000B2E5278>
db_list = insp.get_schema_names()
print(db_list)
['information_schema', 'public']
engine.table_names()
['iris', 'temp', 'sales']
data3= pd.read_sql_query('select * from "sales" limit 10',con=engine)
data3.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10 entries, 0 to 9 Data columns (total 4 columns): customer_id 10 non-null int64 sales 10 non-null int64 date 10 non-null object product_id 10 non-null int64 dtypes: int64(3), object(1) memory usage: 400.0+ bytes
data3
customer_id | sales | date | product_id | |
---|---|---|---|---|
0 | 10001 | 5230 | 2017-02-07 | 524 |
1 | 10002 | 2781 | 2017-05-12 | 469 |
2 | 10003 | 2083 | 2016-12-18 | 917 |
3 | 10004 | 214 | 2015-01-19 | 354 |
4 | 10005 | 9407 | 2016-09-26 | 292 |
5 | 10006 | 4705 | 2015-10-17 | 380 |
6 | 10007 | 4729 | 2016-01-02 | 469 |
7 | 10008 | 7715 | 2015-09-12 | 480 |
8 | 10009 | 9898 | 2015-04-05 | 611 |
9 | 10010 | 5797 | 2015-08-13 | 959 |
data5= pd.read_sql_query('select * from "sales" limit 20',con=engine)
data5
customer_id | sales | date | product_id | |
---|---|---|---|---|
0 | 10001 | 5230 | 2017-02-07 | 524 |
1 | 10002 | 2781 | 2017-05-12 | 469 |
2 | 10003 | 2083 | 2016-12-18 | 917 |
3 | 10004 | 214 | 2015-01-19 | 354 |
4 | 10005 | 9407 | 2016-09-26 | 292 |
5 | 10006 | 4705 | 2015-10-17 | 380 |
6 | 10007 | 4729 | 2016-01-02 | 469 |
7 | 10008 | 7715 | 2015-09-12 | 480 |
8 | 10009 | 9898 | 2015-04-05 | 611 |
9 | 10010 | 5797 | 2015-08-13 | 959 |
10 | 10011 | 1283 | 2016-04-22 | 950 |
11 | 10012 | 2751 | 2015-01-01 | 322 |
12 | 10013 | 4422 | 2017-07-11 | 965 |
13 | 10014 | 6235 | 2015-03-07 | 783 |
14 | 10015 | 7302 | 2016-04-06 | 792 |
15 | 10016 | 6408 | 2016-10-21 | 347 |
16 | 10017 | 1880 | 2015-08-23 | 187 |
17 | 10018 | 3738 | 2017-03-12 | 222 |
18 | 10019 | 900 | 2015-07-20 | 236 |
19 | 10020 | 5516 | 2017-05-10 | 828 |
import pandasql as pdsql
str1="select * from data5 limit 5;"
df11=pdsql.sqldf(str1)
df11
customer_id | sales | date | product_id | |
---|---|---|---|---|
0 | 10001 | 5230 | 2017-02-07 | 524 |
1 | 10002 | 2781 | 2017-05-12 | 469 |
2 | 10003 | 2083 | 2016-12-18 | 917 |
3 | 10004 | 214 | 2015-01-19 | 354 |
4 | 10005 | 9407 | 2016-09-26 | 292 |
type(data5)
pandas.core.frame.DataFrame
data5= pd.read_sql_query('select * from "sales" limit 250',con=engine)
data5.head()
customer_id | sales | date | product_id | |
---|---|---|---|---|
0 | 10001 | 5230 | 2017-02-07 | 524 |
1 | 10002 | 2781 | 2017-05-12 | 469 |
2 | 10003 | 2083 | 2016-12-18 | 917 |
3 | 10004 | 214 | 2015-01-19 | 354 |
4 | 10005 | 9407 | 2016-09-26 | 292 |
str2="select avg(sales) from data5 ;"
df111=pdsql.sqldf(str2)
df111
avg(sales) | |
---|---|
0 | 5226.868 |