import pandas
import pandas_gbq
##excel1 = 'Spark Q419 dj reads sample.xlsx'
##excel2 = 'Spark Q419 local radio sample.xlsx'
excel1 = 'APEX Q320 sample.xlsx'
excel2 = 'APEX Sep 2020 sample.xlsx'
project_id = 'marketing-bigquery-project'
df1 = pandas.read_excel(excel1)
df2 = pandas.read_excel(excel2)
##df3 = pandas.read_excel(excel3)
values1 = df1[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]
values2 = df2[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]
##values3 = df3[['CLI','MARKET NAME','STA','STA TYPE NAME','DEMO #1','DP','LEN','AIR DATE','AFF','FULL TIME','ISCI CODE']]
dataframes = [values1, values2]
joinedData = pandas.concat(dataframes)
##joinedData = values1
Below code extracts the first 3 letters from the AIR DATE column and creates a new column Month with the corresponding number of the month
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JAN', 'MONTH'] = '1'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'FEB', 'MONTH'] = '2'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'MAR', 'MONTH'] = '3'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'APR', 'MONTH'] = '4'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'MAY', 'MONTH'] = '5'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JUN', 'MONTH'] = '6'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'JUL', 'MONTH'] = '7'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'AUG', 'MONTH'] = '8'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'SEP', 'MONTH'] = '9'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'OCT', 'MONTH'] = '10'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'NOV', 'MONTH'] = '11'
joinedData.loc[joinedData['AIR DATE'].str.slice(0,3) == 'DEC', 'MONTH'] = '12'
Create Date column by extracting the last 5 characters from the AIR DATE string
joinedData['Date'] = joinedData['AIR DATE'].str.slice(3,6) + '20' + joinedData['AIR DATE'].str.slice(6,8)
Create full date column by concatenating the MONTH and Date column
joinedData['full date'] = joinedData['MONTH'] + '/' + joinedData['Date']
Create time column by extracting and formatting the time from the TIME column
joinedData['time'] = joinedData['FULL TIME'].str.slice(0,2) + ':' + joinedData['FULL TIME'].str.slice(2,4)
joinedData['am/pm'] = joinedData['FULL TIME'].str.slice(4,5)
Replace spaces with underscore because GBQ does not accept spaces in the column names
joinedDataFormatted = joinedData.rename(columns = {'MARKET NAME': 'MARKET_NAME', 'STA TYPE NAME': 'STA_TYPE_NAME', 'DEMO #1': 'DEMO',
'DP': 'daypart', 'AIR DATE': 'AIR_DATE', 'AFF': 'DAY', 'FULL TIME': 'FULL_TIME',
'ISCI CODE': 'ISCI_CODE', 'full date': 'full_date', 'am/pm': 'am_pm'} )
joinedDataFormatted.head(20)
pandas_gbq.to_gbq(joinedDataFormatted, 'Spike_Attribution_Model.fct_radio_log_data_q3_2020', project_id = project_id,
table_schema = [{'name': 'CLI', 'type': 'STRING'},
{'name': 'MARKET_NAME', 'type': 'STRING'},
{'name': 'STA', 'type': 'STRING'},
{'name': 'STA_TYPE_NAME', 'type': 'STRING'},
{'name': 'DEMO', 'type': 'STRING'},
{'name': 'daypart', 'type': 'STRING'},
{'name': 'LEN', 'type': 'STRING'},
{'name': 'AIR_DATE', 'type': 'STRING'},
{'name': 'DAY', 'type': 'STRING'},
{'name': 'FULL_TIME', 'type': 'STRING'},
{'name': 'ISCI_CODE', 'type': 'STRING'},
{'name': 'MONTH', 'type': 'STRING'},
{'name': 'DATE', 'type': 'STRING'},
{'name': 'full_date', 'type': 'STRING'},
{'name': 'time', 'type': 'STRING'},
{'name': 'am_pm', 'type': 'STRING'}], if_exists = 'replace')