import pandas as pd
import numpy as np
import json
import seaborn as sns
from glob import glob
from fastai.conv_learner import *
PATH = "d:\\data\\yelpdata\\"
#PATH = "/d/data/yelpdata/"
DATA = f'{PATH}dataset\\'
PHOT = f'{PATH}photos\\'
WEAT = f'{PATH}dataset\\weather\\'
PROC = f'{PATH}dataset\\processed_weather\\'
for f in glob(DATA+"*.csv"): print (f)
d:\data\yelpdata\dataset\business_on.csv d:\data\yelpdata\dataset\checkin_on.csv d:\data\yelpdata\dataset\photos_on.csv d:\data\yelpdata\dataset\review_on.csv d:\data\yelpdata\dataset\review_on_small.csv d:\data\yelpdata\dataset\tip_on.csv d:\data\yelpdata\dataset\user_on.csv d:\data\yelpdata\dataset\user_on_friends.csv
# merge timeseries csv files
# automatically overwrites and puts things in order
def merge_ts_csv (input_files = (), *args, output_file):
collector = pd.DataFrame()
for file in input_files:
current_series = pd.read_csv(file).set_index('Date/Time')
collector = collector.combine_first(current_series)
collector.to_csv(output_file)
files = os.listdir(WEAT)
for f in files:
current = pd.read_csv(WEAT+f, skiprows=16)
current.to_csv(PROC+f)
proc_files = os.listdir(PROC)
for i,f in enumerate(proc_files):
proc_files[i] = str(PROC)+f
merge_ts_csv (input_files=proc_files, output_file = f'{PROC}all_weather.csv')