from pandas import read_csv,DatetimeIndex,ols from urllib import urlopen def get_index(gindex, startdate=20040101): """ API wrapper for Google Domestic Trends data. https://www.google.com/finance/domestic_trends Available Indices: 'ADVERT', 'AIRTVL', 'AUTOBY', 'AUTOFI', 'AUTO', 'BIZIND', 'BNKRPT', 'COMLND', 'COMPUT', 'CONSTR', 'CRCARD', 'DURBLE', 'EDUCAT', 'INVEST', 'FINPLN', 'FURNTR', 'INSUR', 'JOBS', 'LUXURY', 'MOBILE', 'MTGE', 'RLEST', 'RENTAL', 'SHOP', 'TRAVEL', 'UNEMPL' """ base_url = 'http://www.google.com/finance/historical?q=GOOGLEINDEX_US:' full_url = '%s%s&output=csv&startdate=%s' % (base_url, gindex, startdate) dframe = read_csv(urlopen(full_url), index_col=0) dframe.index = DatetimeIndex(dframe.index) dframe = dframe.sort_index(0) for col in dframe.columns: if len(dframe[col].unique()) == 1: dframe.pop(col) if len(dframe.columns) == 1 and dframe.columns[0] == 'Close': dframe.columns = [gindex] return dframe[gindex] autobuyers = get_index('AUTOBY') # https://www.google.com/finance?q=GOOGLEINDEX_US:AUTOBY autofinancing = get_index('AUTOFI') # https://www.google.com/finance?q=GOOGLEINDEX_US:AUTOFI # Run OLS testing if searches for queries related to people looking to purchase a car # are predictive of searches for automotive financing. model = ols(y=autofinancing, x={'Automotive Buyers': autobuyers}) print model.summary # Plot actual Y vs. predicted Y pred = model.predict() pred.plot(color='b') autofinancing.plot(color='g') # Plot the residual err = model.resid err.plot(color='r') # Test simple model of 1-period autocorrelation err_t1 = err.tshift(1, freq='D') autocorr_model = ols(y=err, x={'err_t1': err_t1}) print autocorr_model.summary # Test model with up to 14-period autocorrelation err_terms = {} for lag in xrange(1, 15): err_terms['err_%s' % lag] = err.tshift(lag, freq='D') autocorr_model = ols(y=err, x=err_terms) print autocorr_model.summary # Find the lagged error terms that are significant at the 95% level significantvals = [] for pval, term, paramval in zip(autocorr_model.p_value, autocorr_model.beta.index, autocorr_model.beta): if pval < 0.05: significantvals.append((term, paramval)) # Sort them by the size of the coefficient from operator import itemgetter sorted_significantvals = sorted(significantvals, key=itemgetter(1)) for sv in sorted_significantvals: print '%s: %s' % (sv[0], sv[1])