There does not seem to be much of an improvement in running multiple queries in parallel on threads.
Clearly I'm doing something wrong.
import sqlalchemy
import pandas as pd
import concurrent.futures
engine = sqlalchemy.create_engine('mysql://root@localhost/indiatoday')
def query():
return pd.read_sql('SELECT DISTINCT ordering, cat_id FROM jos_article_section', engine)
def parallel(workers=1, count=4):
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
future_query = {executor.submit(query): n for n in range(count)}
result = []
for future in concurrent.futures.as_completed(future_query):
try:
result.append(future.result())
except Exception as exc:
print('Exception: %s' % exc)
return len(pd.concat(result))
%timeit -r1 parallel(workers=1, count=4)
1 loops, best of 1: 8.75 s per loop
%timeit -r1 parallel(workers=4, count=4)
1 loops, best of 1: 7.14 s per loop
engine = sqlalchemy.create_engine('postgresql://gitlab@localhost/gitlabhq_production')
def query():
return pd.read_sql('SELECT * FROM events', engine)
%timeit -r1 parallel(workers=1, count=4)
1 loops, best of 1: 2.03 s per loop
%timeit -r1 parallel(workers=4, count=4)
1 loops, best of 1: 1.97 s per loop