yeah the line that failed was "%time tmpDf=pd.read_pickle('{}/{}.pickle.gz'.format(base_mergedBySRR_dir,chunkSize))"
import pandas as pd
import numpy as np
from tqdm import tqdm
uploadDir='/cellar/users/btsui/Data/merged/snp/hg38/mergedBySRR/'
/cellar/users/btsui/anaconda3/envs/deep_nlp_cpu/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88 return f(*args, **kwds)
tmpDir='/cellar/users/btsui/Data/merged/snp/hg38/mergedBySRR/100000.pickle.gz'
tmpDf=pd.read_pickle(tmpDir)
"""
library(rhdf5)
mydata <- h5read("/tmp/100000.hdf5", "/mygroup/mydata")
"""
#!rm -f /tmp/100000.hdf5
tmpDf.to_hdf('/tmp/100000.hdf5',format='fixed',key='chunk')
#!chmod 400 /tmp/100000.hdf5
#pd.read_hdf('/tmp/100000.hdf5',key='chunk')
#tmpDf.to_parquet('/tmp/100000.parquet',engine='pyarrow')
#https://stackoverflow.com/questions/37010212/what-is-the-fastest-way-to-upload-a-big-csv-file-in-notebook-to-work-with-python/37012035#37012035