%%time
import pandas as pd
dfs = pd.read_csv(
"../../linux/git_diff.log",
skip_blank_lines=False,
sep="\n",
encoding="latin-1",
chunksize=100000,
names=["raw"])
df = pd.concat(dfs)
df.tail()
Wall time: 2min 33s
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 112519384 entries, 0 to 112519383 Data columns (total 1 columns): raw object dtypes: object(1) memory usage: 858.5+ MB