Combine Multiple Excel Worksheets Into a Single Pandas Dataframe

Article posted here

In [1]:
import pandas as pd
In [2]:
workbook_url = 'https://github.com/chris1610/pbpython/raw/master/data/2018_Sales_Total_Tabs.xlsx'
In [3]:
single_df =  pd.read_excel(workbook_url, sheet_name='Sheet1')
In [4]:
single_df.head()
Out[4]:
account number name sku quantity unit price ext price date
0 412290 Jerde-Hilpert S2-77896 43 76.66 3296.38 2018-03-04 23:10:28
1 383080 Will LLC S1-93683 28 90.86 2544.08 2018-03-05 05:11:49
2 729833 Koepp Ltd S1-30248 13 44.84 582.92 2018-03-05 17:33:52
3 424914 White-Trantow S2-82423 38 50.93 1935.34 2018-03-05 21:40:10
4 672390 Kuhn-Gusikowski S1-50961 34 48.20 1638.80 2018-03-06 11:59:00
In [5]:
all_dfs = pd.read_excel(workbook_url, sheet_name=None)
In [6]:
type(all_dfs)
Out[6]:
collections.OrderedDict
In [7]:
type(all_dfs)
Out[7]:
collections.OrderedDict
In [8]:
all_dfs.keys()
Out[8]:
odict_keys(['Sheet1', 'Sheet2', 'Sheet3', 'Sheet4', 'Sheet5', 'Sheet6'])
In [9]:
all_dfs['Sheet1'].head()
Out[9]:
account number name sku quantity unit price ext price date
0 412290 Jerde-Hilpert S2-77896 43 76.66 3296.38 2018-03-04 23:10:28
1 383080 Will LLC S1-93683 28 90.86 2544.08 2018-03-05 05:11:49
2 729833 Koepp Ltd S1-30248 13 44.84 582.92 2018-03-05 17:33:52
3 424914 White-Trantow S2-82423 38 50.93 1935.34 2018-03-05 21:40:10
4 672390 Kuhn-Gusikowski S1-50961 34 48.20 1638.80 2018-03-06 11:59:00
In [10]:
all_dfs['Sheet2'].head()
Out[10]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2018-01-01 07:21:51
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2018-01-01 10:00:47
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2018-01-01 13:24:58
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2018-01-01 15:05:22
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2018-01-01 23:26:55
In [11]:
for sheet in all_dfs:
    print(f"{sheet} - {all_dfs[sheet].shape}")
Sheet1 - (39, 7)
Sheet2 - (35, 7)
Sheet3 - (47, 7)
Sheet4 - (47, 7)
Sheet5 - (81, 7)
Sheet6 - (50, 7)
In [12]:
df = pd.concat(all_dfs)
In [13]:
df.shape
Out[13]:
(299, 7)
In [14]:
df.head()
Out[14]:
account number name sku quantity unit price ext price date
Sheet1 0 412290 Jerde-Hilpert S2-77896 43 76.66 3296.38 2018-03-04 23:10:28
1 383080 Will LLC S1-93683 28 90.86 2544.08 2018-03-05 05:11:49
2 729833 Koepp Ltd S1-30248 13 44.84 582.92 2018-03-05 17:33:52
3 424914 White-Trantow S2-82423 38 50.93 1935.34 2018-03-05 21:40:10
4 672390 Kuhn-Gusikowski S1-50961 34 48.20 1638.80 2018-03-06 11:59:00
In [15]:
df.tail()
Out[15]:
account number name sku quantity unit price ext price date
Sheet6 45 239344 Stokes LLC S1-82801 41 78.90 3234.90 2018-03-04 01:06:20
46 218895 Kulas Inc S2-78676 38 89.02 3382.76 2018-03-04 01:17:11
47 642753 Pollich LLC S2-10342 40 56.85 2274.00 2018-03-04 01:49:22
48 737550 Fritsch, Russel and Anderson S2-83881 12 63.60 763.20 2018-03-04 15:26:20
49 146832 Kiehn-Spinka B1-53636 5 72.16 360.80 2018-03-04 21:18:04
In [16]:
pd.concat(pd.read_excel(workbook_url, sheet_name=None), ignore_index=True)
Out[16]:
account number name sku quantity unit price ext price date
0 412290 Jerde-Hilpert S2-77896 43 76.66 3296.38 2018-03-04 23:10:28
1 383080 Will LLC S1-93683 28 90.86 2544.08 2018-03-05 05:11:49
2 729833 Koepp Ltd S1-30248 13 44.84 582.92 2018-03-05 17:33:52
3 424914 White-Trantow S2-82423 38 50.93 1935.34 2018-03-05 21:40:10
4 672390 Kuhn-Gusikowski S1-50961 34 48.20 1638.80 2018-03-06 11:59:00
5 239344 Stokes LLC S2-10342 34 36.93 1255.62 2018-03-06 23:49:16
6 218895 Kulas Inc S2-34077 30 99.73 2991.90 2018-03-07 05:15:29
7 672390 Kuhn-Gusikowski B1-05914 25 89.86 2246.50 2018-03-07 06:25:52
8 740150 Barton LLC S1-82801 29 60.81 1763.49 2018-03-07 10:24:54
9 218895 Kulas Inc B1-20000 23 99.57 2290.11 2018-03-07 13:34:00
10 257198 Cronin, Oberbrunner and Spencer S1-30248 14 91.37 1279.18 2018-03-07 16:21:58
11 672390 Kuhn-Gusikowski S2-83881 26 88.38 2297.88 2018-03-07 20:18:18
12 424914 White-Trantow B1-53636 44 59.93 2636.92 2018-03-07 21:16:24
13 307599 Kassulke, Ondricka and Metz S2-10342 4 12.99 51.96 2018-03-07 23:59:26
14 383080 Will LLC B1-20000 45 29.90 1345.50 2018-03-08 01:50:36
15 527099 Sanford and Sons S2-82423 44 23.30 1025.20 2018-03-09 00:18:55
16 146832 Kiehn-Spinka S2-77896 27 70.76 1910.52 2018-03-09 11:05:47
17 642753 Pollich LLC B1-05914 15 65.50 982.50 2018-03-09 11:22:09
18 786968 Frami, Hills and Schmidt S1-65481 14 44.67 625.38 2018-03-09 19:13:11
19 527099 Sanford and Sons S1-82801 35 95.17 3330.95 2018-03-09 19:13:20
20 257198 Cronin, Oberbrunner and Spencer S2-23246 43 87.40 3758.20 2018-03-10 05:39:36
21 527099 Sanford and Sons B1-65551 6 58.04 348.24 2018-03-10 05:52:17
22 218895 Kulas Inc S1-47412 21 94.45 1983.45 2018-03-10 11:02:14
23 729833 Koepp Ltd S2-10342 12 93.64 1123.68 2018-03-10 13:08:45
24 412290 Jerde-Hilpert S1-82801 26 74.79 1944.54 2018-03-11 02:49:11
25 642753 Pollich LLC S2-77896 13 48.14 625.82 2018-03-11 06:19:26
26 218895 Kulas Inc B1-33364 17 93.01 1581.17 2018-03-11 08:20:57
27 307599 Kassulke, Ondricka and Metz S1-50961 28 34.87 976.36 2018-03-11 10:49:24
28 146832 Kiehn-Spinka S1-93683 17 10.63 180.71 2018-03-11 12:43:03
29 383080 Will LLC S1-82801 3 77.06 231.18 2018-03-11 16:38:10
... ... ... ... ... ... ... ...
269 141962 Herman LLC B1-69924 21 95.00 1995.00 2018-02-26 19:08:02
270 383080 Will LLC S2-23246 47 10.93 513.71 2018-02-26 20:41:02
271 383080 Will LLC B1-20000 4 35.04 140.16 2018-02-26 23:56:15
272 672390 Kuhn-Gusikowski S1-93683 10 34.96 349.60 2018-02-27 02:58:58
273 424914 White-Trantow S1-93683 -1 25.01 -25.01 2018-02-27 10:32:25
274 383080 Will LLC S1-93683 41 97.29 3988.89 2018-02-27 14:54:11
275 714466 Trantow-Barrows S2-11481 46 39.61 1822.06 2018-02-27 16:12:51
276 740150 Barton LLC S1-30248 21 14.05 295.05 2018-02-28 02:29:03
277 218895 Kulas Inc B1-69924 44 72.43 3186.92 2018-02-28 08:42:56
278 383080 Will LLC B1-53102 6 32.80 196.80 2018-02-28 17:35:23
279 688981 Keeling LLC S2-23246 18 64.71 1164.78 2018-02-28 23:21:04
280 642753 Pollich LLC B1-04202 8 95.86 766.88 2018-02-28 23:47:32
281 163416 Purdy-Kunde S1-30248 19 65.03 1235.57 2018-03-01 16:07:40
282 527099 Sanford and Sons S2-82423 3 76.21 228.63 2018-03-01 17:18:01
283 527099 Sanford and Sons B1-50809 8 70.78 566.24 2018-03-01 18:53:09
284 737550 Fritsch, Russel and Anderson B1-50809 20 50.11 1002.20 2018-03-01 23:47:17
285 688981 Keeling LLC B1-86481 -1 97.16 -97.16 2018-03-02 01:46:44
286 729833 Koepp Ltd S1-82801 26 50.29 1307.54 2018-03-02 02:59:26
287 307599 Kassulke, Ondricka and Metz S1-93683 -1 86.38 -86.38 2018-03-02 03:29:04
288 412290 Jerde-Hilpert S2-23246 27 58.87 1589.49 2018-03-02 12:10:30
289 307599 Kassulke, Ondricka and Metz S2-34077 48 53.36 2561.28 2018-03-02 14:12:28
290 141962 Herman LLC S2-10342 49 31.39 1538.11 2018-03-02 23:08:02
291 257198 Cronin, Oberbrunner and Spencer S2-16558 7 26.21 183.47 2018-03-03 00:29:24
292 257198 Cronin, Oberbrunner and Spencer S2-23246 40 49.19 1967.60 2018-03-03 10:17:24
293 688981 Keeling LLC B1-38851 25 74.68 1867.00 2018-03-03 18:18:54
294 239344 Stokes LLC S1-82801 41 78.90 3234.90 2018-03-04 01:06:20
295 218895 Kulas Inc S2-78676 38 89.02 3382.76 2018-03-04 01:17:11
296 642753 Pollich LLC S2-10342 40 56.85 2274.00 2018-03-04 01:49:22
297 737550 Fritsch, Russel and Anderson S2-83881 12 63.60 763.20 2018-03-04 15:26:20
298 146832 Kiehn-Spinka B1-53636 5 72.16 360.80 2018-03-04 21:18:04

299 rows × 7 columns

In [ ]: