#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import xlsxwriter # Read in the data to a dataframe # In[2]: df = pd.read_csv("pandasComtradeGarmentsNW.csv", index_col="index") # ## Step 1. Make a list of the EU country codes # In[3]: EU_Codes = ["AUT", "BEL", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", "FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA", "LVA", "LTU", "LUX", "MLT", "NLD", "POL", "PRT", "ROU","SVK", "SVN", "ESP", "SWE", "GBR"] # ## Step 2. Figure out and append the fibre classes based on their UN commodity codes # In[4]: commodityCode = { "Cotton": [610120, 611020, 611595, 610220, 610462, 610342, 610910, 610711, 610510, 610610, 610442, 610322, 610332, 610422, 610432, 610452, 610721, 610791, 610821, 610831, 610891], "Man-made": [610130, 610290, 611030, 610712, 610230, 610520, 610620, 610822, 610892, 610722, 610832, 610811], "Other": [611599, 610329, 610339, 610349, 610990, 610419, 610190, 611090, 610429, 610449, 610459, 610469, 610690, 610799, 610439, 610590, 610719, 610729, 610819, 610829, 610839, 610899], "Synthetic": [611596, 610343, 610443, 610463, 610444, 610323, 610423, 610433, 610453, 610333, 610413], "Wool": [610210, 610431, 610441, 610331, 610341, 610451, 610461, 611594] } # In[5]: ccDict = {} for k in commodityCode.keys(): for i in commodityCode[k]: ccDict[i]=k # In[6]: def coding(col, codeDict): colCoded = pd.Series(col, copy=True) for key, value in codeDict.items(): colCoded.replace(key, value, inplace=True) return colCoded # In[7]: df["Class"] = coding(df["Commodity Code"], ccDict) # ## Step 3. Add all of the possible combinations to the bottom to ensure square matrices # In[9]: reporters = list(df['Reporter ISO'].unique()) partners = list(df['Partner ISO'].unique()) #Get a list of all the countries in either categories all_countries = list(reporters) all_countries.extend(partners) all_countries = list(set(all_countries)) classes = list(df['Class'].unique()) # In[10]: df_extra = pd.DataFrame([{'Reporter ISO':y, 'Partner ISO': x, 'Class':z} for x in all_countries for y in all_countries for z in classes]) # In[11]: df=df.append(df_extra) # ## Step 4. Aggregate the values to return trade value for reporter, partner, fibre combos # In[12]: df_aggregate = pd.DataFrame(df.groupby(['Reporter ISO', 'Partner ISO', 'Class']).aggregate(sum)['Netweight (kg)']).reset_index() # ## Step 5. Create leontif matrices for each fibre # In[13]: fibres = list(df['Class'].unique()) # In[14]: split_df = {fibre : df_aggregate[df_aggregate['Class'] == fibre] for fibre in fibres} # In[15]: split_pivot = {fibre : split_df[fibre].pivot(index='Partner ISO', columns='Reporter ISO', values='Netweight (kg)').fillna(0) for fibre in fibres} # In[16]: # turn pivots into coefficient tables split_coeff = {fibre : split_pivot[fibre].divide(split_pivot[fibre].sum(axis=0)).fillna(0) for fibre in fibres} # In[17]: split_matrix = {fibre : split_coeff[fibre].values for fibre in fibres} # In[18]: # the leontif matrix is inverse of I-A (here A is the matrix from above) split_leontif = {fibre : np.linalg.inv(np.identity(len(split_matrix[fibre]))-split_matrix[fibre]) for fibre in fibres} # ## Step 6. Figure out the resolved demands for each country # In[19]: countries = list(split_pivot[fibres[0]].index.values) # In[20]: country_series = pd.Series(countries) country_series.name = 'index' # In[21]: demand_vectors = {c1 : np.array([1 if c1 == c else 0 for c in countries]) for c1 in countries} # In[22]: final_demands = {fibre : pd.DataFrame({country : split_leontif[fibre].dot(demand_vectors[country]) for country in countries}).set_index(country_series) for fibre in fibres} # ## Step 7. Filter EU out of rows and non-EU out of columns # In[23]: simple_tables = {fibre: final_demands[fibre][EU_Codes][~(final_demands[fibre].index.isin(EU_Codes))] for fibre in fibres} # ## Write the results to excel to have a look at # In[24]: # Create a Pandas Excel writer using XlsxWriter as the engine. result_filename = 'GarmentTradetoEUMemberStates.xlsx' writer = pd.ExcelWriter(result_filename, engine='xlsxwriter') for table in final_demands: simple_tables[table].to_excel(writer, sheet_name='{}'.format(table)) writer.save()