In [1]:
import pandas as pd
import numpy as np
import xlsxwriter

Read in the data to a dataframe

In [2]:
df = pd.read_csv("pandasComtradeGarmentsNW.csv", index_col="index")

Step 1. Make a list of the EU country codes

In [3]:
EU_Codes = ["AUT", "BEL", "BGR", "HRV", "CYP",
            "CZE", "DNK", "EST", "FIN", "FRA",
            "DEU", "GRC", "HUN", "IRL", "ITA",
            "LVA", "LTU", "LUX", "MLT", "NLD",
            "POL", "PRT", "ROU","SVK", "SVN",
            "ESP", "SWE", "GBR"]

Step 2. Figure out and append the fibre classes based on their UN commodity codes

In [4]:
commodityCode = {
"Cotton": [610120, 611020, 611595, 610220, 610462, 610342, 610910, 610711, 610510, 610610, 610442, 610322, 610332, 610422, 610432, 610452, 610721, 610791, 610821, 610831, 610891],
"Man-made": [610130, 610290, 611030, 610712, 610230, 610520, 610620, 610822, 610892, 610722, 610832, 610811],
"Other": [611599, 610329, 610339, 610349, 610990, 610419, 610190, 611090, 610429, 610449, 610459, 610469, 610690, 610799, 610439, 610590, 610719, 610729, 610819, 610829, 610839, 610899],
"Synthetic": [611596, 610343, 610443, 610463, 610444, 610323, 610423, 610433, 610453, 610333, 610413],
"Wool": [610210, 610431, 610441, 610331, 610341, 610451, 610461, 611594]
}
In [5]:
ccDict = {}

for k in commodityCode.keys():
    for i in commodityCode[k]:
        ccDict[i]=k
In [6]:
def coding(col, codeDict):
    colCoded = pd.Series(col, copy=True)
    for key, value in codeDict.items():
        colCoded.replace(key, value, inplace=True)
    return colCoded
In [7]:
df["Class"] = coding(df["Commodity Code"], ccDict)

Step 3. Add all of the possible combinations to the bottom to ensure square matrices

In [9]:
reporters = list(df['Reporter ISO'].unique())
partners = list(df['Partner ISO'].unique())

#Get a list of all the countries in either categories
all_countries = list(reporters)
all_countries.extend(partners)
all_countries = list(set(all_countries))

classes = list(df['Class'].unique())
In [10]:
df_extra = pd.DataFrame([{'Reporter ISO':y, 'Partner ISO': x, 'Class':z} for x in all_countries for y in all_countries for z in classes])
In [11]:
df=df.append(df_extra)

Step 4. Aggregate the values to return trade value for reporter, partner, fibre combos

In [12]:
df_aggregate =  pd.DataFrame(df.groupby(['Reporter ISO', 'Partner ISO', 'Class']).aggregate(sum)['Netweight (kg)']).reset_index()

Step 5. Create leontif matrices for each fibre

In [13]:
fibres = list(df['Class'].unique())
In [14]:
split_df = {fibre : df_aggregate[df_aggregate['Class'] == fibre] for fibre in fibres}
In [15]:
split_pivot = {fibre : split_df[fibre].pivot(index='Partner ISO', columns='Reporter ISO', values='Netweight (kg)').fillna(0) for fibre in fibres}
In [16]:
# turn pivots into coefficient tables
split_coeff = {fibre : split_pivot[fibre].divide(split_pivot[fibre].sum(axis=0)).fillna(0) for fibre in fibres}
In [17]:
split_matrix = {fibre : split_coeff[fibre].values for fibre in fibres}
In [18]:
# the leontif matrix is inverse of I-A (here A is the matrix from above)
split_leontif = {fibre : np.linalg.inv(np.identity(len(split_matrix[fibre]))-split_matrix[fibre]) for fibre in fibres}

Step 6. Figure out the resolved demands for each country

In [19]:
countries = list(split_pivot[fibres[0]].index.values)
In [20]:
country_series = pd.Series(countries)
country_series.name = 'index'
In [21]:
demand_vectors = {c1 : np.array([1 if c1 == c else 0 for c in countries]) for c1 in countries}
In [22]:
final_demands = {fibre : pd.DataFrame({country : split_leontif[fibre].dot(demand_vectors[country]) for country in countries}).set_index(country_series) for fibre in fibres}

Step 7. Filter EU out of rows and non-EU out of columns

In [23]:
simple_tables = {fibre: final_demands[fibre][EU_Codes][~(final_demands[fibre].index.isin(EU_Codes))] for fibre in fibres}

Write the results to excel to have a look at

In [24]:
# Create a Pandas Excel writer using XlsxWriter as the engine.

result_filename = 'GarmentTradetoEUMemberStates.xlsx'

writer = pd.ExcelWriter(result_filename, engine='xlsxwriter')
for table in final_demands:
    simple_tables[table].to_excel(writer, sheet_name='{}'.format(table))
    
writer.save()