import pandas as pd
def get_df():
df = pd.DataFrame({'col': ['00000 UNITED STATES', '01000 ALABAMA',
'01001 Autauga County, AL', '01003 Baldwin County, AL',
'01005 Barbour County, AL'],
'type': ['country', 'state', 'county', 'county', 'county']
})
return df
df = get_df()
print(df)
df[['A', 'B']] = df['col'].str.split(n=1, expand=True)
print(df)
col type 0 00000 UNITED STATES country 1 01000 ALABAMA state 2 01001 Autauga County, AL county 3 01003 Baldwin County, AL county 4 01005 Barbour County, AL county col type A B 0 00000 UNITED STATES country 00000 UNITED STATES 1 01000 ALABAMA state 01000 ALABAMA 2 01001 Autauga County, AL county 01001 Autauga County, AL 3 01003 Baldwin County, AL county 01003 Baldwin County, AL 4 01005 Barbour County, AL county 01005 Barbour County, AL
# If the original columnis to be removed, there are two approaches
# method 1: use pop()
df = get_df()
print(df)
df[['A', 'B']] = df.pop('col').str.split(n=1, expand=True)
print(df)
col type 0 00000 UNITED STATES country 1 01000 ALABAMA state 2 01001 Autauga County, AL county 3 01003 Baldwin County, AL county 4 01005 Barbour County, AL county type A B 0 country 00000 UNITED STATES 1 state 01000 ALABAMA 2 county 01001 Autauga County, AL 3 county 01003 Baldwin County, AL 4 county 01005 Barbour County, AL
# method 2: Include the new columns as such and drop the original next
df = get_df()
print(df)
df[['A', 'B']] = df['col'].str.split(n=1, expand=True)
df.drop('col', axis=1)
print(df)
col type 0 00000 UNITED STATES country 1 01000 ALABAMA state 2 01001 Autauga County, AL county 3 01003 Baldwin County, AL county 4 01005 Barbour County, AL county col type A B 0 00000 UNITED STATES country 00000 UNITED STATES 1 01000 ALABAMA state 01000 ALABAMA 2 01001 Autauga County, AL county 01001 Autauga County, AL 3 01003 Baldwin County, AL county 01003 Baldwin County, AL 4 01005 Barbour County, AL county 01005 Barbour County, AL