import feather
import pandas as pd
import saspy
pd.show_versions()
INSTALLED VERSIONS ------------------ commit: None python: 3.6.3.final.0 python-bits: 64 OS: Linux OS-release: 3.10.0-693.11.1.el7.x86_64 machine: x86_64 processor: x86_64 byteorder: little LC_ALL: None LANG: en_US.UTF-8 LOCALE: en_US.UTF-8 pandas: 0.20.3 pytest: 3.2.1 pip: 9.0.1 setuptools: 38.2.4 Cython: 0.26.1 numpy: 1.13.3 scipy: 0.19.1 xarray: None IPython: 6.2.1 sphinx: 1.6.3 patsy: 0.4.1 dateutil: 2.6.1 pytz: 2017.2 blosc: None bottleneck: 1.2.1 tables: 3.4.2 numexpr: 2.6.2 feather: 0.4.0 matplotlib: 2.1.0 openpyxl: 2.4.8 xlrd: 1.1.0 xlwt: 1.3.0 xlsxwriter: 1.0.2 lxml: 4.1.0 bs4: 4.6.0 html5lib: 0.999999999 sqlalchemy: 1.1.13 pymysql: None psycopg2: None jinja2: 2.9.6 s3fs: None pandas_gbq: None pandas_datareader: None
sas = saspy.SASsession()
Using SAS Config named: default SAS Connection established. Subprocess id is 4942
cars = sas.sasdata('cars', libref='sashelp')
print(type(cars))
<class 'saspy.sasbase.SASdata'>
SAS dataset SASHELP.CARS converted to panda dataframe car_df
car_df = cars.to_df()
print(type(car_df))
<class 'pandas.core.frame.DataFrame'>
panda dataframe car_df converted to SAS dataset WORK.CARS
car_df2 = sas.df2sd(car_df, 'cars')
print(type(car_df2))
<class 'saspy.sasbase.SASdata'>
Read a dataframe from disk with the feather library created previously from a R dataframe
path = '/home/sas/notebook/r_staff.feather'
pd_staff = feather.read_dataframe(path)
pd_staff.dtypes
employee category salary float64 startdate datetime64[ns] stringsASFactors bool dtype: object
print(type(pd_staff))
<class 'pandas.core.frame.DataFrame'>
print(pd_staff)
employee salary startdate stringsASFactors 0 Thomas Gunther 27500.0 2010-11-01 False 1 Nicholas Harbinger 33900.0 2008-03-25 False 2 Gisela Benito 28000.0 2007-03-14 False 3 Herbert Rudelich 35000.0 2011-01-24 False 4 Emily Sirignano 36350.0 2009-12-15 False 5 Michael Morrison 40000.0 2009-10-10 False 6 Jacqueline Onieda 36500.0 2015-01-02 False
The dataframe pd_staff appears acceptable to Python, but is not acceptable to the sas.df2sd call
sd_staff = sas.df2sd(pd_staff, 'staff')
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) <ipython-input-13-7607062bf09b> in <module>() ----> 1 sd_staff = sas.df2sd(pd_staff, 'staff') /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in df2sd(self, df, table, libref, results) 618 :return: SASdata object 619 """ --> 620 return self.dataframe2sasdata(df, table, libref, results) 621 622 def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '', /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in dataframe2sasdata(self, df, table, libref, results) 637 return None 638 else: --> 639 self._io.dataframe2sasdata(df, table, libref) 640 641 if self.exist(table, libref): /root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py in dataframe2sasdata(self, df, table, libref) 894 input += "'"+df.columns[name]+"'n " 895 if df.dtypes[df.columns[name]].kind in ('O','S','U','V'): --> 896 col_l = df[df.columns[name]].map(len, 'ignore').max() 897 if col_l == 0: 898 col_l = 8 /root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in map(self, arg, na_action) 2127 values = self._values 2128 if na_action is not None: -> 2129 raise NotImplementedError 2130 map_f = lambda values, f: values.map(f) 2131 else: NotImplementedError:
Copy the pd_staff dataframe with default deep=True
copy_deep_true = pd_staff.copy()
id(pd_staff)
140465635419640
print(type(copy_deep_true))
id(copy_deep_true)
<class 'pandas.core.frame.DataFrame'>
140464967919432
Call the sas.df2sd method. The copied dataframe is not acceptable to this call.
cdt = sas.df2sd(copy_deep_true, 'staff_t')
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) <ipython-input-16-9d457eae85df> in <module>() ----> 1 cdt = sas.df2sd(copy_deep_true, 'staff_t') /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in df2sd(self, df, table, libref, results) 618 :return: SASdata object 619 """ --> 620 return self.dataframe2sasdata(df, table, libref, results) 621 622 def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '', /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in dataframe2sasdata(self, df, table, libref, results) 637 return None 638 else: --> 639 self._io.dataframe2sasdata(df, table, libref) 640 641 if self.exist(table, libref): /root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py in dataframe2sasdata(self, df, table, libref) 894 input += "'"+df.columns[name]+"'n " 895 if df.dtypes[df.columns[name]].kind in ('O','S','U','V'): --> 896 col_l = df[df.columns[name]].map(len, 'ignore').max() 897 if col_l == 0: 898 col_l = 8 /root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in map(self, arg, na_action) 2127 values = self._values 2128 if na_action is not None: -> 2129 raise NotImplementedError 2130 map_f = lambda values, f: values.map(f) 2131 else: NotImplementedError:
Copy pd_staff operation with deep=false parameter
copy_deep_false = pd_staff.copy(deep=False)
id(pd_staff)
140465635419640
print(type(copy_deep_false))
id(copy_deep_false)
<class 'pandas.core.frame.DataFrame'>
140464967921000
Call the sas.df2sd method. copy_deep_false dataframe is not acceptable.
cdf = sas.df2sd(copy_deep_false, 'staff_f')
--------------------------------------------------------------------------- NotImplementedError Traceback (most recent call last) <ipython-input-19-7cbfd61b3812> in <module>() ----> 1 cdf = sas.df2sd(copy_deep_false, 'staff_f') /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in df2sd(self, df, table, libref, results) 618 :return: SASdata object 619 """ --> 620 return self.dataframe2sasdata(df, table, libref, results) 621 622 def dataframe2sasdata(self, df: 'pd.DataFrame', table: str = '_df', libref: str = '', /root/anaconda3/lib/python3.6/site-packages/saspy/sasbase.py in dataframe2sasdata(self, df, table, libref, results) 637 return None 638 else: --> 639 self._io.dataframe2sasdata(df, table, libref) 640 641 if self.exist(table, libref): /root/anaconda3/lib/python3.6/site-packages/saspy/sasiostdio.py in dataframe2sasdata(self, df, table, libref) 894 input += "'"+df.columns[name]+"'n " 895 if df.dtypes[df.columns[name]].kind in ('O','S','U','V'): --> 896 col_l = df[df.columns[name]].map(len, 'ignore').max() 897 if col_l == 0: 898 col_l = 8 /root/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in map(self, arg, na_action) 2127 values = self._values 2128 if na_action is not None: -> 2129 raise NotImplementedError 2130 map_f = lambda values, f: values.map(f) 2131 else: NotImplementedError:
Create the medals dataframe with the read.csv method
df_medals = pd.read_csv("http://winterolympicsmedals.com/medals.csv")
df_medals.head()
Year | City | Sport | Discipline | NOC | Event | Event gender | Medal | |
---|---|---|---|---|---|---|---|---|
0 | 1924 | Chamonix | Skating | Figure skating | AUT | individual | M | Silver |
1 | 1924 | Chamonix | Skating | Figure skating | AUT | individual | W | Gold |
2 | 1924 | Chamonix | Skating | Figure skating | AUT | pairs | X | Gold |
3 | 1924 | Chamonix | Bobsleigh | Bobsleigh | BEL | four-man | M | Bronze |
4 | 1924 | Chamonix | Ice Hockey | Ice Hockey | CAN | ice hockey | M | Gold |
Call the sas.df2sd method to create the SAs dataset WORK.medals2
sd_medals = sas.df2sd(df_medals, 'medals2')
sd_medals.contents()
{'Attributes': Member Label1 \ 0 WORK.MEDALS2 Data Set Name 1 WORK.MEDALS2 Member Type 2 WORK.MEDALS2 Engine 3 WORK.MEDALS2 Created 4 WORK.MEDALS2 Last Modified 5 WORK.MEDALS2 Protection 6 WORK.MEDALS2 Data Set Type 7 WORK.MEDALS2 Label 8 WORK.MEDALS2 Data Representation 9 WORK.MEDALS2 Encoding cValue1 nValue1 \ 0 WORK.MEDALS2 NaN 1 DATA NaN 2 V9 NaN 3 01/14/2018 15:12:11 1.831562e+09 4 01/14/2018 15:12:11 1.831562e+09 5 NaN 6 NaN 7 NaN 8 SOLARIS_X86_64, LINUX_X86_64, ALPHA_TRU64, LIN... NaN 9 latin1 Western (ISO) NaN Label2 cValue2 nValue2 0 Observations 2311 2311.0 1 Variables 8 8.0 2 Indexes 0 0.0 3 Observation Length 96 96.0 4 Deleted Observations 0 0.0 5 Compressed NO NaN 6 Sorted NO NaN 7 0.0 8 0.0 9 0.0 , 'Enginehost': Member Label1 \ 0 WORK.MEDALS2 Data Set Page Size 1 WORK.MEDALS2 Number of Data Set Pages 2 WORK.MEDALS2 First Data Page 3 WORK.MEDALS2 Max Obs per Page 4 WORK.MEDALS2 Obs in First Data Page 5 WORK.MEDALS2 Number of Data Set Repairs 6 WORK.MEDALS2 Filename 7 WORK.MEDALS2 Release Created 8 WORK.MEDALS2 Host Created 9 WORK.MEDALS2 Inode Number 10 WORK.MEDALS2 Access Permission 11 WORK.MEDALS2 Owner Name 12 WORK.MEDALS2 File Size 13 WORK.MEDALS2 File Size (bytes) cValue1 nValue1 0 65536 65536.0 1 4 4.0 2 1 1.0 3 681 681.0 4 652 652.0 5 0 0.0 6 /home/sas/tmp/SAS_work41FB0000134E_localhost.l... NaN 7 9.0401M4 NaN 8 Linux NaN 9 149271914 149271914.0 10 rw-rw-r-- NaN 11 sas NaN 12 320KB NaN 13 327680 327680.0 , 'Variables': Member Num Variable Type Len Pos 0 WORK.MEDALS2 2 City Char 22 8 1 WORK.MEDALS2 4 Discipline Char 15 40 2 WORK.MEDALS2 6 Event Char 31 58 3 WORK.MEDALS2 7 Event gender Char 1 89 4 WORK.MEDALS2 8 Medal Char 6 90 5 WORK.MEDALS2 5 NOC Char 3 55 6 WORK.MEDALS2 3 Sport Char 10 30 7 WORK.MEDALS2 1 Year Num 8 0}
sas
Access Method = STDIO SAS Config name = default WORK Path = /home/sas/tmp/SAS_work41FB0000134E_localhost.localdomain/ SAS Version = 9.04.01M4P11092016 SASPy Version = 2.2.1 Teach me SAS = False Batch = False Results = Pandas SAS Session Encoding = LATIN1 Python Encoding value = utf-8