# Push the egg file to user HDFS directory in the cluster
import os
import dsx_core_utils
dsx_core_utils.upload_hdfs_file(
source_path=os.environ['DSX_PROJECT_DIR']+'/SpamFilterScikit/dist/SpamFilterScikit-1.0-py2.7.egg',
target_path="/user/user1/SpamFilterScikit-1.0-py2.7.egg",
webhdfsurl="https://zinc1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/webhdfs/v1")
upload success
# Push the dataset to user HDFS directory in the cluster
import dsx_core_utils
dsx_core_utils.upload_hdfs_file(
source_path=os.environ['DSX_PROJECT_DIR']+'/datasets/SMSSpamCollection.csv',
target_path="/user/user1/SMSSpamCollection.csv",
webhdfsurl="https://zinc1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/webhdfs/v1")
upload success
%load_ext sparkmagic.magics
from dsx_core_utils import proxy_util,dsxhi_util
proxy_util.configure_proxy_livy()
The sparkmagic.magics extension is already loaded. To reload it, use: %reload_ext sparkmagic.magics success configuring sparkmagic livy.
dsxhi_util.list_livy_endpoints()
['https://becks1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://becks1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://cdh513edge11.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://cdh514edge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://cdh515edge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://cdh515edge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://centos74edge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://centos74edge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://rated3.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://yccdh5.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://yccdh5.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://ycedge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://ycedge1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1', 'https://zinc1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy/v1', 'https://zinc1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1']
!cat /user-home/_global_/.remote-images/dsx-hi/dsx-scripted-ml-python2.json
{ "imageId": "968c2101554e0d1e0d4fdd3720aaa565a2910cb46f4d7ed61188b6ceeec22930", "scriptCommand": "anaconda2/bin/python2.7", "libPaths": ["usr/local/spark-2.0.2-bin-hadoop2.7/python","user-home/.scripts/common-helpers/batch/pmml","user-home/.scripts/common-helpers/saas","user-home/_global_/python-2.7"] }
Using values from dsx-scripted-ml-python2.json
, we'll need to:
scriptCommand
Example Livy Properties for using dsx-scripted-ml-python2.tar.gz Virtual Environment:
{"proxyUser": "user1", "archives": ["/user/dsxhi/environments/26611bf7fe595f786139d6d2132de070fc813f6a0ef7a4e25857b79c8cd4b565/dsx-scripted-ml-python2.tar.gz"],"conf":{"spark.yarn.appMasterEnv.PYSPARK_PYTHON":"dsx-scripted-ml-python2.tar.gz/anaconda2/bin/python"}}
/user/dsxhi/environments/26611bf7fe595f786139d6d2132de070fc813f6a0ef7a4e25857b79c8cd4b565/dsx-scripted-ml-python2.tar.gz
/user/dsxhi/environments/pythonAddons/pythonAddons.tar.gz
%manage_spark
MagicsControllerWidget(children=(Tab(children=(ManageSessionWidget(children=(HTML(value=u'<br/>'), HTML(value=…
Added endpoint https://zinc1.fyre.ibm.com:8443/gateway/mjoudsx336-master-1/livy2/v1 Starting Spark application
SparkSession available as 'spark'.
%%spark
sc.addPyFile("hdfs:///user/user1/SpamFilterScikit-1.0-py2.7.egg")
%%spark
import SpamFilterScikit
import sys
import os
from pyspark import SparkContext
from pyspark.sql import SparkSession
# Import libraries from the deployed egg
from SpamFilterScikit import LRModelScikit
# Read the file from HDFS
filename = "hdfs:///user/user1/SMSSpamCollection.csv"
# Call the method
LRModelScikit().execute(spark,filename)
Accuracy: 93.00% y_test y_pred count 1 1 94 0 1 4 1 0 10 0 0 92 /hadoop/yarn/local/usercache/user1/appcache/application_1533478912530_0781/container_e32_1533478912530_0781_01_000001/dsx-scripted-ml-python2.tar.gz/anaconda2/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)