scores = spark.read.csv("/Users/blairhudson/Downloads/ml-20m/genome-scores.csv",header=True)
scores.take(5)
sqlContext.registerFunction("stringLengthString", lambda x: len(x))
[Row(movieId='1', tagId='1', relevance='0.025000000000000022'), Row(movieId='1', tagId='2', relevance='0.025000000000000022'), Row(movieId='1', tagId='3', relevance='0.057750000000000024'), Row(movieId='1', tagId='4', relevance='0.09675'), Row(movieId='1', tagId='5', relevance='0.14675')]
sqlContext.registerFunction("stringLengthString", lambda x: len(x))
sqlContext.sql("SELECT stringLengthString('test')").collect()
[Row(stringLengthString(test)='4')]