In [5]:
%lsmagic
Available line magics:
%lsmagic %showtypes %showoutput %adddeps %truncation %addjar

Available cell magics:
%%sql %%html %%javascript %%dataframe %%scala

Type %<magic_name> for usage info.
         
In [36]:
sc.version
Out[36]:
2.4.3

pivot using .pivot

In [102]:
val unpivotedDf = Seq(
    ("john", "notebook", 2),
    ("gary", "notebook", 3),
    ("john", "small_phone", 2),
    ("mary", "small_phone", 3),
    ("john", "large_phone", 3),
    ("john", "camera", 3)
).toDF("salesperson","device", "amount_sold").sort("salesperson","device")
unpivotedDf = [salesperson: string, device: string ... 1 more field]
Out[102]:
[salesperson: string, device: string ... 1 more field]
In [103]:
%%dataframe
unpivotedDf
Out[103]:
salespersondeviceamount_sold
garynotebook3
johncamera3
johnlarge_phone3
johnnotebook2
johnsmall_phone2
marysmall_phone3
In [105]:
%%dataframe
unpivotedDf.groupBy("salesperson").pivot("device").sum("amount_sold")
Out[105]:
salespersoncameralarge_phonenotebooksmall_phone
garynullnull3null
marynullnullnull3
john3322

unpivot using stack

In [63]:
val pivotedDf = Seq(
    ("gary", None,   None,   Some(3),None),
    ("mary", None,   None,   None,   Some(3)),
    ("john", Some(3),Some(3),Some(2),Some(2))
).toDF("salesperson", "camera", "large_phone", "notebook", "small_phone")
pivotedDf = [salesperson: string, camera: int ... 3 more fields]
Out[63]:
[salesperson: string, camera: int ... 3 more fields]
In [93]:
%%dataframe
pivotedDf
Out[93]:
salespersoncameralarge_phonenotebooksmall_phone
garynullnull3null
marynullnullnull3
john3322
In [96]:
val unpivotedDf = pivotedDf
    .selectExpr("salesperson","stack(4,'camera',camera,'large_phone',large_phone,'notebook',notebook,'small_phone',small_phone)")
    .withColumnRenamed("col0","device") // default name of this column is col0
    .withColumnRenamed("col1","amount_sold") // default name of this column is col1
    .filter($"amount_sold".isNotNull) // must explicitly remove nulls
unpivotedDf = [salesperson: string, device: string ... 1 more field]
Out[96]:
[salesperson: string, device: string ... 1 more field]
In [104]:
%%dataframe --limit=100
unpivotedDf.sort("salesperson","device")
Out[104]:
salespersondeviceamount_sold
garynotebook3
johncamera3
johnlarge_phone3
johnnotebook2
johnsmall_phone2
marysmall_phone3
In [ ]: