using DataFrames using FreqTables auto = readtable("auto.csv"); showcols(auto) num_obs = size(auto,1) num_vars = size(auto,2) auto[:price] auto[2] auto[:,[:price,:mpg]] head(auto,4) tail(auto,4) auto[[1;2;4;15],[:headroom,:trunk]] auto[(auto[:,:mpg].<20),[:headroom,:trunk]] describe(auto) colwise(mean,auto) round(colwise(mean,auto[:,2:end]),3) countmap(auto[:foreign]) by(auto,:foreign,nrow) freqtable(auto, :rep78, :foreign, subset=!isna(auto[:rep78])) auto1 = auto[!isna(auto[:,:rep78]), :]; showcols(auto1) auto1 = auto1[setdiff(names(auto1), [:price,:mpg])]; showcols(autod1) delete!(auto1,[:weight,:length]); showcols(auto1) auto2 = auto[:,[:make,:mpg,:displacement,:gear_ratio]]; showcols(auto2) rename!(auto2,[:make,:displacement],[:make_name,:CCs]); showcols(auto2) auto2[:mpg_same] = auto2[:mpg]; showcols(auto2) auto2[:mpgSquared] = map(temp -> temp.^2, auto[:mpg]); head(auto2[:,[:mpg,:mpgSquared]]) auto2[:dummy_var] = map((tempx,tempy) -> (tempx.<20) & (tempy.<3), auto2[:mpg], auto2[:gear_ratio]); showcols(auto2) auto2 = auto2[:,[2;3;4;1;5:end]]; showcols(auto2) sort!(auto2,cols=[:mpg,:make_name],rev=[false,true]); head(auto2,4) reshape1 = DataFrame(id = 1:3, sex = [0;1;0], inc1980 = [5000;2000;3000], inc1981 = [5500;2200;2000], inc1982 = [6000;4400;1000]) longform1A = stack(reshape1, [:inc1980, :inc1981, :inc1982], [:id, :sex]) sort!(longform1A, cols = [:id, :variable]) wideform1A = unstack(longform1A, :variable, :value) name = DataFrame(ID = [1, 2, 3, 4, 5, 6], Name = ["John", "Jane", "Mark", "Ann", "Vlad", "Maria"]) jobs = DataFrame(ID = [1, 2, 3, 4, 5, 6], Job = ["Lawyer", "Doctor", "Mechanic", "Doctor", "Judge", "Pilot"]) siblings = DataFrame(ID = [1, 1, 2, 3, 5, 5, 5, 6], Sibling = ["Eric", "Ryan", "Jennifer", "Heather", "Carl", "Dmitri", "Andrei", "Pedro"]) mergedNameJobs = join(name,jobs, on = :ID, kind = :inner) mergedNameSibsInner = join(name,siblings, on = :ID, kind = :inner) mergedNameSibsOuter = join(name,siblings, on = :ID, kind = :outer) mergedNameSibsLeft = join(name,siblings, on = :ID, kind = :left) mergedNameSibsOuter = join(name,siblings, on = :ID, kind = :right) mergedNameSibsSemi = join(name,siblings, on = :ID, kind = :semi) mergedNameSibsAnti = join(name,siblings, on = :ID, kind = :anti) mergedNameSibsCross = join(name,siblings, kind = :cross) reshape2 = DataFrame(id = 1:3, sex = [0;1;0], inc1980 = [5000;2000;3000], inc1981 = [5500;2200;2000],inc1982 = [6000;4400;1000], ue1980 = [0;1;0], ue1981 = [1;0;0], ue1982 = [0;0;1]) longform2 = stack(reshape2, [:inc1980, :inc1981, :inc1982, :ue1980, :ue1981, :ue1982], [:id, :sex])