subtypes(Number)

subtypes(Real)

subtypes(Integer)

# Floating Point
@show 5/3

# Mathematical Constant
@show pi

# Rational
@show 2//3 + 1

# BigInt
@show big(2) ^ 1000 ;

subtypes(String)

s = "Hello World"

@show typeof(s)
@show s[7] ;

# Unicode Names and Values

你好 = "(｡◕_◕｡)ﾉ  "

@show typeof(你好)
@show 你好 ^ 3 ;

type NewType
    i::Integer
    s::String
end

new_t = NewType(33, "this is a NewType")

@show new_t.i
@show new_t.s ;

# Vectors

v = [1, 1]

# Vector Operations

@show v + [2, 0] # vector addition
@show v + 1      # same as v + [1,1]
@show 5*v        # scalar multiplication

println( "Dot Product  : ", dot(v, v) )
println( "Norm         : ", norm(v) )

# Matrices

M = [1 1 ; 0 1]

# Matrix Addition

M + 1 ,
M + [0 0 ; 5 5]

# Matrix Multiplication

2M ,
M ^ 2 ,
M * v

# Gaussian Elimination

b = M * v

M \ b        # solve back for v

# Named functions

f(x) = 10x

function g(x)
    return x * 10
end

@show f(5)
@show g(5) ;

# Anonymous functions assigned to variables

h = x -> x * 10

i = function(x)
    x * 10
end

@show h(5)
@show i(5) ;

# Operators are functions

+(4,5)

p = +

p(2,3)

bar(x::String)  = println("You entered the string: $x")
bar(x::Integer) = x * 10
bar(x::NewType) = println(x.s)

methods(bar)

bar("Hello")
bar(new_t)
bar(5)

# Adding strings

"Hello" + "World"

# But the addition operator is a function, so we can apply multi-dispatch

+(a::String, b::String) = a * b

"Hello" + "World"

+(a::Number, b::String) = string(a) + b
+(a::String, b::Number) = a + string(b)

99 + "bottles"

# Method Overloading

type SimpleObject
    data::Union(Integer, String)
    set::Function

    function SimpleObject()
        this = new()
        this.data = ""

        function setter(x::Integer)
            println("Setting an integer")
            this.data = x
        end
        function setter(x::String)
            println("Setting a string")
            this.data = x
        end
        this.set = setter

        return this
    end
end

obj = SimpleObject()
obj.set(99)
obj.set("hello")

# Sum of odd integers between 1 and 5

values = 1:5

myMapper  = x -> x
myFilter  = x -> x % 2 == 1
myReducer = (x,y) -> x + y

mapped    = map( myMapper, values )
filtered  = filter( myFilter, mapped )
reduced   = reduce( myReducer, filtered )

# Code Generation
# Functions for exponentiating to the powers of 1 to 5

for n in 1:5
    s = "power$n(x) = x ^ $n"
    println(s)
    expression = parse(s)
    eval(expression) 
end

power5( 2 )

# Macros: Crude Timer Example

macro timeit(expression)
    quote
        t = time()
        result = $expression    # evaluation
        elapsed = time() - t
        println( "elapsed time: ", elapsed )
        return result
    end
end

@timeit cos(2pi)
@timeit cos(2pi)

using StatsBase

x = rand(100)    # uniform distribution [0,1)

println( "mean:     ", mean(x) )
println( "variance: ", var(x) )
println( "skewness: ", skewness(x) )
println( "kurtosis: ", kurtosis(x) )

describe(x)

using Distributions

distr = Normal(0, 2)

println( "pdf @ origin = ", pdf(distr, 0.0) )
println( "cdf @ origin = ", cdf(distr, 0.0) )

x = rand(distr, 1000)

fit_mle(Normal, x)

using DataFrames

df = DataFrame(
    A = [6, 3, 4],
    B = ["a", "b", "c"],
    C = [1//2, 3//4, 5//6],
    D = [true, true, false]
)

df[:C][2] = NA
df

# Joins

names = DataFrame(ID = [5, 4], Name = ["Jack", "Jill"])
jobs  = DataFrame(ID = [5, 4], Job = ["Lawyer", "Doctor"])

full  = join(names, jobs, on = :ID)

using RDatasets

iris = dataset("datasets", "iris")
head(iris)

# Group by Species, then compute mean of PetalLength per group

by( iris, :Species, df -> mean(df[:PetalLength]) )

using ASCIIPlots

x = iris[:PetalLength]
y = iris[:PetalWidth]

scatterplot(x, y)

using Winston

scatter(x, y, ".")

xlabel("PetalLength")
ylabel("PetalWidth")

using Gadfly

set_default_plot_size(20cm, 12cm)
plot(iris, x = "PetalLength", y = "PetalWidth", color = "Species", Geom.point)

# K-means Clustering

using Clustering

features = array(iris[:, 1:4])'   # use matrix() on Julia v0.2
result = kmeans( features, 3 )    # onto 3 clusters

plot(iris, x = "PetalLength", y = "PetalWidth", color = result.assignments, Geom.point)

# Principal Component Analysis

using MultivariateStats

pc = fit(PCA, features; maxoutdim = 2)
reduced = transform(pc, features)
@show size(reduced)

plot(iris, x = reduced[1,:], y = reduced[2,:], color = "Species", Geom.point)

using MultivariateStats

# Generate a noisy linear system
features = rand(1000, 3)                         # feature matrix
coeffs = rand(3)                                 # ground truth of weights
targets = features * coeffs + 0.1 * randn(1000)  # generate response

# Linear Least Square Regression
coeffs_llsq = llsq(features, targets; bias=false)

# Ridge Regression
coeffs_ridge = ridge(features, targets, 0.1; bias=false) # regularization coef = 0.1

@show coeffs
@show coeffs_llsq
@show coeffs_ridge ;

# Cross Validation: K-Fold Example

using MLBase, MultivariateStats

n = length(targets)

# Define training and error evaluation functions
function training(inds)
    coeffs = ridge(features[inds, :], targets[inds], 0.1; bias=false)
    return coeffs
end

function error_evaluation(coeffs, inds)
    y = features[inds, :] * coeffs 
    rms_error = sqrt(mean(abs2(targets[inds] .- y)))
    return rms_error
end

# Cross validate
scores = cross_validate(
    inds -> training(inds),
    (coeffs, inds) -> error_evaluation(coeffs, inds),
    n,              # total number of samples
    Kfold(n, 3))    # cross validation plan: 3-fold

# Get the mean and std of scores
@show scores
@show mean_and_std(scores) ;

# Model Tuning: Grid Search

using MLBase, MultivariateStats

# Hold out 20% of records for testing
n_test = int(length(targets) * 0.2)
train_rows = shuffle([1:length(targets)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
targets_train, targets_test = targets[train_rows], targets[!train_rows]

# Define estimation function
function estfun(regcoef, bias)
    coeffs = ridge(features_train, targets_train, regcoef; bias=bias)
    return bias ? (coeffs[1:end-1], coeffs[end]) : (coeffs, 0.0)
end

# Define error evaluation function as mean squared deviation
evalfun(coeffs) = msd(features_test * coeffs[1] + coeffs[2], targets_test)

result = gridtune(estfun, evalfun,
            ("regcoef", [0.01, 0.1, 1.0]),
            ("bias", [true, false]);
            ord=Reverse,    # smaller msd value indicates better model
            verbose=true)   # show progress information

best_model, best_config, best_score = result

# Print results
coeffs, bias = best_model
println("Best model:")
println("  coeffs = $(coeffs')"),
println("  bias = $bias")
println("Best config: regcoef = $(best_config[1]), bias = $(best_config[2])")
println("Best score: $(best_score)")

# Regression Tree

using DecisionTree

# Train model, make predictions on test records
model = build_tree(targets_train, features_train)
predictions = apply_tree(model, features_test)

@show cor(targets_test, predictions)
@show R2(targets_test, predictions)

scatter(targets_test, predictions, ".")
xlabel("actual"); ylabel("predicted")

# Support Vector Machine

using LIBSVM

features = array(iris[:, 1:4])
labels = array(iris[:Species])

# Hold out 20% of records for testing
n_test = int(length(labels) * 0.2)
train_rows = shuffle([1:length(labels)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
labels_train, labels_test = labels[train_rows], labels[!train_rows]

model = svmtrain(labels_train, features_train')
(predictions, decision_values) = svmpredict(model, features_test')

confusion_matrix(labels_test, predictions)

# Random Forest

using DecisionTree

# Train forest using 2 random features per split and 10 trees
model = build_forest(labels_train, features_train, 2, 10)
predictions = apply_forest(model, features_test)

# Pretty print of one tree in forest
print_tree(model.trees[1])

confusion_matrix(labels_test, predictions)