#Pkg.add("DataFrames")
using DataFrames
df = readtable("input/weather.csv")
Station | Date | Tmax | Tmin | Tavg | Depart | DewPoint | WetBulb | Heat | Cool | Sunrise | Sunset | CodeSum | Depth | Water1 | SnowFall | PrecipTotal | StnPressure | SeaLevel | ResultSpeed | ResultDir | AvgSpeed | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 2007-05-01 | 83 | 50 | 67 | 14 | 51 | 56 | 0 | 2 | 0448 | 1849 | 0 | M | 0.0 | 0.00 | 29.10 | 29.82 | 1.7 | 27 | 9.2 | |
2 | 2 | 2007-05-01 | 84 | 52 | 68 | M | 51 | 57 | 0 | 3 | - | - | M | M | M | 0.00 | 29.18 | 29.82 | 2.7 | 25 | 9.6 | |
3 | 1 | 2007-05-02 | 59 | 42 | 51 | -3 | 42 | 47 | 14 | 0 | 0447 | 1850 | BR | 0 | M | 0.0 | 0.00 | 29.38 | 30.09 | 13.0 | 4 | 13.4 |
4 | 2 | 2007-05-02 | 60 | 43 | 52 | M | 42 | 47 | 13 | 0 | - | - | BR HZ | M | M | M | 0.00 | 29.44 | 30.08 | 13.3 | 2 | 13.4 |
5 | 1 | 2007-05-03 | 66 | 46 | 56 | 2 | 40 | 48 | 9 | 0 | 0446 | 1851 | 0 | M | 0.0 | 0.00 | 29.39 | 30.12 | 11.7 | 7 | 11.9 | |
6 | 2 | 2007-05-03 | 67 | 48 | 58 | M | 40 | 50 | 7 | 0 | - | - | HZ | M | M | M | 0.00 | 29.46 | 30.12 | 12.9 | 6 | 13.2 |
7 | 1 | 2007-05-04 | 66 | 49 | 58 | 4 | 41 | 50 | 7 | 0 | 0444 | 1852 | RA | 0 | M | 0.0 | T | 29.31 | 30.05 | 10.4 | 8 | 10.8 |
8 | 2 | 2007-05-04 | 78 | 51 | M | M | 42 | 50 | M | M | - | - | M | M | M | 0.00 | 29.36 | 30.04 | 10.1 | 7 | 10.4 | |
9 | 1 | 2007-05-05 | 66 | 53 | 60 | 5 | 38 | 49 | 5 | 0 | 0443 | 1853 | 0 | M | 0.0 | T | 29.40 | 30.10 | 11.7 | 7 | 12.0 | |
10 | 2 | 2007-05-05 | 66 | 54 | 60 | M | 39 | 50 | 5 | 0 | - | - | M | M | M | T | 29.46 | 30.09 | 11.2 | 7 | 11.5 | |
11 | 1 | 2007-05-06 | 68 | 49 | 59 | 4 | 30 | 46 | 6 | 0 | 0442 | 1855 | 0 | M | 0.0 | 0.00 | 29.57 | 30.29 | 14.4 | 11 | 15.0 | |
12 | 2 | 2007-05-06 | 68 | 52 | 60 | M | 30 | 46 | 5 | 0 | - | - | M | M | M | 0.00 | 29.62 | 30.28 | 13.8 | 10 | 14.5 | |
13 | 1 | 2007-05-07 | 83 | 47 | 65 | 10 | 41 | 54 | 0 | 0 | 0441 | 1856 | RA | 0 | M | 0.0 | T | 29.38 | 30.12 | 8.6 | 18 | 10.5 |
14 | 2 | 2007-05-07 | 84 | 50 | 67 | M | 39 | 53 | 0 | 2 | - | - | M | M | M | 0.00 | 29.44 | 30.12 | 8.5 | 17 | 9.9 | |
15 | 1 | 2007-05-08 | 82 | 54 | 68 | 12 | 58 | 62 | 0 | 3 | 0439 | 1857 | BR | 0 | M | 0.0 | 0.00 | 29.29 | 30.03 | 2.7 | 11 | 5.8 |
16 | 2 | 2007-05-08 | 80 | 60 | 70 | M | 57 | 63 | 0 | 5 | - | - | HZ | M | M | M | T | 29.36 | 30.02 | 2.5 | 8 | 5.4 |
17 | 1 | 2007-05-09 | 77 | 61 | 69 | 13 | 59 | 63 | 0 | 4 | 0438 | 1858 | BR HZ | 0 | M | 0.0 | 0.13 | 29.21 | 29.94 | 3.9 | 9 | 6.2 |
18 | 2 | 2007-05-09 | 76 | 63 | 70 | M | 60 | 63 | 0 | 5 | - | - | BR HZ | M | M | M | 0.02 | 29.28 | 29.93 | 3.9 | 7 | 5.9 |
19 | 1 | 2007-05-10 | 84 | 56 | 70 | 14 | 52 | 60 | 0 | 5 | 0437 | 1859 | BR | 0 | M | 0.0 | 0.00 | 29.20 | 29.92 | 0.7 | 17 | 4.1 |
20 | 2 | 2007-05-10 | 83 | 59 | 71 | M | 52 | 61 | 0 | 6 | - | - | BR HZ | M | M | M | 0.00 | 29.26 | 29.91 | 2.0 | 9 | 3.9 |
21 | 1 | 2007-05-11 | 70 | 51 | 61 | 4 | 42 | 51 | 4 | 0 | 0436 | 1860 | 0 | M | 0.0 | 0.00 | 29.33 | 30.04 | 11.3 | 3 | 12.9 | |
22 | 2 | 2007-05-11 | 73 | 49 | 61 | M | 44 | 51 | 4 | 0 | - | - | M | M | M | 0.00 | 29.39 | 30.03 | 11.7 | 36 | 12.8 | |
23 | 1 | 2007-05-12 | 64 | 46 | 55 | -2 | 36 | 46 | 10 | 0 | 0435 | 1901 | 0 | M | 0.0 | 0.00 | 29.49 | 30.20 | 12.4 | 3 | 12.9 | |
24 | 2 | 2007-05-12 | 65 | 47 | 56 | M | 37 | 46 | 9 | 0 | - | - | M | M | M | 0.00 | 29.54 | 30.19 | 12.7 | 1 | 13.0 | |
25 | 1 | 2007-05-13 | 69 | 43 | 56 | -2 | 33 | 46 | 9 | 0 | 0434 | 1902 | 0 | M | 0.0 | 0.00 | 29.49 | 30.24 | 6.6 | 14 | 8.1 | |
26 | 2 | 2007-05-13 | 69 | 44 | 57 | M | 32 | 46 | 8 | 0 | - | - | M | M | M | 0.00 | 29.55 | 30.24 | 6.4 | 11 | 7.6 | |
27 | 1 | 2007-05-14 | 90 | 56 | 73 | 15 | 47 | 59 | 0 | 8 | 0433 | 1903 | 0 | M | 0.0 | 0.00 | 29.23 | 29.97 | 16.9 | 21 | 17.3 | |
28 | 2 | 2007-05-14 | 90 | 54 | 72 | M | 45 | 58 | 0 | 7 | - | - | M | M | M | 0.00 | 29.31 | 29.98 | 14.1 | 21 | 14.6 | |
29 | 1 | 2007-05-15 | 80 | 57 | 69 | 11 | 56 | 61 | 0 | 4 | 0432 | 1904 | RA BR | 0 | M | 0.0 | 0.38 | 29.13 | 29.84 | 8.1 | 27 | 12.3 |
30 | 2 | 2007-05-15 | 82 | 56 | 69 | M | 56 | 61 | 0 | 4 | - | - | TSRA RA BR | M | M | M | 0.60 | 29.19 | 29.83 | 8.1 | 25 | 10.8 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
df[1:1]
df[2:10]
df[1:2,2:10]
Date | Tmax | Tmin | Tavg | Depart | DewPoint | WetBulb | Heat | Cool | |
---|---|---|---|---|---|---|---|---|---|
1 | 2007-05-01 | 83 | 50 | 67 | 14 | 51 | 56 | 0 | 2 |
2 | 2007-05-01 | 84 | 52 | 68 | M | 51 | 57 | 0 | 3 |
names(df)
22-element Array{Symbol,1}: :Station :Date :Tmax :Tmin :Tavg :Depart :DewPoint :WetBulb :Heat :Cool :Sunrise :Sunset :CodeSum :Depth :Water1 :SnowFall :PrecipTotal :StnPressure :SeaLevel :ResultSpeed :ResultDir :AvgSpeed
rename!(df,:Date,:Day)
df[:Day]
2944-element DataArray{UTF8String,1}: "2007-05-01" "2007-05-01" "2007-05-02" "2007-05-02" "2007-05-03" "2007-05-03" "2007-05-04" "2007-05-04" "2007-05-05" "2007-05-05" "2007-05-06" "2007-05-06" "2007-05-07" ⋮ "2014-10-26" "2014-10-26" "2014-10-27" "2014-10-27" "2014-10-28" "2014-10-28" "2014-10-29" "2014-10-29" "2014-10-30" "2014-10-30" "2014-10-31" "2014-10-31"
df[:,[:Tmax,:Depart]]
Tmax | Depart | |
---|---|---|
1 | 83 | 14 |
2 | 84 | M |
3 | 59 | -3 |
4 | 60 | M |
5 | 66 | 2 |
6 | 67 | M |
7 | 66 | 4 |
8 | 78 | M |
9 | 66 | 5 |
10 | 66 | M |
11 | 68 | 4 |
12 | 68 | M |
13 | 83 | 10 |
14 | 84 | M |
15 | 82 | 12 |
16 | 80 | M |
17 | 77 | 13 |
18 | 76 | M |
19 | 84 | 14 |
20 | 83 | M |
21 | 70 | 4 |
22 | 73 | M |
23 | 64 | -2 |
24 | 65 | M |
25 | 69 | -2 |
26 | 69 | M |
27 | 90 | 15 |
28 | 90 | M |
29 | 80 | 11 |
30 | 82 | M |
⋮ | ⋮ | ⋮ |
df[df[:Tavg].==68,:]
Station | Day | Tmax | Tmin | Tavg | Depart | DewPoint | WetBulb | Heat | Cool | Sunrise | Sunset | CodeSum | Depth | Water1 | SnowFall | PrecipTotal | StnPressure | SeaLevel | ResultSpeed | ResultDir | AvgSpeed |
---|
df[df[:Cool].=="M",:Cool]
11-element DataArray{UTF8String,1}: "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M"
df[df[:Cool].=="M",:Cool] = NA
type: non-boolean (NAtype) used in boolean context
df = readtable("input/weather.csv")
for name in names(df)
df[df[name].=="M",name] = NA
end
using RDatasets
RDatasets.packages()
Package | Title | |
---|---|---|
1 | COUNT | Functions, data and code for count data. |
2 | Ecdat | Data sets for econometrics |
3 | HSAUR | A Handbook of Statistical Analyses Using R (1st Edition) |
4 | HistData | Data sets from the history of statistics and data visualization |
5 | ISLR | Data for An Introduction to Statistical Learning with Applications in R |
6 | KMsurv | Data sets from Klein and Moeschberger (1997), Survival Analysis |
7 | MASS | Support Functions and Datasets for Venables and Ripley's MASS |
8 | SASmixed | Data sets from "SAS System for Mixed Models" |
9 | Zelig | Everyone's Statistical Software |
10 | adehabitatLT | Analysis of Animal Movements |
11 | boot | Bootstrap Functions (Originally by Angelo Canty for S) |
12 | car | Companion to Applied Regression |
13 | cluster | Cluster Analysis Extended Rousseeuw et al. |
14 | datasets | The R Datasets Package |
15 | gap | Genetic analysis package |
16 | ggplot2 | An Implementation of the Grammar of Graphics |
17 | lattice | Lattice Graphics |
18 | lme4 | Linear mixed-effects models using Eigen and S4 |
19 | mgcv | Mixed GAM Computation Vehicle with GCV/AIC/REML smoothness estimation |
20 | mlmRev | Examples from Multilevel Modelling Software Review |
21 | nlreg | Higher Order Inference for Nonlinear Heteroscedastic Models |
22 | plm | Linear Models for Panel Data |
23 | plyr | Tools for splitting, applying and combining data |
24 | pscl | Political Science Computational Laboratory, Stanford University |
25 | psych | Procedures for Psychological, Psychometric, and Personality Research |
26 | quantreg | Quantile Regression |
27 | reshape2 | Flexibly Reshape Data: A Reboot of the Reshape Package. |
28 | robustbase | Basic Robust Statistics |
29 | rpart | Recursive Partitioning and Regression Trees |
30 | sandwich | Robust Covariance Matrix Estimators |
⋮ | ⋮ | ⋮ |
RDatasets.datasets("COUNT")
Package | Dataset | Title | Rows | Columns | |
---|---|---|---|---|---|
1 | COUNT | affairs | affairs | 601 | 18 |
2 | COUNT | azdrg112 | azdrg112 | 1798 | 4 |
3 | COUNT | azpro | azpro | 3589 | 6 |
4 | COUNT | badhealth | badhealth | 1127 | 3 |
5 | COUNT | fasttrakg | fasttrakg | 15 | 9 |
6 | COUNT | lbw | lbw | 189 | 10 |
7 | COUNT | lbwgrp | lbwgrp | 6 | 7 |
8 | COUNT | loomis | loomis | 410 | 11 |
9 | COUNT | mdvis | mdvis | 2227 | 13 |
10 | COUNT | medpar | medpar | 1495 | 10 |
11 | COUNT | rwm | rwm | 27326 | 4 |
12 | COUNT | rwm5yr | rwm5yr | 19609 | 17 |
13 | COUNT | ships | ships | 40 | 7 |
14 | COUNT | titanic | titanic | 1316 | 4 |
15 | COUNT | titanicgrp | titanicgrp | 12 | 5 |
lbw = dataset("COUNT", "lbw")
Low | Smoke | Race | Age | LWt | PTL | Ht | UI | FTV | BWt | |
---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 0 | 2 | 19 | 182 | 0 | 0 | 1 | 0 | 2523 |
2 | 0 | 0 | 3 | 33 | 155 | 0 | 0 | 0 | 3 | 2551 |
3 | 0 | 1 | 1 | 20 | 105 | 0 | 0 | 0 | 1 | 2557 |
4 | 0 | 1 | 1 | 21 | 108 | 0 | 0 | 1 | 2 | 2594 |
5 | 0 | 1 | 1 | 18 | 107 | 0 | 0 | 1 | 0 | 2600 |
6 | 0 | 0 | 3 | 21 | 124 | 0 | 0 | 0 | 0 | 2622 |
7 | 0 | 0 | 1 | 22 | 118 | 0 | 0 | 0 | 1 | 2637 |
8 | 0 | 0 | 3 | 17 | 103 | 0 | 0 | 0 | 1 | 2637 |
9 | 0 | 1 | 1 | 29 | 123 | 0 | 0 | 0 | 1 | 2663 |
10 | 0 | 1 | 1 | 26 | 113 | 0 | 0 | 0 | 0 | 2665 |
11 | 0 | 0 | 3 | 19 | 95 | 0 | 0 | 0 | 0 | 2722 |
12 | 0 | 0 | 3 | 19 | 150 | 0 | 0 | 0 | 1 | 2733 |
13 | 0 | 0 | 3 | 22 | 95 | 0 | 1 | 0 | 0 | 2750 |
14 | 0 | 0 | 3 | 30 | 107 | 1 | 0 | 1 | 2 | 2750 |
15 | 0 | 1 | 1 | 18 | 100 | 0 | 0 | 0 | 0 | 2769 |
16 | 0 | 1 | 1 | 18 | 100 | 0 | 0 | 0 | 0 | 2769 |
17 | 0 | 0 | 2 | 15 | 98 | 0 | 0 | 0 | 0 | 2778 |
18 | 0 | 1 | 1 | 25 | 118 | 0 | 0 | 0 | 3 | 2782 |
19 | 0 | 0 | 3 | 20 | 120 | 0 | 0 | 1 | 0 | 2807 |
20 | 0 | 1 | 1 | 28 | 120 | 0 | 0 | 0 | 1 | 2821 |
21 | 0 | 0 | 3 | 32 | 121 | 0 | 0 | 0 | 2 | 2835 |
22 | 0 | 0 | 1 | 31 | 100 | 0 | 0 | 1 | 3 | 2835 |
23 | 0 | 0 | 1 | 36 | 202 | 0 | 0 | 0 | 1 | 2836 |
24 | 0 | 0 | 3 | 28 | 120 | 0 | 0 | 0 | 0 | 2863 |
25 | 0 | 0 | 3 | 25 | 120 | 0 | 0 | 1 | 2 | 2877 |
26 | 0 | 0 | 1 | 28 | 167 | 0 | 0 | 0 | 0 | 2877 |
27 | 0 | 1 | 1 | 17 | 122 | 0 | 0 | 0 | 0 | 2906 |
28 | 0 | 0 | 1 | 29 | 150 | 0 | 0 | 0 | 2 | 2920 |
29 | 0 | 1 | 2 | 26 | 168 | 0 | 0 | 0 | 0 | 2920 |
30 | 0 | 0 | 2 | 17 | 113 | 0 | 0 | 0 | 1 | 2920 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |