# Install preprequisites
# From CRAN
install.packages(c("remotes", "RstoxData", "data.table", "worms"))
# From GITHUB
remotes::install_github("Sea2Data/Rstox", ref = "develop")
Updating HTML index of packages in '.Library' Making 'packages.html' ... done Skipping install of 'Rstox' from a github remote, the SHA1 (e2246bf3) has not changed since last install. Use `force = TRUE` to force installation
# Load libraries
library(Rstox)
library(RstoxData)
library(data.table)
Rstox_1.11.1 ********** WARNING: This version of Rstox is an unofficial/developer version and bugs should be expected. If problems with Java Memory such as java.lang.OutOfMemoryError occurs, see ?setJavaMemory. ********** Attaching package: ‘RstoxData’ The following object is masked from ‘package:Rstox’: readErsFile
#
# (1) Function for adding missing scientific names names
#
getScientificNames <- function(data) {
# Source Edvin's script
source("https://github.com/Sea2Data/cruisetools/raw/master/taxaAnnotation/annotateTaxa.R")
# Get taxa taxaTable
## Get list of aphias
aphias <- unlist(unique(data$catchsample[!is.na(aphia), "aphia"]))
## Make taxa table
taxaTable <- makeTaxaTable(aphias)
# We can merge with NMD biotic catchsample
newcatchsample <- merge(data$catchsample[,-c("scientificname")], taxaTable[, c("AphiaID", "scientificname")],
by.x="aphia", by.y="AphiaID", all.x = TRUE)
# Now scientificname is properly populated
data$catchsample <- newcatchsample
return(data)
}
#
# (2) Function for downloading all cruise of selection
#
getSurveySeriesData <- function(surveyName, combine_year = FALSE, combine_table = FALSE) {
# Download cruise series as ReadBiotic only project into StoX directory
modelBio <- list("ReadBioticXML")
projects <- getNMDdata(cruise=surveyName, group="all", model=modelBio, abbrev=FALSE, subdir=TRUE, ow=TRUE)
# Load all biotic files (projects should contains the full path to the files)
files <- list.files(paste0(projects,"/input/biotic/"), full.names=TRUE)
biotic <- ReadBiotic(files) # This will take a while to process all 80 biotic files!!!
result <- biotic
if (combine_year) {
result <- list()
# Merge using data.table's rbindlist
for(i in seq_along(biotic)) {
station.dt <- rbindlist(lapply(biotic, "[[", "fishstation"))
catch.dt <- rbindlist(lapply(biotic, "[[", "catchsample"))
individual.dt <- rbindlist(lapply(biotic, "[[", "individual"))
age.dt <- rbindlist(lapply(biotic, "[[", "agedetermination"))
}
result$fishstation <- station.dt
result$catchsample <- catch.dt
result$individual <- individual.dt
result$agedetermination <- age.dt
}
if (combine_year && combine_table) {
# (if required) merge all data (all.x means include empty stations too)
all.dt <- merge(station.dt, catch.dt, all.x = TRUE)
all.dt <- merge(all.dt, individual.dt, by = intersect(names(all.dt), names(individual.dt)), all.x = TRUE)
## Age is a bit tricky to merge
all.dt <- merge(all.dt, age.dt, by.x=c(intersect(names(all.dt), names(age.dt)), "preferredagereading"),
by.y= c(intersect(names(all.dt), names(age.dt)), "agedeterminationid"), all.x = TRUE)
result <- all.dt
}
return(result)
}
# Start download data
# List all cruise series
CS <- getNMDinfo("cs", recursive=FALSE)
as.data.frame(CS)
CS |
---|
<chr> |
Atlantic Ocean West of British Isles INT blue whiting spawning survey in spring |
Barents Sea NOR demersal fish cruise in August-September |
Barents Sea NOR demersel fish cruise in October-November |
Barents Sea NOR-RUS 0-group cruise in autumn |
Barents Sea NOR-RUS ecosystem cruise in autumn |
North Sea International ecosystem cruise in Q2_Q3 |
North Sea International IBTS cruise in Q1 |
North Sea International IBTS cruise in Q2_Q3 |
North Sea International IBTS cruise in Q4 |
North Sea NOR mackerel cruise in summer |
North Sea NOR Sandeel cruise in Apr_May |
North Sea NOR seiskalle cruise in spring |
North Sea NOR shrimp NDSK cruise in Jan_Nov |
Norwegian Sea continental slope NOR deep-sea fish cruise in autumn |
Norwegian Sea International ecosystem cruise in May |
Norwegian Sea NOR mackerel cruise in summer |
Norwegian Sea NOR Norwegian spring-spawning herring spawning cruise in Feb_Mar |
Norwegian Sea NOR pelagic deep-sea fish cruise in summer |
Norwegian Sea NOR salmon cruise in summer |
Skagerrak NOR beach seine survey in autumn |
Varanger Stad NOR coastal cruise in autumn |
Barents Sea NOR-RUS demersal fish cruise in winter |
Lofoten NOR demersal fish cruise in Mar_Apr |
North Sea NOR Herring Acoustic Survey in summer |
Norwegian Sea continental slope NOR deep-sea fish cruise in spring |
Porsangerfjorden Tanafjorden Kvænangen NOR ecosystem mapping in spring_autumn |
Barents Sea NOR capelin spawning cruise in spring |
Coast of Norway costal sprat acoustic survey yearly |
# Pick "Barents Sea NOR-RUS ecosystem cruise in autumn" cruise series (no. 5)
myCS <- CS[5]
getNMDinfo(c("cs", myCS))
code | Cruise | ShipName | Year | |
---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | |
3 | 1 | 0087_2003_UFVZ_TSIVI | Tsivilsk | 2003 |
6 | 2 | 2003110 | G.O.Sars | 2003 |
1 | 3 | 2003209 | Johan Hjort | 2003 |
4 | 4 | 2003703 | Jan Mayen | 2003 |
2 | 5 | 2003705 | Jan Mayen | 2003 |
5 | 6 | 0115_2003_UFFJ_SMOLE | Smolensk | 2003 |
9 | 1 | 2004210 | Johan Hjort | 2004 |
7 | 2 | 2004702 | Jan Mayen | 2004 |
10 | 3 | 2004703 | Jan Mayen | 2004 |
8 | 4 | 0118_2004_UFFJ_SMOLE | Smolensk | 2004 |
11 | 5 | 0088_2004_UANA_NANSE | Fridtjof Nansen | 2004 |
15 | 1 | 2005111 | G.O.Sars | 2005 |
12 | 2 | 2005209 | Johan Hjort | 2005 |
16 | 3 | 2005702 | Jan Mayen | 2005 |
13 | 4 | 2005703 | Jan Mayen | 2005 |
17 | 5 | 0093_2005_UANA_NANSE | Fridtjof Nansen | 2005 |
14 | 6 | 0092_2005_UFJJ_SMOLE | Smolensk | 2005 |
18 | 1 | 2006702 | Jan Mayen | 2006 |
21 | 2 | 2006211 | Johan Hjort | 2006 |
19 | 3 | 2006704 | Jan Mayen | 2006 |
23 | 4 | 2006113 | G.O.Sars | 2006 |
20 | 5 | 0095_2006_UFJJ_SMOLE | Smolensk | 2006 |
22 | 6 | 0094_2006_UANA_NANSE | Fridtjof Nansen | 2006 |
26 | 1 | 2007110 | G.O.Sars | 2007 |
24 | 2 | 2007210 | Johan Hjort | 2007 |
27 | 3 | 2007702 | Jan Mayen | 2007 |
25 | 4 | 0096_2007_UFJJ_SMOLE | Smolensk | 2007 |
28 | 5 | 0097_2007_UFJN_VILNY | Vilnyus | 2007 |
29 | 1 | 2008106 | G.O.Sars | 2008 |
32 | 2 | 2008703 | Jan Mayen | 2008 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
54 | 3 | 2013111 | G.O.Sars | 2013 |
53 | 4 | 0112_2013_UFJN_VILNY | Vilnyus | 2013 |
55 | 1 | 2014212 | Johan Hjort | 2014 |
57 | 2 | 2014806 | Helmer Hanssen | 2014 |
56 | 3 | 2014116 | G.O.Sars | 2014 |
58 | 4 | 0116_2014_UFJN_VILNY | Vilnyus | 2014 |
61 | 1 | 2015210 | Johan Hjort | 2015 |
59 | 2 | 2015843 | Helmer Hanssen | 2015 |
62 | 3 | 2015114 | G.O.Sars | 2015 |
60 | 4 | 0117_2015_UFJN_VILNY | Vilnyus | 2015 |
63 | 1 | 2016209 | Johan Hjort | 2016 |
65 | 2 | 2016847 | Helmer Hanssen | 2016 |
64 | 3 | 2016842 | Eros | 2016 |
66 | 4 | 0142_2016_UANA_NANSE | Fridtjof Nansen | 2016 |
69 | 1 | 2017209 | Johan Hjort | 2017 |
67 | 2 | 2017113 | G.O.Sars | 2017 |
70 | 3 | 2017856 | Helmer Hanssen | 2017 |
68 | 4 | 0143_2017_UFJN_VLNY | Vilnyus | 2017 |
71 | 1 | 2018209 | Johan Hjort | 2018 |
72 | 2 | 2018110 | G.O.Sars | 2018 |
73 | 3 | 2018838 | Helmer Hanssen | 2018 |
74 | 4 | 0145_2018_UFJN_VLNY | Vilnyus | 2018 |
75 | 1 | 2019113 | G.O.Sars | 2019 |
76 | 2 | 2019209 | Johan Hjort | 2019 |
77 | 3 | 2019813 | Helmer Hanssen | 2019 |
78 | 4 | 0147_2019_UFJN_VLNY | Vilnyus | 2019 |
79 | 1 | 2020111 | G.O.Sars | 2020 |
80 | 2 | 2020209 | Johan Hjort | 2020 |
81 | 3 | 0151_2020_UFJN_VILN | Vilnyus | 2020 |
82 | 4 | 0152_2020_UHOB_ATLA | Atlantniro | 2020 |
# Get all data from the survey, combine all cruises into hierarchical tables
surveyData <- getSurveySeriesData(myCS, combine_year = TRUE)
Downloading cruise series 'Barents Sea NOR-RUS ecosystem cruise in autumn' ... Searching for files (164 runs):
Overwriting: /home/jovyan/workspace/stox/project/Barents Sea NOR-RUS ecosystem cruise in autumn_Alldata
Downloading... (1 runs):
# Examine sample count of the survey
sampleCount <- lapply(surveyData, nrow)
t(as.data.frame(sampleCount))
fishstation | 13917 |
---|---|
catchsample | 243012 |
individual | 3606935 |
agedetermination | 261539 |
# Check if we have missing scientific names (answer: yes)
unique(surveyData$catchsample$scientificname)
# Let's populate them
newSurveyData <- getScientificNames(surveyData)
Loading required package: httr Loading required package: plyr
REQUESTING 817 ITEMS BY ID from World Register of Marine Species (www.marinespecies.org), 13/01/2021 12:09:14 PM (CC-BY) ,126436,126441,126437,126444,126461,126439,151324,127137,127144,105865 ,107649,233889,127254,274100,126758,126759,19494,126735,127255,126757 ,127191,1135,126417,126433,127212,127193,127110,154675,127115,127073 ,11723,126435,126505,127214,1128,127206,127235,127104,127205,106669 ,127218,105863,127199,125589,234519,125566,126580,127103,127072,11760 ,127113,125909,126752,135220,127136,127070,138481,106903,126088,127111 ,127112,106782,123082,125575,126102,140692,125517,125912,127203,101800 ,127143,127190,127139,126154,127207,126160,254529,126150,139178,110708 ,123080,123586,100653,101,105,127217,105410,107315,110690,123084 ,126104,127211,51,1806,127198,127215,127098,1248,140223,1302 ,107013,123083,558,123081,883,125732,1066,159523,110711,138281 ,138036,153097,105894,127119,126352,126555,126450,126627,107012,107566 ,124612,100694,118283,119036,107567,107563,126715,123258,123915,124321 ,127095,107568,117145,135301,106994,107533,107521,107651,101170,127186 ,123203,138314,156819,1358,110381,1207,101592,130769,127312,103732 ,139437,101759,123776,298380,101174,107504,127200,107569,104054,11716 ,134204,146941,158269,1268,2081,11707,159920,111600,272298,101173 ,1839,123851,123221,101528,124154,124160,123815,105919,106738,1137 ,11718,124156,124167,125158,101417,124135,135306,104,140474,196874 ,124147,146142,111598,101157,123908,124149,1130,107052,107497,1341 ,128529,125171,125166,102181,1762,105868,127387,127379,110679,126447 ,126175,1082,106671,1337,107044,125594,124978,138820,1267,126716 ,126152,127096,141449,123338,106057,106215,126438,126507,124967,125587 ,124151,123321,125170,1365,123276,117703,55,107322,110364,124043 ,124798,140525,254728,138866,139490,124934,124404,137704,102146,130097 ,138933,101537,127219,146943,125147,123177,104055,111723,156103,129491 ,181299,346965,133185,125125,160212,141905,423255,141896,141902,146851 ,137710,110911,124969,124900,147023,124324,124223,793,156452,107677 ,107676,126156,101408,102224,130099,138592,103105,152391,139525,126432 ,136051,138938,137701,882,134586,138906,100790,138875,140536,141820 ,140227,146420,124710,101160,213,106210,101156,138858,138899,254656 ,423556,123506,154825,149668,127140,123896,100951,140596,156383,510534 ,138923,139523,138709,111367,283798,100954,1567,138908,153083,127194 ,124933,124655,156264,139015,127142,124943,140480,127220,123142,140467 ,138878,254378,141989,138864,140103,139008,138859,157012,228,137858 ,988,111711,985,101881,138855,138904,101078,129289,124966,146953 ,101395,1292,131673,123207,138797,102200,123881,127116,1131,129437 ,138122,233981,123188,153095,124860,106983,156345,102152,123335,156101 ,107125,102145,274046,102864,107498,125825,126451,138265,140579,129924 ,124446,1803,103259,106039,229,927,123441,140430,110349,138902 ,126736,142001,107323,141999,159519,293567,107240,254617,1363,107552 ,129379,254583,127108,127236,137658,134366,140717,111544,1360,149 ,134669,134214,134224,134360,141904,1566,523706,134659,133882,134591 ,133910,254510,131779,134023,170671,126751,132005,1614,107205,1269 ,107232,138818,140524,160143,107531,124020,1367,127099,126472,127231 ,293648,127023,126103,134240,141580,130467,981,233984,1608,138873 ,138860,110377,130801,117597,160141,124443,127101,127222,127102,124121 ,100817,182882,100982,101163,101383,123390,126453,125498,135263,150637 ,135294,125595,125447,11709,105711,127138,126822,125516,126458,125579 ,125603,127105,126501,158960,127146,105869,102281,105913,105824,127309 ,105883,106673,125100,138339,140649,2,123479,123128,123813,106854 ,123574,104062,106835,123250,123143,129496,123231,1205,102297,137943 ,167882,492171,754175,144192,129553,265027,112120,1065,102563,101400 ,145725,13552,393810,164811,138266,382226,754174,102,118827,101397 ,135259,126833,125606,830,125743,127251,178738,154444,123106,147109 ,123206,123622,237,559,23123,125269,125333,143755,126617,125436 ,158656,105766,125390,119039,125601,125469,105874,126361,293018,126484 ,10329,126612,293496,105872,126440,274423,22626,127213,126725,126527 ,105870,274476,158950,159262,126831,126449,126375,411,123160,10313 ,10216,137134,126448,126996,398381,300735,110676,1364,1340,1648 ,135252,2041,105408,105450,104174,110342,110783,1778,125508,122323 ,1080,135302,110695,1371,110680,117212,106350,1250,106674,106358 ,156451,117849,107163,143751,104566,106790,110674,153091,107590,107678 ,1071,107021,138483,107643,107011,125815,106331,117070,117059,103251 ,101796,117143,11782,101445,111041,206633,104023,123291,123336,131834 ,100706,100681,129370,423901,144197,144129,136025,117728,130305,144199 ,939,923,129155,111037,110749,104040,124782,131629,100688,901 ,129487,134691,110398,102870,146940,103483,132046,132095,1606,100655 ,134711,10309,137793,110709,106341,104464,102644,110908,1609,1277 ,129233,131644,132077,138238,119962,117103,100757,107007,342212,106766 ,101506,100834,134687,146762,956,119842,158351,514209,106829,131774 ,106377,101519,119822,123121,11776,123540,123970,123353,123333,138068 ,118451,145,123244,123633,123587,149765,123219,123561,123503,103714 ,103488,134106,106989,138672,156818,107374,593072,106122,889,140702 ,173,138288,172,123297,118420,103509,137885,138673,110842,9904 ,104022,138268,137673,175024,124418,391543,100854,140168,130500,123386 ,532031,100839,138824,127182,124085,137871,123974,170674,124535,106918 ,134033,134022,137714,135144,106845,150642,104057,126459,117994,118394 ,118396,103862,1296,140600,10331,123182,117870,106687,938,135233 ,103256,1069,274967,103443,127374,159526,266489,16327,123158,106149 ,135226,123456,101798,116115,117487,100782,101063,101764,126446,110671 ,100803,138482,129566,106393,867958,21263,134741 by id .............................................. DONE
Warning message in makeTaxaTable(aphias): “Some provided Aphia IDs encode non-accepted scientific names.”
# Re-check the scientific names
as.data.frame(unique(newSurveyData$catchsample$scientificname))
unique(newSurveyData$catchsample$scientificname) |
---|
<chr> |
NA |
Actiniidae |
Actinostolidae |
Sagartiidae |
Epizoanthidae |
Actinia |
Urticina |
Hormathia |
Cerianthus |
Epizoanthus |
Actinia equina |
Bolocera tuediae |
Urticina felina |
Actinostola callosa |
Stomphia coccinea |
Hormathia digitata |
Hormathia nodosa |
Metridium senile |
Gastropoda |
Priapulida |
Priapulidae |
Halicryptus spinulosus |
Priapulopsis bicaudatus |
Priapulus caudatus |
Petromyzontidae |
Myxine glutinosa |
Lethenteron camtschaticum |
Petromyzon marinus |
Gammaridae |
Lysianassidae |
⋮ |
Neptunea denselirata |
Phascoliidae |
Trigoniidae |
Mollusca |
Echinidea |
Pasiphaea affinis |
Boreoscala greenlandica |
Gracilechinus acutus |
Polyplacophora |
Porifera |
Calcarea |
Liponema multicorne |
Errantia |
Sedentaria |
Platyhelminthes |
Phaeophyceae |
Liparis bathyarcticus |
Annelida |
Polychaeta |
Spionida |
Sabellida |
Maldanidae |
Aphroditiformia |
Aphroditidae |
Polynoidae |
Nephtyidae |
Ampharetidae |
Sabellidae |
Serpulidae |
Dorididae |