-
Notifications
You must be signed in to change notification settings - Fork 1
/
5_lake_modelling.Rmd
118 lines (95 loc) · 4.64 KB
/
5_lake_modelling.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
---
title: "5_LakeModelling"
author: "Simon Topp"
date: "9/11/2018"
output: html_document
editor_options:
chunk_output_type: console
---
## Connect all of the lake info to LakeCat to get model inputs
```{r, eval = F}
# Load in lakeCat data
lc.files <- list.files('in/lakeCat/unzip', full.names = T)
# Join lakes to nhd plus LakeCat data, lake.join and lakes.up are defined in 00_ParameterConfiguration
for(i in lc.files){
if(i == first(lc.files)){
lc <- read.csv(i) %>% mutate(COMID = factor(COMID))
lake.join <- lakes.up %>%
left_join(lc , by = 'COMID')
}else{
lc <- read.csv(i) %>% mutate(COMID = factor(COMID))%>%
select(-c(CatAreaSqKm, WsAreaSqKm, CatPctFull,WsPctFull,inStreamCat))
lake.join <- lake.join %>%
left_join(lc, by = 'COMID')}
}
names.in <- names(lake.join)
## Remove unwanted parameters and munge a couple of date related ones.
lake.join <- lake.join %>%
filter(!is.na(CatAreaSqKm)) %>% ##
select(-c(grep(names.in, pattern = '*Ws', value = T),
grep(names.in, pattern = '2011Cat', value = T),
## 2011 values aren't representative of the study period
grep(names.in, pattern = 'PctFire', value = T),
## While potentially important, the forest metrics lack
grep(names.in, pattern = 'PctFrstLoss', value = T),
##the temporal resolution we need.
grep(names.in, pattern = 'NABD', value = T),
##Dam info acts as identifiers and are redundant with lake_type/area
grep(names.in, pattern = 'Dam', value = T),
##Dam info acts as identifiers and are redundant with lake_type/area
'Precip08Cat', 'Precip09Cat', 'Tmean08Cat', 'Tmean09Cat'))
#Use long term averages not these
#Round the majority of values to the nearest 1 to avoid variables becoming 'unique ID's)
round1 <- names(lake.join %>% select(c(CatAreaSqKm:CatPctFull,PctImp2006Cat, PctCarbResidCat:WetIndexCat)))
## Round a few to 1 because they're variation is real big and larger values with
#Round a subset to the nearest 10th because their values more or less range from 0 to 1.
round.1 <- names(lake.join %>% select(c(AgKffactCat, KffactCat, MineDensCat)))
lake.join <- lake.join %>%
mutate_at(round1, round, digits = 0) %>%
mutate_at(round.1, round, digits = 1)
lake.join[lake.join == -9998] = NA
if(lakeSamp == 'EcoReg2000'){
write_feather(lake.join, 'out/EcoReg2000LakesFull.feather')
}else if(lakeSamp == 'NLA'){
write_feather(lake.join, 'out/NLA2012LakesFull.feather')
}else if(lakeSamp == 'Over10'){
write_feather(lake.join, 'out/Over10LakesFull.feather')
}
plan(multiprocess)
lut <- lakesDown %>% future_map_dfr(aodLUT, .progress = T)
plan(sequential)
#write_feather(lut, 'out/aodLUT_NLA2012.feather')
```
## Model clarity for each lake pulled down from EE
```{r}
## Bring in all the files needed in the script
# Landscape variables ranked by RF feature importance
ffsVariables <- read_feather('out/ffsResultsFull.feather')
features <- ffsVariables[ffsVariables$RMSE == min(ffsVariables$RMSE),] %>%
select(-c(nvar, RMSE, SE)) %>%
paste(.) %>% .[.!= 'NA']
#For NLA_2012 full lake medians
lakesDown = list.files('lake_data/NLA2012', full.names = T)
plan(multiprocess(workers = availableCores()-1))
Preds.out <- ids %>% future_map_dfr(~EvalPreds(id = .,paths = lakesDown, lakesUp = lake.join, log = log, model = model, features = features, lakeSamp = lakeSamp), .progress = T)
plan(sequential)
#write_feather(Preds.out, paste0('out/TS_Preds/', lakeSamp, '_',iteration,'.feather'))
Preds.out<- read_feather(paste0('out/TS_Preds/TS_Predictions_',iteration,'.feather'))
```
```{r}
##2007 NLA wasn't part of the original pipeline, quickly generate preds for it here
# ids <- list.files('lake_data/NLA2007_cntr') %>% strsplit(split = '.csv', fixed = T) %>% unlist()
# lakesDown <- list.files('lake_data/NLA2007_cntr', full.names = T)
# lake.join <- read_feather('out/NLA2007LakesFull.feather')
# ## Bad form, but you need to comment out part of the function for this because
# ## there's no l8 involved and it'll throw errors otherwise.
# plan(multiprocess(workers = availableCores()-4))
#
# nla.2007.preds <- ids %>% future_map_dfr(~EvalPreds(id = .,paths = lakesDown, lakesUp = lake.join, log = log, model = model, features = features, lakeSamp = lakeSamp), .progress = T)
#
# plan(sequential)
#
# nla.2007.preds <- nla.2007.preds %>% mutate(region = factor(region,
# labels = c("Coastal Plain", "Northern Appalachians", "Northern Plains", "Southern Appalachians", "Southern Plains", "Temperate Plains", "Upper Midwest","Western Mountains","Xeric West")))
# write_feather(nla.2007.preds, 'out/nla2007_preds.feather')
```