-
Notifications
You must be signed in to change notification settings - Fork 13
/
7b_temp_merge.yml
79 lines (61 loc) · 2.69 KB
/
7b_temp_merge.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
target_default: 7b_temp_merge
packages:
- scipiper
- dplyr
- feather
- googledrive
sources:
- 7b_temp_merge/src/merge_all_temp_dat.R
- 7b_temp_merge/src/munge_source_ids.R
targets:
7b_temp_merge:
depends:
- 7b_temp_merge/out/merged_temp_data_daily.feather.ind
- 7b_temp_merge/out/temp_data_with_sources.feather.ind
- 7b_temp_merge/out/source_metadata_for_release.csv.ind
# -- get WQP, coop data and merge -- #
# -- removes (coarsely defined) duplicates -- #
# -- removes egregious outliers in Jan/Feb (>10 deg C) and July/Aug (surface, <10 deg C)
# -- returns all times -- ##
7b_temp_merge/out/merged_temp_data_alltimes.feather.ind:
command: merge_temp_data(
outind = target_name,
wqp_ind = '7a_wqp_munge/out/temp_wqp_munged_linked.feather.ind',
coop_ind = '7a_temp_coop_munge/out/all_coop_dat_linked.feather.ind')
# -- resolve multiple unique obs per lake/date/depth -- ##
7b_temp_merge/out/merged_temp_data_daily.feather.ind:
command: reduce_temp_data(
outind = target_name,
inind = '7b_temp_merge/out/merged_temp_data_alltimes.feather.ind')
# -- clean up data sources for data release -- #
# create a version for daily temperature observations with new source
# IDs for data release
# download and indicate source metadata
source_metadata_loc:
command: as_id(I('1gX9ejUr18_w4mYTCORj0_ZuXdyqUhcZvpsV5G0Yu26U'))
depends:
- 6_temp_coop_fetch/log/6_temp_coop_fetch_tasks.ind
7b_temp_merge/in/source_metadata.csv.ind:
command: gd_download_and_indicate(
source_metadata_loc,
out_ind = target_name)
# merge data with new source_ids
7b_temp_merge/out/temp_data_with_sources.feather.ind:
command: add_source_ids(
datin_ind = '7b_temp_merge/out/merged_temp_data_daily.feather.ind',
metadata_ind = '7b_temp_merge/in/source_metadata.csv.ind',
datout_ind = target_name)
# create a metadata file for data release
7b_temp_merge/out/source_metadata_for_release.csv.ind:
command: summarize_sources(
datin_ind = '7b_temp_merge/out/temp_data_with_sources.feather.ind',
metadata_ind = '7b_temp_merge/in/source_metadata.csv.ind',
datout_ind = target_name)
## reservoir data - reduce to dailies
## leaving this seperate since 1) there are some oddities with depth data from reservoirs
## and 2) in eastern time zone, and above selects noon of central time
## not a huge but this lets us select noon local time
7b_temp_merge/out/drb_daily_reservoir_temps.rds.ind:
command: reduce_reservoir_data(target_name,
drb_ind = '7a_nwis_munge/out/drb_reservoirs_temp_munged.rds.ind',
nyc_dep_ind = '7a_nwis_munge/out/NYC_DEP_reservoir_temperature_data.rds.ind')