-
Notifications
You must be signed in to change notification settings - Fork 11
/
01-create-yaml.Rmd
130 lines (102 loc) · 3.96 KB
/
01-create-yaml.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
---
title: "01-Create-yaml"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(stringr)
```
## R Markdown - Create YAML files for Bioc2022 website
### STEPS
* Original input file is google excel https://docs.google.com/spreadsheets/d/1tGtGffcbCRxQFjE3ej42IcWlN4FJCsuQeZvETt9g0oA/edit#gid=0
* Export this file as TSV. Save file in "input" folder.
* NOTE - For the "time" column, set format to custom "hh:mm" (IMPORTANT)
* Import TSV file into R. Run the code
* Output files generated in "yaml_output" folder name
### import TSV file
```{r}
if ("googlesheets4" %in% installed.packages()[,"Package"]) {
## This option requires authentication with a google account
## No need to download the google spreadsheet
ss <- 'https://docs.google.com/spreadsheets/d/1tGtGffcbCRxQFjE3ej42IcWlN4FJCsuQeZvETt9g0oA/edit#gid=0'
google_sheet <- googlesheets4::read_sheet(ss, sheet = 1)
scheduleBlank <- as.data.frame(google_sheet)
scheduleBlank[is.na(scheduleBlank)] <- ""
} else {
file_path <- "https://docs.google.com/spreadsheets/d/e/2PACX-1vRWk1K6nVoGeeYWmosAkTBuS4shpV7eZ7T7cQElCiV3ZOa9q5HdH1IR2h4Y7x1G-_wRpgoQ7TAeANp9/pub?gid=0&single=true&output=tsv"
schedule <- read.csv(file = file_path,
sep = "\t", stringsAsFactors = F,
na.strings = "")
#use "" instead of NAs
scheduleBlank <- schedule #make copy
scheduleBlank[is.na(scheduleBlank)] <- ""
}
## datetime to character
scheduleBlank$time <- sub("^.+ (.+):00", "\\1", scheduleBlank$time)
```
## Create a directory to store ouptut
```{r}
output_dir <- "yaml_output/"
if (!file.exists(output_dir)) {
dir.create(output_dir)
} else {
## Overwrite whatever output was generated before
unlink(output_dir, recursive=TRUE)
dir.create(output_dir)
}
```
### loop through each entry in the input file. While loop for easy debugging
```{r}
iCount = 1
while(iCount <= nrow(scheduleBlank)) {
#print(paste("iCount=", iCount))
oneRow <- scheduleBlank[iCount, ] #one row
## get file name ready
fileNameSpace <- paste(oneRow$day, "_",
oneRow$time, "_",
oneRow$session_type,"_",
oneRow$paper,
".yaml",
sep = "" )
#replace space and semi-colon by underscore
fileNameFinal <- str_replace_all(string = fileNameSpace,
pattern = c(" |:"),
replacement = '')
## There are some trailing underscores in names without the paper field
## that should be removed
fileNameFinal <- sub("_.yaml$", ".yaml", fileNameFinal)
#loop through each columns to create the yaml file
iCount2 = 1
while(iCount2 <= ncol(oneRow)) {
#print(paste("iCount2=", iCount2))
# write key value pair into file
oneValueName <- names(oneRow)[iCount2]
oneValue <- paste("\"",unlist(oneRow[iCount2]),"\"", sep ="")
if (any(grepl("paper", oneValue))) {
oneValue <- sub("^\"(.+)\"$", "\\1", oneValue)
}
#write to file for the first time. Create new file
if (iCount2 == 1) {
#create file , no append
line <- paste(oneValueName, ": ", oneValue, sep = "")
write.table(line, col.names = FALSE, row.names = FALSE,
file = paste(output_dir,
fileNameFinal, sep=""),
append = FALSE,
quote = FALSE, )
} else {
#append to existing file
line <- paste(oneValueName, ": ", oneValue, sep = "")
write.table(line, col.names = FALSE, row.names = FALSE,
file = paste(output_dir,
fileNameFinal, sep=""),
append = TRUE,
quote = FALSE)
} # end of else
iCount2 <- iCount2 + 1
} #end of inside loop
iCount <- iCount + 1
} # end of outside loop
```