Skip to content

Commit

Permalink
Adapt code for changes to opencyto behaviour (addresses #1)
Browse files Browse the repository at this point in the history
  • Loading branch information
malisas committed Jun 21, 2022
1 parent ad0756c commit 56d241a
Show file tree
Hide file tree
Showing 8 changed files with 624 additions and 559 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,14 @@ stringr
survival # required by coin
svglite # for saving plots
tidyr
tidyverse
```

You can use bioconductor or `devtools::install_github()` to install the relevant flow cytometry packages:

```
COMPASS
CytoML
flowCore
flowWorkspace
ggcyto
Expand All @@ -79,3 +81,19 @@ ncdfFlow
[openCyto](https://bioconductor.org/packages/release/bioc/html/openCyto.html) should install flowWorkspace, flowCore, and ncdfFlow.
[ggcyto](https://bioconductor.org/packages/release/bioc/html/ggcyto.html) for bivariate flow dot plots.
[COMPASS](https://bioconductor.org/packages/release/bioc/html/COMPASS.html)
[CytoML](https://bioconductor.org/packages/release/bioc/html/CytoML.html)

Quick install:
```
# Note: on ubuntu 20.04, you may need to install libfontconfig1-dev first
install.packages(c("BH", "RcppArmadillo", "coin", "cowplot", "data.table", "extrafont",
"ggplot2", "ggsignif", "here", "plyr", "stringr", "survival",
"svglite", "tidyr", "tidyverse))
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("openCyto")
BiocManager::install("CytoML")
BiocManager::install("ggcyto")
BiocManager::install("COMPASS")
```
848 changes: 424 additions & 424 deletions data/ImmPort_FCS_FileMapping.tsv

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions scripts/0_Copy_and_Rename_FCS_Files.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ ImmPort_dl_path <- file.path(projectDir, "data/Flow_cytometry_result")
subfolder_paths <- unique(file_map$New_Folder_Path)
for(subfolder_path in subfolder_paths) {
subfolder_path_abs <- file.path(projectDir, "data", subfolder_path)
print(sprintf("Copying and renaming files to %s", subfolder_path_abs))
message(sprintf("Copying and renaming files to %s", subfolder_path_abs))
if(!dir.exists(subfolder_path_abs)) {
dir.create(subfolder_path_abs, recursive = T)
}
curRows <- subset(file_map, New_Folder_Path == subfolder_path)
file.copy(file.path(ImmPort_dl_path, curRows$ImmPort_Name), subfolder_path_abs)
file.rename(file.path(subfolder_path_abs, curRows$ImmPort_Name), file.path(subfolder_path_abs, curRows$Original_Name))
}
}
64 changes: 40 additions & 24 deletions scripts/1_QC_SetupGSList_TBAgs.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env Rscript
library(CytoML)
library(here)
library(flowWorkspace)
library(plyr)
library(tidyverse)

projectDir <- here::here()
source(file.path(projectDir, "scripts/flowHelperFunctions.R")) # for boxplot.cell.counts and prepare.gating.set.list.4.compass
Expand All @@ -13,42 +14,57 @@ flowJoXmlPath2 <- file.path(projectDir, "data/TBAgs/20170612_RSTR_OMIP14_ICS_Bat
batch1FcsDirPath <- file.path(projectDir, "data/TBAgs/20170605_RSTR_OMIP14_ICS_Batch1/20170607_RSTR_OMIP14_ICS")
batch2FcsDirPath <- file.path(projectDir, "data/TBAgs/20170612_RSTR_OMIP14_ICS_Batch2/20170614_RSTR_OMIP14_ICS_Batch2")

file_map <- read.table(here::here("data/ImmPort_FCS_FileMapping.tsv"), sep = "\t", header = T,
colClasses = c("character", "character", "character", "character", "numeric"))

# Additional variables
qcOutDir <- file.path(projectDir, "out/QC/TBAgs")
patientStatusFilePath <- file.path(projectDir, "data/20170518_HiRisk_VisitA_Only_1.txt")
gatingSetListOutDir <- file.path(projectDir, "out/GatingSets/AllBatchesForCompass_TBAgs")
flowJoWorkspace_sampleID_FileMapping <- read.table(file.path(projectDir, "data/flowJoWorkspace_sampleID_FileMapping.tsv"), sep = "\t",
header = T, colClasses = c("character", "numeric", "character"))


# First read in the flowJoXmlPaths with desired keywords
ws1 <- openWorkspace(flowJoXmlPath1)
ws1 <- open_flowjo_xml(flowJoXmlPath1)
keywords2import=c("PATIENT ID", "Comp", "Antigen", "Status", "PLATE NAME", "TUBE NAME", "WELL ID")
sampleGroup <- "Samples"
ws1_filemap <- subset(flowJoWorkspace_sampleID_FileMapping, Experiment == "TBAgs_B1")[, c("sampleID", "file")]
missingFiles_b1 <- c("Specimen_001_B2_B02_012.fcs", "Specimen_001_B4_B04_014.fcs", "Specimen_001_B6_B06_016.fcs", "Specimen_001_B8_B08_018.fcs", "Specimen_001_B10_B10_020.fcs", "Specimen_001_C6_C06_066.fcs", "Specimen_001_C8_C08_068.fcs", "Specimen_001_A2_A02_112.fcs", "Specimen_001_A4_A04_114.fcs", "Specimen_001_A6_A06_116.fcs", "Specimen_001_A8_A08_118.fcs", "Specimen_001_A10_A10_120.fcs", "Specimen_001_A2_A02_192.fcs", "Specimen_001_A4_A04_194.fcs", "Specimen_001_A6_A06_196.fcs", "Specimen_001_A8_A08_198.fcs", "Specimen_001_A10_A10_200.fcs")
ws1_filemap <- ws1_filemap[which(!(ws1_filemap$file %in% missingFiles_b1)),]
ws1_filemap$file <- file.path(batch1FcsDirPath, ws1_filemap$file)
gs1 <- parseWorkspace(ws1, name=sampleGroup, keywords=keywords2import,
path=ws1_filemap) # batch1FcsDirPath

cs1 <- cytoset()
quietly(pmap(file_map %>% dplyr::filter(grepl("^TBAgs", New_Folder_Path) & grepl("Batch1", New_Folder_Path) & !grepl("Compensation", Original_Name)) %>%
dplyr::select(New_Folder_Path, Original_Name, flowJo_xml_sampleID),
function(New_Folder_Path, Original_Name, flowJo_xml_sampleID) {
cat(sprintf("Loading %s/%s for sampleID %s\n", New_Folder_Path, Original_Name, flowJo_xml_sampleID))
cf_tmp <- load_cytoframe_from_fcs(here::here("data", New_Folder_Path, Original_Name))
flowJo_xml_sample_name <- fj_ws_get_samples(ws1) %>% dplyr::filter(sampleID == flowJo_xml_sampleID) %>% dplyr::pull(name)
# below, flowjo_to_gatingset will use the name stored in flowJo_xml_sample_name to match the cytoframe to the flowjo workspace entry
cs_add_cytoframe(cs = cs1, sn = flowJo_xml_sample_name, cf = cf_tmp)
}))

gs1 <- flowjo_to_gatingset(ws1, name=sampleGroup, cytoset = cs1, keywords=keywords2import)

#########################

ws2 <- openWorkspace(flowJoXmlPath2)
ws2 <- open_flowjo_xml(flowJoXmlPath2)
keywords2import=c("PATIENT ID", "Comp", "Antigen", "Status", "PLATE NAME", "TUBE NAME", "WELL ID")
sampleGroup <- "Samples"
ws2_filemap <- subset(flowJoWorkspace_sampleID_FileMapping, Experiment == "TBAgs_B2")[, c("sampleID", "file")]
missingFiles_b2 <- c("Specimen_001_F10_F10_100.fcs")
ws2_filemap <- ws2_filemap[which(!(ws2_filemap$file %in% missingFiles_b2)),]
ws2_filemap$file <- file.path(batch2FcsDirPath, ws2_filemap$file)
gs2 <- parseWorkspace(ws2, name=sampleGroup, keywords=keywords2import,
path=ws2_filemap)

cs2 <- cytoset()
quietly(pmap(file_map %>% dplyr::filter(grepl("^TBAgs", New_Folder_Path) & grepl("Batch2", New_Folder_Path) & !grepl("Compensation", Original_Name)) %>%
dplyr::select(New_Folder_Path, Original_Name, flowJo_xml_sampleID),
function(New_Folder_Path, Original_Name, flowJo_xml_sampleID) {
cat(sprintf("Loading %s/%s for sampleID %s\n", New_Folder_Path, Original_Name, flowJo_xml_sampleID))
cf_tmp <- load_cytoframe_from_fcs(here::here("data", New_Folder_Path, Original_Name))
flowJo_xml_sample_name <- fj_ws_get_samples(ws2) %>% dplyr::filter(sampleID == flowJo_xml_sampleID) %>% dplyr::pull(name)
# below, flowjo_to_gatingset will use the name stored in flowJo_xml_sample_name to match the cytoframe to the flowjo workspace entry
cs_add_cytoframe(cs = cs2, sn = flowJo_xml_sample_name, cf = cf_tmp)
}))

gs2 <- flowjo_to_gatingset(ws2, name=sampleGroup, cytoset = cs2, keywords=keywords2import)

png(file.path(qcOutDir, "Batch2GatingTree.png"))
plot(gs2)
dev.off()

# The only inconsistency between the batches is the extra "8+/4+" gate in Batch 2. Remove it:
Rm("8+/4+", gs2)
gs_pop_remove(gs2, "8+/4+")

# We want to run COMPASS on DNeg and DPos populations as well.
# Cytokine gates don't exist on DNeg and DPos, so
Expand All @@ -59,8 +75,8 @@ nodes2Copy <- c("4+/TNF+", "4+/IL17a+", "4+/IL4+", "4+/IL2+", "4+/IFNg+", "4+/CD
parents <- c("DPos", "DNeg")
for (parent in parents) {
for (nodeName in nodes2Copy) {
add(gs1, getGate(gs1, nodeName), parent=parent)
add(gs2, getGate(gs2, nodeName), parent=parent)
gs_pop_add(gs1, gs_pop_get_gate(gs1, nodeName), parent=parent)
gs_pop_add(gs2, gs_pop_get_gate(gs2, nodeName), parent=parent)
}
recompute(gs1, parent)
recompute(gs2, parent)
Expand All @@ -82,7 +98,7 @@ pData(gs2)$trt <- sapply(pData(gs2)$Antigen, getTrt)
# Read in the patient status mappings and add to GatingSet metadata
patientStatusFile <- read.table(patientStatusFilePath, header = T, sep = "\t")[,c("RS_SUB_ACCESSION_NO", "STATUS_TST")]
colnames(patientStatusFile) <- c("PATIENT ID", "Status")
patientStatusFile$Status <- revalue(patientStatusFile$Status, c("PERSISTENT NEG." = "RSTR", "TST+ CONTROL" = "LTBI"))
patientStatusFile$Status <- dplyr::recode(patientStatusFile$Status, "PERSISTENT NEG." = "RSTR", "TST+ CONTROL" = "LTBI")
pData(gs1)$Batch <- "Batch 1"
pData(gs2)$Batch <- "Batch 2"
pData(gs1) <- subset(pData(gs1), select = -c(Status))
Expand Down Expand Up @@ -205,9 +221,9 @@ boxplot.cell.counts(gatingSet=subset(gs2, `PLATE NAME` == "Plate 4"),
subpopulation="/S/LV/L/3+/4+",
batch="Batch 2 Plate 4",
threshold=3000)
cd4CountsBatch1 <- merge(getPopStats(gs1)[which(getPopStats(gs1)$Population == "4+"),], pData(gs1), by.x = "name", by.y = "tmpRownames")
cd4CountsBatch1 <- merge(gs_pop_get_count_fast(gs1)[which(gs_pop_get_count_fast(gs1)$Population == "/S/LV/L/3+/4+"),], pData(gs1), by.x = "name", by.y = "tmpRownames")
cd4CountsBatch1 <- cd4CountsBatch1[order(cd4CountsBatch1$Count),]
cd4CountsBatch2 <- merge(getPopStats(gs2)[which(getPopStats(gs2)$Population == "4+"),], pData(gs2), by.x = "name", by.y = "tmpRownames")
cd4CountsBatch2 <- merge(gs_pop_get_count_fast(gs2)[which(gs_pop_get_count_fast(gs2)$Population == "/S/LV/L/3+/4+"),], pData(gs2), by.x = "name", by.y = "tmpRownames")
cd4CountsBatch2 <- cd4CountsBatch2[order(cd4CountsBatch2$Count),]

# Rename columns, e.g. Count and ParentCount columns to CD4_Count and CD3_Count.
Expand Down
Loading

0 comments on commit 56d241a

Please sign in to comment.