Skip to content

Commit

Permalink
use latest ascat
Browse files Browse the repository at this point in the history
  • Loading branch information
Shixiang Wang (王诗翔) committed May 15, 2024
1 parent 42f216d commit 2fc8b59
Show file tree
Hide file tree
Showing 15 changed files with 126 additions and 61 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: gcap
Title: Gene-level Circular Amplicon Prediction
Version: 1.1.4
Version: 1.2.0
Authors@R:
person("Shixiang", "Wang", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-9855-7357"))
Expand Down Expand Up @@ -45,7 +45,7 @@ LinkingTo:
Rcpp
Remotes:
git::https://bitbucket.org/sequenzatools/sequenza.git@master,
github::ShixiangWang/ascat/ASCAT@v3-for-gcap-v1,
github::VanLoo-lab/ascat/ASCAT,
github::ShixiangWang/copynumber,
github::ShixiangWang/facets,
github::ShixiangWang/IDConverter
Expand All @@ -55,4 +55,4 @@ Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd",
"roxytest::testthat_roclet"))
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ RUN mamba install -y -c conda-forge -c bioconda r-base=4.3 python=3.10 r-remotes
R -e 'BiocManager::install("jokergoo/GetoptLong", update = FALSE, force = TRUE)' &&\
R -e 'BiocManager::install("ShixiangWang/copynumber", update = FALSE, force = TRUE)' &&\
R -e 'BiocManager::install("ShixiangWang/facets", update = FALSE, force = TRUE)' &&\
R -e 'BiocManager::install("ShixiangWang/ascat@v3-for-gcap-v1", subdir = "ASCAT", dependencies = TRUE, update = FALSE)' &&\
R -e 'BiocManager::install("VanLoo-lab/ascat", subdir = "ASCAT", dependencies = TRUE, update = FALSE)' &&\
R -e 'BiocManager::install("ShixiangWang/gcap", dependencies = TRUE, update = FALSE)' &&\
cd /opt/conda/lib/R/library/facets/extcode/ &&\
g++ -std=c++11 -I/opt/conda/include snp-pileup.cpp -L/opt/conda/lib -lhts -Wl,-rpath=/opt/conda/lib -o snp-pileup &&\
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# gcap 1.2.0

- Utilized the latest ASCAT version from [VanLoo-lab/ascat](https://github.com/VanLoo-lab/ascat) for default copy number calling. This help address issue mentioned at #43.
- Updated documentation.
- Added a whole workflow for debugging purpose.
- Enhanced path parsing like `~`.
- Fixed bug in `gcap-bam.R`.

# gcap 1.1.4

- Removed the XGBOOST version limits, instead, a warning is posted.
Expand Down
104 changes: 75 additions & 29 deletions R/ascat.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
#' Note, for multiple tumor-normal pairs, the first 5 arguments should
#' be a vector with same length.
#'
#' @param g1000allelesprefix Prefix path to the allele data (e.g. "G1000_alleles_chr").
#' @param g1000lociprefix Prefix path to the loci data (e.g. "G1000_loci_chr").
#' @inheritParams ASCAT::ascat.prepareHTS
#' @inheritParams ASCAT::ascat.aspcf
#' @inheritParams ASCAT::ascat.GCcorrect
#' @param GCcontentfile File containing the GC content around every SNP for increasing window sizes.
#' @param replictimingfile File containing replication timing at every SNP for various cell lines.
#' @param tumourseqfile Full path to the tumour BAM file.
#' @param normalseqfile Full path to the normal BAM file.
#' @param jobname job name, typically an unique name for a tumor-normal pair.
Expand All @@ -25,6 +28,7 @@
#' considered (optional, default=1:22).
#' @param skip_finished_ASCAT if `TRUE`, skipped finished ASCAT calls
#' to save time.
#' @param genome_build "hg38" or "hg19".
#'
#' @importFrom ASCAT ascat.prepareHTS ascat.loadData ascat.GCcorrect
#' ascat.plotRawData ascat.aspcf ascat.plotSegmentedData ascat.predictGermlineGenotypes
Expand Down Expand Up @@ -57,6 +61,7 @@ gcap.runASCAT <- function(tumourseqfile,
min_base_qual = 20,
min_map_qual = 35,
penalty = 70,
genome_build = "hg38",
skip_finished_ASCAT = FALSE) {
stopifnot(all(gender %in% c("XX", "XY")))
if (!dir.exists(outdir)) dir.create(outdir, recursive = TRUE)
Expand Down Expand Up @@ -128,34 +133,70 @@ gcap.runASCAT <- function(tumourseqfile,
if (!all(file.exists(tfile, nfile))) {
lg$fatal("Not all bam files exist")
}
ascat.prepareHTS(
tumourseqfile = tfile,
normalseqfile = nfile,
tumourname = tn,
normalname = nn,
allelecounter_exe = allelecounter_exe,
g1000allelesprefix = g1000allelesprefix,
g1000lociprefix = g1000lociprefix,
nthreads = nthreads,
minCounts = minCounts,
BED_file = BED_file,
probloci_file = probloci_file,
gender = gd,
chrom_names = chrom_names,
min_base_qual = min_base_qual,
min_map_qual = min_map_qual,
skip_allele_counting_tumour = FALSE,
skip_allele_counting_normal = skip_norm
)

ascat.bc <- ascat.loadData(
paste0(tn, "_tumourLogR.txt"), paste0(tn, "_tumourBAF.txt"),
if (skip_norm) NULL else paste0(tn, "_normalLogR.txt"),
if (skip_norm) NULL else paste0(tn, "_normalBAF.txt"),
chrs = chrom_names,
gender = gender
) # New parameter genomeVersion in the latest version of ASCAT
ascat.bc <- ascat.GCcorrect(ascat.bc, GCcontentfile, replictimingfile)

if (packageVersion("ASCAT") > "3.1.0") {
ascat.prepareHTS(
tumourseqfile = tfile,
normalseqfile = nfile,
tumourname = tn,
normalname = nn,
allelecounter_exe = allelecounter_exe,
alleles.prefix = g1000allelesprefix,
loci.prefix = g1000lociprefix,
nthreads = nthreads,
minCounts = minCounts,
BED_file = BED_file,
probloci_file = probloci_file,
gender = gd,
chrom_names = chrom_names,
min_base_qual = min_base_qual,
min_map_qual = min_map_qual,
skip_allele_counting_tumour = FALSE,
skip_allele_counting_normal = skip_norm
)

ascat.bc <- ascat.loadData(
paste0(tn, "_tumourLogR.txt"), paste0(tn, "_tumourBAF.txt"),
if (skip_norm) NULL else paste0(tn, "_normalLogR.txt"),
if (skip_norm) NULL else paste0(tn, "_normalBAF.txt"),
chrs = chrom_names,
gender = gender,
genomeVersion = genome_build
)

ascat.bc <- ASCAT::ascat.correctLogR(ascat.bc, GCcontentfile, replictimingfile)

} else {
ascat.prepareHTS(
tumourseqfile = tfile,
normalseqfile = nfile,
tumourname = tn,
normalname = nn,
allelecounter_exe = allelecounter_exe,
g1000allelesprefix = g1000allelesprefix,
g1000lociprefix = g1000lociprefix,
nthreads = nthreads,
minCounts = minCounts,
BED_file = BED_file,
probloci_file = probloci_file,
gender = gd,
chrom_names = chrom_names,
min_base_qual = min_base_qual,
min_map_qual = min_map_qual,
skip_allele_counting_tumour = FALSE,
skip_allele_counting_normal = skip_norm
)

ascat.bc <- ascat.loadData(
paste0(tn, "_tumourLogR.txt"), paste0(tn, "_tumourBAF.txt"),
if (skip_norm) NULL else paste0(tn, "_normalLogR.txt"),
if (skip_norm) NULL else paste0(tn, "_normalBAF.txt"),
chrs = chrom_names,
gender = gender
)
ascat.bc <- ascat.GCcorrect(ascat.bc, GCcontentfile, replictimingfile)
}

ascat.plotRawData(ascat.bc)
if (skip_norm) {
# https://github.com/VanLoo-lab/ascat/issues/73
Expand All @@ -177,6 +218,11 @@ gcap.runASCAT <- function(tumourseqfile,
ascat.bc <- ascat.aspcf(ascat.bc, ascat.gg = gg, penalty = penalty)
ascat.plotSegmentedData(ascat.bc)
ascat.output <- ascat.runAscat(ascat.bc, gamma = 1L, pdfPlot = TRUE)
if (packageVersion("ASCAT") > "3.1.0") {
QC = ASCAT::ascat.metrics(ascat.bc, ascat.output)
ascat.output = c(ascat.output, list(QC = QC))
}

saveRDS(ascat.output, file = paste0(id, ".ASCAT.rds"))

lg$info("job {id} done")
Expand Down
1 change: 1 addition & 0 deletions R/workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ gcap.workflow <- function(tumourseqfile, normalseqfile,
},
min_base_qual = min_base_qual,
min_map_qual = min_map_qual,
genome_build = genome_build,
penalty = penalty,
skip_finished_ASCAT
)
Expand Down
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,20 @@ if you use **conda** or other approaches, please set the path when you use corre

### Install ASCAT (required)

Install **ASCAT** v3.0 (modified and adapted for GCAP workflow in HPC) in R console from GitHub with:
#### Latest ASCAT

From v1.2, GCAP uses the latest version of ASCAT. Install **ASCAT** v3 in R console from GitHub with:

```r
# install.packages("remotes")
remotes::install_github('VanLoo-lab/ascat/ASCAT')
```

We have provided generated reference files above, but sometimes you may want to generate the reference data for yourself, in such case, please refer to <https://github.com/VanLoo-lab/ascat> for generating the required allele-specific copy number data.

#### A fixed version of ASCAT

In our manuscript, we used a fixed version of ASCAT for the GCAP data pre-processing (modified and adapted for GCAP workflow in HPC). It does not fit the R version `>=4.3`.

```r
# This is a forked version ASCAT
Expand All @@ -51,10 +64,6 @@ remotes::install_github("ShixiangWang/ascat@v3-for-gcap-v1", subdir = "ASCAT")
# See https://github.com/ShixiangWang/gcap/issues/27
```

> Here we used a fixed version of ASCAT for the GCAP data pre-processing, if you want to adopted the latest
> updates in processing your data, please refer to <https://github.com/VanLoo-lab/ascat> for generating the required
> allele-specific copy number data, and refer to [`?gcap.ASCNworkflow()`](https://shixiangwang.github.io/gcap/reference/gcap.ASCNworkflow.html) for downstream analysis.
### Alternatives to ASCAT

For the latest version of GCAP, [sequenza](https://shixiangwang.github.io/gcap/reference/gcap.workflow.seqz.html) or [facets](https://shixiangwang.github.io/gcap/reference/gcap.workflow.facets.html) are supported for preprocessing the bam data, please refer to the provided links for usage.
Expand Down
3 changes: 2 additions & 1 deletion man/gcap-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/gcap.ASCNworkflow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions man/gcap.runASCAT.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 5 additions & 6 deletions man/gcap.workflow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions tests/testthat/test-roxytest-testexamples-ASCNworkflow.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by roxytest: Do not edit by hand!
# Generated by roxytest: do not edit by hand!

context("File R/ASCNworkflow.R: @testexamples")
# File R/ASCNworkflow.R: @testexamples

test_that("Function gcap.ASCNworkflow() @ L85", {

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-roxytest-testexamples-auc.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by roxytest: Do not edit by hand!
# Generated by roxytest: do not edit by hand!

context("File R/auc.R: @testexamples")
# File R/auc.R: @testexamples

test_that("Function get_auc() @ L23", {

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-roxytest-testexamples-prediction.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by roxytest: Do not edit by hand!
# Generated by roxytest: do not edit by hand!

context("File R/prediction.R: @testexamples")
# File R/prediction.R: @testexamples

test_that("Function gcap.runPrediction() @ L18", {

Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-roxytest-testexamples-scoring.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by roxytest: Do not edit by hand!
# Generated by roxytest: do not edit by hand!

context("File R/scoring.R: @testexamples")
# File R/scoring.R: @testexamples

test_that("Function gcap.runScoring() @ L28", {

Expand Down

0 comments on commit 2fc8b59

Please sign in to comment.