-
Notifications
You must be signed in to change notification settings - Fork 13
/
QDNAseq_from_bam_chrX.R
95 lines (63 loc) · 3.15 KB
/
QDNAseq_from_bam_chrX.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("QDNAseq")
# BiocManager::install("QDNAseq.hg19")
# install.packages("BSgenome.Hsapiens.UCSC.hg19")
# install.packages("BSgenome.Hsapiens.UCSC.hg38")
# BiocManager::install("Biobase")
# BiocManager::install("DNAcopy")
# BiocManager::install("CGHcall")
library("Biobase")
# library("BSgenome.Hsapiens.UCSC.hg19")
library("QDNAseq")
library("DNAcopy")
library("CGHcall")
##### initialization #####
args = commandArgs(trailingOnly=TRUE)
path_to_bam_file = args[1]
inputPath = normalizePath(dirname(path_to_bam_file))
NAMEEE_intermediary1 = gsub("\\", "/", normalizePath(path_to_bam_file), fixed = TRUE) # for window path
NAMEEE_intermediary2 = sub("*/*.bam", "", NAMEEE_intermediary1)
NAMEEE = sub(".*/", "", NAMEEE_intermediary2)
output_relative_Path = args[2]
outputPath = normalizePath(output_relative_Path)
bin_size = args[3]
inputPath
NAMEEE
outputPath
bin_size
##### QDNAseq script #####
bin_size = as.numeric(bin_size)
bins <- getBinAnnotations(binSize=bin_size)
paste0(inputPath,"/",NAMEEE,".bam")
readCounts <- binReadCounts(bins, bamfiles=paste0(inputPath,"/",NAMEEE,".bam"))
# exportBins(readCounts, file="D038R4_readcount_unfiltered_10kb.tsv",
# format="tsv", type=c("copynumber"), logTransform = FALSE)
readCountsFiltered <- applyFilters(readCounts, chromosomes=c("Y", "MT"),
residual=TRUE, blacklist=TRUE)
# exportBins(readCountsFiltered, file="D038R4_readcount_filtered_10kb.tsv",
# format="tsv", type=c("copynumber"), logTransform = FALSE)
readCountsFiltered <- estimateCorrection(readCountsFiltered)
copyNumbers <- correctBins(readCountsFiltered)
copyNumbersNormalized <- normalizeBins(copyNumbers)
copyNumbersSmooth <- smoothOutlierBins(copyNumbersNormalized)
copyNumbersSegmented <- segmentBins(copyNumbersSmooth, transformFun = 'none',
alpha = 0.05, nperm = 10000, p.method = "hybrid",
min.width=5, kmax=25, nmin=200, eta=0.05,
trim = 0.025, undo.splits = "sdundo", undo.SD=1)
copyNumbersSegmented <- normalizeSegmentedBins(copyNumbersSegmented)
exportBins(copyNumbersSegmented, file=paste0(outputPath,"/",NAMEEE,".bam_CN"), format="tsv", type=c("copynumber")) # exporte les données au format VCF DEL and DUP
exportBins(copyNumbersSegmented, file=paste0(outputPath,"/",NAMEEE,".bam_seg"), format="tsv", type=c("segments"))
X = read.table(paste0(outputPath,"/",NAMEEE,".bam_CN"), sep = "\t", header = TRUE)
Y = read.table(paste0(outputPath,"/",NAMEEE,".bam_seg"), sep = "\t", header = TRUE)
X = cbind(X, Y[,5])
X = X[,-4]
X = X[,-1]
print(X)
colnames(X) <- c("chromosome", "start", "ratio", "ratio_median")
X[,1] <- gsub('X','23',X[,1])
X[,1] = as.numeric(as.character(X[,1]))
write.table(X, file = paste0(outputPath,"/",NAMEEE,".bam_ratio.txt"), sep = "\t", row.names = FALSE)
file.remove(paste0(outputPath,"/",NAMEEE,".bam_CN"))
file.remove(paste0(outputPath,"/",NAMEEE,".bam_seg"))