-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy path02-fragment_gc.r
35 lines (26 loc) · 988 Bytes
/
02-fragment_gc.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
tx <-as.numeric(Sys.getenv("SGE_TASK_ID"))
galpdir <- "/dcl01/scharpf1/data/galignmentpairs/Low_Coverage_WGS"
files <- list.files(galpdir, full.names=TRUE)
file <- files[tx]
names <- strsplit(basename(file), "_")[[1]][1]
outdir <- "/dcl01/scharpf1/data/galignmentpairs/Low_Coverage_WGS/fragments"
# if(file.exists(file.path(outdir, paste0(names, "_frags.rds")))) q('no')
library(GenomicAlignments)
library(GenomicRanges)
library(Rsamtools)
library(devtools)
library(Homo.sapiens)
library(biovizBase)
library(BSgenome.Hsapiens.UCSC.hg19)
class(Homo.sapiens)
galp <- readRDS(file)
frags <- granges(keepSeqlevels(galp, paste0("chr", 1:22), pruning.mode="coarse"),
on.discordant.seqnames="drop")
## filter outliers
w.all <- width(frags)
q.all <- quantile(w.all, c(0.001, 0.999))
frags <- frags[which(w.all > q.all[1] & w.all < q.all[2])]
gcs <- GCcontent(Hsapiens, unstrand(frags))
frags$gc <- gcs
saveRDS(frags, file.path(outdir, paste0(names, "_frags.rds")) )
q('no')