-
Notifications
You must be signed in to change notification settings - Fork 4
/
Makefile.inc
164 lines (136 loc) · 5.89 KB
/
Makefile.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
HPC ?= scicore
include usb-modules-v2/env/$(HPC)/Makefile.inc
# GLOBAL CONF:
############################################
######## DEFINE SAMPLES/SETS ###############
############################################
ifndef MAKEFILE_INC
PROJECT_NAME = $(shell pwd | sed 's:.*/projects/::; s:.*/data/::; s:.*kinglab/::; s:/:_:g')
ifneq ($(wildcard $(SAMPLE_FILE)),)
SAMPLES ?= $(shell sed '/^\#/d' $(SAMPLE_FILE))
endif
ifneq ($(wildcard $(SAMPLE_PON_FILE)),)
PANEL_OF_NORMAL_SAMPLES ?= $(shell sed '/^\#/d' $(SAMPLE_PON_FILE))
endif
ifneq ($(wildcard $(SAMPLE_POOLEDNORM_FILE)),)
POOLED_NORM_SAMPLES ?= $(shell sed '/^\#/d' $(SAMPLE_POOLEDNORM_FILE))
endif
ifneq ($(wildcard $(MERGE_SAMPLE_FILE)),)
MERGE_SAMPLES ?= $(shell sed '/^\#/d' $(MERGE_SAMPLE_FILE))
endif
get_tumors = $(wordlist 1,$(shell expr $(words $1) - 1),$1)
get_normal = $(lastword $1)
get_space_sets = $(shell sed '/^\#/d; s/\s\+/ /g; s/\s\+$$//;' $(SAMPLE_SET_FILE) | sed -n '$(1)p')
get_underscore_sets = $(shell sed '/^\#/d; s/\s\+/_/g; s/\s\+$$//;' $(SAMPLE_SET_FILE) | sed -n '$(1)p')
ifneq ($(wildcard $(SAMPLE_SET_FILE)),)
NUM_SETS := $(shell sed '/^\#/d' $(SAMPLE_SET_FILE) | wc -l | cut -f 1 -d' ')
SETS_SEQ := $(shell seq 1 $(NUM_SETS))
$(foreach i,$(SETS_SEQ), \
$(eval set.$i := $(call get_space_sets,$i)))
# define set_lookup.$(sample)
$(foreach i,$(SETS_SEQ), \
$(foreach sample,$(set.$i), \
$(eval set_lookup.$(sample) := $i)))
# define SAMPLE_SETS to contain sample_sample_normal
$(foreach i,$(SETS_SEQ), \
$(eval SAMPLE_SETS += $(call get_underscore_sets,$i)))
# tumor.SET => tumors
$(foreach i,$(SETS_SEQ), \
$(eval tumor.$(call get_underscore_sets,$i) := $(call get_tumors,$(set.$i))))
# normal.SET => normal
$(foreach i,$(SETS_SEQ), \
$(eval normal.$(call get_underscore_sets,$i) := $(call get_normal,$(set.$i))))
NORMAL_SAMPLES = $(filter-out poolednorm,$(foreach i,$(SETS_SEQ),$(call get_normal,$(set.$i))))
TUMOR_SAMPLES = $(foreach i,$(SETS_SEQ),$(call get_tumors,$(set.$i)))
SAMPLE_PAIRS = $(foreach set,$(SAMPLE_SETS),$(foreach tumor,$(tumor.$(set)),$(tumor)_$(normal.$(set))))
# define tumor.pair and normal.pair to retrieve tumor/normal from pairs
$(foreach set,$(SAMPLE_SETS), \
$(foreach tumor,$(tumor.$(set)), \
$(eval tumor.$(tumor)_$(normal.$(set)) := $(tumor))))
$(foreach set,$(SAMPLE_SETS), \
$(foreach tumor,$(tumor.$(set)), \
$(eval normal.$(tumor)_$(normal.$(set)) := $(normal.$(set)))))
# tumor_normal.$(tumor) to retrieve tumor_normal
$(foreach set,$(SAMPLE_SETS), \
$(foreach tumor,$(tumor.$(set)), \
$(eval tumor_normal.$(tumor) := $(tumor)_$(normal.$(set)))))
$(foreach set,$(SAMPLE_SETS), \
$(foreach tumor,$(tumor.$(set)), \
$(eval tumor_normal.$(normal.$(set)) := $(tumor)_$(normal.$(set)))))
# set.$(sample) to retrieve sample sets (underscores)
$(foreach i,$(SETS_SEQ), \
$(foreach sample,$(set.$i), \
$(eval set.$(sample) := $(word $i,$(SAMPLE_SETS)))))
UNMATCHED_SAMPLES = $(shell sed '/^\#/d' $(SAMPLE_FILE) $(SAMPLE_SET_FILE) | tr ' ' '\n' | sort | uniq -u)
SAMPLE_SET_PAIRS = $(shell echo "$(SAMPLE_PAIRS) $(SAMPLE_SETS)" | tr ' ' '\n' | sort | uniq)
$(foreach set,$(SAMPLE_SET_PAIRS), \
$(eval samples.$(set) := $(shell echo "$(set)" | sed 's/_/ /g')))
endif
ifneq ($(wildcard $(SAMPLE_SPLIT_FILE)),)
A = $(shell sed '/^\#/d' $(SAMPLE_SPLIT_FILE) | cut -f1)
B = $(shell sed '/^\#/d' $(SAMPLE_SPLIT_FILE) | cut -f2)
$(foreach i,$(shell seq 1 $(words $(A))),$(eval split.$(word $i,$(A)) += $(word $i,$(B))))
UNSPLIT_SAMPLES = $(B)
SPLIT_SAMPLES = $(shell sed '/^\#/d' $(SAMPLE_SPLIT_FILE) | cut -f1 | sort | uniq)
endif
ALL_SAMPLES = $(SAMPLE_PAIRS) $(SAMPLES)
COUNT_SAMPLES = $(shell expr `sed 's/\..*//; s:.*/::' <<< $1 | grep -o "_" | wc -l` + 1)
PANEL_OF_NORMAL_SAMPLES ?= $(NORMAL_SAMPLES)
POOLED_NORM_SAMPLES ?= $(NORMAL_SAMPLES)
#######################################################
########### DEFINE GENOME AND RESOURCES ###############
#######################################################
RESOURCE_REQ ?= medium
include usb-modules-v2/genome_inc/$(REF).inc
include usb-modules-v2/resources.inc
#####################################
### DEFINE BASIC FUNCTIONS ##########
#####################################
NO_RM ?= false
ifeq ($(NO_RM),true)
RM := touch
RMR = touch
else
RM := rm -f
RMR := rm -r
endif
# define $(,) and $( ) for escaping commas and spaces
, := ,
space :=
space +=
$(space) :=
$(space) +=
# $(call strip-suffix,filename)
strip-suffix = $(firstword $(subst ., ,$(1)))
LINK = ln -svf $(notdir $1) $(2) && touch $1
NOW := $(shell date +"%F")
MKDIR = mkdir -p -m 775
MKDIRS = $(MKDIR) $(LOGDIR)/$(@D) $(@D)
LOG = $(LOGDIR)/$(@).log
UMASK = 002
INIT = module purge; umask $(UMASK); $(MKDIRS); set -o pipefail;
PURGE_AND_LOAD = module purge && module load $1
#PURGE_AND_LOAD = export LMOD_DISABLE_SAME_NAME_AUTOSWAP='no' && module purge && module load $1
MD5 = md5sum $(@:.md5=) > $@
CHECK_MD5 = for i in {0..20}; do if md5sum -c $(filter %.md5,$^); then break; fi; sleep 5; done;
<M = $(<:.md5=)
^M = $(^:.md5=)
@M = $(@:.md5=)
<<M = $(word 2,$(^M))
<<<M = $(word 3,$(^M))
# $(call CHECK_VCF,vcf.file,target,command)
# check for valid header, ie non-empty vcf file
CHECK_VCF = if [ `grep -v '^\#' $1 | wc -l` -eq 0 ] && [ `grep '^\#CHROM' $1 | wc -l` -eq 1 ]; then cp $1 $2; else $3; fi
CHECK_MAF = if [ `grep -v '^\#' $1 | wc -l` -eq 1 ] && [ `grep '^Hugo_Symbol' $1 | wc -l` -eq 1 ]; then cp $1 $2; else $3; fi
CHECK_VCF_CMD = if [ `grep -v '^\#' $1 | wc -l` -eq 0 ] && [ `grep '^\#CHROM' $1 | wc -l` -eq 1 ]; then $2; else $3; fi
CHECK_VCF_HIGH_MODERATE_CMD = if [ `grep -v '^\#' $1 | egrep "HIGH|MODERATE" | wc -l` -eq 0 ] && [ `grep '^\#CHROM' $1 | wc -l` -eq 1 ]; then $2; else $3; fi
<< = $(word 2,$^)
<<< = $(word 3,$^)
<<<< = $(word 4,$^)
<<<<< = $(word 5,$^)
define R_INIT
dir.create('$(@D)', showWarnings = F, recursive = T)
qw <- function(x) unlist(strsplit(x, "[[:space:]]+"))
endef
endif
MAKEFILE_INC = true