forked from WEHI-ResearchComputing/wehi-gdc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlist_file_metadata.py
executable file
·80 lines (64 loc) · 1.75 KB
/
list_file_metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
'''
List metadata for all files associated with a case query
'''
from helpers import GDCIterator
import json
import sys
from argparse import ArgumentParser
'''
Command line argument parser
'''
def build_parser():
parser = ArgumentParser()
parser.add_argument('--output-file',
help='Output file name',
dest='output_file',
required=True)
parser.add_argument('--gdc-project-id',
dest='gdc_project_id',
help='The GDC project id, e.g. TCGA-SKCM, TCGA-LUAD, etc',
type=str,
default=None,
required=True)
return parser
case_filters = {
'op': '=',
'content': {
'field': 'project.project_id',
'value': None
}
}
# case_filters = {
# 'op': '=',
# 'content': {
# 'field': 'files.experimental_strategy',
# 'value': None
# }
# }
file_filters = {
'op': '=',
'content': {
'field': 'cases.submitter_id',
}
}
def main(argv):
parser = build_parser()
options = parser.parse_args(args=argv)
output_file = options.output_file
gdc_project_id = options.gdc_project_id
case_filters['content']['value'] = gdc_project_id
cases = []
for case in GDCIterator('cases', case_filters):
submitter_id = case['submitter_id']
case_id = case['case_id']
file_filters['content']['value'] = submitter_id
print(f'case_id: {case_id}, submitter_id: {submitter_id}')
flmds = []
for fl in GDCIterator('files', file_filters):
flmds.append(fl)
cases.append({'case_id': case_id, 'case': case, 'files': flmds})
js = {'cases': cases}
with open(output_file, 'w') as f:
print(json.dumps(js, indent=2), file=f)
if __name__ == '__main__':
main(sys.argv[1:])