forked from OBOFoundry/OBOFoundry.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sort-ontologies.py
executable file
·92 lines (76 loc) · 2.77 KB
/
sort-ontologies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
import csv
import sys
from argparse import ArgumentParser
import yaml
def main(args):
parser = ArgumentParser(
description="""
Takes a YAML file containing information for various ontologies and a metadata file specifying
the sorting order for ontologies, and then produces a sorted version input YAML"""
)
parser.add_argument(
"unsorted_yaml",
type=str,
help="Unsorted YAML file containing information for ontologies",
)
parser.add_argument(
"metadata_grid",
type=str,
help="CSV or TSV file containing metadata information for ontologies",
)
parser.add_argument(
"output_yaml",
type=str,
help="Name of output YAML file that will contain sorted ontology information",
)
args = parser.parse_args()
data_file = args.unsorted_yaml
grid = args.metadata_grid
output = args.output_yaml
sort_order = get_sort_order(grid)
data = load_data(data_file)
data = sort_ontologies(data, sort_order)
write_data(data, output)
def get_sort_order(grid):
"""Given the path to the metadata grid (CSV or TSV), extract the order of
ontologies from the grid. Return the list of ontology IDs in that order."""
sort_order = []
if ".csv" in grid:
separator = ","
elif ".tsv" or ".txt" in grid:
separator = "\t"
else:
print("%s must be tab- or comma-separated.", file=sys.stderr)
sys.exit(1)
with open(grid, "r") as f:
reader = csv.reader(f, delimiter=separator)
# Ignore the header row:
next(reader)
for row in reader:
# Ontology IDs are in the first column of the CSV/TSV. We simply pull them out of each line
# in the file. Their ordering in the file is the sort ordering we are looking for:
sort_order.append(row[0])
return sort_order
def load_data(data_file):
"""Given a YAML file, load the data into a dictionary."""
stream = open(data_file, "r")
data = yaml.load(stream, Loader=yaml.SafeLoader)
return data
def sort_ontologies(data, sort_order):
"""Given the ontologies data as a dictionary and the list of ontologies in
proper sort order, return the sorted data."""
ontologies = []
for ont_id in sort_order:
# We assume that ontology ids are unique:
ont = [ont for ont in data["ontologies"] if ont["id"] == ont_id].pop()
ontologies.append(ont)
data["ontologies"] = ontologies
return data
def write_data(data, output):
"""Given the ontologies data as a dictionary and an output YAML file to
write to, write the data to the file."""
with open(output, "w") as f:
yaml.safe_dump(data, f, allow_unicode=True)
if __name__ == "__main__":
main(sys.argv)