-
Notifications
You must be signed in to change notification settings - Fork 1
/
databases.py
101 lines (83 loc) · 3.41 KB
/
databases.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from namespace_registry import NamespaceRegistry as ns
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Database:
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def __init__(self, abbrev, name, url, cat, in_up):
self.abbrev = abbrev
self.rdf_id = ns.xref.cleanDb(abbrev)
self.name = name
self.url = url
self.cat = cat
self.in_up = in_up
def __str__(self):
return(f"Database({self.abbrev}, {self.name}, {self.url}, {self.cat}, rdf_id={self.rdf_id}, in_up={self.in_up})")
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Databases:
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def __init__(self, src_file="data_in/cellosaurus_xrefs.txt"):
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
f_in = open("uniprot-db-abbr.txt")
self.uniprot_set = set()
while True:
line = f_in.readline()
if line == "": break
line = line.strip()
if line.startswith("#"): continue
if line == "": continue
self.uniprot_set.add(line)
f_in.close()
self.db_dict = dict()
f_in = open(src_file)
while True:
line = f_in.readline()
if line == "": break
line = line.strip()
if line.startswith( "Abbrev: "):
abbrev = line[8:]; name = ""; url = ""; cat = ""
elif line.startswith("Name : "):
name = line[8:]
elif line.startswith("Server: "):
url = line[8:]
elif line.startswith("Cat : "):
cat = line[8:]
elif line.startswith("//"):
in_up = abbrev in self.uniprot_set
self.db_dict[abbrev] = Database(abbrev, name, url, cat, in_up)
f_in.close()
self.cats = dict()
for db_key in self.db_dict:
db = self.db_dict[db_key]
cat = db.cat
if cat not in self.cats:
self.cats[cat] = { "label": cat, "count": 0, "IRI": get_db_category_IRI(cat)}
rec = self.cats[cat]
rec["count"] += 1
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def categories(self):
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
return self.cats
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def keys(self):
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
return self.db_dict.keys()
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def get(self, abbrev):
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
return self.db_dict.get(abbrev)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
def get_db_category_IRI(label):
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
prefix = ns.cello.pfx
name = label.title().replace(" ", "").replace("(", "").replace(")", "").replace("/","").replace("-","")
return prefix + ":" + name
# = = = = = = = = = = = = = = = = = = = = = = = = = = =
if __name__ == '__main__':
# = = = = = = = = = = = = = = = = = = = = = = = = = = =
dbs = Databases()
for abbrev in dbs.keys():
db = dbs.get(abbrev)
print(abbrev, "==>", db )
print("----- cats -----")
for cat in dbs.categories():
print(dbs.categories()[cat])