-
Notifications
You must be signed in to change notification settings - Fork 0
/
structure2.py
57 lines (48 loc) · 1.77 KB
/
structure2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
'''
#One specie have several genes, one gene several proteins and one protein corresponds to one transcript.
'''
import csv
class Protein:
#The class protein contains information about the length of the protein and a list with it's exons (with it's own attributes)
def __init__(self, name, len):
self.name = name
self.len = len
class Gene:
#The class gene contains information about the gene and a dict with it's proteins (with it's own attributes)
def __init__(self, name):
self.name = name
self.proteins = {}
@property
def nproteins(self):
return len(self.proteins)
class Species:
'''This structure contains all the information needed for all genes.
One specie have several genes, one gene several proteins'''
def __init__(self, name):
self.name = name #name of the GENE
self.genes = {}
def addProtein(self, gene, protname, len):
#Converting a line from the input file into a protein and/or an exon
if gene not in self.genes:
self.genes[gene] = Gene(gene)
self.genes[gene].proteins[protname] = Protein(protname, len)
@property
def ngenes(self):
return len(self.genes)
def import_gene(specie, filename):
#Reads input file and stores information in one species structure
specie = Species(specie)
for line in csv.reader(open(filename), delimiter='\t'):
try:
specie.addProtein(line[0], line[1], int(line[2]))
except:
pass
return specie
def homo_load(gene_list, folder):
homo_load = {}
#Load all the homologs.txt information into a dictionary structure called homologs
for line in csv.reader(open(folder+'/homologs.txt'), delimiter='\t'):
#exec 'line_sp'
homo_load[line[0]]=line
gene_list.append(line[0])
return homo_load, gene_list