-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdnastring.py
37 lines (32 loc) · 1.04 KB
/
dnastring.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#gc_content
#base_count('G')
#reverse_complement
#G to C
#T to A
# this code allows to calculate the GC content of a sequence from a given fasta file
class NucleotideString:
base_complement = {'G': 'C', 'C':'G',
'A': 'T', 'T': 'A'}
def __init__(self, sequence):
self.sequence = sequence
self.bases = {}
def base_count(self, base):
if base in self.bases:
return self.bases[base]
else:
self.bases[base] = self.sequence.count(base)
return self.bases[base]
def gc_content(self):
g = self.base_count('G')
c = self.base_count('C')
return float(g+c)/len(self.sequence)
def reverse_complement(self):
complement = ''
for base in self.sequence:
complement = self.base_complement[base] + complement
return complement
class DNAString(NucleotideString):
pass
class RNAString(NucleotideString):
base_complement = {'G': 'C', 'C':'G',
'A': 'U', 'U': 'A'}