-
Notifications
You must be signed in to change notification settings - Fork 0
/
Exercise07_function.py
74 lines (50 loc) · 1.59 KB
/
Exercise07_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#Given the string 'dna', replace all 'N', return the GC-content
def GCcontent(dna):
dna_noN = dna.replace('N',"")
Gs = dna_noN.count('G')
Cs = dna_noN.count('C')
return (Gs + Cs)/float(len(dna_noN)), len(dna)
def GCcontent(dna):
dna_noN = dna.replace('N',"")
Gs = dna_noN.count('G')
Cs = dna_noN.count('C')
return (Gs + Cs)/float(len(dna_noN))
def Ncontent(dna):
Ns = dna.count('N')
return (Ns)/float(len(dna))
def sum_two_numbers(first_number, second_number):
result = first_number + second_number
return results
print sum_two_numbers(4, 9)
dna = 'AGCTNNCTC'
gc, length_dna = GCcontent(dna)
def change_list(a_list, to_change = 'I changed this!'):
a_list[0] = to_change
a = [1,2,3,4]
change_list(list(a))
print a
#Exercise 2
Sequence 1
ATGGGGGTGTGTGNNNNNNTGA
Sequence 2
ATGCCCGCGCGCGCTGA
Sequence 3
GGGTGGTGTGTGACAAAAAAAA
#Give a string 'filename', write a function which opens the file, iterates over all sequences, and writes a bit of stats about each sequence:
-Name of each sequence ('Sequence 1', ...)
-Counts of Ns
-GC-content
Print number of sequences in that file.
example_fasta = 'Fasta_examples.txt'
def give_stats(example_fasta):
#Read/open the file
reader = open(example_fasta,'r')
line = reader.readline()
my_dict = {}
while line != '': #line has to exist before this call.
if line.startswith('>')
name = line.lstrip('>').rstrip('\n')
else:
my_dict[name] = [GCcontent(line), Ncontent(line)])
line = reader.readline()
#For each line in the file, save name, nNs, GC