-
Notifications
You must be signed in to change notification settings - Fork 120
Expand file tree
/
Copy pathTextbook_01A.py
More file actions
28 lines (22 loc) · 867 Bytes
/
Textbook_01A.py
File metadata and controls
28 lines (22 loc) · 867 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python
'''
A solution to a code challenges that accompanies Bioinformatics Algorithms: An Active-Learning Approach by Phillip Compeau & Pavel Pevzner.
The textbook is hosted on Stepic and the problem is listed on ROSALIND under the Textbook track.
Problem Title: Creating a Distance Matrix
Chapter #: 01
Problem ID: A
URL: http://rosalind.info/problems/1a/
'''
with open('data/textbook/rosalind_1a.txt') as input_data:
dna, k = [line.strip() for line in input_data.readlines()]
k = int(k)
kmer_dict = dict()
for i in xrange(len(dna)-k+1):
if dna[i:i+k] in kmer_dict:
kmer_dict[dna[i:i+k]] += 1
else:
kmer_dict[dna[i:i+k]] = 1
kmers = [item[0] for item in kmer_dict.items() if item[1] == max(kmer_dict.values())]
print ' '.join(kmers)
with open('output/textbook/Textbook_01A.txt', 'w') as output_data:
output_data.write(' '.join(kmers))