filename = '/Users/t/dev/2012-scripps/python/25k.fq.gz'
import screed
for record in screed.open('/Users/t/dev/2012-scripps/python/25k.fq.gz'):
print record.name
print record.sequence
print record.accuracy
break
895:1:1:1246:14654/1 CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT ][aaX__aa[`ZUZ[NONNFNNNNNO_____^RQ_
for record in screed.open(filename):
if record.name == '@895:1:4:1596:8538/2':
break
print record
{'annotations': '', 'sequence': 'TACGGTCTTTTGAACTCATTGATGGCCCTGCGAAACGCCGCCTCAAGCTGTGCTCCGTAA', 'name': '895:1:4:1596:6003/2', 'accuracy': '_____aMUV]VQFJZZVZ]VZZMPZP[Q\\X]]S]S````X__X_WUZZMU________P_'}
list_of_names = ['895:1:4:1596:8538/2', '895:1:4:1596:6003/2']
list_of_records = []
for record in screed.open(filename):
if record.name in list_of_names:
list_of_records.append(record)
list_of_records
[{'annotations': '', 'sequence': 'TCGCTCAGTTTCTTCGGCATATTGATCTGAACTTCGACGTAAAGATCGCCACGCTTTCCTGAGACTCCC', 'name': '895:1:4:1596:8538/2', 'accuracy': '_________X```]ZZZX]]KXXXY____X________________________X___X______``P`'}, {'annotations': '', 'sequence': 'TACGGTCTTTTGAACTCATTGATGGCCCTGCGAAACGCCGCCTCAAGCTGTGCTCCGTAA', 'name': '895:1:4:1596:6003/2', 'accuracy': '_____aMUV]VQFJZZVZ]VZZMPZP[Q\\X]]S]S````X__X_WUZZMU________P_'}]
%%file list-of-seqs.txt
895:1:4:1596:8538/2
895:1:4:1596:6003/2
Writing list-of-seqs.txt
x = []
for line in open('list-of-seqs.txt'):
line = line.strip()
x.append(line)
x
['895:1:4:1596:8538/2', '895:1:4:1596:6003/2']
list_of_names = x
list_of_records = []
for record in screed.open(filename):
if record.name in list_of_names:
list_of_records.append(record)
list_of_records
[{'annotations': '', 'sequence': 'TCGCTCAGTTTCTTCGGCATATTGATCTGAACTTCGACGTAAAGATCGCCACGCTTTCCTGAGACTCCC', 'name': '895:1:4:1596:8538/2', 'accuracy': '_________X```]ZZZX]]KXXXY____X________________________X___X______``P`'}, {'annotations': '', 'sequence': 'TACGGTCTTTTGAACTCATTGATGGCCCTGCGAAACGCCGCCTCAAGCTGTGCTCCGTAA', 'name': '895:1:4:1596:6003/2', 'accuracy': '_____aMUV]VQFJZZVZ]VZZMPZP[Q\\X]]S]S````X__X_WUZZMU________P_'}]
n = 0.0
m = 0.0
for record in screed.open(filename):
n += len(record.sequence) - record.sequence.count('N')
m += record.sequence.count('G') + record.sequence.count('C')
print '%g G/C content from %d and %d' % (m / n, m, n) # string interpolation in Python
print '%g G/C content' % (m / n,) # string interpolation in Python
print m/n, 'G/C content'
0.597727 G/C content from 1030420 and 1723896 0.597727 G/C content 0.597727473119 G/C content
outfp = open('/tmp/out.fa', 'w')
for record in screed.open(filename):
outfp.write('>%s\n%s\n' % (record.name, record.sequence))
pwd
u'/Users/t/dev/2012-scripps/python'