import screed
filename = '../data/25k.fq.gz'
n = 0
for record in screed.open(filename):
n += 1
if n > 10:
break
print record.name
print record.sequence
print record.accuracy
895:1:1:1246:14654/1 CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT ][aaX__aa[`ZUZ[NONNFNNNNNO_____^RQ_ 895:1:1:1248:9583/1 ACTGGGCGTAGACGGTGTCCTCATCGGCACCAGC \UJUWSSV[JQQWNP]]SZ]ZWU^]ZX][^TXR` 895:1:1:1252:19493/1 CCGGCGTGGTTGGTGAGGTCACTGAGCTTCATGTC OOOKONNNNN__`R]O[TGTRSY[IUZ]]]__X__ 895:1:1:1255:18861/1 ACGACGAGAAGCTGATCTACCGCGCCGAGCGCATC bb_^^bb_XTbbbbbb_bab]KTITZQTZ]ZYT^^ 895:1:1:1264:15854/1 CGTGATGATGTGCTTGCGGCCGGAGGGCCTGTTGCCCAGG ````W__ZZ`R__ZSOJNNNQWSQZ\^X\W_______J__ 895:1:1:1265:2265/1 TATAGCGTGAGGCGATGACGTTGCTGTCCTTGGCGCGGC `bbbbbbbbbbbbbUbbbbb]X_QXUQ[QWOPTTZ__X_ 895:1:1:1273:17782/1 TCGAAAATCACGTGGGAGATGCACTATCACGCGGTCGGTGAGGAAGTGACCGACCACACCGAGCTCGC OOOOONNNOO^^X^`]TVa]WWLSSKNNONI]S[Q]SWXZX_\\^ZF___Y\V]]_____]KTRV^X^ 895:1:1:1274:18571/1 AGCAGGCGAACAGCACGCCGAACAATACTGTCTTCATGCCAAACTGCTGAAAGCCGAGCACAGCAGAAATGCTCCAGAG VMJPVUIPHRUaZRZ\___XUZ[X[]]`]X]]]]]_______________XHXX]]S]]Z``X]`]VX]]_____ZZMa 895:1:1:1276:16426/1 GCAGGTATTGGTTTGCCTAACGTTGAAATTGCAGGATTAACG [[Z[[Z`\`\^[^`^U]ZZROFONOOQYPY]ZU]URQYQV`` 895:1:1:1283:17864/1 ATTCGTCAACCCGCGGCTCGAGCTGCGCATCC `Q\`[Z______`_\Q`\[MTYWTW_T_TTX`
import calc_gc
gc_list = []
for record in screed.open(filename):
seq = record.sequence
gc = calc_gc.calc_gc(seq)
gc_list.append(gc)
hist(gc_list, bins=100, range=(0, 1))
(array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 1, 1, 1, 3, 0, 5, 1, 3, 2, 3, 4, 11, 7, 11, 9, 10, 17, 38, 32, 40, 57, 65, 38, 98, 111, 136, 168, 212, 252, 329, 361, 462, 336, 766, 651, 781, 787, 808, 909, 960, 972, 1049, 687, 1246, 969, 1124, 886, 1037, 1040, 918, 892, 885, 853, 533, 581, 583, 437, 324, 416, 251, 233, 167, 94, 113, 71, 50, 28, 27, 16, 6, 8, 5, 1, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0]), array([ 0. , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1. ]), <a list of 100 Patch objects>)
import calc_gc
gc_list = []
for record in screed.open(filename):
seq = record.sequence[:10]
gc = calc_gc.calc_gc(seq)
gc_list.append(gc)
hist(gc_list, bins=10, range=(0, 1))
(array([ 12, 82, 1581, 0, 2846, 10712, 5313, 0, 3114, 1340]), array([ 0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]), <a list of 10 Patch objects>)