# import fadapa
from fadapa import Fadapa
#load file by specifying the path to file
f = Fadapa('demo/fastqc_data.txt')
# to print the file name
print(f.file_name)
demo/fastqc_data.txt
# to print all the contents of file
print(f.content())
##FastQC 0.10.1 >>Basic Statistics pass #Measure Value Filename sample1.fastq File type Conventional base calls Encoding Sanger / Illumina 1.9 Total Sequences 1571332 Filtered Sequences 0 Sequence length 29 %GC 53 >>END_MODULE >>Per base sequence quality pass #Base Mean Median Lower Quartile Upper Quartile 10th Percentile 90th Percentile 1 39.851088121415465 40.0 40.0 40.0 40.0 40.0 2 39.823216863145404 40.0 40.0 40.0 40.0 40.0 3 39.806648754050705 40.0 40.0 40.0 40.0 40.0 4 39.7966120463403 40.0 40.0 40.0 40.0 40.0 5 39.48240982809489 40.0 40.0 40.0 40.0 40.0 ........................................................................................................................
# print a summary of all modules
for data in f.summary():
print(data)
['Module Name', 'Status'] ['Basic Statistics', 'pass'] ['Per base sequence quality', 'pass'] ['Per sequence quality scores', 'pass'] ['Per base sequence content', 'pass'] ['Per base GC content', 'pass'] ['Per sequence GC content', 'pass'] ['Per base N content', 'pass'] ['Sequence Length Distribution', 'pass'] ['Sequence Duplication Levels', 'pass'] ['Overrepresented sequences', 'warn'] ['Kmer Content', 'fail']
# print raw data for a given module
for data in f.raw_data('Basic Statistics'):
print(data)
>>Basic Statistics pass #Measure Value Filename sample1.fastq File type Conventional base calls Encoding Sanger / Illumina 1.9 Total Sequences 1571332 Filtered Sequences 0 Sequence length 29 %GC 53 >>END_MODULE
# print parsed data for a given module
for data in f.clean_data('Basic Statistics'):
print(data)
['Measure', 'Value'] ['Filename', 'sample1.fastq'] ['File type', 'Conventional base calls'] ['Encoding', 'Sanger / Illumina 1.9'] ['Total Sequences', '1571332'] ['Filtered Sequences', '0'] ['Sequence length', '29'] ['%GC', '53']
for data in f.raw_data('Kmer Content'):
print(data)
>>Kmer Content fail #Sequence Count Obs/Exp Overall Obs/Exp Max Max Obs/Exp Position AAAAA 247430 10.240459 11.52296 3 TTTTT 91310 3.2113638 5.350794 25 >>END_MODULE
for data in f.clean_data('Kmer Content'):
print(data)
['Sequence', 'Count', 'Obs/Exp Overall', 'Obs/Exp Max', 'Max Obs/Exp Position'] ['AAAAA', '247430', '10.240459', '11.52296', '3'] ['TTTTT', '91310', '3.2113638', '5.350794', '25']