Using this table from the short-read-tax-assignment repo, Jenya noticed that summarize_taxa.py
gives different results from BIOM 1.x.x's collapseObservationsByMetadata
. I previously confirmed that, but now also observe the issue with BIOM 2.0.1.
from biom import load_table
t1 = load_table('table.biom')
level = 6
def collapse_f(id_, md):
return ';'.join(md['taxonomy'][:level])
collapsed_t1 = t1.collapse(collapse_f, axis='observation')
len(collapsed_t1.observation_ids)
38
!summarize_taxa.py -i table.biom -o summarize_taxa_out/
t2 = load_table('summarize_taxa_out/table_L6.biom')
len(t2.observation_ids)
50
It turns out that you need to pass min_group_size=1
to get this to work as expected.
level = 6
def collapse_f(id_, md):
return ';'.join(md['taxonomy'][:level])
collapsed_t1 = t1.collapse(collapse_f, axis='observation', min_group_size=1)
len(collapsed_t1.observation_ids)
50
Everything below here was experiments to try to figure this out...
from os.path import exists
if not exists('table.from_biom_w_taxonomy.txt'):
!biom convert -i table.biom -o table.from_biom_w_taxonomy.txt --to-tsv --header-key taxonomy
import pandas as pd
t3 = pd.read_csv('table.from_biom_w_taxonomy.txt', sep='\t', skiprows=2, index_col="otu-id", names=['otu-id', 'count', 'taxonomy'])
# All counts are greater than zero
len(t3[t3["count"] == 0])
0
t3_taxa = []
for e in t3['taxonomy']:
t = [x.strip() for x in e.split(';')]
t3_taxa.append(';'.join(t[:level]))
print len(set(t3_taxa))
50
print set(t3_taxa) - set(t2.observation_ids)
print set(t2.observation_ids) - set(t3_taxa)
set(['No blast hit']) set(['No blast hit;Other;Other;Other;Other;Other'])
print set(collapsed_t1.observation_ids) - set(t2.observation_ids)
print set(t2.observation_ids) - set(collapsed_t1.observation_ids)
set([u'No blast hit']) set(['No blast hit;Other;Other;Other;Other;Other'])
BIOM 2.0.1's collapseObservationsByMetadata
is dropping some low abundance observations, while summarize_taxa.py
keeps them. (Note: this behavior was the same in BIOM 1.x.x - data not shown, but ran this same analysis a couple of weeks ago with previous BIOM version.)
set(collapsed_t1.observation_ids) - set(t2.observation_ids)
{u'No blast hit'}
missing_ids = set(t2.observation_ids) - set(collapsed_t1.observation_ids)
missing_ids
{'No blast hit;Other;Other;Other;Other;Other'}
When taxa show up only one time, they are not collapsed correctly:
from collections import defaultdict
tax_to_ids = defaultdict(list)
for e in t3.index:
tax = t3['taxonomy'][e].replace(' ','')
try:
s_index = tax.index(';s__')
except ValueError:
continue
tax = tax[:s_index]
tax_to_ids[tax].append(e)
singles = []
for tax, ids in tax_to_ids.items():
if len(ids) == 1:
singles.append(tax)
print set(singles) - set(missing_ids)
print len(singles)
print tax_to_ids['k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Anaerostipes']
print len(t3.index)
print len(set(t3.index))
set(['k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Anaerostipes', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__', 'k__Bacteria;p__Bacteroidetes;c__Flavobacteriia;o__Flavobacteriales;f__[Weeksellaceae];g__Cloacibacterium', 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__', 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Alicycliphilus', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Thiotrichales;f__Piscirickettsiaceae;g__', 'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__CandidatusAncillula', 'k__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__;f__;g__', 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Bacillaceae;g__Bacillus', 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Chromatiales;f__;g__']) 11 ['3537197'] 3398 3398
print t2
# Constructed from biom file #OTU ID MockMiSeq.even No blast hit;Other;Other;Other;Other;Other 2.94801388093e-05 k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Candidatus Ancillula 8.42289680267e-06 k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__ 0.000412721943331 k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium 0.0482505643341 k__Bacteria;p__Actinobacteria;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella 0.00266163538964 k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides 0.279273777838 k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Porphyromonadaceae;g__Parabacteroides 0.0227123412284 k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__ 0.00701206158822 k__Bacteria;p__Bacteroidetes;c__Flavobacteriia;o__Flavobacteriales;f__[Weeksellaceae];g__Cloacibacterium 4.21144840133e-05 k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Bacillaceae;g__Bacillus 1.2634345204e-05 k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus 0.00548330581854 k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus 0.0823296047977 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__;g__ 0.0012634345204 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium 0.033443111755 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__ 0.0415627842728 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Anaerostipes 8.8440416428e-05 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia 0.0210740878003 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus 0.0343612075065 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea 0.0707818132812 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira 0.0266963714161 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia 0.00368080590277 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus] 0.0559238233213 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__ 0.045353087834 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Anaerotruncus 0.00510427546242 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium 0.00918938041171 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus 0.00645615039925 k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__[Tissierellaceae];g__Anaerococcus 0.0261783632627 k__Bacteria;p__Firmicutes;c__Erysipelotrichi;o__Erysipelotrichales;f__Erysipelotrichaceae;g__[Eubacterium] 0.00578653010343 k__Bacteria;p__Lentisphaerae;c__[Lentisphaeria];o__Victivallales;f__Victivallaceae;g__ 0.000400087598127 k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__;f__;g__ 4.21144840133e-05 k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__;g__ 0.0003158586301 k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__ 8.42289680267e-06 k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Alicycliphilus 3.36915872107e-05 k__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio 5.89602776187e-05 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__;f__;g__ 8.42289680267e-06 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Aeromonadales;f__Aeromonadaceae;g__ 0.0184250867558 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Chromatiales;f__;g__ 8.42289680267e-06 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__ 0.100699942724 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Edwardsiella 5.0537380816e-05 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Enterobacter 4.21144840133e-05 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Erwinia 0.000181092281257 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Klebsiella 1.68457936053e-05 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Proteus 0.000534853946969 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Providencia 0.000113709106836 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Salmonella 0.000265321249284 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Serratia 0.00012634345204 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__ 8.42289680267e-06 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__Acinetobacter 0.000644351605404 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Thiotrichales;f__Piscirickettsiaceae;g__ 8.42289680267e-06 k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae;g__Akkermansia 0.04283464169
r = []
for m in missing_ids:
obs_idx = t2.index(m,axis='observation')
r.append((t2.get_value_by_ids(m,'MockMiSeq.even'), t2.observation_ids[obs_idx]))
r.sort()
for e in r:
print e
(2.9480138809339307e-05, 'No blast hit;Other;Other;Other;Other;Other')
r = []
for e in list(t2.iter(axis='observation')):
r.append((e[0][0], e[1]))
r.sort()
for e in r:
print e
(8.4228968026683745e-06, 'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Actinomycetales;f__Actinomycetaceae;g__Candidatus Ancillula') (8.4228968026683745e-06, 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__') (8.4228968026683745e-06, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__;f__;g__') (8.4228968026683745e-06, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Chromatiales;f__;g__') (8.4228968026683745e-06, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__') (8.4228968026683745e-06, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Thiotrichales;f__Piscirickettsiaceae;g__') (1.2634345204002561e-05, 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Bacillaceae;g__Bacillus') (1.6845793605336749e-05, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Klebsiella') (2.9480138809339307e-05, 'No blast hit;Other;Other;Other;Other;Other') (3.3691587210673498e-05, 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__Comamonadaceae;g__Alicycliphilus') (4.2114484013341868e-05, 'k__Bacteria;p__Bacteroidetes;c__Flavobacteriia;o__Flavobacteriales;f__[Weeksellaceae];g__Cloacibacterium') (4.2114484013341868e-05, 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__;f__;g__') (4.2114484013341868e-05, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Enterobacter') (5.0537380816010244e-05, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Edwardsiella') (5.8960277618678613e-05, 'k__Bacteria;p__Proteobacteria;c__Deltaproteobacteria;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio') (8.8440416428017923e-05, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Anaerostipes') (0.00011370910683602305, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Providencia') (0.00012634345204002562, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Serratia') (0.00018109228125737006, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Erwinia') (0.0002653212492840538, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Salmonella') (0.00031585863010006405, 'k__Bacteria;p__Proteobacteria;c__Betaproteobacteria;o__Burkholderiales;f__;g__') (0.00040008759812674776, 'k__Bacteria;p__Lentisphaerae;c__[Lentisphaeria];o__Victivallales;f__Victivallaceae;g__') (0.00041272194333075033, 'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__') (0.00053485394696944173, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Proteus') (0.00064435160540413062, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Pseudomonadales;f__Moraxellaceae;g__Acinetobacter') (0.001263434520400256, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__;g__') (0.0026616353896432039, 'k__Bacteria;p__Actinobacteria;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella') (0.003680805902766079, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia') (0.0051042754624170388, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Anaerotruncus') (0.0054833058185371156, 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus') (0.0057865301034331759, 'k__Bacteria;p__Firmicutes;c__Erysipelotrichi;o__Erysipelotrichales;f__Erysipelotrichaceae;g__[Eubacterium]') (0.0064561503992453121, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus') (0.0070120615882214247, 'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__') (0.0091893804117111726, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium') (0.018425086755837046, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Aeromonadales;f__Aeromonadaceae;g__') (0.021074087800276236, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia') (0.022712341228395216, 'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Porphyromonadaceae;g__Parabacteroides') (0.026178363262693221, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__[Tissierellaceae];g__Anaerococcus') (0.026696371416057376, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira') (0.033443111754994759, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium') (0.034361207506485601, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus') (0.041562784272767002, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__') (0.042834641689969914, 'k__Bacteria;p__Verrucomicrobia;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Verrucomicrobiaceae;g__Akkermansia') (0.045353087833967777, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__') (0.048250564334085679, 'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium') (0.055923823321316582, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]') (0.070781813281223557, 'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea') (0.082329604797681713, 'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus') (0.10069994272430152, 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__') (0.27927377783768348, 'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides')
!print_qiime_config.py
System information ================== Platform: darwin Python version: 2.7.1 (r271:86832, Aug 30 2012, 10:07:33) [GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2336.11.00)] Python executable: /Users/caporaso/.virtualenvs/qiime/bin/python Dependency versions =================== NumPy version: 1.8.1 SciPy version: 0.14.0 matplotlib version: 1.3.1 biom-format version: 2.0.1 qcli version: 0.1.0 pyqi version: 0.3.2 scikit-bio version: 0.1.3 QIIME library version: 1.8.0-dev, master@0668ba7 QIIME script version: 1.8.0-dev PyNAST version (if installed): 1.2.2 Emperor version: 0.9.3 RDP Classifier version (if installed): rdp_classifier-2.2.jar Java version (if installed): 1.6.0_65 QIIME config values =================== blastmat_dir: /Applications/blast-2.2.22/data/ sc_queue: all.q template_alignment_lanemask_fp: /Users/caporaso/data/greengenes_core_sets/lanemask_in_1s_and_0s.txt pynast_template_alignment_fp: /Users/caporaso/data/greengenes_core_sets/core_set_aligned_imputed.fasta_11_8_07.no_dots seconds_to_sleep: 1 pynast_template_alignment_blastdb: None assign_taxonomy_reference_seqs_fp: /Users/caporaso/data/gg_13_8_otus/rep_set/97_otus.fasta torque_queue: friendlyq topiaryexplorer_project_dir: /Users/caporaso/code/TopiaryExplorer-0.9.1/ jobs_to_start: 4 denoiser_min_per_core: 50 cluster_jobs_fp: start_parallel_jobs.py assign_taxonomy_id_to_taxonomy_fp: /Users/caporaso/data/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt temp_dir: /Users/caporaso/temp blastall_fp: blastall
f = open('test.biom','w')
f.write("""{
"columns": [
{
"id": "Sample1",
"metadata": {
"BarcodeSequence": "AGCACGAGCCTA",
"DOB": 20060805
}
},
{
"id": "Sample2",
"metadata": {
"BarcodeSequence": "AACTCGTCGATG",
"DOB": 20060216
}
},
{
"id": "Sample3",
"metadata": {
"BarcodeSequence": "ACAGACCACTCA",
"DOB": 20060109
}
},
{
"id": "Sample4",
"metadata": {
"BarcodeSequence": "ACCAGCGACTAG",
"DOB": 20070530
}
},
{
"id": "Sample5",
"metadata": {
"BarcodeSequence": "AGCAGCACTTGT",
"DOB": 20070101
}
},
{
"id": "Sample6",
"metadata": {
"BarcodeSequence": "AGCAGCACAACT",
"DOB": 20070716
}
}
],
"data": [
[0, 2, 1.0],
[1, 0, 5.0],
[1, 1, 1.0],
[1, 3, 2.0],
[1, 4, 3.0],
[1, 5, 1.0],
[2, 2, 1.0],
[2, 3, 4.0],
[2, 5, 2.0],
[3, 0, 2.0],
[3, 1, 1.0],
[3, 2, 1.0],
[3, 5, 1.0],
[4, 1, 1.0],
[4, 2, 1.0]
],
"date": "2012-12-11T07:30:29.870689",
"format": "Biological Observation Matrix 1.0.0",
"format_url": "http://biom-format.org",
"generated_by": "some software package",
"id": null,
"matrix_element_type": "float",
"matrix_type": "sparse",
"rows": [
{
"id": "GG_OTU_1",
"metadata": {
"confidence": 0.665,
"taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"]
}
},
{
"id": "GG_OTU_2",
"metadata": {
"confidence": 0.98,
"taxonomy": ["Root", "k__Bacteria"]
}
},
{
"id": "GG_OTU_3",
"metadata": {
"confidence": 1.0,
"taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"]
}
},
{
"id": "GG_OTU_4",
"metadata": {
"confidence": 0.842,
"taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"]
}
},
{
"id": "GG_OTU_5",
"metadata": {
"confidence": 1.0,
"taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__OnlyOnce"]
}
}
],
"shape": [5, 6],
"type": "OTU table"
}""")
f.close()
t4 = load_table('test.biom')
for e in t4.observation_metadata:
print e['taxonomy']
[u'Root', u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Clostridiales', u'f__Lachnospiraceae'] [u'Root', u'k__Bacteria'] [u'Root', u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Clostridiales', u'f__Lachnospiraceae'] [u'Root', u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Clostridiales', u'f__Lachnospiraceae'] [u'Root', u'k__Bacteria', u'p__Firmicutes', u'c__Clostridia', u'o__Clostridiales', u'f__OnlyOnce']
def collapse_on_family(id_, md):
return ';'.join(md['taxonomy'][:5])
collapsed_t4 = t4.collapse(collapse_f, axis='observation')
print collapsed_t4
# Constructed from biom file #OTU ID Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Root;k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae 0.666666666667 0.333333333333 1.0 1.33333333333 0.0 1.0