from trnlib.omeORM import * from pandas import * from numpy import array from sqlalchemy import func from IPython.core.display import HTML ome = Session() category_1_fnr_paper = [['pyrD','b0945'],\ ['ynfEFGH-dmsD','b1587'],\ ['pykA','b1854'],\ ['nikABCDE','b3746'],\ ['focA-pflB','b0904'],\ ['yfiD','b2579'],\ ['dmsABC','b0894'],\ ['narGHJI','b1224'],\ ['ydfZ','b1541'],\ ['ynjE','b1757'],\ ['nirBDC-cysG','b3365'],\ ['nrfABCDEFG','b4070'],\ ['caiF','b0034'],\ ['narK','b1223'],\ ['bssR','b0836'],\ ['ydhYVWXUT','b1674'],\ ['fnrS','b4699'],\ ['frdABCD','b4154'],\ ['pitA','b3493'],\ ['ydjXYZ-ynjABCD','b1750'],\ ['hcp-hcr','b0873'],\ ['ackA-pta','b2296'],\ ['dcuB-fumB','b4123'],\ ['ynfK','b1593'],\ ['yfcC','b2298'],\ ['yhbUV','b3158'],\ ['yhcC','b3211'],\ ['yhiD','b3508'],\ ['yjiML','b4335'],\ ['yjjI','b4380'],\ ['dcuC','b0621'],\ ['pepE','b4021']] category_2_fnr_paper = [['fnr','b1334'],\ ['can','b0126'],\ ['hisLGDC','b2018'],\ ['ndh','b1109'],\ ['fbaB','b2097'],\ ['yccA','b0970'],\ ['ygjG','b3073'],\ ['rmf','b0953'],\ ['lysC','b4024'],\ ['yecR','b1904'],\ ['iraP','b0382'],\ ['msrB','b1778'],\ ['nrdAB','b2234'],\ ['gadE','b3512'],\ ['hdeD','b3511'],\ ['pdhR-aceEF-lpdA','b0113'],\ ['cydAB','b0733'],\ ['hdeAB-yhiD','b3510'],\ ['ompW','b1256'],\ ['ompC','b2215'],\ ['hycABCDEFGHI','b2725']] category_3_fnr_paper = [['moaA','b0781'],\ ['pepT','b1127'],\ ['fdnG','b1474'],\ ['napF','b2208'],\ ['upp','b2494'],\ ['feoA','b3408'],\ ['katG','b3942'],\ ['cadC','b4133'],\ ['nrdD','b4238']] category_4_fnr_paper = [['ompA','b0957'],\ ['ptsG','b1101'],\ ['uxaB','b1521'],\ ['ptsH','b2415'],\ ['guaB','b2508'],\ ['fadH','b3081'],\ ['uxaC','b3092'],\ ['dusB','b3260'],\ ['ppiA','b3363'],\ ['gntT','b3415'],\ ['aspA','b4139']] category_1_2_discrepancies = { 'fnrS': 'significant peak but sRNA not reliably measured on microarray',\ 'ydjXYZ-ynjABCD': 'significant peak, diff exp NO3, not measured anaerobically',\ 'hcp-hcr': 'significant peak, diff exp NO3, not measured anaerobically',\ 'dcuB-fumB': 'weak, insignificant peak, diff expressed',\ 'yfcC': 'significant peak, diff exp NO3, not measured anaerobically',\ 'yjiML': 'weak, insignificant peak, diff expressed NO3',\ 'yjjI': 'weak, insignificant peak, diff expressed',\ 'dcuC': 'weak, insignificant peak, diff expressed',\ 'pepE': 'no signal, diff expressed',\ 'can': 'no signal, diff expressed',\ 'hisLGDC': 'no signal, diff expressed',\ 'fbaB': 'weak, insignificant peak, diff expressed',\ 'yccA': 'weak, insignificant peak, diff expressed',\ 'ygjG': 'no signal, diff expressed',\ 'lysC': 'weak, insignificant peak, diff expressed',\ 'yecR': 'weak, insignificant peak',\ 'ompC': 'significant peak, diff exp NO3',\ 'hycABCDEFGHI': 'one significant replicate, diff expressed' } category_1_2_mapping_errors = { 'yhcC': 'significant peak, arcB is second gene in operon and is diff expressed',\ 'yhiD': 'peak calling error, is now a peak and diff expressed',\ 'ndh': 'processing error, definite peak and diff expressed',\ 'rmf': 'peak calling error, is now a peak and diff expressed',\ 'gadE': 'peak calling error, is now a peak and diff expressed',\ 'hdeD': 'peak calling error, is now a peak and diff expressed',\ 'hdeAB-yhiD': 'peak calling error, is now a peak and diff expressed, also a repeat' } len(category_1_fnr_paper+category_2_fnr_paper) 18/53. len(category_1_2_discrepancies) aa = AllAnalysis ome.query(aa).filter(and_(or_(aa.target1 == 'delta-fnr', aa.target2 == 'delta-fnr'),\ or_(aa.target1 == 'wt', aa.target2 == 'wt'), aa.fdr < 1, aa.gene_name.ilike('hycC%'),\ or_(and_(aa.eacceptor1 == 'anaerobic', aa.eacceptor2 == 'anaerobic'),\ and_(aa.eacceptor1 == 'NO3', aa.eacceptor2 == 'NO3'),\ and_(aa.eacceptor1 == 'O2', aa.eacceptor2 == 'O2')))).all() ome.query(cpge).filter(and_(cpge.target=='Fnr', cpge.gene_name.ilike('gadE%'))).all() cpge = ChipPeakGeneExpression for gene,bnum in category_1_fnr_paper+category_2_fnr_paper: if gene in category_1_2_discrepancies.keys() or gene in category_1_2_mapping_errors.keys(): continue if not ome.query(cpge).filter(and_(cpge.target=='Fnr', cpge.eacceptor=='anaerobic', cpge.bnum==bnum)).all(): print gene cat_6 = read_table('Fnr_category_6.txt', index_col='bnum') HTML(cat_6.to_html())