from trnlib.omeORM import * from pandas import * from numpy import array from sqlalchemy import func from IPython.core.display import HTML ome = Session() direct_reg = read_table('arcA_ChIP_expression.txt', index_col=0) HTML(direct_reg.to_html()) cpge = ChipPeakTUGeneExpression for operon in direct_reg.index: if operon in discrepancies.keys() or operon in mapping_errors.keys(): continue if not ome.query(cpge).filter_by(target='ArcA').filter(and_(cpge.gene_name.ilike(operon[0:3]+'%'), cpge.fdr < .05)).all(): print operon aa = AllAnalysis ome.query(aa).filter(and_(or_(aa.target1 == 'delta-arcA', aa.target2 == 'delta-arcA'),\ or_(aa.target1 == 'wt', aa.target2 == 'wt'), aa.fdr < 1, aa.gene_name.ilike('fnrS%'),\ or_(and_(aa.eacceptor1 == 'anaerobic', aa.eacceptor2 == 'anaerobic'),\ and_(aa.eacceptor1 == 'NO3', aa.eacceptor2 == 'NO3'),\ and_(aa.eacceptor1 == 'O2', aa.eacceptor2 == 'O2')))).all() cpge = ChipPeakGeneExpression ome.query(cpge).filter(and_(cpge.fdr < .05, cpge.gene_name.ilike('app%'))).all() discrepancies = { 'gcd': 'very weak, insignificant peak, diff expressed',\ 'tesB': '3 significant out of 12 same as ybaY, diff expressed in NO3',\ 'ybaY': '3 significant out of 12 same as tesB, diff expressed in NO3',\ 'ylaC': 'no signal, diff expressed',\ 'csgD': 'two significant anaerobic peaks, diff expressed',\ 'ycgZ': 'two significant anaerobic peaks, diff expressed',\ 'ydcI': 'very weak, insignicant signal, diff expressed',\ 'maeB': 'very weak, insignicant signal, diff expressed',\ 'hcaR': 'peak and diff exp for NO3 but no measurement anaerobically',\ 'trxC': 'peak and diff exp for NO3 but no measurement anaerobically',\ 'ycgZ-ymgA-ariR-ymgC': 'two significant anaerobic peaks, diff expressed',\ 'kefGB-yheV': 'two significant anaerobic peaks, diff expressed',\ 'dctA': 'two significant anaerobic peaks, diff expressed',\ 'nepI': 'peak and diff exp for NO3 but no measurement anaerobically',\ 'yifK': 'very weak, insignificant signal',\ 'ybcW': 'two significant anaerobic peaks, prophage related, diff expressed',\ 'appY': 'peak and diff exp for NO3 but no measurement anaerobically',\ 'hyaABCDEF': 'very weak, insignicant signal, diff expressed',\ 'slp-dctR': 'two significant anaerobic peaks, diff expressed just above cutoff',\ 'adiC': 'peak and diff exp for NO3 but no measurement anaerobically',\ 'fnrS': 'no signal, sRNA' } direct_reg 21/85 discrepancies 5/21 have peak and NO3 diff expression but not expressed anaerobically 9/21 have peak in one replicate and diff exp 5/21 have very weak, insignificant, peak and diff exp 1/21 has no signal and diff expression 1/21 has no signal and is an sRNA which cannot be reliably measured with microarrays mapping_errors = { 'pdhR-aceEF-lpdA': 'aceEF-lpdA is repressed but not pdhR',\ 'cstA': 'peak calling error, now FP',\ 'feaR': 'peak calling error, now FP',\ 'yehDCBA': 'peak calling error, now FP',\ 'rsd': 'peak is called for thiABCDEF',\ 'appY':' peak calling error, now FP' }