from trnlib.omeORM import *
from pandas import *
from numpy import array
from sqlalchemy import func
from IPython.core.display import HTML
ome = Session()
most_peaks = read_table('arcA_ChIP_seq_only.txt', index_col='gene')
HTML(most_peaks.sort().to_html())
Replicate 1 position | Replicate 1 enrichment | Replicate 2 position | Replicate 2 enrichment | Location of ArcA box | Strand | Ri (bits) | |
---|---|---|---|---|---|---|---|
gene | |||||||
CO343 | 1407214 | 4.6 | 1407252 | 4.0 | NaN | NaN | NaN |
acnB | 131400 | 3.8 | 131463 | 3.7 | 131465 | + | 17.0 |
acs | 4285384 | 11.9 | 4285404 | 11.8 | 4285418 | + | 18.3 |
aldA | 1486187 | 7.8 | 1486202 | 5.6 | 1486221 | - | 10.7 |
appY | 582513 | 3.8 | 582538 | 3.9 | 582518 | + | 15.4 |
argT | 2425894 | 6.6 | 2425921 | 6.3 | 2425934 | + | 13.4 |
astC | 1830008 | 15.4 | 1830017 | 13.6 | 1830032 | + | 19.1 |
atpI | 3920795 | 3.2 | 3920826 | 3.3 | 3920874 | - | 13.0 |
avtA | 3737499 | 2.4 | 3737631 | 3.2 | 3737679 | + | 13.6 |
betT/betI | 328597 | 4.5 | 328632 | 6.3 | 328630 | + | 14.0 |
bssR/rimO | 877347 | 2.5 | 877383 | 2.8 | 877374 | + | 10.2 |
cadB | 4358301 | 5.3 | 4358333 | 4.6 | 4358302 | + | 15.8 |
caiT | 41949 | 8.6 | 41981 | 6.9 | 41980 | + | 17.3 |
cirA | 2244973 | 3.0 | 2244987 | 3.0 | NaN | NaN | NaN |
csgB | 1103086 | 2.6 | 1103088 | 2.6 | 1103096 | - | 13.2 |
csgD | 1102574 | 2.7 | 1102613 | 3.4 | 1102593 | + | 10.0 |
csiD | 2786818 | 6.1 | 2786839 | 7.6 | 2786863 | + | 19.4 |
cspA1 | 3717947 | 4.8 | 3717921 | 5.2 | 3717865 | + | 13.1 |
csrD_intragenic | 3399820 | 6.8 | 3399827 | 5.7 | 3399851 | - | 15.0 |
cycA | 4427654 | 7.8 | 4427673 | 6.4 | 4427748 | - | 18.9 |
cydA1 | 770043 | 8.4 | 770066 | 8.9 | 770094 | + | 19.8 |
cydA2 | 770253 | 5.8 | 770271 | 6.2 | 770277 | - | 12.7 |
cydA3 | 770462 | 3.1 | 770494 | 3.7 | 770543 | - | 15.3 |
cydD | 930264 | 27.6 | 930288 | 27.6 | 930292 | + | 20.0 |
cyoA1 | 450852 | 9.1 | 450857 | 6.8 | 450898 | + | 13.4 |
cyoA2 | 450990 | 11.5 | 451010 | 14.4 | NaN | NaN | NaN |
cyoA3 | 451069 | 19.7 | 451088 | 16.5 | 451094 | - | 18.9 |
dctA | 3681512 | 4.5 | 3681557 | 4.0 | 3681576 | + | 13.2 |
dcuA | 4364813 | 18.7 | 4364810 | 21.1 | 4364837 | + | 17.5 |
dcuB | 4346931 | 4.7 | 4346943 | 5.8 | 4347011 | + | 13.8 |
ddpX | 1561199 | 3.7 | 1561200 | 3.9 | 1561217 | - | 16.7 |
dhaR/dhaK | 1250169 | 9.0 | 1250183 | 14.4 | NaN | NaN | NaN |
exuT/uxaC | 3242801 | 3.4 | 3242819 | 3.4 | NaN | NaN | NaN |
fadB | 4029013 | 5.3 | 4029043 | 5.0 | 4029059 | + | 12.4 |
fadD | 1887807 | 14.3 | 1887816 | 13.6 | 1887844 | - | 17.4 |
fadE | 243344 | 2.8 | 243375 | 4.4 | 243400 | - | 18.0 |
fadH | 3229633 | 3.4 | 3229636 | 2.4 | 3229670 | - | 19.5 |
fadI | 2458502 | 9.1 | 2458529 | 11.2 | 2458556 | - | 18.9 |
fadL | 2459101 | 3.6 | 2459138 | 4.1 | 2459195 | - | 11.5 |
feaB/feaR | 1445316 | 3.9 | 1445299 | 2.1 | 1445343 | + | 13.3 |
feoA | 3537845 | 4.2 | 3537882 | 3.3 | 3537905 | - | 17.6 |
fiu | 840921 | 14.8 | 840928 | 9.9 | NaN | NaN | NaN |
fnrS | 1407082 | 4.6 | 1407110 | 5.0 | 1407023 | + | 9.1 |
focA1 | 953477 | 2.2 | 953615 | 2.9 | NaN | NaN | NaN |
focA2 | 953962 | 4.1 | 953967 | 3.8 | 953988 | - | 16.7 |
fumA | 1686433 | 4.4 | 1686468 | 5.7 | 1686496 | - | 16.6 |
fumC | 1684662 | 8.0 | 1684681 | 8.7 | 1684699 | - | 16.2 |
gadE | 3656148 | 2.5 | 3656141 | 3.3 | 3656188 | - | 12.7 |
gadX | 3664058 | 7.6 | 3664072 | 7.3 | 3664074 | - | 15.6 |
gapA | 1860780 | 2.8 | 1860752 | 2.3 | 1860771 | - | 10.5 |
gcd | 141230 | 7.6 | 141251 | 8.0 | 141262 | - | 12.3 |
gcvB/gcvA | 2940582 | 13.6 | 2940613 | 12.9 | 2940632 | - | 13.2 |
glcC/glcD | 3126158 | 12.9 | 3126179 | 9.1 | 3126191 | + | 17.7 |
glpD/glpE | 3559932 | 3.7 | 3559956 | 5.8 | 3559947 | + | 15.9 |
gltA | 753985 | 7.5 | 754015 | 8.8 | 754030 | + | 15.8 |
hcaE/hcaR | 2666920 | 3.2 | 2666984 | 3.2 | 2666984 | - | 15.9 |
hyaA | 1031271 | 3.6 | 1031264 | 2.1 | 1031206 | - | 13.9 |
icd | 1194217 | 5.7 | 1194224 | 6.8 | 1194249 | - | 18.8 |
intF | 296397 | 5.0 | 296437 | 6.3 | 296426 | - | 15.4 |
intF_intergenic | 295252 | 2.8 | 295257 | 2.1 | 295293 | - | 18.8 |
intergenic | 1620836 | 2.8 | 1620824 | 2.0 | 1620802 | + | 11.7 |
intergenic | 1640360 | 3.9 | 1640373 | 4.4 | 1640421 | - | 12.1 |
intergenic | 2563347 | 3.0 | 2563326 | 3.9 | NaN | NaN | NaN |
kgtP1 | 2723801 | 7.8 | 2723815 | 7.5 | 2723830 | + | 12.2 |
kgtP2 | 2724010 | 5.7 | 2723996 | 3.5 | NaN | NaN | NaN |
lldP | 3775233 | 10.9 | 3775232 | 13.2 | 3775302 | - | 17.8 |
lon | 457929 | 3.5 | 457923 | 4.8 | 457950 | - | 10.3 |
lpd | 127646 | 3.4 | 127683 | 3.8 | 127682 | + | 21.0 |
mdh | 3382502 | 3.7 | 3382529 | 3.8 | 3382553 | - | 17.1 |
mdtL | 3889503 | 2.5 | 3889536 | 2.9 | NaN | NaN | NaN |
mglB | 2238549 | 20.6 | 2238577 | 18.3 | 2238613 | + | 15.0 |
mhpA/mhpR | 367720 | 18.9 | 367746 | 19.3 | 367772 | - | 20.0 |
mqo | 2304831 | 5.3 | 2304821 | 5.8 | 2304850 | - | 20.5 |
msrB | 1860459 | 6.0 | 1860472 | 6.4 | 1860513 | - | 14.6 |
ndh | 1165227 | 11.9 | 1165239 | 11.0 | 1165277 | - | 18.7 |
ndk | 2642953 | 11.8 | 2642972 | 11.3 | 2642995 | - | 18.7 |
nepI | 3839764 | 2.8 | 3839819 | 2.2 | 3839842 | - | 15.3 |
nuoA1 | 2403217 | 6.1 | 2403245 | 8.0 | 2403298 | - | 15.8 |
nuoA2 | 2403439 | 4.3 | 2403443 | 3.0 | 2403520 | - | 10.5 |
nupC | 2511118 | 2.6 | 2511163 | 1.9 | 2511176 | + | 17.2 |
ompC | 2310725 | 2.4 | 2310766 | 1.9 | NaN | NaN | NaN |
ompW | 1311945 | 3.9 | 1311978 | 3.1 | 1312024 | - | 14.1 |
oppA1 | 1298696 | 6.1 | 1298685 | 5.0 | 1298736 | + | 15.1 |
oppA2 | 1298985 | 3.2 | 1298848 | 2.8 | 1298896 | - | 16.0 |
paaA | 1451838 | 4.7 | 1451847 | 5.1 | 1451882 | - | 14.7 |
pdhR | 121950 | 9.6 | 121923 | 5.4 | 121871 | + | 15.1 |
phnC | 4323209 | 5.2 | 4323222 | 4.2 | 4323245 | + | 15.8 |
phoH | 1084064 | 7.5 | 1084083 | 5.8 | 1084108 | + | 16.6 |
potF1 | 892675 | 11.1 | 892688 | 12.3 | 892735 | - | 11.2 |
proP | 4328283 | 3.4 | 4328306 | 2.6 | 4328272 | - | 10.6 |
ptsH | 2531443 | 3.3 | 2531456 | 2.8 | 2531413 | + | 18.0 |
putA | 1078132 | 7.1 | 1078166 | 6.1 | 1078166 | + | 10.2 |
putP | 1078326 | 4.5 | 1078367 | 3.5 | 1078379 | + | 15.3 |
puuD/puuA | 1359011 | 9.8 | 1359020 | 8.5 | 1359019 | + | 14.0 |
rcsD | 2311224 | 7.5 | 2311254 | 10.0 | 2311279 | - | 21.4 |
rsd | 4194840 | 3.3 | 4194871 | 3.1 | 4194878 | + | 17.1 |
rstA/ydgC | 1680030 | 3.9 | 1680050 | 4.3 | 1680078 | - | 17.8 |
rutR/rutA | 1073279 | 36.6 | 1073290 | 36.0 | 1073297 | + | 19.1 |
ryeA/pphA | 1920997 | 2.4 | 1921059 | 2.3 | 1921032 | - | 16.2 |
sdhC | 754094 | 16.0 | 754117 | 13.1 | 754146 | + | 14.6 |
secE | 4175225 | 3.2 | 4175239 | 3.7 | NaN | NaN | NaN |
spf1 | 4047953 | 6.8 | 4047886 | 3.4 | NaN | NaN | NaN |
sstT | 3237784 | 3.9 | 3237801 | 3.1 | 3237787 | + | 13.5 |
sthA | 4158803 | 6.2 | 4158830 | 6.1 | 4158858 | - | 12.6 |
thiC2 | 4194328 | 13.4 | 4194389 | 5.3 | 4194329 | + | 15.1 |
trpD | 1317927 | 3.4 | 1317949 | 2.0 | 1317981 | + | 16.8 |
trxC/yfiF | 2716648 | 3.6 | 2716665 | 3.5 | 2716706 | - | 18.9 |
ubiC | 4250399 | 7.6 | 4250416 | 8.1 | 4250427 | + | 12.8 |
ugpB | 3590405 | 7.8 | 3590427 | 6.4 | 3590448 | - | 16.4 |
ung/yfiD | 2714537 | 3.5 | 2714570 | 3.2 | 2714527 | + | 10.5 |
uspC | 1977521 | 15.5 | 1977537 | 18.4 | 1977548 | + | 17.3 |
uxaB1 | 1608750 | 6.0 | 1608715 | 4.9 | NaN | NaN | NaN |
yaiZ/yaiY | 398660 | 5.1 | 398696 | 6.2 | 398716 | - | 11.2 |
ybaY/tesB | 474480 | 3.0 | 474519 | 2.7 | 474531 | + | 18.3 |
ybcV_intragenic | 578611 | 4.0 | 578634 | 3.0 | 578708 | + | 12.5 |
ybdJ | 605400 | 3.2 | 605428 | 3.0 | 605430 | + | 17.3 |
ybdN | 1451687 | 3.7 | 1451713 | 4.4 | 1451753 | - | 15.7 |
ybfA/kdpF | 728168 | 4.4 | 728182 | 4.2 | 728198 | - | 15.0 |
ybiU | 858305 | 16.3 | 858323 | 15.4 | 858343 | + | 17.1 |
yceJ | 1118283 | 3.3 | 1118305 | 2.0 | 1118341 | - | 15.8 |
ycgV | 1255346 | 2.4 | 1255320 | 2.1 | 1255353 | - | 19.2 |
ycjG/tpx | 1386819 | 2.6 | 1386801 | 2.7 | 1386864 | - | 17.0 |
ydcJ/ydcI | 1493129 | 2.7 | 1493151 | 3.6 | 1493175 | - | 15.6 |
yecR/isrB_or_azuC | 1986029 | 5.4 | 1986030 | 3.9 | 1986099 | + | 17.0 |
yegE/udk | 2141127 | 2.9 | 2141185 | 2.7 | NaN | NaN | NaN |
yehB_intragenic | 2188596 | 3.6 | 2188623 | 3.0 | 2188606 | + | 15.4 |
yeiQ1 | 2264169 | 12.1 | 2264107 | 3.5 | 2264237 | + | 20.5 |
yejG1 | 2276269 | 8.2 | 2276279 | 8.5 | 2276322 | - | 18.8 |
yfbM | 2384795 | 4.5 | 2384816 | 4.5 | 2384825 | + | 14.5 |
yffQ | 2561375 | 5.9 | 2561415 | 6.3 | NaN | NaN | NaN |
ygbE | 2871290 | 2.7 | 2871334 | 5.3 | 2871332 | + | 15.4 |
ygjG | 3217347 | 12.5 | 3217372 | 12.0 | NaN | NaN | NaN |
yhiS_2_intragenic | 3651471 | 3.6 | 3651471 | 3.5 | 3651490 | + | 13.5 |
yhjE | 3672558 | 76.9 | 3672585 | 83.0 | 3672621 | - | 17.7 |
yhjX | 3710211 | 12.4 | 3710222 | 15.2 | 3710228 | + | 14.4 |
yifK | 3978765 | 4.9 | 3978811 | 4.9 | NaN | NaN | NaN |
yigG | 4001166 | 4.0 | 4001194 | 3.6 | 4001224 | - | 19.2 |
yjiJ | 4560669 | 2.8 | 4560714 | 2.8 | 4560740 | - | 20.3 |
yjiS/yjiR | 4569683 | 3.0 | 4569668 | 2.4 | 4569691 | + | 17.4 |
yjtD | 4638845 | 4.1 | 4638852 | 4.9 | 4638882 | + | 16.6 |
ymgJ/ymjI | 1222398 | 5.0 | 1222442 | 3.1 | 1222426 | + | 10.0 |
ynaJ | 1395308 | 10.8 | 1395329 | 13.3 | 1395358 | - | 16.0 |
yncE/yncD | 1521168 | 3.6 | 1521200 | 3.8 | 1521129 | + | 14.3 |
yohO | 2213656 | 2.5 | 2213637 | 1.9 | 2213630 | - | 14.2 |
ypdA/yfdZ | 2496459 | 2.9 | 2496499 | 3.3 | 2496526 | - | 14.7 |
ytfF | 4431123 | 4.0 | 4431150 | 3.3 | 4431197 | - | 19.2 |
ome.query(ChipPeakGene).filter_by(gene_name='caiT').all()
[ChipPeakGene (ArcA): (Gene: caiT (b0040), Condition: C:glucose, N:NH4Cl, e:NO3, BindingSite: (+)41664-42181), ChipPeakGene (ArcA): (Gene: caiT (b0040), Condition: C:glucose, N:NH4Cl, e:anaerobic, BindingSite: (+)41673-42043)]
kiley_discrepancies = {
'pdhR':'very weak, insignificant peak',\
'gcd': 'very weak, insignificant peak',\
'intF_intergenic': 'variable, phage related peak',\
'intF': 'variable, phage related peak',\
'yaiZ/yaiY': 'weak peak, fell just below threshold',\
'lon': 'variable, potential sraA small RNA contamination',\
'ybcV_intragenic': 'weak, variable, phage proximal',\
'ybdJ': 'very weak, essentially no signal',\
'ybaY/tesB': 'three significant out of 12',\
'hyaA': 'two significant nitrate peaks',\
'csgD': 'two significant anaerobic peaks',\
'csgB': 'two significant anaerobic peaks',\
'ymgJ/ymjI': 'no signal',\
'ycgV': 'two signficant anaerobic peaks',\
'ompW': 'two significant anaerobic peaks',\
'fnrS': 'no signal',\
'CO343': 'phage',\
'ybdN': 'two significant anaerobic peaks',\
'ydcJ/ydcI': 'very weak, insignificant peaks',\
'intergenic': 'all weak and insignificant, 2563400 is phage',\
'ryeA/pphA': 'three weak insignificant peaks',\
'yegE/udk': 'no signal',\
'yohO': 'weak, insignificant signal, possibly small RNA contamination',\
'ung/yfiD': 'no signal',\
'yhiS_2_intragenic': 'two significant anaerobic peaks',\
'gadE': 'two weak peaks, one significant',\
'dctA': 'two significant anaerobic peaks',\
'mdtL': 'no signal',\
'yifK': 'no upstream signal',\
'spfl': 'weak signal, possibly small RNA contamination',\
'secE': 'ribosomal contamination',\
'proP': 'weak, insignificant nitrate peaks',\
'cadB': 'two significant anaerobic peaks'
}
len(kiley_discrepancies.keys())
33