In [1]:

from genda.formats import Genotype as G
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:

#Load in my saved data
#In thi example case and control are just data frames with data
control = pd.load('tests/data/control_example')
case = pd.load('tests/data/case_example')
encoder = pd.load('tests/data/encoder_example')

In [3]:

#Applying the encoder
#If your data has already had an encoder applied to it, don't worry about this step
#Also, if your data is a SNP_array object, this process is simplified by the SNP_array.apply_encoder function
from genda.formats.Snp_array import _single_column_allele
case = case.ix[encoder.index,:]
control = control.ix[encoder.index,:]
case.geno = case.apply(_single_column_allele, encoder = encoder, axis = 1)
control.geno = control.apply(_single_column_allele, encoder = encoder, axis = 1)

In [4]:

#Removes rows so the indexes are identicle and ready to analyze
case.geno, control.geno = G.comparable(case.geno, control.geno)

In [5]:

#Perform association test
p = G.chi2_association(control.geno,case.geno)

In [6]:

#Graph results
plt.scatter(range(len(p[1])), -1*np.log10(p[1]))

Out[6]:

<matplotlib.collections.PathCollection at 0x8794590>

In [7]:

#Check value of a certain SNP
p[0]['rs12913832']

Out[7]:

9.581070982428592e-09