import os
os.environ['R_HOME'] = '/home/cfriedline/lib64/R'
import rpy2.robjects
import random
import string
import tempfile
import dendropy
import scipy
from multiprocessing import Pool
from IPython.parallel import Client
rc = Client(profile='sge')
dview = rc[:]
lview = rc.load_balanced_view()
len(rc)
10
with dview.sync_imports():
import os
import rpy2
import rpy2.robjects
import random
import string
import tempfile
import dendropy
import socket
import scipy
from multiprocessing import Pool
def setup_cluster_engines():
os.environ['R_HOME'] = '/home/cfriedline/lib64/R'
r = rpy2.robjects.r
ape = r('library(ape)')
return socket.gethostname(), os.getpid(), os.environ['R_HOME'], rpy2.__version__
dview['setup_cluster_engines'] = setup_cluster_engines
dview.apply(setup_cluster_engines).get()
importing os on engine(s) importing rpy2 on engine(s) importing rpy2.robjects on engine(s) importing random on engine(s) importing string on engine(s) importing tempfile on engine(s) importing dendropy on engine(s) importing socket on engine(s) importing scipy on engine(s) importing Pool from multiprocessing on engine(s)
[('godel199', 17947, '/home/cfriedline/lib64/R', '2.4.0'), ('godel97', 28753, '/home/cfriedline/lib64/R', '2.4.0'), ('godel97', 28756, '/home/cfriedline/lib64/R', '2.4.0'), ('godel97', 28781, '/home/cfriedline/lib64/R', '2.4.0'), ('godel97', 28779, '/home/cfriedline/lib64/R', '2.4.0'), ('godel97', 28752, '/home/cfriedline/lib64/R', '2.4.0'), ('godel199', 18050, '/home/cfriedline/lib64/R', '2.4.0'), ('godel199', 18095, '/home/cfriedline/lib64/R', '2.4.0'), ('godel199', 18083, '/home/cfriedline/lib64/R', '2.4.0'), ('godel199', 18077, '/home/cfriedline/lib64/R', '2.4.0')]
r = rpy2.robjects.r
ape = r('library(ape)')
def create_tree(num_tips, type):
"""
creates the taxa tree in R
@param num_tips: number of taxa to create
@param type: type for naming (e.g., 'taxa')
@return: a dendropy Tree
@rtype: dendropy.Tree
"""
r = rpy2.robjects.r
rpy2.robjects.globalenv['numtips'] = num_tips
rpy2.robjects.globalenv['treetype'] = type
name = _get_random_string(20)
if type == "T":
r("%s = rtree(numtips, rooted=T, tip.label=paste(treetype, seq(1:(numtips)), sep=''))" % name)
else:
r("%s = rtree(numtips, rooted=F, tip.label=paste(treetype, seq(1:(numtips)), sep=''))" % name)
tree = r[name]
return ape_to_dendropy(tree)
def ape_to_dendropy(phylo):
"""
converts an ape tree to dendropy tree
@param phylo: ape instance from rpy2
@return: a dendropy tree
@rtype: dendropy.Tree
"""
f = tempfile.NamedTemporaryFile()
rpy2.robjects.r['write.nexus'](phylo, file=f.name)
tree = dendropy.Tree.get_from_path(f.name, "nexus")
f.close()
return tree
def _get_random_string(length):
"""
gets a random string of letters/numbers, ensuring that it does not start with a
number
@param length: length of the string
@return: the random string
@rtype: string
"""
choices = "%s%s" % (string.letters,string.digits)
s = ''.join(scipy.random.choice(list(choices),10))
if s[0] not in string.letters:
return _get_random_string(length)
return s
tree = create_tree(100, "T")
tree.as_newick_string()[0:80]
'((((T88:0.2252453833,T52:0.4084565411):0.2996090709,((((T68:0.1808569834,T67:0.2'
def reseed(args):
scipy.random.seed()
return os.getpid()
def pool_reseed(pool, jobs):
res = pool.map(reseed, range(jobs))
if len(set(res)) != jobs:
return pool_reseed(pool, jobs)
return True, res
def create_tree_mp(num_taxa):
t = create_tree(num_taxa, "T")
return t
def get_taxa_trees(num_trees, num_taxa):
jobs_mp = []
jobs_ip = []
jobs = []
res = []
pool = Pool(num_trees)
print pool_reseed(pool, num_trees)
for i in xrange(num_trees):
jobs_mp.append(pool.apply_async(create_tree_mp, (num_taxa,)))
jobs.append(create_tree_mp(num_taxa))
jobs_ip.append(lview.apply_async(create_tree_mp, num_taxa))
pool.close()
pool.join()
res.append(jobs)
res.append([x.get() for x in jobs_mp])
res.append([x.get() for x in jobs_ip])
return res
dview['create_tree'] = create_tree
dview['_get_random_string'] = _get_random_string
dview['ape_to_dendropy'] = ape_to_dendropy
dview['create_tree_mp'] = create_tree_mp
dview['get_taxa_trees'] = get_taxa_trees
# jobs = 10
# pool = Pool(jobs)
# pool_reseed(pool, jobs)
# if pool_reseed:
# for i in xrange(10):
# print pool.apply_async(_get_random_string, (20,)).get()
# pool.close()
# pool.join()
trees = get_taxa_trees(5, 10)
(True, [3499, 3500, 3501, 3503, 3502])
for t in trees[0]:
print t.as_newick_string()[0:80]
((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846 (((T7:0.1649202022,(((T6:0.432419851,T1:0.670651369):0.9676914262,(T9:0.25170810 (((T9:0.1202223536,((T10:0.1974260821,T2:0.9239173683):0.3418610748,T8:0.3907029 (((T6:0.3341352192,T8:0.7529280938):0.5023088937,(((T1:0.2147031985,T10:0.728587 (T5:0.870002911,(((T10:0.2627973619,T7:0.5444249122):0.1685904923,T3:0.799313113
for t in trees[1]:
print t.as_newick_string()[0:80]
((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846 ((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846 ((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846 ((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846 ((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846
for t in trees[2]:
print t.as_newick_string()[0:80]
(((T10:0.5095300437,T2:0.8708823652):0.1151310115,T8:0.6142042589):0.9769569514, (((T5:0.1514588273,((T1:0.4826061821,T7:0.1636368397):0.9322419968,(T6:0.8749330 ((T10:0.9323699088,(T1:0.2327637093,(((T2:0.8742392994,T4:0.8461208662):0.681684 (((T4:0.946286211,T8:0.6548262595):0.8693261528,(T5:0.911844848,T1:0.6759150678) ((((T2:0.7475027808,T10:0.4808094592):0.7282650121,(T9:0.5261673003,T5:0.9762509