from gdsctools import elastic_net
reload(elastic_net)
%pylab inline
rcParams['figure.figsize'] = 10,5
Populating the interactive namespace from numpy and matplotlib
from gdsctools import IC50, gdsctools_data, GenomicFeatures
ic50 = IC50(gdsctools_data("IC50_v5.csv.gz"))
gf = GenomicFeatures(gdsctools_data("genomic_features_v5.csv.gz"))
gd = elastic_net.ElasticNet(ic50, gf)
ElasticNet requires 2 parameters called alpha and l1_ratio note that this is different from glmnet terminology
sklearn | |
---|---|
l1_ratio | alpha |
alpha | lambda |
elatic_net takes the drugname as input + 11_ratio set to 0.5 by dfault and an alpha parameter. If we naively try 0.01 for alpha, we get a set of scores and plot showing the predicted versus test sets
scores = gd.elastic_net(1047, alpha=0.1, n_folds=10, show=True)
_ = hist(scores,bins=20); xlabel('scores for the N folds ')
<matplotlib.text.Text at 0x611b590>
We see here that scores are pretty bad (zero or even less)
If we try another value (0.01), we get better results
scores = gd.elastic_net(1047, alpha=0.01, n_folds=10, show=True)
_ = hist(scores,bins=20); xlabel('scores for the N folds ')
<matplotlib.text.Text at 0x74fcc50>
Now the question is how to get the alpha value?
alphas, all_scores, maximum, alpha_best = gd.tune_alpha(1047, alphas=logspace(-6,0,40), n_folds=10)
alpha_best
0.0070170382867038297
log(alpha_best)
-4.9594140464487131
This is not instateneous but can be speed up by preventing the plotting, or playing on the number of alphas or the number of N_folds
res = gd.tune_alpha(1047, alphas=logspace(-6,0,40), n_folds=10, show=False)
res = gd.tune_alpha(1047, alphas=logspace(-6,0,20), n_folds=4, show=False)
alphas, all_scores, maximum, best_alpha = gd.tune_alpha(1047, alphas=logspace(-6,0,20), n_folds=4, show=True)
Using only 4 folds and 20 alphas decrease the computing time to 286ms instead of 1.34s
Another method to identify the best alpha is to use the method based on concordance index. Although it is much longer to compute, it may be interesting for further detailled analysis
CI_best_alpha = gd.plot_cindex(1047, pylab.logspace(-6,0,20))
[-----------------95%---------------- ] 19 of 20 complete in 271.5 sec
log(CI_best_alpha)
-5.8170570770375898
This is much longer that tuning_alpha method. The other point is that the 2 values found differe slightly from -4.9 to -5.8 In this case, the CI values from -14 to -6 are pretty flat and may not be so precise as compare to the other method.
Coming back on the previous value of 0.000701, let re-run the elastic net with this value:
scores = gd.elastic_net(1047, alpha=0.00701,n_folds=10, show=True)
We can also now look at the weights for that model:
weights = gd.plot_weight(1047, alpha=0.00701)
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj) 337 pass 338 else: --> 339 return printer(obj) 340 # Finally look for special method names 341 method = _safe_get_formatter_method(obj, self.print_method) /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in <lambda>(fig) 224 225 if 'png' in formats: --> 226 png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs)) 227 if 'retina' in formats or 'png2x' in formats: 228 png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs)) /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in print_figure(fig, fmt, bbox_inches, **kwargs) 115 116 bytes_io = BytesIO() --> 117 fig.canvas.print_figure(bytes_io, **kw) 118 data = bytes_io.getvalue() 119 if fmt == 'svg': /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs) 2178 orientation=orientation, 2179 dryrun=True, -> 2180 **kwargs) 2181 renderer = self.figure._cachedRenderer 2182 bbox_inches = self.figure.get_tightbbox(renderer) /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs) 525 526 def print_png(self, filename_or_obj, *args, **kwargs): --> 527 FigureCanvasAgg.draw(self) 528 renderer = self.get_renderer() 529 original_dpi = renderer.dpi /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in draw(self) 472 473 try: --> 474 self.figure.draw(self.renderer) 475 finally: 476 RendererAgg.lock.release() /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/figure.pyc in draw(self, renderer) 1157 dsu.sort(key=itemgetter(0)) 1158 for zorder, a, func, args in dsu: -> 1159 func(*args) 1160 1161 renderer.close_group('figure') /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axes/_base.pyc in draw(self, renderer, inframe) 2322 2323 for zorder, a in dsu: -> 2324 a.draw(renderer) 2325 2326 renderer.close_group('axes') /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axis.pyc in draw(self, renderer, *args, **kwargs) 1109 1110 for tick in ticks_to_draw: -> 1111 tick.draw(renderer) 1112 1113 # scale up the axis label box to also find the neighbors, not /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axis.pyc in draw(self, renderer) 250 251 if self.label1On: --> 252 self.label1.draw(renderer) 253 if self.label2On: 254 self.label2.draw(renderer) /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs) 59 def draw_wrapper(artist, renderer, *args, **kwargs): 60 before(artist, renderer) ---> 61 draw(artist, renderer, *args, **kwargs) 62 after(artist, renderer) 63 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/text.pyc in draw(self, renderer) 790 textrenderer.draw_tex(gc, x, y, clean_line, 791 textobj._fontproperties, angle, --> 792 mtext=mtext) 793 else: 794 textrenderer.draw_text(gc, x, y, clean_line, /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in draw_tex(self, gc, x, y, s, prop, angle, ismath, mtext) 251 texmanager = self.get_texmanager() 252 --> 253 Z = texmanager.get_grey(s, size, self.dpi) 254 Z = np.array(Z * 255.0, np.uint8) 255 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/texmanager.pyc in get_grey(self, tex, fontsize, dpi) 581 582 if alpha is None: --> 583 pngfile = self.make_png(tex, fontsize, dpi) 584 X = read_png(os.path.join(self.texcache, pngfile)) 585 /home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/texmanager.pyc in make_png(self, tex, fontsize, dpi) 522 'dvipng was not able to process the following ' 523 'file:\n%s\nHere is the full report generated by ' --> 524 'dvipng: \n\n' % dvifile + report) 525 else: 526 mpl.verbose.report(report, 'debug') RuntimeError: dvipng was not able to process the following file: /home/cokelaer/.cache/matplotlib/tex.cache/1b72844dd7a94ad6e211af28a517e002.dvi Here is the full report generated by dvipng:
<matplotlib.figure.Figure at 0x8a8f9d0>
Traceback (most recent call last): File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 175, in do_execute shell.run_cell(code, store_history=store_history, silent=silent) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2729, in run_cell self.events.trigger('post_execute') File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/events.py", line 74, in trigger func(*args, **kwargs) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/ipykernel/pylab/backend_inline.py", line 113, in flush_figures return show(True) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/ipykernel/pylab/backend_inline.py", line 36, in show display(figure_manager.canvas.figure) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/display.py", line 158, in display format_dict, md_dict = format(obj, include=include, exclude=exclude) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/formatters.py", line 177, in format data = formatter(obj) File "<decorator-gen-9>", line 2, in __call__ File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/formatters.py", line 222, in catch_format_error r = method(self, *args, **kwargs) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/formatters.py", line 339, in __call__ return printer(obj) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/pylabtools.py", line 226, in <lambda> png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs)) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/IPython/core/pylabtools.py", line 117, in print_figure fig.canvas.print_figure(bytes_io, **kw) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backend_bases.py", line 2182, in print_figure bbox_inches = self.figure.get_tightbbox(renderer) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/figure.py", line 1708, in get_tightbbox bb.append(ax.get_tightbbox(renderer)) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axes/_base.py", line 3679, in get_tightbbox bb_xaxis = self.xaxis.get_tightbbox(renderer) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axis.py", line 1075, in get_tightbbox renderer) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/axis.py", line 1058, in _get_tick_bboxes extent = tick.label1.get_window_extent(renderer) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/text.py", line 961, in get_window_extent bbox, info, descent = self._get_layout(self._renderer) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/text.py", line 352, in _get_layout ismath=False) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/backends/backend_agg.py", line 229, in get_text_width_height_descent renderer=self) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/texmanager.py", line 678, in get_text_width_height_descent page = next(iter(dvi)) File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/dviread.py", line 89, in __iter__ have_page = self._read() File "/home/cokelaer/Work/virtualenv/lib/python2.7/site-packages/matplotlib/dviread.py", line 149, in _read byte = ord(self.file.read(1)[0]) KeyboardInterrupt
# Here are the largest weigths (in absolute value)
weights.tail()
drugs = gd.ic50.drugIds
res = gd.tune_alpha(drugs[0], alphas=logspace(-6,0,20), n_folds=4, plot=False)
res[3]
0.012742749857031322