from __future__ import print_function import re import numpy as np import pandas as pd origtext = open('formulas.txt').read() print(origtext) # replace newlines with spaces for easier regex matching text = origtext.replace('\n', ' ') # 1. match two groups (key-val pairs); for example, "H1" and the equation excluding "Tanh(0.5 *" equations = dict(re.findall('(H\d): +TanH.0.5[^(]+([^M]+M\)*)', text)) equations # construct the regex's and make sure they work on an equation matchfloat = '-? *\d*\.\d*' # match the intercept value matchintercept = '({matchfloat}) *\)? *\+'.format(matchfloat=matchfloat) # match both the coefficient value and the variable name [A-M] matchvariable = '({matchfloat}) *\* *([A-M])'.format(matchfloat=matchfloat) print('Intercept:') print(re.findall(matchintercept, equations['H1'])) print('Coefficients:') re.findall(matchvariable, equations['H1']) def tofloat(sval): return float(sval.replace(' ', '')) def parsevars(eqtext): d = dict(map(reversed, re.findall(matchvariable, eqtext))) assert len(d) == 12 intercept = re.findall(matchintercept, eqtext) assert len(intercept) == 1 d['intercept'] = intercept[0] d = dict((key, tofloat(val)) for key, val in d.items()) return d parsevars(equations['H1']) eq_coeff = pd.DataFrame(map(parsevars, equations.values()), index=equations.keys()).sort() eq_coeff.T # too wide for the screen, so take the transpose text[:text.index('H1:')] coeff = re.findall( '({matchfloat}){plus}' '({matchfloat}){times}H1{plus}' '({matchfloat}){times}H2{plus}' '({matchfloat}){times}H3{plus}' '({matchfloat}){times}H4' .format(matchfloat=matchfloat, plus=' *\+ *', times=' *\* *'), text) coeff H_coeff = pd.Series(map(tofloat, coeff[0]), index=['intercept', 'H1', 'H2', 'H3', 'H4']) H_coeff input_series = 0.1 + 0.1 * pd.Series(range(12), index=eq_coeff.columns[:-1]) # exclude 'intercept' input_series # calculate the value within the "0.5*()" parentheses X = eq_coeff[input_series.index].dot(input_series) + eq_coeff['intercept'] X # for sanity, let's look at the individual contributions of each factor debug = eq_coeff[input_series.index] * input_series debug['intercept'] = eq_coeff['intercept'] debug.T # for kicks, let's use this debug DataFrame to perform a consistenty check X.equals(debug.sum(axis=1)) # continuing with the calculation... TANH_X = np.tanh(0.5 * X) TANH_X # and take the dot product again just as we did above result = H_coeff[TANH_X.index].dot(TANH_X) + H_coeff['intercept'] result def dotcoeff(coeff, data): """Perform the dot product of coeff and data, and add coeff['intercept']. Use the index (or columns) of the input data Series (or DataFrame). """ return coeff[data.T.index].dot(data.T).T + coeff['intercept'] def docalc(data): X = dotcoeff(eq_coeff, data) TANH_X = np.tanh(0.5 * X) return dotcoeff(H_coeff, TANH_X) # Make sure we still get the same result with Series input docalc(input_series) # Now try DataFrame input input_df = pd.DataFrame([input_series, 2*input_series]) input_df docalc(input_df)