from pymadlib.pymadlib import * from pymadlib import example from pymadlib.example import * conn_str = '''host='{hostname}' port ='{port}' dbname='{database}' user='{username}' password='{password}' ''' #conn_str = conn_str.format(hostname='localhost',database='pivotal_test',port='5433',username='gpadmin',password='gpadmin') conn_str = conn_str.format(hostname='192.168.241.161',database='pivotal_test',port='5433',username='gpadmin',password='gpadmin') #PyMADlib is compatible with only MADlib v0.5, so we need to explicitly specify the MADlib schema. #We have installed both MADlib 1.3 and MADlib 0.5 in this VM. conn = DBConnect(conn_str=conn_str,madlib_schema='madlib_v05') #View Documentation mdl = LinearRegression(conn) print(mdl.train.__doc__) #Train Model and Score lreg = LinearRegression(conn) mdl_dict, mdl_params = lreg.train('public.wine_training_set',['1','alcohol','proline','hue','color_intensity','flavanoids'],'quality') #Show model params mdl_params #Now do prediction predictions = lreg.predict('public.wine_test_set','quality') #Show prediction results predictions.head() #Show Scatter Matrix of Actual Vs Predicted from pandas.tools.plotting import scatter_matrix import matplotlib.pyplot as plt smat = scatter_matrix(predictions.get(['quality','prediction']), diagonal='kde') #Train Linear Regression Model on a mixture of Numeric and Categorical Variables mdl_dict, mdl_params = lreg.train('public.auto_mpg_train',['1','height','width','length','highway_mpg','engine_size','make','fuel_type','fuel_system'],'price') predictions = lreg.predict('public.auto_mpg_test','price') #Show sample predictions predictions.head() #Display Scatter Plot of Actual Vs Predicted Values smat = scatter_matrix(predictions.get(['price','prediction']), diagonal='kde') #1) Logistic Regression with Numeric Variables Alone log_reg = LogisticRegression(conn) #Train Model mdl_dict, mdl_params = log_reg.train('public.wine_bool_training_set','indep','quality_label') #Show Model Parameters mdl_params.head() #2) Logistic Regression Prediction predictions = log_reg.predict('wine_bool_test_set','',None) predictions.head() #Display ROC Curve actual = predictions.get('quality_label') predicted = predictions.get('prediction') ROCPlot('ROC curve Logistic Reg. on Continuous Features ',['Logistic Regression'],actual,predicted) #Demonstrate K-Means example.kmeansDemo(conn)