w = [3.0, -4.0, 5.0]; # the underlying model coefficients n = 10000; X = randn(3, n); # generate 10000 sample features sig = 0.1; y = vec(w'X) + sig * randn(n); # generate the responses, adding some noise using SGDOptim rmodel = riskmodel(LinearPred(3), # use linear prediction x -> w'x, 3 is the input dimension SqrLoss()) # use squared loss: loss(u, y) = (u - y)^2/2 w_e = sgd(rmodel, zeros(3), # the initial guess minibatch_seq(X, y, 10), # supply the data in mini-batches, each with 10 samples reg = SqrL2Reg(1.0e-4), # add a squared L2 regression with coefficient 1.0e-4 lrate = t->1.0/(100.0 + t), # set the rule of learning rate cbinterval = 100, # invoke the callback every 100 iterations callback = simple_trace) # print the optimization trace in callback sumabs2(w_e - w) / sumabs2(w)