%% Tutorial for Elastic Basis Pursuit

% This folder contain demonstration code for applying EBP to arbitrary mixture problems
% This tutorial provides a simple example of how to use the code to fit a 2D Gaussian mixture
% Step I.   We specify the kernel function by writing an anonymous function
%           f(x,theta). A kernel function the assumed function that allows to
%           predict the measurements from individual components of the
%           model.
% Step II.  As an example, we specify the parameter space by constructing a cubic grid
%           of possible parameters.
% Step III. We specify a design matrix X = x_1,...,x_n, that defines the
%           the points at which measurements where taken.  
% Step IV.  We specify the true parameters: K0, thetas0, w0, and noise
% Step V.   We simulate a signal y, by y_i = sum_j w0_j f(x_i, thetas0_j) + error_i
%            and the noiseless version y0_i = sum_j w0_j f(x_i, thetas0_j)
% Step VI.  Specify an oracle function tau: 
%            We provide a generic oracle function which uses gradient
%            descent, given a cache of starting points w/ predcomputed
%            kernels
%           Create function handler vfunc for evaluating prediction error
% Step VII. Fit the model using EBP!
%            EBP will fit the model y_i ~ sum_j w_j f(x_i, thetas_j)
%            to estimate the parameters (thetas) and weights (w)
%           We specify an L1 regularization, so that EBP solves
%            minimize ||y - sum_j w_j F(x_i,thetas_j)||^2 + lambda * (sum_j w_j)
%Step VIII. Show the quality of the estimated parameters 

%% I. Specification of kernel function: 2D rotated square of side length 0.5
%      parameters: (mu1, mu2) - center; angle - angle of rotation
%      theta is a 3x1 vector with components (mu1,mu_2,angle), and x is a 2x1 vector
% We write the kernel function f(x,theta) which we use to generate the data

rot_coords = @(x,theta) [sin(theta(3)*pi), cos(theta(3)*pi); cos(theta(3)*pi), -sin(theta(3)*pi)] * (x-theta(1:2));
rot_l_inf = @(x, theta) max(abs(rot_coords(x,theta)));
soft_thres = @(s, l) (abs(s) > l)*sign(s)*(abs(s)-l);
f = @(x, theta) exp(-10 * soft_thres(rot_l_inf(x,theta),.5)^2);

% corners of a square with center and rotation specified by theta: used for plotting
sqcorners = @(theta) repmat(theta(1:2),1,6) + ...
    [sin(theta(3)*pi), cos(theta(3)*pi); cos(theta(3)*pi), -sin(theta(3)*pi)] *[-.5 -.5; -.5 .5; .5 .5; .5 -.5; -.5 -.5; -.5 .5]';

%% II. Specification of parameter space]
% We take [-1,1]^2 x [0,0.5] as the parameter space
% Specifying this parameter space involves constructing a grid within the parameter space

theta_space = 1/20 .* (fullfact([21 21 6]) - 1)';
theta_space(1,:) = -2.* theta_space(1,:) + 1;
theta_space(2,:) = -2.* theta_space(2,:) + 1;
theta_space(3,:) = 2 .* theta_space(3,:);
delta = 0.1;
assert(isequal(min(theta_space'), [-1 -1 0]))
assert(isequal(max(theta_space'), [1 1 0.5]))

%% III. Specification of design matrix X
% X =  x_1,...,x_n are the points at which we observe y_i = sum_j w_j f(x_i, theta_j) + error_i
% Note our convention of taking X (p x n) even though y is n x 1
% Xval is design matrix for an independent data set used for evaluating
% error

X = 2/20 .* (fullfact([41 41])-21)';
n = size(X,2);
X_val = 2/10 .* (fullfact([21 21])-11)';
n_val = size(X_val,2);

%% IV. Specification of model parameters
% The parameters of the model (and their assigned values) are
%  - K0, the number of components, in this case 10
%  - thetas0, a 3xK0 matrix of the parameters, each column drawn randomly
%     from the parameter space
%  - w0, a K0x1 vector of weights, this case all weights = 1
%  - noise, standard deviation of noise
%  - noise_type, Gaussian noise

noise = 0.01;
noise_type = 'gaussian';
K0 = 3;
thetas0 = sample_parameters(theta_space, delta, K0); 
w0 = 1/K0.*ones(K0,1);

% Plot the parameters

figure;
scatter(thetas0(1,:),thetas0(2,:),'k.'); hold on;
for comp = 1:K0;
    % plot the four corners of the square
    sqc = sqcorners(thetas0(:,comp));
    plot(sqc(1,:),sqc(2,:),'k','LineWidth',10*w0(comp)); hold on;
end
title('Model parameters');

%% V. Generation of data
% We generate a noiseless signal y0 and a noisy signal y using design matrix X and the true parameters of the model

[y,y0] = generate_signal(X, thetas0, w0, f, noise_type, noise);
[y_val,y0_val] = generate_signal(X_val, thetas0, w0, f, noise_type, noise);

% Plot the data

figure;
scatter(thetas0(1,:),thetas0(2,:),'k.'); hold on;
for comp = 1:K0;
    % plot the four corners of the square
    sqc = sqcorners(thetas0(:,comp));
    plot(sqc(1,:),sqc(2,:),'k','LineWidth',10*w0(comp)); hold on;
end
title('Model parameters');
pos = (y > 0);
scatter(X(1,pos),X(2,pos),100.*y(pos)','b'); hold on;
scatter(X(1,~pos),X(2,~pos),-100.*y(~pos)','r');
title('Data');

%% VI. Specify the oracle tau and prediction error function
% An oracle is a function which takes a residual (n x 1 vector) as input
% and fits a single-component model, f_theta, to the residual: then it
% returns the parameter theta. We provide a generic oracle builder
% (generic_oracle.m) We use the generic_oracle function to create a
% function handler tau which we will pass to EBP which uses gradient
% descent w/ numerical derivatives on random starting points
% 
% Our EBP method also accepts a prediction error function, vfunc It uses
% vfunc to estimate the prediction error of its current model Internally,
% vfunc uses a independent set of validation data to estimate the
% prediction error of a model We provide a validation function builder
% (validation_function.m) which can be used to build a validation function,
% using a validation data set

regtype = 'L1'; lambda = 0.01;
grad_eps = [1e-5 0.9];        % uses step size of 1e-5 for numerical derivative, multiplier of 0.9 for line search
Ngradsteps = [3 10];          % uses 3 gradient steps, max 10 line search steps
Nrestarts = 10;             % uses 10 random restarts

% create the oracle tau as an anonymous function
tau = @(r,cache) generic_oracle(r,X,f,theta_space,cache,delta,regtype,lambda,grad_eps,Ngradsteps,Nrestarts);

% create the validation function 
vfunc = @(thetas, w) validation_function(thetas, w, X_val, f, y_val);
% TRUE prediction error
vfunc0 = @(thetas, w) validation_function(thetas, w, X_val, f, y0_val);


%% VII. Use NNLS and elastic basis pursuit to fit the model
%  Fit the model using EBP!
%  - EBP will fit the model y_i ~ sum_j w_j f(x_i, thetas_j)
%    to estimate the parameters (thetas) and weights (w)
%  - We specify an L1 regularization, so that EBP solves
%    minimize ||y - sum_j w_j F(x_i,thetas_j)||^2 + lambda * (sum_j w_j)

%  fit NNLS
cache=[];
[thetas_n, w_n, yh_n, psse_n, sse_n,cache] = nnls_fit(y,X,cache,theta_space,f,regtype,lambda);
mse_n = vfunc(thetas_n, w_n);
r_n = y-yh_n;
mse0_n = vfunc0(thetas_n, w_n)
K_n = size(thetas_n,2);

% show  NNLSfit results

figure;

for comp = 1:K0;
    % plot the four corners of the square
    sqc = sqcorners(thetas0(:,comp));
    plot(sqc(1,:),sqc(2,:),'k','LineWidth',50*w0(comp)); hold on;
end
for comp = 1:K_n;
    % plot the four corners of the square
    sqc = sqcorners(thetas_n(:,comp));
    plot(sqc(1,:),sqc(2,:),'r','LineWidth',50*w_n(comp)); hold on;
end

title('True fits (black) vs NNLS fits (red)');


%% fit EBP
nits =10;
tau = @(r,cache) generic_oracle(r,X,f,theta_space,cache,delta,regtype,lambda,grad_eps,Ngradsteps,Nrestarts);
[thetas_e,w_e,yh_e]=elastic_basis_pursuit([],y,X,cache,f,regtype,lambda,tau,vfunc,nits,true);
mse0_e = vfunc0(thetas_e, w_e)
K_e = size(thetas_e,2);


% show  EBPfit results

figure;

for comp = 1:K0;
    % plot the four corners of the square
    sqc = sqcorners(thetas0(:,comp));
    plot(sqc(1,:),sqc(2,:),'k','LineWidth',50*w0(comp)); hold on;
end
for comp = 1:K_e;
    % plot the four corners of the square
    sqc = sqcorners(thetas_e(:,comp));
    plot(sqc(1,:),sqc(2,:),'r','LineWidth',50*w_e(comp)); hold on;
end

title('True fits (black) vs EBP fits (red)');
