function [thetas, w, yh, psse, sse, cache] = nnls_fit(y, X, cache, theta_space, f, regtype, lambda)
% [thetas, w, yh, psse, sse] = nnls_fit(y, X, theta_space, f, regtype, lambda)
%  Fits regularized NNLS to estimate thetas, w
%  I.e. solves
%   minimize psse = sum_i (y_i - sum_j w_j f(x_i,theta_j))^2 + lambda * P(w)
%
% Inputs:
%  - y, n x 1 signal
%  - X, p x n design matrix X = (x_1,...,x_n)
%  - cache: can be left blank [], or a struct with the following fields
%   * thetas, D x K1 matrix: previously cached parameter points
%   * inds, 1 x K1 matrix: the theta_space cell index for cached thetas
%   * F, n x K1 matrix: previously cached signal matrix
%   * nms, 1 x K1 matrix: norms of cached_F
%  - theta_space , D x K0 grid of parameters = (theta_1,...,theta_K0)
%  - f, function handle of the form f(x,theta): specifies kernel family
%  - regtype, 'none', 'L1', or 'unweighted': regularization type
%  - lambda, scalar: penalization constant
%
% Outputs:
%  - thetas, D x K matrix of estimated parameters
%  - w, K x 1 vector of estimated weights
%  - yh, n x1 predicted signal
%  - psse, penalized sum of squares error
%  - sse, sum of squares error for regression
%  - cache: a struct with the following fields
%   * thetas, D x K1 matrix: previously cached parameter points
%   * inds, 1 x K1 matrix: the theta_space cell index for cached thetas 
%   * F, n x K1 matrix: previously cached signal matrix
%   * nms, 1 x K1 matrix: norms of cached_F
%
% Example usage:
%  theta_space = .5 .* (fullfact([5 5])-3)'; % grid on [-1,1]^2
%  delta = .5;
%  thetas = sample_parameters(theta_space,delta,10); % will sample points from [-1.25,1.25]^2
%  w = rand(10,1);
%  f = @(x,theta) exp(-norm(x-theta)^2); % gaussian kernel
%  X = randn(2,10);
%  regtype = 'L1'; lambda = 0.1;
%  noise_type = 'gaussian'; noise=0.1; % specifies gaussian noise N(0, 0.01)
%  [y,y0] = generate_signal(X, thetas, w, f, noise_type, noise);
%  [thetas_n, w_n, yh_n, psse_n, sse_n] = nnls_fit(y,X,[],theta_space,f,regtype,lambda);

K0 = size(theta_space,2);

if size(cache,2)==0;
  cached_thetas = theta_space;
  cached_inds = 1:K0;
  cached_F = predictor_matrix(X, theta_space, f, regtype, lambda);
  cached_nms = sqrt(sum(cached_F.^2));
  cache = struct('thetas',cached_thetas,'inds',cached_inds,'F',cached_F,'nms',cached_nms);
end

F = cache.F;

n = size(y,1);
if strcmp(regtype,'L1');
  y = [y; 0];
end
if strcmp(regtype,'unweighted');
  y = [y; sqrt(lambda)];
end
betaa = lsqnonneg(F, y);
yh = F * betaa;
psse = norm(yh - y)^2;
sse = norm(yh(1:n)-y(1:n))^2;
w = betaa(betaa > 0);
thetas = theta_space(:, betaa > 0);
yh = yh(1:n);

end

