% Copyright (C) 2009  Arno Onken
%
% This program is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program.  If not, see <http://www.gnu.org/licenses/>.

% -------------------------------------------------------------------------
% Test for linear dependence structure between x and y, assuming that x and
% y are non-negative integer vectors. See Onken, Gruenewaelder, and
% Obermayer 2009, Advances in Neural Information Processing Systems 22.
%
% Arguments:
%  x         - Vector of integer random values
%  y         - Vector of integer random values of the same size as x
%  alpha     - Significance level (default alpha = 0.05)
%  mef       - Minimum expected frequency (default mef = 1)
%
% Returns:
%  h         - 1 indicates rejection of the linear dependence hypothesis at
%              the specified significance level; 0 otherwise
%  thresh    - chi-square threshold value
%  test_stat - Test statistic
% -------------------------------------------------------------------------
function [h, thresh, test_stat] = chi2franktest (x, y, alpha, mef)

    % Check arguments
    if (nargin < 2)
        error ('chi2franktest: usage [h, thresh, test_stat] = chi2franktest (x, y [, alpha, mef])');
    end

    if (~isvector (x) || ~isvector (y) || length (x) ~= length (y))
        error ('chi2franktest: x and y must be vectors of equal lengths');
    end
    x = x(:);
    y = y(:);

    % Default minimum expected frequency
    if (nargin < 4)
        mef = 1;
    end
    
    % Default significance level
    if (nargin < 3 || isempty (alpha))
        alpha = 0.05;
    end
    
    if (~isscalar (alpha) || alpha < 0 || alpha > 1)
        error ('chi2franktest: alpha must be a scalar in [0, 1]');
    end

    %% Generate contingency table
    cont = zeros (max (x) + 1, max (y) + 1);
    for i = 1:length (x)
        cont(x(i)+1, y(i)+1) = cont(x(i)+1, y(i)+1) + 1;
    end

    %% Copula fit
    % Apply a semiparametric model with empirical marginals
    marginfit = @discempfit;
    margincdf = @discempcdf;

    fit = marginfit ([x y]);
    % Parameterized margin
    margincdf_par = @(x)margincdf (x, fit);

    % Hack for deviating definition in GNU Octave
    if (exist ('kendall'))
        tau = kendall (x, y);
    else
        tau = corr (x, y, 'type', 'Kendall');
    end
    theta = frankparam (tau);

    %% Expected counts
    [x2, y2] = meshgrid (0:(size (cont, 1) - 1), 0:(size (cont, 2) - 1));
    econt = reshape (dmfrankpdf ([x2(:) y2(:)], margincdf_par, theta)', size (cont'))' .* length (x);

    % Apply ordered expected-frequencies procedure (see Loukas and Kemp 1986, The Statistician)
    [econt_array, ordering] = sort (econt(:), 'descend');
    cont_array = cont(ordering);
    % Group according to minimum expected frequency (MEF)
    econt_group = zeros (length (econt_array), 1);
    cont_group = econt_group;
    ig = 1;
    for i = 1:length (econt_array)
        econt_group(ig) = econt_group(ig) + econt_array(i);
        cont_group(ig) = cont_group(ig) + cont_array(i);
        if (econt_group(ig) >= mef)
            ig = ig + 1;
        end
    end

    econt_group = econt_group (1:ig);
    cont_group = cont_group (1:ig);
    
    % Compute test statistic for data
    test_stat = sum (sum (((cont_group - econt_group).^2) ./ econt_group));
    % Degrees of freedom; -1 for copula parameter
    df = (size (cont, 1) - 1) .* (size (cont, 2) - 1) - 1;
    thresh = chi2inv (alpha, df);
    h = test_stat < thresh;

end
