function CVerr = ar_cvglmnet(x,y,nfolds,foldid,type,family,options,verbous)
% Do crossvalidation of glmnet model. The coordinate descent algorithm 
% chooses a set of lambda to optimize over based on the input given in
% the options struct.Parameter tuning should be done in such a way that for
% a fixed alpha, a set of lambda values are evaluated. Basically it does
% not matter if lambda corresponds across alpha values, as each cv-plot
% should inspected seperatly.
% So e.g. to find optimal tuning parameters, fit a total of 10 alpha
% values, beeing alpha = 0:0.1:1.lambdas will then be chosen according to 
% the specific alpha. 
% Call: CVerr = cvglmnet(x,y,nfolds,foldid,type,family,options,verbous)
% Example:
% x=randn(100,2000);
% y=randn(100,1);
% g2=randsample(2,100,true);
% CVerr=cvglmnet(x,y,100,[],'response','gaussian',glmnetSet,1);
% CVerr=cvglmnet(x,g2,100,[],'response','binomial',glmnetSet,1);
% x         : Covariates
% y         : Response (For now only elastic net = continous data supported
% nfolds    : How many folds to evaluate. nfolds = size(x,1) for LOOCV
% foldid    : Possibility for supplying own folding series. [] for nothing
% type      : Used in the glmnetPredict routine. (Now only "response" works)
% family    : Used in glmnet routine. (Now only "gaussian" and "binomial" work)
% options   : See function glmnetSet()
% verbous   : Print model plot
% 
% Written by Bjørn Skovlund Dissing (27-02-2010)
%
% Modified Andrew Reid 2011:
% Accumelate beta statistics across folds
%
glmnet_object = glmnet(x, y, family,options);
options.lambda = glmnet_object.lambda;
options.nlambda = length(options.lambda);
N = size(x,1);
nfolds_test = 0;
if (isempty(foldid))
    foldid = randsample([repmat(1:nfolds,1,floor(N/nfolds)) 1:mod(N,nfolds)],N);
else
    %nfolds = max(foldid);
    if nfolds < max(foldid)
       % Use remainder as a test set
       nfolds_test = max(foldid) - nfolds;
    end
end
predmat = glmnetPredict(glmnet_object, type,x, options.lambda);

B = size(glmnet_object.beta,2);
M = size(x,2);
betas = zeros(nfolds,M,B);
N_lambda = length(options.lambda);

for i=1:nfolds
    which=foldid==i;
    if verbous, disp(['Fitting fold # ' num2str(i) ' of ' num2str(nfolds)]);end
    cvfit = glmnet(x(~which,:), y(~which), family, options);
    CVerr.cvfit(i) = cvfit;
    betas(i,:,:) = cvfit.beta;
    
    predmat(which,:) = glmnetPredict(cvfit, type, x(which,:), options.lambda);
   
end

% Testing set
if nfolds_test > 1
   test_idx = false(N,1);
   for i = 1 : nfolds_test
      which=foldid==i+nfolds; 
      cvfit = glmnet(x(~which,:), y(~which), family, options);
      CVerr.cvfit(i+nfolds) = cvfit;
      predmat(which,:) = glmnetPredict(cvfit, type, x(which,:), options.lambda);
      test_idx(which)=true;
   end
   CVerr.test_idx = test_idx;
else
   CVerr.test_idx = true(N,1);
end

CVerr.nfolds = nfolds;
CVerr.foldid = foldid;
CVerr.predmat = predmat;
CVerr.beta_stats = get_beta_stats(betas);
CVerr.R2 = zeros(N_lambda,1);
for l = 1 : N_lambda
    if nfolds_test > 1
        % Get R2 from test folds
        r = corrcoef(y(test_idx), predmat(test_idx,l));
        CVerr.R2(l) = r(1,2)^2;
    else
        % Get R2 from all cross-vals
        r = corrcoef(y, predmat(:,l));
        CVerr.R2(l) = r(1,2)^2;
    end
end

yy=repmat(y,1,length(options.lambda));
if strcmp(family,'gaussian')
%     y_max=max(y(:));
%     y_min=min(y(:));
%     cvraw=normalize_data2(yy-predmat,0,1,y_min,y_max);
%     cvraw=cvraw.^2;
    cvraw=(yy-predmat).^2;
elseif strcmp(family,'binomial')
    if     strcmp(type,'response')
        cvraw=-2*((yy==2).*log(predmat)+(yy==1).*log(1-predmat));
    elseif strcmp(type,'class')
        cvraw=double(yy~=predmat);
    end
elseif strcmp(family,'multinomial')
    error('Not implemented yet')
end

CVerr.cvm=mean(cvraw);
CVerr.stderr=sqrt(var(cvraw)/N);
CVerr.cvlo=CVerr.cvm-CVerr.stderr;
CVerr.cvup=CVerr.cvm+CVerr.stderr;
% if there are several minima, choose largest lambda of the smallest cvm
CVerr.lambda_min=max(options.lambda(CVerr.cvm<=min(CVerr.cvm)));
%Find stderr for lambda(min(sterr))
semin=CVerr.cvup(options.lambda==CVerr.lambda_min);
% find largest lambda which has a smaller mse than the stderr belonging to
% the largest of the lambda belonging to the smallest mse
% In other words, this defines the uncertainty of the min-cv, and the min
% cv-err could in essence be any model in this interval.
CVerr.lambda_1se=max(options.lambda(CVerr.cvm<semin));

CVerr.glmnetOptions=options;
CVerr.glmnet_object = glmnet_object;
if verbous, cvglmnetPlot(CVerr);end
end


function [ stats ] = get_beta_stats( betas )

    % we want stats across the first dimension (nfolds)
    stats.mean = squeeze(mean(betas,1));
    stats.std = squeeze(std(betas,1));
    stats.h = zeros(size(betas,2),size(betas,3));
    stats.t = zeros(size(betas,2),size(betas,3));
    stats.p = zeros(size(betas,2),size(betas,3));
    for i = 1 : size(betas,2)
        for j = 1 : size(betas,3)
            x = squeeze(betas(:,i,j));
            [h,p,~,st] = ttest(x);
            if ~isnan(h)
                stats.h(i,j) = h;
                stats.p(i,j) = p;
                stats.t(i,j) = st.tstat;
            end
        end
    end
    stats.h = squeeze(stats.h);
end