%% Load actual data

% Data map:
% [A1r1]	Positive
% [A1r2]	Negative
% [A1r3]	Good
% [A1r4]	Bad
% [A1r5]	Pleasant
% [A1r6]	Unpleasant
% [A1r7]	Happy
% [A1r8]	Sad
% [A1r9]	Afraid
% [A1r10]	Joyful
% [A1r11]	Angry
% [A1r12]	Contented

close all;
data_file = 'formr.csv';

opts = detectImportOptions(data_file);
opts.VariableTypes([4:6 8:38 40:end]) = {'single'};
T_gameplay = readtable(data_file, opts);

idx_acnh = T_gameplay.spane_acnh_positive > 0;
T_gameplay = T_gameplay(idx_acnh,[3:20 40:41 44:end]);

X_gp = T_gameplay.active_play_hours + T_gameplay.active_play_minutes / 60.0;
X_gp = X_gp / 2;

X_gpz = abs(zscore(X_gp));
idx_rm = X_gpz > 6;

% pos_hdr = [{'GT4r1'},{'GT4r3'},{'GT4r5'},{'GT4r7'},{'GT4r10'},{'GT4r12'}];
% neg_hdr = [{'GT4r2'},{'GT4r3'},{'GT4r6'},{'GT4r8'},{'GT4r9'},{'GT4r11'}];
% pos_hdr = [{'spane_positive'},{'spane_good'},{'spane_pleasant'}, ...
%            {'spane_happy'},{'spane_joyful'},{'spane_contented'}];
% neg_hdr = [{'spane_negative'},{'spane_bad'},{'spane_unpleasant'}, ...
%            {'spane_sad'},{'spane_afraid'},{'spane_angry'}];
pos_hdr = [{'spane_acnh_positive'},{'spane_acnh_good'},{'spane_acnh_pleasant'}, ...
           {'spane_acnh_happy'},{'spane_acnh_joyful'},{'spane_acnh_contented'}];
neg_hdr = [{'spane_acnh_negative'},{'spane_acnh_bad'},{'spane_acnh_unpleasant'}, ...
           {'spane_acnh_sad'},{'spane_acnh_afraid'},{'spane_acnh_angry'}];

Y_pos = zeros(height(T_gameplay),6);
for i = 1 : 6
    Y_pos(:,i) = T_gameplay.(pos_hdr{i});
end

idx_rm = idx_rm | sum(isnan(Y_pos),2) > 0 | sum(Y_pos==0,2) > 0;

Y_neg = zeros(height(T_gameplay),6);
for i = 1 : 6
    Y_neg(:,i) = T_gameplay.(neg_hdr{i});
end

idx_rm = idx_rm | sum(isnan(Y_neg),2) > 0 | sum(Y_neg==0,2) > 0;
Y_pos = Y_pos(~idx_rm,:);
Y_neg = Y_neg(~idx_rm,:);
X_gp = X_gp(~idx_rm);

Y_gp = mean(Y_pos,2) - mean(Y_neg,2);

% Doesn't match histograms/scatterplots in preprint?
figure, histogram(Y_gp, -6:.5:6);
figure, scatter(X_gp, Y_gp);

%% Demonstrate the influence of skewness on correlation/regression

close all;

% See: https://twitter.com/ShuhBillSkee/status/1328251696468926465?s=20

pct_dp = 0.05;
N = 6011;
Ns = round(N * pct_dp); % Outliers are 5%
outlier_threshold = 20;

% Data cluster is Gaussian with mX = 5, mY = 2, truncated at X=0, Y =
% [-6,6]
mX = 5;
mY = 1.5;
sX = 7;
sY = 1.8;

X = mX + sX * randn(N, 1);
X(X<0) = 0;

Y = mY + sY * randn(N, 1);
Y(Y<-6) = -6;
Y(Y>6) = 6;

% Add sparse points with same mean
mXs = 25;
sXs = 20;
Xs = mXs + sXs * randn(Ns, 1);
Xs(Xs<0) = 0; Xs(Xs>100) = 0;
mYs = mY;
sYs = sY;
Ys = mYs + sYs * randn(Ns, 1);
Ys(Ys<-6) = -6; Ys(Ys>6) = 6;

X0 = [X; Xs];
Y0 = [Y; Ys];

pct_0 = sum(X0 > outlier_threshold) / length(X0);

% Subplots
h = figure;
h.Position(3:4) = [1200 700];
h.Color = 'w';

% Correlation
corrXY = corr(X0, Y0);


subplot(2,3,1);
[hh, lms] = plot_regression(X0, Y0, 'Hours', 'Wellbeing', [.8 .8 .8], 50, [0 100], false);
ylim([-6 6]);
ax = gca;
ax.FontSize = 14;
fprintf('Correlation: %1.3f, R^2=%1.3f\n', corrXY, lms{1}.Rsquared.Ordinary);
hh = title('No mean offset');
hh.FontSize = 16;
hold on;
hh = fill([60 60 90 90], [-3.5 -5.5 -5.5 -3.5], 'w');
hh.LineStyle = 'none';
hh = text(70, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
hh.FontSize = 13;
hh.FontWeight = 'bold';
hh = text(73, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
hh.FontSize = 13;
hh.FontWeight = 'bold';

X0_log = log(X0);
X0_log(X0_log<0) = 0;
corrXYlog = corr(X0_log, Y0);
subplot(2,3,4);
[hh, lms] = plot_regression(X0_log, Y0, 'Hours', 'Wellbeing', [.8 .8 .8], 50, [0 log(100)], false);
ax = gca;
ax.FontSize = 14;
ax.XLabel.String = 'log(Hours)';
ylim([-6 6]);
fprintf('Correlation (log X): %1.3f, R^2=%1.3f\n', corrXY, lms{1}.Rsquared.Ordinary);
hh = title('Log transformed');
hh.FontSize = 16;
hold on;
hh = fill([2.8 2.8 4.2 4.2], [-3.5 -5.5 -5.5 -3.5], 'w');
hh.LineStyle = 'none';
hh = text(3, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
hh.FontSize = 13;
hh.FontWeight = 'bold';
hh = text(3.15, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
hh.FontSize = 13;
hh.FontWeight = 'bold';

% Add sparse points above mY
mXup = 25;
sXup = 25;
Xup = mXup + sXup * randn(Ns, 1);
Xup(Xup<0) = 0; Xup(Xup>100) = 00;
mYup = 3;
sYup = sY;
Yup = mYup + sYup * randn(Ns, 1);
Yup(Yup<-6) = -6; Yup(Yup>6) = 6;

X1 = [X; Xup];
Y1 = [Y; Yup];

pct_up = sum(X1 > outlier_threshold) / length(X1);

% Correlation
corrXYup = corr(X1, Y1);
subplot(2,3,2);
[hh, lms] = plot_regression(X1, Y1, 'Hours', 'Wellbeing', [.9 .2 .2], 50, [0 100], false);
lm_up = lms{1};
ax = gca;
ax.FontSize = 14;
ylim([-6 6]);
fprintf('Correlation: %1.3f, R^2=%1.3f\n', corrXYup, lms{1}.Rsquared.Ordinary);
h.Color = 'w';
hh = title(sprintf('?Y: %1.1f | %1.1f%% of points with X>%1.0f', mYup-mY, pct_up*100, outlier_threshold));
hh.FontSize = 16;
hold on;
hh = fill([60 60 90 90], [-3.5 -5.5 -5.5 -3.5], 'w');
hh.LineStyle = 'none';
hh = text(70, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
hh.Color = 'k';
hh.FontSize = 13;
hh.FontWeight = 'bold';
hh = text(73, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
hh.FontSize = 13;
hh.FontWeight = 'bold';

X1_log = log(X1);
X1_log(X1_log<0) = 0;
corrXYup_log = corr(X1_log, Y1);
subplot(2,3,5);
[hh, lms] = plot_regression(X1_log, Y1, 'Hours', 'Wellbeing', [.9 .2 .2], 50, [0 log(100)], false);
lm_uplog = lms{1};
ax = gca;
ax.FontSize = 14;
ax.XLabel.String = 'log(Hours)';
ylim([-6 6]);
fprintf('Correlation (log X): %1.3f, R^2=%1.3f\n', corrXYup_log, lms{1}.Rsquared.Ordinary);
h.Color = 'w';
hh = title('Log transformed');
hh.FontSize = 16;
hold on;
hh = fill([2.8 2.8 4.2 4.2], [-3.5 -5.5 -5.5 -3.5], 'w');
hh.LineStyle = 'none';
hh = text(3, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
hh.FontSize = 13;
hh.FontWeight = 'bold';
hh = text(3.15, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
hh.FontSize = 13;
hh.FontWeight = 'bold';

% Add sparse points below mY
% mXdown = 25;
% sXdown = 25;
% Xdown = mXdown + sXdown * randn(Ns, 1);
% Xdown(Xdown<0) = 0; Xdown(Xdown>100) = 00;
% mYdown = 0;
% sYdown = sY;
% Ydown = mYdown + sYdown * randn(Ns, 1);
% Ydown(Ydown<-6) = -6; Ydown(Ydown>6) = 6;
% 
% X2 = [X; Xdown];
% Y2 = [Y; Ydown];
% 
% pct_down = sum(X1 > outlier_threshold) / length(X1);
% 
% % Correlation
% corrXYdown = corr(X2, Y2);
% subplot(2,3,3);
% [hh, lms] = plot_regression(X2, Y2, 'Hours', 'Wellbeing', [.2 .2 .9], 50, [0 100], false);
% ylim([-6 6]);
% ax = gca;
% ax.FontSize = 14;
% fprintf('Correlation: %1.3f, R^2=%1.3f\n', corrXYdown, lms{1}.Rsquared.Ordinary);
% h.Color = 'w';
% hh = title(sprintf('?Y: %1.1f | %1.1f%% of points with X>%1.0f', mYdown-mY, pct_down*100, outlier_threshold));
% hh.FontSize = 16;
% hold on;
% hh = fill([60 60 90 90], [-3.5 -5.5 -5.5 -3.5], 'w');
% hh.LineStyle = 'none';
% hh = text(70, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
% hh.FontSize = 13;
% hh.FontWeight = 'bold';
% hh = text(73, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
% hh.FontSize = 13;
% hh.FontWeight = 'bold';
% 
% X2_log = log(X2);
% X2_log(X2_log<0) = 0;
% corrXYdown_log = corr(X2_log, Y1);
% subplot(2,3,6);
% [hh, lms] = plot_regression(X2_log, Y2, 'Hours', 'Wellbeing', [.2 .2 .9], 50, [0 log(100)], false);
% ax = gca;
% ax.FontSize = 14;
% ax.XLabel.String = 'log(Hours)';
% ylim([-6 6]);
% fprintf('Correlation (log X): %1.3f, R^2=%1.3f\n', corrXYdown_log, lms{1}.Rsquared.Ordinary);
% h.Color = 'w';
% hh = title('Log transformed');
% hh.FontSize = 16;
% hold on;
% hh = fill([2.8 2.8 4.2 4.2], [-3.5 -5.5 -5.5 -3.5], 'w');
% hh.LineStyle = 'none';
% hh = text(3, -4, sprintf('R^2=%1.3f', lms{1}.Rsquared.Ordinary));
% hh.FontSize = 13;
% hh.FontWeight = 'bold';
% hh = text(3.15, -5.1, sprintf('p=%1.3f', lms{1}.anova.pValue(1)));
% hh.FontSize = 13;
% hh.FontWeight = 'bold';

X_gp = 

saveas(h, 'log_transformed.svg');
saveas(h, 'log_transformed.png');

%% Schematic version

clr_main = [.2 .2 .9];
clr_skew = [.1 .6 .1];
mrk_size = 100;

h = figure;
h.Position(3:4) = [700 500];
h.Color = 'w';

hh = scatter(X, Y, 50, lighten_colour(clr_main,0.5), 'filled');
hold on;
hh = scatter(Xup, Yup, 50, lighten_colour(clr_skew,0.5), 'filled');

hh = plot_ellipse(mX, mY, sX*2, sY*2, clr_main, 0.4);
hh.LineWidth = 2;

hh = plot_ellipse(mXup, mYup, sXup*2, sYup*2, clr_skew, 0.4);
hh.LineWidth = 2;

% Draw means
hh = scatter(mX, mY, mrk_size, clr_main, 'filled');
hh = scatter(mXup, mYup, mrk_size, clr_skew, 'filled');

xlim([0 100]);
ylim([-6 6]);

hh = title(sprintf('Simulating skewed data (%1.0f%%)', pct_dp*100));
hh.FontSize = 20;

ax = gca;
ax.FontSize = 14;

hh = xlabel('Hours');
hh.FontSize = 16;

hh = ylabel('Wellbeing');
hh.FontSize = 16;

box on;

saveas(h, 'schematic.svg');
saveas(h, 'schematic.png');

%% Heteroscedasticity
Xlm = zscore(lm_up.predict);
Ylm = lm_up.Residuals.Standardized;

h = figure;
h.Position(3:4) = [1000 750];
h.Color = 'w';

subplot(2,2,1);
scatter(Xlm, Ylm, mrk_size, [.9 .2 .2]);
ax = gca;
ax.FontSize = 13;
hh = xlabel('Predicted Well-being');
hh.FontSize = 15;
hh = ylabel('Residual error');
hh.FontSize = 15;
hh = title(sprintf('?Y: %1.1f | %1.1f%% of points with X>%1.0f', mYup-mY, pct_up*100, outlier_threshold));
hh.FontSize = 17;
box on;

% Estimate variability resolved by X
% window = 1;
% bins = min(Xlm):0.1:max(Xlm);
[Xlm, idx] = sort(Xlm);
Ylm = Ylm(idx);
Slm = movvar(Ylm,500);
% Slm = zeros(length(bins),1);
% for i = 1 : length(bins)
%     idx = find(Xlm>bins(i)-window/2 & Xlm<bins(i)+window/2);
%     if ~isempty(idx)
%         Slm(i) = range(Ylm(idx));
%     end
% end
subplot(2,2,3);
hh = plot(Xlm, Slm);
ylim([0.7 1.3]);
hh.LineWidth = 2;
ax = gca;
ax.FontSize = 13;
hh = xlabel('Predicted Well-being');
hh.FontSize = 15;
hh = ylabel('Residual Variability');
hh.FontSize = 15;
hh = title('Variability vs. Predicted');
hh.FontSize = 17;
box on;

Xlm = zscore(lm_uplog.predict);
Ylm = lm_uplog.Residuals.Standardized;
subplot(2,2,2);
scatter(Xlm, Ylm, mrk_size, [.9 .2 .2]);
ax = gca;
ax.FontSize = 13;
hh = xlabel('Predicted Well-being');
hh.FontSize = 15;
hh = ylabel('Residual error');
hh.FontSize = 15;
hh = title('Log-transformed');
hh.FontSize = 16;
box on;

% Estimate variability resolved by X
[Xlm, idx] = sort(Xlm);
Ylm = Ylm(idx);
Slm = movvar(Ylm,500);
subplot(2,2,4);
hh = plot(Xlm, Slm);
ylim([0.7 1.3]);
hh.LineWidth = 2;
ax = gca;
ax.FontSize = 13;
hh = xlabel('Predicted Well-being');
hh.FontSize = 15;
hh = ylabel('Residual Variability');
hh.FontSize = 15;
hh = title('Variability vs. Predicted');
hh.FontSize = 17;
box on;

saveas(h, 'homoscedasticity.svg');
saveas(h, 'homoscedasticity.png');


%% Test influence of percentage

N_itr = 50;

pct = 0.0:0.01:0.3;

corrs = zeros(length(pct),N_itr);
R2s = zeros(length(pct),N_itr);
Pval = zeros(length(pct),N_itr);

corrs_log = zeros(length(pct),N_itr);
R2s_log = zeros(length(pct),N_itr);
Pval_log = zeros(length(pct),N_itr);
pct_gt30 = zeros(length(pct),N_itr);

for i = 1 : length(pct)
    
    Ns = round(N * pct(i));
    
    for j = 1 : N_itr
    
        mXup = 25;
        sXup = 25;
        Xup = mXup + sXup * randn(Ns, 1);
        Xup(Xup<0) = 0; Xup(Xup>100) = 00;
        mYup = 3;
        sYup = sY;
        Yup = mYup + sYup * randn(Ns, 1);
        Yup(Yup<-6) = -6; Yup(Yup>6) = 6;

        X1 = [X; Xup];
        Y1 = [Y; Yup];
        
        pct_gt30(i,j) = sum(X1 > outlier_threshold) / length(X1);

        data = array2table([X1,Y1],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lm = fitlm(data, model);

        corrs(i,j) = corr(X1,Y1);
        R2s(i,j) = lm.Rsquared.Ordinary;
        Pval(i,j) = lm.anova.pValue(1);
        
        % Log transformed
        X1 = log(X1);
        X1(X1<0)=0;
        data = array2table([X1,Y1],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lm = fitlm(data, model);

        corrs_log(i,j) = corr(X1,Y1);
        R2s_log(i,j) = lm.Rsquared.Ordinary;
        Pval_log(i,j) = lm.anova.pValue(1);
        
    end
    
end

%% Plot results

h = figure;
h.Color = 'w';
h.Position(3:4) = [900 400];

pct100 = mean(pct_gt30,2)*100;

% Original
subplot(1,2,1);
yyaxis right;
hh = plot(pct100, mean(R2s,2));
hh.LineWidth = 2;
ylim([0 0.05]);

hold on;
yyaxis left;

hh = plot(pct100, mean(Pval,2), '--');
hh.LineWidth = 2;
ylim([0 0.2]);

hh = legend([{'P'},{'R^2'}]);
hh.Location = 'northeast';

ax = gca;
ax.FontSize = 14;

hh = xlabel(sprintf('%% Data Points > %1.0f', outlier_threshold));
hh.FontSize = 15;

hh = title('Original data');
hh.FontSize=16;

% Log-transformed
subplot(1,2,2);
yyaxis right;

hh = plot(pct100, mean(R2s_log,2));
hh.LineWidth = 2;
ylim([0 0.05]);

hold on;
yyaxis left;
hh = plot(pct100, mean(Pval_log,2), '--');
hh.LineWidth = 2;
ylim([0 0.2]);

hh = legend([{'P'},{'R^2'}]);
hh.Location = 'northeast';

ax = gca;
ax.FontSize = 14;

hh = xlabel(sprintf('%% Data Points > %1.0f', outlier_threshold));
hh.FontSize = 15;

hh = title('Log-transformed data');
hh.FontSize=16;

hh = suptitle('Associations driven by % of sparse data points');
hh.FontSize = 20;

saveas(h, 'pct_influence.svg');
saveas(h, 'pct_influence.png');

%% Test influence of Y offset

N_itr = 50;

offset = 0:0.1:2;

corrs = zeros(length(offset),N_itr);
R2s = zeros(length(offset),N_itr);
Pval = zeros(length(offset),N_itr);

corrs_log = zeros(length(offset),N_itr);
R2s_log = zeros(length(offset),N_itr);
Pval_log = zeros(length(offset),N_itr);

pct_gt30 = zeros(length(pct),N_itr);

for i = 1 : length(offset)
    
    Ns = round(N * pct_dp);
    
    for j = 1 : N_itr
    
        mXup = 25;
        sXup = 25;
        Xup = mXup + sXup * randn(Ns, 1);
        Xup(Xup<0) = 0; Xup(Xup>100) = 0;
        mYup = mY + + offset(i);
        sYup = sY;
        Yup = mYup + sYup * randn(Ns, 1);
        Yup(Yup<-6) = -6; Yup(Yup>6) = 6;

        X1 = [X; Xup];
        Y1 = [Y; Yup];
        
        pct_gt30(i,j) = sum(X1 > outlier_threshold) / length(X1);

        data = array2table([X1,Y1],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lm = fitlm(data, model);

        corrs(i,j) = corr(X1,Y1);
        R2s(i,j) = lm.Rsquared.Ordinary;
        Pval(i,j) = lm.anova.pValue(1);
        
        % Log transformed
        X1 = log(X1);
        X1(X1<0)=0;
        data = array2table([X1,Y1],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lm = fitlm(data, model);

        corrs_log(i,j) = corr(X1,Y1);
        R2s_log(i,j) = lm.Rsquared.Ordinary;
        Pval_log(i,j) = lm.anova.pValue(1);
        
    end
    
end

%% Plot results

h = figure;
h.Color = 'w';
h.Position(3:4) = [900 400];

pct_mean = mean(pct_gt30(:));

% Original
subplot(1,2,1);
yyaxis right;
hh = plot(offset, mean(R2s,2));
hh.LineWidth = 2;
ylim([0 0.05]);


hold on;
yyaxis left;
hh = plot(offset, mean(Pval,2), '--');
hh.LineWidth = 2;
ylim([0 0.2]);

hh = legend([{'P'},{'R^2'}]);
hh.Location = 'northeast';

ax = gca;
ax.FontSize = 14;

hh = xlabel('Offset (Y)');
hh.FontSize = 15;

hh = title('Original data');
hh.FontSize=16;

% Log-transformed
subplot(1,2,2);
yyaxis right;
hh = plot(offset, mean(R2s_log,2));
hh.LineWidth = 2;
ylim([0 0.05]);

hold on;
yyaxis left;
hh = plot(offset, mean(Pval_log,2), '--');
hh.LineWidth = 2;
ylim([0 0.2]);

hh = legend([{'P'},{'R^2'}]);
hh.Location = 'northeast';

ax = gca;
ax.FontSize = 14;

hh = xlabel('Offset (Y)');
hh.FontSize = 15;

hh = title('Log-transformed data');
hh.FontSize=16;

hh = suptitle(sprintf('Associations driven by offset of %1.0f%% of data points', pct_dp*100));
hh.FontSize = 20;

saveas(h, 'offset_influence.svg');
saveas(h, 'offset_influence.png');


%% Cook's distance

N_itr = 10;
Ns = round(N * pct_dp);
delta_R2 = zeros(length(X1),N_itr);
delta_R2_log = zeros(length(X1),N_itr);
deltaYpred = zeros(length(X1),N_itr);
deltaYpred_log = zeros(length(X1),N_itr);
Xs = zeros(length(X1),N_itr);
Xs_log = zeros(length(X1),N_itr);

for j = 1 : N_itr

    mXup = 25;
    sXup = 25;
    Xup = mXup + sXup * randn(Ns, 1);
    Xup(Xup<0) = 0; Xup(Xup>100) = 0;
    mYup = mY + 1.5;
    sYup = sY;
    Yup = mYup + sYup * randn(Ns, 1);
    Yup(Yup<-6) = -6; Yup(Yup>6) = 6;

    X1 = [X; Xup];
    Y1 = [Y; Yup];
    
    data = array2table([X1,Y1],'VariableNames',[{'X'},{'Y'}]);
    model = 'X~Y';

    lm = fitlm(data, model);
    R2 = lm.Rsquared.Ordinary;
    si = lm.Residuals.Raw' * lm.Residuals.Raw;
    si = si / (length(X1)-1);
    
    X1_log = log(X1);
    X1_log(X1_log<0)=0;
    data = array2table([X1_log,Y1],'VariableNames',[{'X'},{'Y'}]);
    model = 'X~Y';

    lm_log = fitlm(data, model);
    R2_log = lm_log.Rsquared.Ordinary;
    si_log = lm_log.Residuals.Raw' * lm_log.Residuals.Raw;
    si_log = si_log / (length(X1)-1);
        
    for i = 1 : length(X1)
    
        Xs(i,j) = X1(i);
        
        X1i = X1;
        X1i(i)=[];
        Y1i = Y1;
        Y1i(i)=[];
        
        data = array2table([X1i,Y1i],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lmi = fitlm(data, model);

        delta_R2(i,j) = abs(R2 - lmi.Rsquared.Ordinary);
        Ypred = lm.predict;
        Ypred(i) = [];
        deltaYpred(i,j) = sum((Ypred - lmi.predict).^2) / si;
        
        % Log transformed
        Xs_log(i,j) = X1_log(i);
        X1i_log = X1_log;
        X1i_log(i) = [];
        data = array2table([X1i_log,Y1i],'VariableNames',[{'X'},{'Y'}]);
        model = 'X~Y';

        lmi = fitlm(data, model);

        delta_R2_log(i,j) = abs(R2_log - lmi.Rsquared.Ordinary);
        Ypred = lm_log.predict;
        Ypred(i) = [];
        deltaYpred_log(i,j) = sum((Ypred - lmi.predict).^2) / si_log;
        
    end

    
end

%% Plot Cook's distances

h = figure;
h.Color = 'w';
h.Position(3:4) = [1000 800];

subplot(2,2,1);
Xck = Xs(:);
Yck = delta_R2(:);
hh = scatter(Xck, Yck, 50, [0.2 0.2 0.9]);
ylim([0 6e-4]);
hh = title('Original data: ?R^2');
hh.FontSize = 16;
ax = gca;
ax.FontSize = 14;
hh = xlabel('Hours');
hh.FontSize = 16;
hh = ylabel('?R^2');
hh.FontSize = 16;
box on;

subplot(2,2,3);
Yck = deltaYpred(:);
hh = scatter(Xck, Yck, 50, [0.2 0.2 0.9]);
ylim([0 0.07]);
hh = title('Original data: Cook''s Distance');
hh.FontSize = 16;
ax = gca;
ax.FontSize = 14;
hh = xlabel('Hours');
hh.FontSize = 16;
hh = ylabel('Cook''s Distance (D)');
hh.FontSize = 16;
box on;

subplot(2,2,2);
Xck = Xs(:);
Yck = delta_R2_log(:);
scatter(Xck, Yck, 50, [0.9 0.2 0.2]);
ylim([0 6e-4]);
hh = title('Log-transformed: ?R^2');
hh.FontSize = 16;
ax = gca;
ax.FontSize = 14;
hh = xlabel('Hours');
hh.FontSize = 16;
hh = ylabel('?R^2');
hh.FontSize = 16;
box on;

subplot(2,2,4);
Yck = deltaYpred_log(:);
hh = scatter(Xck, Yck, 50, [0.9 0.2 0.2]);
ylim([0 0.07]);
hh = title('Log-transformed: Cook''s Distance');
hh.FontSize = 16;
ax = gca;
ax.FontSize = 14;
hh = xlabel('Hours');
hh.FontSize = 16;
hh = ylabel('Cook''s Distance (D)');
hh.FontSize = 16;
box on;


hh = suptitle('Influence by data point (Leave-one out)');
hh.FontSize = 20;

saveas(h, 'pointwise_influence.svg');
saveas(h, 'pointwise_influence.png');
