%% Classification of data set #1 and data set #3
% This example is from Section IV.C of Tabassum and Ollila (2019). Running
% this code you are able to reproduce figures for data set #1 (Isolet wovels)  
% and data set #3 (Khan et al.) given the Table 1 in the paper referenced below.
%
% *Reference*:
%
% [1] M.N. Tabassum and E. Ollila (2019), "A Compressive Classification 
%   Framework for High-Dimensional Data," _Preprint_, Submitted for 
%   publication, Oct. 2019.  
%
% (c) E. Ollila and M.N. Tabassum, CompressiveRDA MATLAB toolbox.

%% Initialize 
clear; clc;

Q = 3;   % Nr of CRDA approaches
L = 10;  % # of MC splits of data to training / test sets
cntrX             = true;
use_uniform_prior = true;

CT  = zeros(L,Q); % computation times 
FSR = zeros(L,Q); % feature selection rate
TER = zeros(L,Q); % test error rate

print_info   = true;
%% Load and check the data
pt = 1/3; % percentage of training observations
dsname = 'IsoletVowels';

% Uncheck these to compute the results for data set #3 
%dsname = 'khan2001';
%pt = 0.6; 
   
load(sprintf('%s.mat',dsname), 'Xo','yo');
Xo = crda_check_data(Xo,yo,print_info);

yo = double(yo);
G  = max(yo);
p = size(Xo,1);

%% Start the simulatios
rng('default'); % for reproducibility

for mc=1:L  % Simulation with L splits of data for training-test sets
        
    [yt,Xt,y,X,mu,prior] = crda_create_data(Xo,yo,pt,cntrX);
        
    if use_uniform_prior
        prior = (1/G)*ones(1,G);
    end
    Nt = length(yt);
        
    fprintf('Data-split# %d ...\n', mc);
    scurr = mc*1e3;
        
   % CRDA1 (Ell1-RSCM, {K,q} = CV)
   rng(scurr); 
   algo = 1;
   tic;
   [yhat1,~,~,K1,~] = CRDA(Xt,X,y,'method','crda1','prior',prior,'mu',mu);
   CT(mc,algo) = toc;
   TER(mc,algo) = sum(yhat1 ~= yt)/Nt;
   FSR(mc,algo) = K1/p;
       
   if print_info
       fprintf('\tCRDA%d : {TER, FSR} = {%5.2f, %5.2f} | CT = %.2f\n', ...
             algo, 100*TER(mc,algo),100*FSR(mc,algo), CT(mc,algo));
   end
        
   % CRDA2 (Ell2-RSCM, {K,q} = CV)
   rng(scurr); 
   algo = algo + 1;
   tic;
   [yhat2,~,~,K2,~] = CRDA(Xt,X,y,'method','crda2','prior',prior,'mu',mu);
   CT(mc,algo) = toc;
   TER(mc,algo) = sum(yhat2 ~= yt)/Nt;
   FSR(mc,algo) = K2/p;
        
   if print_info
      fprintf('\tCRDA%d : {TER, FSR} = {%5.2f, %5.2f} | CT = %.2f\n', ...
           algo, 100*TER(mc,algo),100*FSR(mc,algo), CT(mc,algo));
   end
        
   % CRDA3 (PSCM, K = Kub, q = CV)
   rng(scurr); 
   algo = algo + 1;
   tic;
   [yhat3,~,~,K3,~] = CRDA(Xt,X,y,'method','crda3','prior',prior,'mu',mu);
   CT(mc,algo) = toc;
   TER(mc,algo) = sum(yhat3 ~= yt)/Nt;
   FSR(mc,algo) = K3/p;
        
   if print_info
       fprintf('\tCRDA%d : {TER, FSR} = {%5.2f, %5.2f} | CT = %.2f\n', ...
            algo, 100*TER(mc,algo),100*FSR(mc,algo), CT(mc,algo));
   end
        
end
    
%% Calculate the naive TER
n = histcounts(y);
[~,tmp_indx] = max(n);
avgNaiveTER = 100 * ( sum(repmat(tmp_indx,size(yt)) ~= yt) / length(yt) );
    
%% Make a table 
avgFSR = 100*mean(FSR)';
avgTER = 100*mean(TER)';
avgCT = mean(CT)';
    
fprintf('\nResults for %s dataset\n',dsname)
table({'CRDA1','CRDA2','CRDA3'}',round(avgTER,2),round(avgFSR,2), ...
      round(avgCT,2), 'VariableNames',{'method','TER','FSR','CT'})
    
%% Make a bar plot
figure(1); clf;
names = categorical({'CRDA1','CRDA2','CRDA3'});
subplot(1,2,1)
bar(names,avgTER);
title('Training error rate');
grid on;
set(gca,'FontSize',16,'LineWidth',1.3)
    
subplot(1,2,2);
bar(names,avgFSR);
title('Feature selection rate','FontSize',18);
grid on;
set(gca,'FontSize',16,'LineWidth',1.3)

