function [yt,Xt,y,X,mu,prior,trIdx,tstIdx] = crda_create_data(Xo,yo,pt,cntrX)
% Create the training data and test data
% CRDA MATLAB toolbox,  (c) E. Ollila and M.N Tabassum

I       = eye(max(yo));
G       = max(yo);
Yf      = logical(I(yo, :));

if nargin< 4
    cntrX = false;
end

% Yf is an n x G  indicator matrix => ind(i,j) = 1, i belong to group j
[p,Nf]  = size(Xo);     % full sample length and dimension
nf      = sum(Yf);      % nf is full sample length in classes
n = round(nf*pt);   % sample lengths in training sets

if any(n == 0)
    error('ERROR, not enough training data!!!');
end

N    = sum(n);
Nind = [0 cumsum(n)];
prior = n / N; 	% empirical prior (probability) of classes

%% Generate class label vectors y and yt

ntst    = nf - n;                % sample lengths in test sets
Nt      = sum(ntst);
Nind2   = [0 cumsum(ntst)];

yt = zeros(Nt,1);
y = zeros(N,1);
for ii=1:G
    yt((Nind2(ii)+1):Nind2(ii+1))=ii; % class labels for test set
    y((Nind(ii)+1):Nind(ii+1))=ii; % class labels of training set
end

%% Generate training data set X and test set Xt

X  = zeros(p,N);
Xt = zeros(p,Nt);
mu = zeros(p,G);
onetoNf = 1:Nf;
trIdx  = cell(1,G);
tstIdx = cell(1,G);

for ii=1:G
    i_tr  = randsample(onetoNf(Yf(:,ii)),n(ii)); % indices (training)
    i_tst = setdiff(onetoNf(Yf(:,ii)),i_tr);     % indices (testing)
    trIdx{ii}  = i_tr;
    tstIdx{ii} = i_tst;
    
    %% Create the training set
    % Note: replace nan's in the training set with the mean values
    Xii = Xo(:,i_tr);
    ind = isnan(Xii);
    mu(:,ii)  = nanmean(Xii,2);              % class means
    tmp = repmat(mu(:,ii),1,numel(i_tr));
    Xii(ind) = tmp(ind);
    X(:,(Nind(ii)+1):Nind(ii+1)) = Xii;
    %% Create test data
    % replace nan's with corresponding mean values in the test set
    Xii = Xo(:,i_tst);
    ind = isnan(Xii);
    muii  = nanmean(Xii,2);              % class means
    tmp = repmat(muii,1,numel(i_tst));
    Xii(ind) = tmp(ind);
    Xt(:,(Nind2(ii)+1):Nind2(ii+1)) = Xii;% test data set
    %%
end

trIdx  = cell2mat(trIdx);
tstIdx = cell2mat(tstIdx);

if cntrX
    muX =  mean(X,2);   % Grand-mean of training set
    X = bsxfun(@minus, X, muX); % center the training set by its grand-mean
    Xt = bsxfun(@minus, Xt, muX); % center the test set by grand-mean of training set
    mu = X*logical(I(y,:)) ./ repmat(n,p,1); % p x G matrix of group centroids
end
