function PCGMat=gen_PCGdata
%function to generate group data, i.e. each patient 
%is assigned in each year to one primary condition 
%group (PCG) based on the maximum number of claims

%load data
load heritage_data_new.mat

%column with PrimaryConditionGroup in the claims table
claimscol=11;



Claims_table = heritage_new_matrix.tabledata{1}.coldata;
MembIDs_claims = unique(Claims_table(:,1));
N_IDs_claims = length(MembIDs_claims);

DaysInHospital_Y2_table = heritage_new_matrix.tabledata{2}.coldata;
MembIDs_DaysInHospital_Y2 = unique(DaysInHospital_Y2_table(:,1));
N_IDs_DaysInHospital_Y2 = length(MembIDs_DaysInHospital_Y2);

DaysInHospital_Y3_table = heritage_new_matrix.tabledata{3}.coldata;
MembIDs_DaysInHospital_Y3 = unique(DaysInHospital_Y3_table(:,1));
N_IDs_DaysInHospital_Y3 = length(MembIDs_DaysInHospital_Y3);

DrugCount_table = heritage_new_matrix.tabledata{4}.coldata;
MembIDs_DrugCount = unique(DrugCount_table(:,1));
N_IDs_DrugCount = length(MembIDs_DrugCount);

LabCount_table = heritage_new_matrix.tabledata{5}.coldata;
MembIDs_LabCount = unique(LabCount_table(:,1));
N_IDs_LabCount = length(MembIDs_LabCount);

Members_table = heritage_new_matrix.tabledata{6}.coldata;
MembIDs_Members = unique(Members_table(:,1));
N_IDs_Members = length(MembIDs_Members);

Target_table = heritage_new_matrix.tabledata{7}.coldata;
MembIDs_Target = unique(Target_table(:,1));
N_IDs_Target = length(MembIDs_Target);


%CLAIMS TABLE
%Provider (setting NaN to 9999999)
Claims_table(isnan(Claims_table(:,2)),2) = 9999999;
Auspr_PID = unique(Claims_table(:,2));
%Vendor (setting NaN to 9999999)
Claims_table(isnan(Claims_table(:,3)),3) = 9999999;
Auspr_VID = unique(Claims_table(:,3));
%PCP (setting NaN to 99999)
Claims_table(isnan(Claims_table(:,4)),4)=99999;
Auspr_PCPID = unique(Claims_table(:,4));
%Specialty (setting NaN to 13)
Claims_table(isnan(Claims_table(:,6)),6) = 13;
Auspr_sp = unique(Claims_table(:,6));
%PlaceSvc (setting NaN to 9)
Claims_table(isnan(Claims_table(:,7)),7) = 9;
Auspr_ps = unique(Claims_table(:,7));
%PrimaryConditionGroup (setting NaN to 46)
Claims_table(isnan(Claims_table(:,11)),11) = 46;
Auspr_pcg = unique(Claims_table(:,11));
%ProcedureGroup (setting NaN to 18)
Claims_table(isnan(Claims_table(:,13)),13) = 18;
Auspr_pg = unique(Claims_table(:,13));


%*LengthOfStay (column 9)
% -> setting 9 to 182 (lower class limit, i.e. 26*7)
Claims_table((Claims_table(:,9)==9),9)=182;
% -> setting 8 to 41.5 (class midpoint, i.e. (4*7+(8*7)-1)/2)
Claims_table((Claims_table(:,9)==8),9)=41.5;
% -> setting 7 to 20.5 (class midpoint, i.e. (14+(4*7)-1)/2)
Claims_table((Claims_table(:,9)==7),9)=20.5;
% -> setting 6 to 10 (class midpoint, i.e. (7+13)/2)
Claims_table((Claims_table(:,9)==6),9)=10;
% -> setting 5 to 6
Claims_table((Claims_table(:,9)==5),9)=6;
% -> setting 4 to 5
Claims_table((Claims_table(:,9)==4),9)=5;
% -> setting 3 to 4
Claims_table((Claims_table(:,9)==3),9)=4;
% -> setting 2 to 3
Claims_table((Claims_table(:,9)==2),9)=3;
% -> setting 1 to 2
Claims_table((Claims_table(:,9)==1),9)=2;
% -> setting 0 to 1
Claims_table((Claims_table(:,9)==0),9)=1;
% -> setting NaN to 0
Claims_table(isnan(Claims_table(:,9)),9)=0;

%--------------------------------------------------------------------------


%Member IDs aus allen Tabellen
ALL_Members = unique([MembIDs_claims;MembIDs_DaysInHospital_Y2;MembIDs_DaysInHospital_Y3;MembIDs_DrugCount;MembIDs_LabCount;MembIDs_Members;MembIDs_Target]);
%Anzahl der gesamten Members
N_ALLMemb = length(ALL_Members);


%Vector with GroupIDs
VecGroupsID = unique(Claims_table(:,claimscol));
%number of Groups
N_Groups = length(VecGroupsID);
%MaxID of Groups
MaxIDGroup = max(VecGroupsID);


%-------------------------- Loop over Members -----------------------------
GroupMat = zeros(N_ALLMemb,2,3);
anz = 1000;
disp('-------------------------- Loop over Members -----------------------------')
tic
for i=1:N_ALLMemb
    if i/anz==round(i/anz),disp(['i = ' num2str(i) ' (' num2str((i/N_ALLMemb)*100) '%)']),toc,tic,end
    %i-te ID
    Memb_id = ALL_Members(i);
    %Identifikation von Memb_id in Claims_table
    ind_i = Claims_table(:,1)==Memb_id;
    %Anzahl der Claims
    N_claims = sum(ind_i);
    %Claims der i-ten ID
    Claims_i = Claims_table(ind_i,:);
    
    %---------------------- Groups-matrix ---------------------------------
    
    %LOOP over time
    for t=1:3
        
        % -> IDMax (MemberID)
        GroupMat(i,1,t) = Memb_id;
        
        %Identifikation von Memb_id im Jahr t in Claims_table
        ind_it = (Claims_table(:,1)==Memb_id) & (Claims_table(:,5)==t);
        %Anzahl der Claims fr Member i in Year t
        N_claims_it = sum(ind_it);
        %Tabelle mit den Claims fr Member i in Year t
        Claims_it = Claims_table(ind_it,:);
        
        %-------------------------- GroupID -------------------------------
        %Vektor mit den GroupsID von Member Memb_id (Group: column claimscol)
        Vec_it = Claims_it(:,claimscol);
        
        if isempty(Vec_it)
            %Setze GroupID=MaxIDGroup+1 fr Patienten ohne Claims
            GroupMat(i,2,t) = MaxIDGroup+1;
        else
            %Ausrgungen von Member Memb_id in claims
            uniVec_it = unique(Vec_it);
            %Anzahl der Ausrgungen
            N_Vec_it = length(uniVec_it);
            %loop ber Ausrgungen
            Vec_relN_j=zeros(N_Vec_it,1);
            for j=1:N_Vec_it
                %ID der j-ten Gruppe
                id_j = uniVec_it(j);
                %Hufigkeit der j-ten Gruppe
                N_j = sum(id_j==Vec_it);
                %relative Hufigkeit der j-ten Gruppe
                relN_j = N_j/N_claims_it;
				%stack in vector
                Vec_relN_j(j,1)=relN_j;
            end%for j=1:N_Vec_it
            %Identification of the GroupID with maximal number of Claims
            [~,j]=max(Vec_relN_j);
            % -> save in GoupMat
            GroupMat(i,2,t) = uniVec_it(j);
        end%if isempty(Vec_it)
    end%for t=1:3
end%for i=1:N_ALLMemb
GroupMat1 = GroupMat(:,:,1);
GroupMat2 = GroupMat(:,:,2);
GroupMat3 = GroupMat(:,:,3);

%stack PCG data in one matrix
PCGMat = [GroupMat1, GroupMat2(:,2), GroupMat3(:,2)];

clearvars -except PCGMat

%save group data
save PCGdata.mat