%% data92rfp.m
%Loads the data in data92rfp.csv, creates labeled variables from the columns, and saves the resulting data set.

%% Preliminaries
% In this section, we open the file, read the variable names, and determine
% which of them are strings. We also count the number of distinct
% observations (rows).

f1=fopen('data92rfp.csv','r');

%Read variable names from the first row. Begin with counting commas to get
%the number of variables (minus one). Then extract the variable names.
tline=fgetl(f1);
commas=regexp(tline,',');
nvars=length(commas)+1;
varnames=cell(39,1);
varnames{1}=tline(1:commas(1)-1);
for i=2:nvars-1 
        varnames{i}=tline(commas(i-1)+1:commas(i)-1);
end
varnames{nvars}=tline(commas(nvars-1)+1:end);
    
%Record the current file position for later use.
datastart=ftell(f1);
    
%Read a sample line and use it to determine which of the variables are
%strings.
tline=fgetl(f1);
commas=regexp(tline,','); %Note, not all commas are delimiters. Take care to find commas in strings.
stringvar=zeros(nvars,1);
stringvar(1)=strcmp(tline(1),'"');
if stringvar(1)
    ncommas=0; %Number of commas in this string.
    while ~strcmp(tline(commas(1+ncommas)-1),'"')
        ncommas=ncommas+1;
    end
    commas=commas(ncommas+1:end); %Remove indices for non-delimiting commas.
end

for i=2:nvars
    stringvar(i)=strcmp(tline(commas(i-1)+1),'"');
    if stringvar(i) && i<nvars
        ncommas=0;
        while ~strcmp(tline(commas(i+ncommas)-1),'"')
            ncommas=ncommas+1;
        end
        commas=[commas(1:i-1) commas(i+ncommas:end)];
    end
end

%Keep reading the lines to count observations.
nrows=0;
while ischar(tline)
    nrows=nrows+1;
    tline=fgetl(f1);
end

%Create storage space for data.
for i=1:nvars
    if stringvar(i)
        comstr=[varnames{i} ' = cell(' num2str(nrows) ',1);'];
        eval(comstr);
    else
        comstr=[varnames{i} ' = zeros(' num2str(nrows) ',1);'];
        eval(comstr);
    end
end

%% Read the data.


%Rewind the file back to the start of the data
fseek(f1,datastart,'bof');

for i=1:nrows
    tline=fgetl(f1);
    
    %First, collect the string variables.
    numstrings=sum(stringvar);
    stringpos=find(stringvar);
    quotes=regexp(tline,'"');
    for j=1:numstrings
        evalstr=[varnames{stringpos(j)} '{i} = tline(quotes(j*2-1)+1:quotes(j*2)-1);'];
        eval(evalstr);
    end
    
    %Remove string variable information from tline.
    isstringinfo=zeros(1,length(tline));
    position=1:1:length(tline);
    for j=1:numstrings
        if j==1
            isstringinfo=isstringinfo+(position>=quotes(j*2-1)).*(position<=quotes(j*2)+1); %Identify the trailing comma as string information.
        else
            isstringinfo=isstringinfo+(position>=quotes(j*2-1)-1).*(position<=quotes(j*2)); %Identify the leading comma as string information.
        end
    end
    tline2=[tline(isstringinfo==0) ',']; %Add a trailing comma so that a comma follows each varable.
    
    %Assign each numerical variable its value from the current record.
    commas=regexp(tline2,','); %All remaining commas should be delimiters.
    numnum=nvars-numstrings;
    numpos=find(1-stringvar);
    linepos=1;
    for j=1:numnum
        if commas(j)>linepos %There is space before the next comma that might contain data.
            x=sscanf(tline2(linepos:commas(j)),'%f');
            if isempty(x)
                x=NaN;
            end
            evalstr=[varnames{numpos(j)} '(i) = x;'];
            eval(evalstr);
            linepos=commas(j)+1;
        else %There is no space before the next comma
            evalstr=[varnames{numpos(j)} '(i) = NaN'];
            eval(evalstr)
            linepos=commas(j)+1;
        end
    end
    

    
end

%% Save the variables constructed into a .mat file
%Start by creating a horizontal list of the variable names.
charvarnames=char(varnames);
charvarnames=[charvarnames ones(size(charvarnames,1),1)*' ']; %This pads each name with a space so that the names do not run together.
charvarnames=charvarnames'; %Transpose and vec to create a vertical character array with the variable names.
charvarnames=charvarnames(:);
charvarnames=charvarnames'; %Transpose again to get the desired horizontal array.
evalstr=['save data92rfp ' charvarnames];

eval(evalstr);
clear
quit
