Home > database > ensemble_vals2vars.m

ensemble_vals2vars

PURPOSE ^

Rearranges an ensemble data struct such that unique combinations of

SYNOPSIS ^

function [out_st, newVars] = ensemble_vals2vars(data_st, params)

DESCRIPTION ^

 Rearranges an ensemble data struct such that unique combinations of
 values in one or more variables specified in params.src_vars become their
 own variables in the output structure.

 Additional parameters that govern how this function work are:
 params.by_var - Unique values of this variable are looped over when
                 creating the rearranged matrix. This variable should have
                 exactly one instance of each unique combination of the
                 values of the source variables, i.e. new variable. If
                 this field is not specified, the first variable in the
                 data structure variables is used. 

 params.value_var - name of the variable that contains the actual values
                    that will be placed in the new variables. If this is
                    not specified, 'value' is used. 
 params.carryover_vars - list of variable names from original data
                  structure to carry over to the new structure.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [out_st, newVars] = ensemble_vals2vars(data_st, params)
0002 % Rearranges an ensemble data struct such that unique combinations of
0003 % values in one or more variables specified in params.src_vars become their
0004 % own variables in the output structure.
0005 %
0006 % Additional parameters that govern how this function work are:
0007 % params.by_var - Unique values of this variable are looped over when
0008 %                 creating the rearranged matrix. This variable should have
0009 %                 exactly one instance of each unique combination of the
0010 %                 values of the source variables, i.e. new variable. If
0011 %                 this field is not specified, the first variable in the
0012 %                 data structure variables is used.
0013 %
0014 % params.value_var - name of the variable that contains the actual values
0015 %                    that will be placed in the new variables. If this is
0016 %                    not specified, 'value' is used.
0017 % params.carryover_vars - list of variable names from original data
0018 %                  structure to carry over to the new structure.
0019 
0020 % 09Nov2011 Petr Janata
0021 % 22Aug2014 PJ - fixed handling of non-cell by_var; added carryover_vars
0022 %                option
0023 % 15Aug2015 PJ - fixed handling of missing numeric input when copying data
0024 %                to output
0025 
0026 if ~isfield(params,'src_vars')
0027     error('%s: Variables whose values to transform not specified in params.src_vars', mfilename)
0028 end
0029 
0030 xfmVars = params.src_vars;
0031 if ~iscell(xfmVars)
0032   xfmVars = {xfmVars};
0033 end
0034 nvars = length(xfmVars);
0035 
0036 srcVars = data_st.vars;
0037 srcCols = set_var_col_const(data_st.vars);
0038 [varMask,srcIdxs] = ismember(xfmVars, srcVars);
0039 if ~all(varMask)
0040     error('%s: Desired variables are not all present', mfilename);
0041 end
0042 
0043 % Figure out which variable we are going to use for looping when we
0044 % rearrange the data
0045 if isfield(params,'by_var')
0046     byVar = params.by_var;
0047 else
0048     byVar = srcVars{1};
0049 end
0050 fprintf('Will loop using variable: %s\n', byVar);
0051 
0052 % Figure out which variable we are pulling data from
0053 if isfield(params,'value_var')
0054     valVar = params.value_var;
0055 else
0056     valVar = 'value';
0057 end
0058 if ~ismember(valVar, srcVars)
0059     error('Variable from which values will be rearranged (%s) could not be found', valVar)
0060 end
0061 
0062 % Initialize the output data structure
0063 out_st = data_st;
0064 out_st.vars = {};
0065 out_st.data = {};
0066 
0067 % Extract the desired variables
0068 vardata = data_st.data(srcIdxs);
0069 
0070 % Convert any numeric data to strings
0071 for ivar = 1:nvars
0072     % If it is already in numeric format, but not in a cell, place in cell so
0073     % that it can be converted properly to a string without whitespace
0074     if isnumeric(vardata{ivar}) 
0075         vardata{ivar} = num2cell(vardata{ivar});
0076     end
0077     if ~ischar(vardata{ivar}{1})
0078         fprintf('Converting numeric values to cell array of strings for variable %s\n', xfmVars{ivar});
0079         vardata{ivar} = cellfun(@num2str,vardata{ivar},'UniformOutput',false);
0080     end
0081 end
0082 
0083 % Get a list of unique values along each dimension
0084 unique_vals = cell(nvars,1);
0085 for ivar = 1:nvars
0086     unique_vals{ivar} = unique(vardata{ivar});
0087 end
0088 
0089 % Reformat vardata
0090 vardata = cat(2,vardata{:});
0091 
0092 % Get the unique combinations that are going to form the new variables
0093 fprintf('Finding unique combinations of values of source variables\n'); 
0094 [newVarMask, uniqueCombos] = make_mask_mtx(vardata);
0095 nNewVars = size(uniqueCombos,1);
0096 newVars = cell(nNewVars,1);
0097 for inew = 1:nNewVars
0098     newVars{inew,1} = cell2str(uniqueCombos(inew,:),'_');
0099 end
0100 
0101 % Sanitize new variable names
0102 newVars = strrep(newVars,'.','p');
0103 
0104 %
0105 % Set the new variable list for the output structure
0106 %
0107 
0108 % Figure out which variables we are carrying over
0109 if isfield(params,'carryover_vars')
0110   carryoverVars = params.carryover_vars;
0111 else
0112   carryoverVars = setdiff(srcVars, xfmVars);
0113 end
0114 nCarryover = length(carryoverVars);
0115 
0116 % Set the variable list
0117 out_st.vars = [carryoverVars newVars'];
0118 outcols = set_var_col_const(out_st.vars);
0119 out_st.data = cell(1,length(out_st.vars));
0120 
0121 % Loop using unique values of the looping variable
0122 uniqueVals = unique(data_st.data{srcCols.(byVar)});
0123 nvals = length(uniqueVals);
0124 
0125 fprintf('Looping over %d values for variable %s\n', nvals, byVar);
0126 for ival = 1:nvals
0127     if mod(ival,20) == 0
0128         fprintf('%d', ival);
0129     else
0130         fprintf('.');
0131   end
0132   if iscell(uniqueVals)
0133     currVal = uniqueVals{ival};
0134   else
0135     currVal = uniqueVals(ival);
0136   end
0137     valMask = ismember(data_st.data{srcCols.(byVar)}, currVal);
0138 
0139     % Copy carryover variables
0140     for icarry = 1:nCarryover
0141         currVar = carryoverVars{icarry};
0142         
0143         if strcmp(currVar,valVar)
0144             continue
0145         end        
0146         
0147         % Make sure we have non-empty values
0148         if iscell(data_st.data{srcCols.(currVar)}(1))
0149             if all(cellfun('isempty',data_st.data{srcCols.(currVar)}(valMask)))
0150                 continue
0151             end
0152         end
0153             
0154         uniqueOldVal = unique(data_st.data{srcCols.(currVar)}(valMask));
0155         if length(uniqueOldVal) > 1
0156             % Check to see if we are dealing with only NANs. If so, simply skip
0157             % over this variable
0158             if ~iscell(uniqueOldVal) && all(isnan(uniqueOldVal))
0159                 continue
0160             else
0161                 error('\nMore than 1 unique old value')
0162             end
0163         end
0164         if iscell(uniqueOldVal)
0165             uniqueOldVal = uniqueOldVal{1};
0166     end
0167     if isnumeric(uniqueOldVal)
0168       out_st.data{outcols.(currVar)}(ival,1) = uniqueOldVal;
0169     else
0170       out_st.data{outcols.(currVar)}{ival,1} = uniqueOldVal;
0171     end
0172   end % end of carryover variables
0173     
0174     % Now, loop over all the new variables and pull the relevant data
0175     nNewVars = length(newVars);
0176     for inew = 1:nNewVars
0177         % Find matching rows
0178         currMask = newVarMask(:,inew);
0179     
0180         % Get the intersection of the current variable mask and stimulus mask
0181         compositeMask = valMask & currMask;
0182         if sum(compositeMask) > 1
0183             error('\nToo many values for instance of variable %s and variable %s', currVal, newVars{inew})
0184         end
0185         
0186         % Assign the value
0187         tmpval = data_st.data{srcCols.(valVar)}(compositeMask);
0188     if isempty(tmpval)
0189       % 15Aug2015 PJ - changing what I think is erroneous assessment of
0190       % output structure. Should be assessing type of source data
0191       % if isnumeric(out_st.data{outcols.(newVars{inew})}(1))
0192       if isnumeric(data_st.data{srcCols.(valVar)}(1))
0193         tmpval = NaN;
0194       else
0195         tmpval = '';
0196       end
0197     end
0198     
0199         if iscell(tmpval)
0200             tmpval = tmpval{1};
0201     end
0202     if isnumeric(tmpval)
0203       out_st.data{outcols.(newVars{inew})}(ival,1) = tmpval;
0204     else
0205       out_st.data{outcols.(newVars{inew})}{ival,1} = tmpval;
0206     end
0207         
0208     end
0209 end % for ival
0210 
0211 fprintf('\n')
0212 
0213 % Remove the value variable
0214 out_st = ensemble_remove_vars_from_datastruct(out_st, {valVar});
0215 
0216 return

Generated on Wed 20-Sep-2023 04:00:50 by m2html © 2003