0001 function [out_st, newVars] = ensemble_vals2vars(data_st, params)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 if ~isfield(params,'src_vars')
0027 error('%s: Variables whose values to transform not specified in params.src_vars', mfilename)
0028 end
0029
0030 xfmVars = params.src_vars;
0031 if ~iscell(xfmVars)
0032 xfmVars = {xfmVars};
0033 end
0034 nvars = length(xfmVars);
0035
0036 srcVars = data_st.vars;
0037 srcCols = set_var_col_const(data_st.vars);
0038 [varMask,srcIdxs] = ismember(xfmVars, srcVars);
0039 if ~all(varMask)
0040 error('%s: Desired variables are not all present', mfilename);
0041 end
0042
0043
0044
0045 if isfield(params,'by_var')
0046 byVar = params.by_var;
0047 else
0048 byVar = srcVars{1};
0049 end
0050 fprintf('Will loop using variable: %s\n', byVar);
0051
0052
0053 if isfield(params,'value_var')
0054 valVar = params.value_var;
0055 else
0056 valVar = 'value';
0057 end
0058 if ~ismember(valVar, srcVars)
0059 error('Variable from which values will be rearranged (%s) could not be found', valVar)
0060 end
0061
0062
0063 out_st = data_st;
0064 out_st.vars = {};
0065 out_st.data = {};
0066
0067
0068 vardata = data_st.data(srcIdxs);
0069
0070
0071 for ivar = 1:nvars
0072
0073
0074 if isnumeric(vardata{ivar})
0075 vardata{ivar} = num2cell(vardata{ivar});
0076 end
0077 if ~ischar(vardata{ivar}{1})
0078 fprintf('Converting numeric values to cell array of strings for variable %s\n', xfmVars{ivar});
0079 vardata{ivar} = cellfun(@num2str,vardata{ivar},'UniformOutput',false);
0080 end
0081 end
0082
0083
0084 unique_vals = cell(nvars,1);
0085 for ivar = 1:nvars
0086 unique_vals{ivar} = unique(vardata{ivar});
0087 end
0088
0089
0090 vardata = cat(2,vardata{:});
0091
0092
0093 fprintf('Finding unique combinations of values of source variables\n');
0094 [newVarMask, uniqueCombos] = make_mask_mtx(vardata);
0095 nNewVars = size(uniqueCombos,1);
0096 newVars = cell(nNewVars,1);
0097 for inew = 1:nNewVars
0098 newVars{inew,1} = cell2str(uniqueCombos(inew,:),'_');
0099 end
0100
0101
0102 newVars = strrep(newVars,'.','p');
0103
0104
0105
0106
0107
0108
0109 if isfield(params,'carryover_vars')
0110 carryoverVars = params.carryover_vars;
0111 else
0112 carryoverVars = setdiff(srcVars, xfmVars);
0113 end
0114 nCarryover = length(carryoverVars);
0115
0116
0117 out_st.vars = [carryoverVars newVars'];
0118 outcols = set_var_col_const(out_st.vars);
0119 out_st.data = cell(1,length(out_st.vars));
0120
0121
0122 uniqueVals = unique(data_st.data{srcCols.(byVar)});
0123 nvals = length(uniqueVals);
0124
0125 fprintf('Looping over %d values for variable %s\n', nvals, byVar);
0126 for ival = 1:nvals
0127 if mod(ival,20) == 0
0128 fprintf('%d', ival);
0129 else
0130 fprintf('.');
0131 end
0132 if iscell(uniqueVals)
0133 currVal = uniqueVals{ival};
0134 else
0135 currVal = uniqueVals(ival);
0136 end
0137 valMask = ismember(data_st.data{srcCols.(byVar)}, currVal);
0138
0139
0140 for icarry = 1:nCarryover
0141 currVar = carryoverVars{icarry};
0142
0143 if strcmp(currVar,valVar)
0144 continue
0145 end
0146
0147
0148 if iscell(data_st.data{srcCols.(currVar)}(1))
0149 if all(cellfun('isempty',data_st.data{srcCols.(currVar)}(valMask)))
0150 continue
0151 end
0152 end
0153
0154 uniqueOldVal = unique(data_st.data{srcCols.(currVar)}(valMask));
0155 if length(uniqueOldVal) > 1
0156
0157
0158 if ~iscell(uniqueOldVal) && all(isnan(uniqueOldVal))
0159 continue
0160 else
0161 error('\nMore than 1 unique old value')
0162 end
0163 end
0164 if iscell(uniqueOldVal)
0165 uniqueOldVal = uniqueOldVal{1};
0166 end
0167 if isnumeric(uniqueOldVal)
0168 out_st.data{outcols.(currVar)}(ival,1) = uniqueOldVal;
0169 else
0170 out_st.data{outcols.(currVar)}{ival,1} = uniqueOldVal;
0171 end
0172 end
0173
0174
0175 nNewVars = length(newVars);
0176 for inew = 1:nNewVars
0177
0178 currMask = newVarMask(:,inew);
0179
0180
0181 compositeMask = valMask & currMask;
0182 if sum(compositeMask) > 1
0183 error('\nToo many values for instance of variable %s and variable %s', currVal, newVars{inew})
0184 end
0185
0186
0187 tmpval = data_st.data{srcCols.(valVar)}(compositeMask);
0188 if isempty(tmpval)
0189
0190
0191
0192 if isnumeric(data_st.data{srcCols.(valVar)}(1))
0193 tmpval = NaN;
0194 else
0195 tmpval = '';
0196 end
0197 end
0198
0199 if iscell(tmpval)
0200 tmpval = tmpval{1};
0201 end
0202 if isnumeric(tmpval)
0203 out_st.data{outcols.(newVars{inew})}(ival,1) = tmpval;
0204 else
0205 out_st.data{outcols.(newVars{inew})}{ival,1} = tmpval;
0206 end
0207
0208 end
0209 end
0210
0211 fprintf('\n')
0212
0213
0214 out_st = ensemble_remove_vars_from_datastruct(out_st, {valVar});
0215
0216 return