0001 function out_st = ensemble_reshape_data(data_st,params)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048 warning('off','stats:categorical:subsasgn:NewLevelsAdded')
0049
0050
0051 if nargin < 2
0052 error('%s: data_st and params inputs required', mfilename)
0053 end
0054
0055 if ~isfield(params,mfilename)
0056 error('params.%s structure is required',mfilename)
0057 end
0058
0059 requiredVars = {'xfmVar','valueVars','keyVars'};
0060 missingMask = ~ismember(requiredVars,fieldnames(params.(mfilename)));
0061 if any(missingMask)
0062 error('Required fields missing from params.%s: %s', mfilename, cell2str(requiredVars(missingMask),','))
0063 end
0064
0065 if isfield(params,'verbose')
0066 verbose = params.verbose;
0067 else
0068 verbose = 1;
0069 end
0070
0071
0072 xfmVar = params.(mfilename).xfmVar;
0073
0074 keyVars = [params.(mfilename).keyVars xfmVar];
0075 nkeys = length(keyVars);
0076
0077 valueVars = params.(mfilename).valueVars;
0078
0079 if isfield(params.(mfilename),'copyVars')
0080 copyVars = params.(mfilename).copyVars;
0081 else
0082 copyVars = {};
0083 end
0084 ncopy = length(copyVars);
0085
0086
0087 cols = set_var_col_const(data_st.vars);
0088
0089
0090 if isfield(params,'filt')
0091 data_st = ensemble_filter(data_st, params.filt);
0092 end
0093
0094
0095 nrows = size(data_st.data{cols.(keyVars{1})},1);
0096
0097
0098 for ikey = 1:nkeys
0099 keymtx(:,ikey) = nominal(data_st.data{cols.(keyVars{ikey})});
0100 end
0101
0102
0103 nonUniqueMask = check_unique_rows(keymtx, verbose);
0104
0105 if any(nonUniqueMask)
0106 error('Key variables do not return unique values')
0107 end
0108
0109
0110 outkeymtx = unique(keymtx(:,1:end-1),'rows');
0111 noutRows = size(outkeymtx,1);
0112
0113
0114 valmtx = data_st.data(ismember(data_st.vars,valueVars));
0115 nvals = length(valmtx);
0116
0117
0118 valtype = cell(1,nvals);
0119 for ival = 1:nvals
0120 valtype{ival} = class(valmtx{ival});
0121 end
0122
0123
0124 out_st = ensemble_init_data_struct;
0125
0126
0127 if ncopy
0128 out_st.vars = copyVars;
0129 ocols = set_var_col_const(out_st.vars);
0130 for icopy = 1:ncopy
0131 currVar = copyVars{icopy};
0132 currType = class(data_st.data{cols.(currVar)});
0133 switch currType
0134 case {'numeric','double'}
0135 out_st.data{ocols.(currVar)} = nan(noutRows,1);
0136 case 'logical'
0137 out_st.data{ocols.(currVar)} = false(noutRows,1);
0138 case 'cell'
0139 out_st.data{ocols.(currVar)} = cell(noutRows,1);
0140 otherwise
0141 error('No initialization for type: %s', currType)
0142 end
0143 end
0144 end
0145
0146
0147 [levelMaskMtx, newVars] = make_mask_mtx(data_st.data{cols.(xfmVar)});
0148 numNew = length(newVars);
0149 newSrc = cell(1,numNew);
0150
0151 for inew = 1:numNew
0152 currLevel = newVars{inew};
0153
0154
0155 levelMask = levelMaskMtx(:,strcmp(newVars,currLevel));
0156
0157
0158 if isfield(params.(mfilename).var_name_map,currLevel)
0159 varName = params.(mfilename).var_name_map.(currLevel);
0160 else
0161 varName = currLevel;
0162 end
0163
0164
0165 out_st.vars{end+1} = varName;
0166 ocols = set_var_col_const(out_st.vars);
0167
0168
0169 haveData = false(1,nvals);
0170 for ival = 1:nvals
0171 currData = data_st.data{cols.(valueVars{ival})}(levelMask);
0172 switch valtype{ival}
0173 case {'numeric','double','logical'}
0174 haveData(ival) = any(currData);
0175 case 'cell'
0176 haveData(ival) = any(~cellfun('isempty', currData));
0177 end
0178 end
0179
0180 if ~any(haveData)
0181 error('No data available in any of the value variables')
0182 end
0183
0184 if sum(haveData) > 1
0185 error('More than one value variable has data for level (%s): %s', ...
0186 currLevel, cell2str(valueVars(haveData),','))
0187 end
0188
0189 valVar = valueVars{haveData};
0190 currType = valtype{haveData};
0191 newSrc{inew} = valVar;
0192
0193
0194 switch currType
0195 case {'numeric','double'}
0196 out_st.data{ocols.(varName)} = nan(noutRows,1);
0197 case 'logical'
0198 out_st.data{ocols.(varName)} = false(noutRows,1);
0199 case 'cell'
0200 out_st.data{ocols.(varName)} = cell(noutRows,1);
0201 otherwise
0202 error('No initialization for type: %s', currType)
0203 end
0204 end
0205
0206
0207
0208
0209
0210 for irow = 1:noutRows
0211 currKey = outkeymtx(irow,:);
0212 outkeyMask = ismember(keymtx(:,1:end-1),currKey,'rows');
0213
0214 for inew = 1:numNew
0215 currLevel = newVars{inew};
0216
0217
0218 if isfield(params.(mfilename).var_name_map,currLevel)
0219 varName = params.(mfilename).var_name_map.(currLevel);
0220 else
0221 varName = currLevel;
0222 end
0223
0224
0225 levelMask = levelMaskMtx(:,strcmp(newVars,currLevel));
0226
0227
0228 compMask = outkeyMask & levelMask;
0229
0230
0231 if ~any(compMask)
0232 continue
0233 end
0234
0235
0236 out_st.data{ocols.(varName)}(irow) = data_st.data{cols.(newSrc{inew})}(compMask);
0237
0238 end
0239
0240
0241 for icopy = 1:ncopy
0242 tmp = data_st.data{cols.(copyVars{icopy})}(outkeyMask);
0243 if ~iscell(tmp) && any(isnan(tmp))
0244 continue
0245 end
0246
0247 outval = unique(tmp);
0248 if numel(outval) > 1
0249 error('Too many (%d) values found for variable we want to copy (%s)', numel(outval), copyVars{icopy})
0250 else
0251 out_st.data{ocols.(copyVars{icopy})}(irow) = outval;
0252 end
0253 end
0254
0255
0256 end
0257
0258
0259 end