Home > database > ensemble_extract_nd_matrix.m

ensemble_extract_nd_matrix

PURPOSE ^

an_st = ensemble_extract_nd_matrix(data_st,params);

SYNOPSIS ^

function an_st = ensemble_extract_nd_matrix(data_st,params)

DESCRIPTION ^

 an_st = ensemble_extract_nd_matrix(data_st,params);

 Given a list of variables, this function creates an ND matrix, where ND is
 the number of variables with the data in data_st.

 A couple of rules are followed. If the data are entirely numeric with a
 single value per variable combination, a numeric ND matrix is returned.
 Otherwise an ND cell array is returned

 params should contain the following fields:
 .matrix_dims - specifies which variables in data_st will be used to construct
                the dimensions of the matrix.
 .dependent_var - specifies which variable provides the values in the matrix.
                  If this variable is numeric then a numeric matrix is
                  returned, otherwise a cell array.

 Optionally, one can specify:
 .post_process_func - a cell-array of strings that specifies further
 transformations on the matrix before the data are returned.
 .mean_multiple_values - if this is set to 1, and a cell is found to have
 multiple values, the mean of those values will be calculated for that
 cell. The default behavior in this case is to convert the entire data
 matrix into a multi-dimensional cell array.
 
 NOTE: Currently does not support dynamic handling of response type (radio,
 checkbox, written).  Only handles radio button enums.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function an_st = ensemble_extract_nd_matrix(data_st,params)
0002 % an_st = ensemble_extract_nd_matrix(data_st,params);
0003 %
0004 % Given a list of variables, this function creates an ND matrix, where ND is
0005 % the number of variables with the data in data_st.
0006 %
0007 % A couple of rules are followed. If the data are entirely numeric with a
0008 % single value per variable combination, a numeric ND matrix is returned.
0009 % Otherwise an ND cell array is returned
0010 %
0011 % params should contain the following fields:
0012 % .matrix_dims - specifies which variables in data_st will be used to construct
0013 %                the dimensions of the matrix.
0014 % .dependent_var - specifies which variable provides the values in the matrix.
0015 %                  If this variable is numeric then a numeric matrix is
0016 %                  returned, otherwise a cell array.
0017 %
0018 % Optionally, one can specify:
0019 % .post_process_func - a cell-array of strings that specifies further
0020 % transformations on the matrix before the data are returned.
0021 % .mean_multiple_values - if this is set to 1, and a cell is found to have
0022 % multiple values, the mean of those values will be calculated for that
0023 % cell. The default behavior in this case is to convert the entire data
0024 % matrix into a multi-dimensional cell array.
0025 %
0026 % NOTE: Currently does not support dynamic handling of response type (radio,
0027 % checkbox, written).  Only handles radio button enums.
0028 
0029 % 12/10/08 Petr Janata
0030 % 05/06/09 FB - added params.mean_multiple_values
0031 
0032 an_st = ensemble_init_data_struct;
0033 
0034 an_st.vars{1} = 'data_matrix';
0035 an_st.vars{2} = 'dim_names';
0036 an_st.vars{3} = 'dim_values';
0037 ancols = set_var_col_const(an_st.vars);
0038 
0039 %
0040 % Make sure parameters have been specified
0041 %
0042 if ~all(isfield(params,{'matrix_dims','dependent_var'})) || isempty(params.matrix_dims) ...
0043       || isempty(params.dependent_var)
0044   fprintf('%s: No matrix dimension or dependent variables specified\n', mfilename);
0045   return
0046 end
0047 
0048 if iscell(params.dependent_var)
0049   params.dependent_var = params.dependent_var{1};
0050 end
0051 all_vars = [params.matrix_dims {params.dependent_var}];
0052 
0053 %
0054 % See how many variables we want in our output matrix and make sure they all exist
0055 %
0056 have_var_mask = ismember(all_vars, data_st.vars);
0057 if ~all(have_var_mask)
0058   fprintf('%s: Could not find %d variables in the datastruct: %s\n', ...
0059       mfilename, sum(~have_var_mask), ...
0060       cell2str(all_vars(~have_var_mask),','))
0061   return
0062 end
0063 
0064 num_dims = length(params.matrix_dims);
0065 datacols = set_var_col_const(data_st.vars);
0066 
0067 %
0068 % Perform any initial filtering
0069 %
0070 if isfield(params,'filt')
0071   data_st = ensemble_filter(data_st,params.filt);
0072 end
0073 
0074 %
0075 % Based on the dependent variable, determine if the output matrix is likely to be a
0076 % numeric or cell array.
0077 %
0078 make_numeric = 1;
0079 
0080 % Are we still numeric
0081 if ~isnumeric(data_st.data{datacols.(params.dependent_var)})
0082   make_numeric = 0;
0083 end  
0084 
0085 %
0086 % Figure out how many unique values there are along each dimension
0087 %
0088 dimension_vals = cell(1,num_dims);
0089 for idim = 1:num_dims
0090   % Make a copy of the current data
0091   curr_data = data_st.data{datacols.(params.matrix_dims{idim})};
0092   
0093   % Number of unique values along dimension
0094   dimension_vals{idim} = unique(curr_data);
0095   
0096   % Check for and remove NaNs
0097   if isnumeric(curr_data) && any(isnan(dimension_vals{idim}))
0098     fprintf('%s: Found NaNs in dimension: \n', mfilename, params.matrix_dims{idim});
0099     dimension_vals{idim}(isnan(dimension_vals{idim})) = [];
0100   end
0101 end
0102 
0103 %
0104 % Initialize the output matrix
0105 %
0106 if make_numeric
0107   data_matrix = zeros(cellfun('length', dimension_vals))+NaN;
0108 else
0109   data_matrix = cell(cellfun('length', dimension_vals));
0110 end
0111 
0112 [dim_idxs{1:num_dims}] = deal(0);
0113 curr_dim = 1;
0114 dim_names = params.matrix_dims;
0115 
0116 % Figure out what input data we're going to pass in
0117 indata = data_st.data{datacols.(params.dependent_var)};
0118 curr_mask = ones(size(indata));
0119 
0120 
0121 %
0122 % Now extract the data. This is a bit tricky because the function that does
0123 % this has to be recursive to handle an arbitrary number of dimensions.
0124 %
0125 data_matrix = burrow(curr_mask, dimension_vals, curr_dim, dim_names, dim_idxs, ...
0126     indata, data_matrix, data_st, params);
0127 
0128 %
0129 % See if we want to do any post-processing
0130 %
0131 try func_list = params.post_process_func; catch func_list = {}; end
0132 if ~isempty(func_list)
0133   mod_data = data_matrix;
0134   for ifunc = 1:length(func_list)
0135     fh = str2func(func_list{ifunc});
0136     try mod_data = fh(mod_data);
0137     catch
0138       fprintf(['%s: Post processing step (%s) failed. Returning data to ' ...
0139         'original state\n'], mfilename, func_list{ifunc});
0140       mod_data = data_matrix;
0141       break
0142     end
0143   end
0144   data_matrix = mod_data;
0145 end
0146 
0147 % Finalize the output structure
0148 an_st.data{ancols.data_matrix} = data_matrix;
0149 an_st.data{ancols.dim_names} = dim_names;
0150 an_st.data{ancols.dim_values} = dimension_vals;
0151 
0152 end % function ensemble_extract_nd_matrix
0153 
0154 function outdata = burrow(curr_mask, dimension_vals, curr_dim, dim_names, ...
0155       dim_idxs, indata, outdata, data_st, params)
0156 
0157   datacols = set_var_col_const(data_st.vars);
0158   curr_data = data_st.data{datacols.(dim_names{curr_dim})};
0159   try mmv = params.mean_multiple_values; catch mmv = 0; end
0160 
0161   % Get the number of values we have to traverse for the current dimension
0162   ndim_idxs = length(dimension_vals{curr_dim});
0163 
0164   for iidx = 1:ndim_idxs
0165     dim_idxs{curr_dim} = iidx;  % to keep track of where we are overall
0166     
0167     % Update the mask with the mask for the current dimension's current index
0168     curr_mask(:,curr_dim) = ismember(curr_data,dimension_vals{curr_dim}(iidx));
0169 
0170     % Check to see if there is a next dim or whether we have descended all the way
0171     % down
0172     if curr_dim+1 <= length(dim_idxs)
0173       outdata = burrow(curr_mask, dimension_vals, curr_dim+1, dim_names, ...
0174       dim_idxs, indata, outdata, data_st, params);
0175     else
0176       % grab the data
0177       
0178       composite_mask = all(curr_mask,2);
0179       curr_outdata_idx = sub2ind(size(outdata),dim_idxs{:});
0180       % If we are building a numeric array, make sure we have only a single
0181       % value, otherwise convert to a cell array
0182       num_values = sum(composite_mask);
0183       if isnumeric(outdata) && num_values > 1
0184         if ~mmv
0185       fprintf('%s: Have to convert numeric array to cell array because more than one value was found\n', mfilename)
0186     outdata = num2cell(outdata);
0187         end
0188       end
0189             % Copy the data
0190       if num_values > 0
0191     if ~isnumeric(outdata)
0192       outdata{curr_outdata_idx} = indata(composite_mask);
0193     else
0194       if length(indata(composite_mask)) > 1 && mmv
0195     outdata(curr_outdata_idx) = nanmean(indata(composite_mask));
0196       else
0197     outdata(curr_outdata_idx) = indata(composite_mask);
0198       end
0199     end
0200       end
0201     end
0202   end % for iidx = 1:num_dims
0203 
0204 end % burrow

Generated on Wed 20-Sep-2023 04:00:50 by m2html © 2003