Description of ensemble_enum

0001 function an_st = ensemble_enum_stats(data_st,params)
0002 % Calculates statistics on responses to enum questions.
0003 %
0004 % outdata = ensemble_enum_stats(data_st,params);
0005 %
0006 % Calculates various descriptive and quantitative statistics on responses to
0007 % questions that are enums.
0008 %
0009 % Note: Currently, the script will not treat the same question appearing on
0010 % different forms as a different instance of the question. If you don't want
0011 % answers to the same question on different forms combined, you must filter the
0012 % data to only process forms with unique question IDs.  This behavior may
0013 % change in future versions.
0014 
0015 %
0016 % 02/02/07 Petr Janata - adapted from ensemble_enum_hist
0017 % 10/07/08 PJ - generalized to handle other databases
0018 
0019 an_st = ensemble_init_data_struct;
0020 an_st.type = 'enum_stats_by_compqid'; 
0021 
0022 % Make sure that a stats structure has been specified as part of the params structure
0023 if ~isfield(params,'stats')
0024   fprintf('ensemble_enum_stats: No stats requests specified\n');
0025   return
0026 end
0027 
0028 % Make sure we have a database connection
0029 try 
0030   conn_id = params.mysql.conn_id;
0031 catch
0032   try 
0033     conn_id = params.ensemble.conn_id;
0034   catch
0035     conn_id = [];
0036   end
0037 end
0038 if isempty(conn_id)
0039   error('%s: Need to have a valid database connection ID', mfilename)
0040 end
0041 
0042 % Make sure we have a compqid variable
0043 data_st = ensemble_check_compqid(data_st);
0044 if isempty(data_st)
0045   return
0046 end
0047 
0048 % Set the column constants
0049 incol = set_var_col_const(data_st.vars);
0050 
0051 % Apply any specified filtering to the input data
0052 if isfield(params,'filt')
0053   fprintf('Applying filtering criteria\n')
0054   data_st = ensemble_filter(data_st, params.filt);
0055 end
0056 
0057 %
0058 % Gather metadata on the questions
0059 %
0060 
0061 % Get a list of unique composite question IDs
0062 qids = fix(unique(data_st.data{incol.compqid}));
0063 
0064 qinfo = mysql_extract_metadata('conn_id', conn_id, ...
0065   'table','question',...
0066   'question_id',qids);
0067 
0068 % Figure out which of the questions in the qinfo structure are enums and remove
0069 % those that are not
0070 qinfo_enum_mask = ismember({qinfo.type},'enum');
0071 if sum(~qinfo_enum_mask)
0072   fprintf('ensemble_enum_stats: Removing %d non-enum qids\n', sum(~qinfo_enum_mask));
0073   qinfo(~qinfo_enum_mask) = [];
0074 end
0075 
0076 % Figure out which of the questions are bitmasks that allow for selection of
0077 % multiple values (checkbox as opposed to radiogroup) and remove these from the
0078 % qinfo array
0079 qinfo_bitmask_mask = ismember({qinfo.html_field_type},'checkbox');
0080 if sum(qinfo_bitmask_mask)
0081   fprintf('ensemble_enum_stats: Removing %d bitmask qids from list of qids\n', sum(qinfo_bitmask_mask));
0082   qinfo(qinfo_bitmask_mask) = [];
0083 end
0084 
0085 % Create masks for all of the response data
0086 enum_compqids = [qinfo.compqid];
0087   
0088 % Filter the data again
0089 filt.include.any.compqid = enum_compqids;
0090 data_st = ensemble_filter(data_st,filt);
0091 
0092 % Copy all of the enum data that are not a bitmask to the data_vect and convert
0093 % to category indices
0094 data_vect = data_st.data{incol.response_enum};
0095 data_vect = enum2data(data_vect);
0096 
0097 % Precalculate the subject masks
0098 [sub_mask_mtx, subids] = make_mask_mtx(data_st.data{incol.subject_id});
0099 nsub = length(subids);
0100 
0101 %
0102 % Set stuff up for writing to a file, if that's what we're going to do.
0103 %
0104 %fid = ensemble_init_fid(params.display.tables);
0105 
0106 %
0107 % Loop over all of the unique question/subquestion combinations or compqids
0108 %
0109 nqid = length(qinfo);
0110 for iqid = 1:nqid
0111   an_st.vars{iqid} = sprintf('compqid %s',num2str(qinfo(iqid).compqid));
0112   
0113   % Copy the question info over to the display parameter structure in case we
0114   % are going to display some of the data
0115   params.display.qinfo = qinfo(iqid);
0116 
0117   % Get the enum categories
0118   enum_values = qinfo(iqid).enum_values;
0119   ncat = length(enum_values);
0120   
0121   % Make a mask for the data corresponding to this question
0122   qid_mask = ismember(data_st.data{incol.compqid},qinfo(iqid).compqid);
0123     
0124   an_st_l1 = ensemble_init_data_struct;
0125   an_st_l1.type = 'enum_basic_stats';
0126   an_st_l1.vars = {'by_subject','across_subjects'};
0127   an_st_l1.meta.question = qinfo(iqid);
0128 
0129   an_st_l1_cols = set_var_col_const(an_st_l1.vars);
0130   nlevel1 = length(an_st_l1.vars);
0131   
0132   tmp_st = {};
0133   tmp_idx = [];
0134   for il1 = 1:nlevel1
0135     id_str = an_st_l1.vars{il1};
0136     tmp_idx.(id_str) = il1;
0137     
0138     tmp_st{il1} = ensemble_init_data_struct;
0139     tmp_st{il1}.type = sprintf('enum_stats_%s', id_str);
0140 
0141     % The variables in this analysis are all of the fields within the stats
0142     % field of the params structure
0143     stats_list = fieldnames(params.stats.(id_str));
0144     nstats = length(stats_list);
0145     
0146     switch id_str
0147       case 'by_subject'
0148     aux_vars = {'subject_id','nresp'};
0149       case 'across_subjects'
0150     aux_vars = {'nsub'};
0151     end
0152     tmp_vars = [aux_vars stats_list'];
0153     tmp_st_cols = set_var_col_const(tmp_vars);
0154     tmp_st{il1}.vars = tmp_vars;
0155   
0156     % Initialize output variables
0157     for ia = 1:length(tmp_vars)
0158       switch tmp_vars{ia}
0159     case {'subject_id'}
0160       tmp_st{il1}.data{ia} = subids;
0161     otherwise
0162       switch id_str
0163         case 'by_subject'
0164           tmp_st{il1}.data{ia} = zeros(nsub,1);
0165         otherwise
0166           tmp_st{il1}.data{ia} = [];
0167       end
0168       end
0169     end % for ia=1:length(tmp_vars)
0170 
0171     %
0172     % Execute some type-specific code. One can imagine additional case
0173     % statements for 'by_trial' or 'by_attribute'
0174     %
0175     
0176     switch id_str
0177       %
0178       % Deal with the set of by_subject analyses
0179       %
0180       case 'by_subject'
0181     for isub = 1:nsub
0182       sub_mask = sub_mask_mtx(:,isub);
0183 
0184       % Tally the number of responses the subject made to this question
0185       nresp = sum(sub_mask&qid_mask);
0186       tmp_st{il1}.data{tmp_st_cols.nresp}(isub) = nresp;
0187       
0188       if ~nresp
0189         no_resps = 1;
0190       else
0191         no_resps = 0;
0192       end
0193       
0194       % Now loop over all of the analyses we want to perform
0195       for istat = 1:nstats
0196         stat_str = stats_list{istat};
0197         
0198         % If we need to enter a Nan, do that here
0199         if no_resps
0200           tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = NaN;
0201           continue
0202         end
0203         
0204         switch stat_str
0205           case {'mean','std','min','max'}
0206         fh = str2func(stat_str);
0207         tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = fh(data_vect(sub_mask&qid_mask));
0208         end % switch stat_str
0209       end % for istat=
0210     end % for isub
0211     
0212       case 'across_subjects'
0213     src_st = tmp_st{tmp_idx.by_subject};
0214     src_cols = set_var_col_const(src_st.vars);
0215 
0216     % 02/02/07 PJ Currently hard-coded to use subject-level means as input into this
0217     % level of the analysis. Ultimately, this should really become another
0218     % level of abstraction which supports different types of source data.
0219     src_data = src_st.data{src_cols.mean};
0220           
0221     % Remove any data with NaNs
0222     src_data(any(isnan(src_data),2),:) = [];
0223     
0224     tmp_st{il1}.data{tmp_st_cols.nsub} = size(src_data,1);
0225     for istat = 1:nstats
0226       stat_str = stats_list{istat};
0227       switch stat_str
0228         case {'mean','std','min','max'}
0229           
0230           % Evaluate the basic function
0231           fh = str2func(stat_str);
0232           tmp_st{il1}.data{tmp_st_cols.(stat_str)} = fh(src_data);
0233           
0234           % See if there is additional processing to be done
0235           if isstruct(params.stats.(id_str).(stat_str))
0236         proc_list = fieldnames(params.stats.(id_str).(stat_str));
0237         for iproc = 1:length(proc_list)
0238           switch proc_list{iproc}
0239             case 'ttest'
0240               try mu = params.stats.(id_str).(stat_str).ttest.mu; ...
0241               catch mu = 'midpoint'; end
0242               if isstr(mu) && strcmp(mu,'midpoint')
0243             mu = (ncat+1)/2;
0244               end
0245               
0246               tmp_st2 = ensemble_init_data_struct;
0247               tmp_st2.type = proc_list{iproc};
0248               tmp_st2.vars = {'H','p','ci','stats'};
0249               [tmp_st2.data{1:nargout(proc_list{iproc})}] = ttest(src_data, mu);
0250             otherwise
0251               continue
0252           end
0253           tmp_st{il1}.vars{end+1} = sprintf('%s_%s',stat_str,proc_list{iproc});
0254           tmp_st_cols = set_var_col_const(tmp_st{il1}.vars);
0255           tmp_st{il1}.data{end+1} = tmp_st2;
0256         end % for iproc
0257           end % if isstruct(params.stats.(id_str).(stat_str)
0258       end % switch stat_str
0259     end % for istat=
0260         
0261       otherwise
0262     
0263     end % switch id_str (by_subject, across_subjects)
0264     
0265     % Register a reporting function and execute it if desired
0266     tmp_st{il1}.report.fun = str2func(sprintf('report_stats_%s', id_str));
0267     
0268     try do_report = params.report.print_tables; catch do_report = 1; end
0269     if do_report
0270       fprintf('Doing report for %s\n', func2str(tmp_st{il1}.report.fun))
0271       params.report.question = qinfo(iqid);  % cludge
0272       tmp_st{il1}.report.fun(tmp_st{il1},params.report);
0273     end
0274   end % for il1 = 1:nlevel1
0275   an_st_l1.data = tmp_st;
0276   
0277   an_st.data{iqid} = an_st_l1;
0278 end % for iqid
0279 
0280 an_st.meta.params = params;
0281 
0282 end % function ensemble_enum_stats
0283 
0284 %
0285 % START OF VARIOUS SUB-FUNCTIONS
0286 %
0287 
0288 function report_stats_by_subject(data_st,params)
0289   col = set_var_col_const(data_st.vars);
0290 
0291 end % report_stats_by_subject(an_st,params)
0292 
0293 function report_stats_across_subjects(data_st,params)
0294   col = set_var_col_const(data_st.vars);
0295 
0296   % Deal with opening the file ID
0297   fid = ensemble_init_fid(params.tables);
0298   
0299   % Prepare variables for printing
0300   nsub = data_st.data{col.nsub};
0301   m = data_st.data{col.mean};
0302   sd = data_st.data{col.std};
0303   sem = sd/sqrt(nsub-1);
0304   
0305   if isfield(col,'mean_ttest')
0306     ttest_st = data_st.data{col.mean_ttest};
0307     ttest_cols = set_var_col_const(ttest_st.vars);
0308     pvalue = ttest_st.data{ttest_cols.p};
0309     tvalue = ttest_st.data{ttest_cols.stats}.tstat;
0310   else
0311     pvalue = NaN;
0312     tvalue = NaN;
0313   end
0314   
0315   if isfield(params,'question')
0316     qtxt = params.question.question_text;
0317     num_enum = length(params.question.enum_values);
0318     enum_str = sprintf('1=%s, %d=%s', ...
0319     params.question.enum_values{1}, ...
0320     num_enum, ...
0321     params.question.enum_values{num_enum});
0322     qtxt = sprintf('%s (%s):', qtxt, enum_str);
0323   else
0324     qtxt = '';
0325   end
0326     
0327   fprintf(fid,'%50s\tN\tMean\tSEM\tT\tprob\n','');
0328   fprintf(fid,'%50s\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n', qtxt, nsub, m, sem, tvalue, pvalue);
0329     
0330   if fid > 1
0331     fclose(fid);
0332   end
0333 end % report_stats_across_subjects(an_st,params)
0334
ensemble_enum_stats

PURPOSE

SYNOPSIS

DESCRIPTION

CROSS-REFERENCE INFORMATION

SUBFUNCTIONS

SOURCE CODE