Home > database > ensemble_enum_stats.m

ensemble_enum_stats

PURPOSE ^

Calculates statistics on responses to enum questions.

SYNOPSIS ^

function an_st = ensemble_enum_stats(data_st,params)

DESCRIPTION ^

 Calculates statistics on responses to enum questions.
 
 outdata = ensemble_enum_stats(data_st,params);

 Calculates various descriptive and quantitative statistics on responses to
 questions that are enums.

 Note: Currently, the script will not treat the same question appearing on
 different forms as a different instance of the question. If you don't want
 answers to the same question on different forms combined, you must filter the
 data to only process forms with unique question IDs.  This behavior may
 change in future versions.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function an_st = ensemble_enum_stats(data_st,params)
0002 % Calculates statistics on responses to enum questions.
0003 %
0004 % outdata = ensemble_enum_stats(data_st,params);
0005 %
0006 % Calculates various descriptive and quantitative statistics on responses to
0007 % questions that are enums.
0008 %
0009 % Note: Currently, the script will not treat the same question appearing on
0010 % different forms as a different instance of the question. If you don't want
0011 % answers to the same question on different forms combined, you must filter the
0012 % data to only process forms with unique question IDs.  This behavior may
0013 % change in future versions.
0014 
0015 %
0016 % 02/02/07 Petr Janata - adapted from ensemble_enum_hist
0017 % 10/07/08 PJ - generalized to handle other databases
0018 
0019 an_st = ensemble_init_data_struct;
0020 an_st.type = 'enum_stats_by_compqid'; 
0021 
0022 % Make sure that a stats structure has been specified as part of the params structure
0023 if ~isfield(params,'stats')
0024   fprintf('ensemble_enum_stats: No stats requests specified\n');
0025   return
0026 end
0027 
0028 % Make sure we have a database connection
0029 try 
0030   conn_id = params.mysql.conn_id;
0031 catch
0032   try 
0033     conn_id = params.ensemble.conn_id;
0034   catch
0035     conn_id = [];
0036   end
0037 end
0038 if isempty(conn_id)
0039   error('%s: Need to have a valid database connection ID', mfilename)
0040 end
0041 
0042 % Make sure we have a compqid variable
0043 data_st = ensemble_check_compqid(data_st);
0044 if isempty(data_st)
0045   return
0046 end
0047 
0048 % Set the column constants
0049 incol = set_var_col_const(data_st.vars);
0050 
0051 % Apply any specified filtering to the input data
0052 if isfield(params,'filt')
0053   fprintf('Applying filtering criteria\n')
0054   data_st = ensemble_filter(data_st, params.filt);
0055 end
0056 
0057 %
0058 % Gather metadata on the questions
0059 %
0060 
0061 % Get a list of unique composite question IDs
0062 qids = fix(unique(data_st.data{incol.compqid}));
0063 
0064 qinfo = mysql_extract_metadata('conn_id', conn_id, ...
0065   'table','question',...
0066   'question_id',qids);
0067 
0068 % Figure out which of the questions in the qinfo structure are enums and remove
0069 % those that are not
0070 qinfo_enum_mask = ismember({qinfo.type},'enum');
0071 if sum(~qinfo_enum_mask)
0072   fprintf('ensemble_enum_stats: Removing %d non-enum qids\n', sum(~qinfo_enum_mask));
0073   qinfo(~qinfo_enum_mask) = [];
0074 end
0075 
0076 % Figure out which of the questions are bitmasks that allow for selection of
0077 % multiple values (checkbox as opposed to radiogroup) and remove these from the
0078 % qinfo array
0079 qinfo_bitmask_mask = ismember({qinfo.html_field_type},'checkbox');
0080 if sum(qinfo_bitmask_mask)
0081   fprintf('ensemble_enum_stats: Removing %d bitmask qids from list of qids\n', sum(qinfo_bitmask_mask));
0082   qinfo(qinfo_bitmask_mask) = [];
0083 end
0084 
0085 % Create masks for all of the response data
0086 enum_compqids = [qinfo.compqid];
0087   
0088 % Filter the data again
0089 filt.include.any.compqid = enum_compqids;
0090 data_st = ensemble_filter(data_st,filt);
0091 
0092 % Copy all of the enum data that are not a bitmask to the data_vect and convert
0093 % to category indices
0094 data_vect = data_st.data{incol.response_enum};
0095 data_vect = enum2data(data_vect);
0096 
0097 % Precalculate the subject masks
0098 [sub_mask_mtx, subids] = make_mask_mtx(data_st.data{incol.subject_id});
0099 nsub = length(subids);
0100 
0101 %
0102 % Set stuff up for writing to a file, if that's what we're going to do.
0103 %
0104 %fid = ensemble_init_fid(params.display.tables);
0105 
0106 %
0107 % Loop over all of the unique question/subquestion combinations or compqids
0108 %
0109 nqid = length(qinfo);
0110 for iqid = 1:nqid
0111   an_st.vars{iqid} = sprintf('compqid %s',num2str(qinfo(iqid).compqid));
0112   
0113   % Copy the question info over to the display parameter structure in case we
0114   % are going to display some of the data
0115   params.display.qinfo = qinfo(iqid);
0116 
0117   % Get the enum categories
0118   enum_values = qinfo(iqid).enum_values;
0119   ncat = length(enum_values);
0120   
0121   % Make a mask for the data corresponding to this question
0122   qid_mask = ismember(data_st.data{incol.compqid},qinfo(iqid).compqid);
0123     
0124   an_st_l1 = ensemble_init_data_struct;
0125   an_st_l1.type = 'enum_basic_stats';
0126   an_st_l1.vars = {'by_subject','across_subjects'};
0127   an_st_l1.meta.question = qinfo(iqid);
0128 
0129   an_st_l1_cols = set_var_col_const(an_st_l1.vars);
0130   nlevel1 = length(an_st_l1.vars);
0131   
0132   tmp_st = {};
0133   tmp_idx = [];
0134   for il1 = 1:nlevel1
0135     id_str = an_st_l1.vars{il1};
0136     tmp_idx.(id_str) = il1;
0137     
0138     tmp_st{il1} = ensemble_init_data_struct;
0139     tmp_st{il1}.type = sprintf('enum_stats_%s', id_str);
0140 
0141     % The variables in this analysis are all of the fields within the stats
0142     % field of the params structure
0143     stats_list = fieldnames(params.stats.(id_str));
0144     nstats = length(stats_list);
0145     
0146     switch id_str
0147       case 'by_subject'
0148     aux_vars = {'subject_id','nresp'};
0149       case 'across_subjects'
0150     aux_vars = {'nsub'};
0151     end
0152     tmp_vars = [aux_vars stats_list'];
0153     tmp_st_cols = set_var_col_const(tmp_vars);
0154     tmp_st{il1}.vars = tmp_vars;
0155   
0156     % Initialize output variables
0157     for ia = 1:length(tmp_vars)
0158       switch tmp_vars{ia}
0159     case {'subject_id'}
0160       tmp_st{il1}.data{ia} = subids;
0161     otherwise
0162       switch id_str
0163         case 'by_subject'
0164           tmp_st{il1}.data{ia} = zeros(nsub,1);
0165         otherwise
0166           tmp_st{il1}.data{ia} = [];
0167       end
0168       end
0169     end % for ia=1:length(tmp_vars)
0170 
0171     %
0172     % Execute some type-specific code. One can imagine additional case
0173     % statements for 'by_trial' or 'by_attribute'
0174     %
0175     
0176     switch id_str
0177       %
0178       % Deal with the set of by_subject analyses
0179       %
0180       case 'by_subject'
0181     for isub = 1:nsub
0182       sub_mask = sub_mask_mtx(:,isub);
0183 
0184       % Tally the number of responses the subject made to this question
0185       nresp = sum(sub_mask&qid_mask);
0186       tmp_st{il1}.data{tmp_st_cols.nresp}(isub) = nresp;
0187       
0188       if ~nresp
0189         no_resps = 1;
0190       else
0191         no_resps = 0;
0192       end
0193       
0194       % Now loop over all of the analyses we want to perform
0195       for istat = 1:nstats
0196         stat_str = stats_list{istat};
0197         
0198         % If we need to enter a Nan, do that here
0199         if no_resps
0200           tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = NaN;
0201           continue
0202         end
0203         
0204         switch stat_str
0205           case {'mean','std','min','max'}
0206         fh = str2func(stat_str);
0207         tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = fh(data_vect(sub_mask&qid_mask));
0208         end % switch stat_str
0209       end % for istat=
0210     end % for isub
0211     
0212       case 'across_subjects'
0213     src_st = tmp_st{tmp_idx.by_subject};
0214     src_cols = set_var_col_const(src_st.vars);
0215 
0216     % 02/02/07 PJ Currently hard-coded to use subject-level means as input into this
0217     % level of the analysis. Ultimately, this should really become another
0218     % level of abstraction which supports different types of source data.
0219     src_data = src_st.data{src_cols.mean};
0220           
0221     % Remove any data with NaNs
0222     src_data(any(isnan(src_data),2),:) = [];
0223     
0224     tmp_st{il1}.data{tmp_st_cols.nsub} = size(src_data,1);
0225     for istat = 1:nstats
0226       stat_str = stats_list{istat};
0227       switch stat_str
0228         case {'mean','std','min','max'}
0229           
0230           % Evaluate the basic function
0231           fh = str2func(stat_str);
0232           tmp_st{il1}.data{tmp_st_cols.(stat_str)} = fh(src_data);
0233           
0234           % See if there is additional processing to be done
0235           if isstruct(params.stats.(id_str).(stat_str))
0236         proc_list = fieldnames(params.stats.(id_str).(stat_str));
0237         for iproc = 1:length(proc_list)
0238           switch proc_list{iproc}
0239             case 'ttest'
0240               try mu = params.stats.(id_str).(stat_str).ttest.mu; ...
0241               catch mu = 'midpoint'; end
0242               if isstr(mu) && strcmp(mu,'midpoint')
0243             mu = (ncat+1)/2;
0244               end
0245               
0246               tmp_st2 = ensemble_init_data_struct;
0247               tmp_st2.type = proc_list{iproc};
0248               tmp_st2.vars = {'H','p','ci','stats'};
0249               [tmp_st2.data{1:nargout(proc_list{iproc})}] = ttest(src_data, mu);
0250             otherwise
0251               continue
0252           end
0253           tmp_st{il1}.vars{end+1} = sprintf('%s_%s',stat_str,proc_list{iproc});
0254           tmp_st_cols = set_var_col_const(tmp_st{il1}.vars);
0255           tmp_st{il1}.data{end+1} = tmp_st2;
0256         end % for iproc
0257           end % if isstruct(params.stats.(id_str).(stat_str)
0258       end % switch stat_str
0259     end % for istat=
0260         
0261       otherwise
0262     
0263     end % switch id_str (by_subject, across_subjects)
0264     
0265     % Register a reporting function and execute it if desired
0266     tmp_st{il1}.report.fun = str2func(sprintf('report_stats_%s', id_str));
0267     
0268     try do_report = params.report.print_tables; catch do_report = 1; end
0269     if do_report
0270       fprintf('Doing report for %s\n', func2str(tmp_st{il1}.report.fun))
0271       params.report.question = qinfo(iqid);  % cludge
0272       tmp_st{il1}.report.fun(tmp_st{il1},params.report);
0273     end
0274   end % for il1 = 1:nlevel1
0275   an_st_l1.data = tmp_st;
0276   
0277   an_st.data{iqid} = an_st_l1;
0278 end % for iqid
0279 
0280 an_st.meta.params = params;
0281 
0282 end % function ensemble_enum_stats
0283 
0284 %
0285 % START OF VARIOUS SUB-FUNCTIONS
0286 %
0287 
0288 function report_stats_by_subject(data_st,params)
0289   col = set_var_col_const(data_st.vars);
0290 
0291 end % report_stats_by_subject(an_st,params)
0292 
0293 function report_stats_across_subjects(data_st,params)
0294   col = set_var_col_const(data_st.vars);
0295 
0296   % Deal with opening the file ID
0297   fid = ensemble_init_fid(params.tables);
0298   
0299   % Prepare variables for printing
0300   nsub = data_st.data{col.nsub};
0301   m = data_st.data{col.mean};
0302   sd = data_st.data{col.std};
0303   sem = sd/sqrt(nsub-1);
0304   
0305   if isfield(col,'mean_ttest')
0306     ttest_st = data_st.data{col.mean_ttest};
0307     ttest_cols = set_var_col_const(ttest_st.vars);
0308     pvalue = ttest_st.data{ttest_cols.p};
0309     tvalue = ttest_st.data{ttest_cols.stats}.tstat;
0310   else
0311     pvalue = NaN;
0312     tvalue = NaN;
0313   end
0314   
0315   if isfield(params,'question')
0316     qtxt = params.question.question_text;
0317     num_enum = length(params.question.enum_values);
0318     enum_str = sprintf('1=%s, %d=%s', ...
0319     params.question.enum_values{1}, ...
0320     num_enum, ...
0321     params.question.enum_values{num_enum});
0322     qtxt = sprintf('%s (%s):', qtxt, enum_str);
0323   else
0324     qtxt = '';
0325   end
0326     
0327   fprintf(fid,'%50s\tN\tMean\tSEM\tT\tprob\n','');
0328   fprintf(fid,'%50s\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n', qtxt, nsub, m, sem, tvalue, pvalue);
0329     
0330   if fid > 1
0331     fclose(fid);
0332   end
0333 end % report_stats_across_subjects(an_st,params)
0334

Generated on Wed 20-Sep-2023 04:00:50 by m2html © 2003