0001 function an_st = ensemble_enum_stats(data_st,params)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 an_st = ensemble_init_data_struct;
0020 an_st.type = 'enum_stats_by_compqid';
0021
0022
0023 if ~isfield(params,'stats')
0024 fprintf('ensemble_enum_stats: No stats requests specified\n');
0025 return
0026 end
0027
0028
0029 try
0030 conn_id = params.mysql.conn_id;
0031 catch
0032 try
0033 conn_id = params.ensemble.conn_id;
0034 catch
0035 conn_id = [];
0036 end
0037 end
0038 if isempty(conn_id)
0039 error('%s: Need to have a valid database connection ID', mfilename)
0040 end
0041
0042
0043 data_st = ensemble_check_compqid(data_st);
0044 if isempty(data_st)
0045 return
0046 end
0047
0048
0049 incol = set_var_col_const(data_st.vars);
0050
0051
0052 if isfield(params,'filt')
0053 fprintf('Applying filtering criteria\n')
0054 data_st = ensemble_filter(data_st, params.filt);
0055 end
0056
0057
0058
0059
0060
0061
0062 qids = fix(unique(data_st.data{incol.compqid}));
0063
0064 qinfo = mysql_extract_metadata('conn_id', conn_id, ...
0065 'table','question',...
0066 'question_id',qids);
0067
0068
0069
0070 qinfo_enum_mask = ismember({qinfo.type},'enum');
0071 if sum(~qinfo_enum_mask)
0072 fprintf('ensemble_enum_stats: Removing %d non-enum qids\n', sum(~qinfo_enum_mask));
0073 qinfo(~qinfo_enum_mask) = [];
0074 end
0075
0076
0077
0078
0079 qinfo_bitmask_mask = ismember({qinfo.html_field_type},'checkbox');
0080 if sum(qinfo_bitmask_mask)
0081 fprintf('ensemble_enum_stats: Removing %d bitmask qids from list of qids\n', sum(qinfo_bitmask_mask));
0082 qinfo(qinfo_bitmask_mask) = [];
0083 end
0084
0085
0086 enum_compqids = [qinfo.compqid];
0087
0088
0089 filt.include.any.compqid = enum_compqids;
0090 data_st = ensemble_filter(data_st,filt);
0091
0092
0093
0094 data_vect = data_st.data{incol.response_enum};
0095 data_vect = enum2data(data_vect);
0096
0097
0098 [sub_mask_mtx, subids] = make_mask_mtx(data_st.data{incol.subject_id});
0099 nsub = length(subids);
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 nqid = length(qinfo);
0110 for iqid = 1:nqid
0111 an_st.vars{iqid} = sprintf('compqid %s',num2str(qinfo(iqid).compqid));
0112
0113
0114
0115 params.display.qinfo = qinfo(iqid);
0116
0117
0118 enum_values = qinfo(iqid).enum_values;
0119 ncat = length(enum_values);
0120
0121
0122 qid_mask = ismember(data_st.data{incol.compqid},qinfo(iqid).compqid);
0123
0124 an_st_l1 = ensemble_init_data_struct;
0125 an_st_l1.type = 'enum_basic_stats';
0126 an_st_l1.vars = {'by_subject','across_subjects'};
0127 an_st_l1.meta.question = qinfo(iqid);
0128
0129 an_st_l1_cols = set_var_col_const(an_st_l1.vars);
0130 nlevel1 = length(an_st_l1.vars);
0131
0132 tmp_st = {};
0133 tmp_idx = [];
0134 for il1 = 1:nlevel1
0135 id_str = an_st_l1.vars{il1};
0136 tmp_idx.(id_str) = il1;
0137
0138 tmp_st{il1} = ensemble_init_data_struct;
0139 tmp_st{il1}.type = sprintf('enum_stats_%s', id_str);
0140
0141
0142
0143 stats_list = fieldnames(params.stats.(id_str));
0144 nstats = length(stats_list);
0145
0146 switch id_str
0147 case 'by_subject'
0148 aux_vars = {'subject_id','nresp'};
0149 case 'across_subjects'
0150 aux_vars = {'nsub'};
0151 end
0152 tmp_vars = [aux_vars stats_list'];
0153 tmp_st_cols = set_var_col_const(tmp_vars);
0154 tmp_st{il1}.vars = tmp_vars;
0155
0156
0157 for ia = 1:length(tmp_vars)
0158 switch tmp_vars{ia}
0159 case {'subject_id'}
0160 tmp_st{il1}.data{ia} = subids;
0161 otherwise
0162 switch id_str
0163 case 'by_subject'
0164 tmp_st{il1}.data{ia} = zeros(nsub,1);
0165 otherwise
0166 tmp_st{il1}.data{ia} = [];
0167 end
0168 end
0169 end
0170
0171
0172
0173
0174
0175
0176 switch id_str
0177
0178
0179
0180 case 'by_subject'
0181 for isub = 1:nsub
0182 sub_mask = sub_mask_mtx(:,isub);
0183
0184
0185 nresp = sum(sub_mask&qid_mask);
0186 tmp_st{il1}.data{tmp_st_cols.nresp}(isub) = nresp;
0187
0188 if ~nresp
0189 no_resps = 1;
0190 else
0191 no_resps = 0;
0192 end
0193
0194
0195 for istat = 1:nstats
0196 stat_str = stats_list{istat};
0197
0198
0199 if no_resps
0200 tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = NaN;
0201 continue
0202 end
0203
0204 switch stat_str
0205 case {'mean','std','min','max'}
0206 fh = str2func(stat_str);
0207 tmp_st{il1}.data{tmp_st_cols.(stat_str)}(isub) = fh(data_vect(sub_mask&qid_mask));
0208 end
0209 end
0210 end
0211
0212 case 'across_subjects'
0213 src_st = tmp_st{tmp_idx.by_subject};
0214 src_cols = set_var_col_const(src_st.vars);
0215
0216
0217
0218
0219 src_data = src_st.data{src_cols.mean};
0220
0221
0222 src_data(any(isnan(src_data),2),:) = [];
0223
0224 tmp_st{il1}.data{tmp_st_cols.nsub} = size(src_data,1);
0225 for istat = 1:nstats
0226 stat_str = stats_list{istat};
0227 switch stat_str
0228 case {'mean','std','min','max'}
0229
0230
0231 fh = str2func(stat_str);
0232 tmp_st{il1}.data{tmp_st_cols.(stat_str)} = fh(src_data);
0233
0234
0235 if isstruct(params.stats.(id_str).(stat_str))
0236 proc_list = fieldnames(params.stats.(id_str).(stat_str));
0237 for iproc = 1:length(proc_list)
0238 switch proc_list{iproc}
0239 case 'ttest'
0240 try mu = params.stats.(id_str).(stat_str).ttest.mu; ...
0241 catch mu = 'midpoint'; end
0242 if isstr(mu) && strcmp(mu,'midpoint')
0243 mu = (ncat+1)/2;
0244 end
0245
0246 tmp_st2 = ensemble_init_data_struct;
0247 tmp_st2.type = proc_list{iproc};
0248 tmp_st2.vars = {'H','p','ci','stats'};
0249 [tmp_st2.data{1:nargout(proc_list{iproc})}] = ttest(src_data, mu);
0250 otherwise
0251 continue
0252 end
0253 tmp_st{il1}.vars{end+1} = sprintf('%s_%s',stat_str,proc_list{iproc});
0254 tmp_st_cols = set_var_col_const(tmp_st{il1}.vars);
0255 tmp_st{il1}.data{end+1} = tmp_st2;
0256 end
0257 end
0258 end
0259 end
0260
0261 otherwise
0262
0263 end
0264
0265
0266 tmp_st{il1}.report.fun = str2func(sprintf('report_stats_%s', id_str));
0267
0268 try do_report = params.report.print_tables; catch do_report = 1; end
0269 if do_report
0270 fprintf('Doing report for %s\n', func2str(tmp_st{il1}.report.fun))
0271 params.report.question = qinfo(iqid);
0272 tmp_st{il1}.report.fun(tmp_st{il1},params.report);
0273 end
0274 end
0275 an_st_l1.data = tmp_st;
0276
0277 an_st.data{iqid} = an_st_l1;
0278 end
0279
0280 an_st.meta.params = params;
0281
0282 end
0283
0284
0285
0286
0287
0288 function report_stats_by_subject(data_st,params)
0289 col = set_var_col_const(data_st.vars);
0290
0291 end
0292
0293 function report_stats_across_subjects(data_st,params)
0294 col = set_var_col_const(data_st.vars);
0295
0296
0297 fid = ensemble_init_fid(params.tables);
0298
0299
0300 nsub = data_st.data{col.nsub};
0301 m = data_st.data{col.mean};
0302 sd = data_st.data{col.std};
0303 sem = sd/sqrt(nsub-1);
0304
0305 if isfield(col,'mean_ttest')
0306 ttest_st = data_st.data{col.mean_ttest};
0307 ttest_cols = set_var_col_const(ttest_st.vars);
0308 pvalue = ttest_st.data{ttest_cols.p};
0309 tvalue = ttest_st.data{ttest_cols.stats}.tstat;
0310 else
0311 pvalue = NaN;
0312 tvalue = NaN;
0313 end
0314
0315 if isfield(params,'question')
0316 qtxt = params.question.question_text;
0317 num_enum = length(params.question.enum_values);
0318 enum_str = sprintf('1=%s, %d=%s', ...
0319 params.question.enum_values{1}, ...
0320 num_enum, ...
0321 params.question.enum_values{num_enum});
0322 qtxt = sprintf('%s (%s):', qtxt, enum_str);
0323 else
0324 qtxt = '';
0325 end
0326
0327 fprintf(fid,'%50s\tN\tMean\tSEM\tT\tprob\n','');
0328 fprintf(fid,'%50s\t%d\t%1.2f\t%1.2f\t%1.2f\t%1.4f\n', qtxt, nsub, m, sem, tvalue, pvalue);
0329
0330 if fid > 1
0331 fclose(fid);
0332 end
0333 end
0334