0001 function an_st = ensemble_enum_hist(data_st,params)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 an_st = {};
0051 na = 0;
0052
0053 try
0054 conn_id = params.ensemble.conn_id;
0055 catch
0056 conn_id = [];
0057 end
0058
0059 if isempty(conn_id)
0060 try
0061 conn_id = params.mysql.conn_id;
0062 catch
0063 conn_id = [];
0064 end
0065 end
0066
0067
0068 if isfield(params,'display') && isfield(params,'report')
0069 fprintf('%s: both display and report fields specified. Do not know which to use...\n', mfilename);
0070 end
0071 report_string_types = {'display','report'};
0072 repstr = 'report';
0073 for itype = 1:length(report_string_types)
0074 if isfield(params,report_string_types{itype})
0075 repstr = report_string_types{itype};
0076 break
0077 end
0078 end
0079
0080 try
0081 do_plot = params.(repstr).figs.plot;
0082 catch
0083 do_plot = 0;
0084 end
0085
0086
0087 if isfield(params,'filt')
0088 fprintf('Applying filtering criteria\n')
0089 data_st = ensemble_filter(data_st, params.filt);
0090 end
0091
0092
0093 incol = set_var_col_const(data_st.vars);
0094
0095
0096
0097
0098
0099
0100 if isfield(incol,'question_id')
0101 qids = unique(data_st.data{incol.question_id});
0102 elseif isfield(incol,'compqid')
0103 qids = fix(unique(data_st.data{incol.compqid}));
0104 else
0105 fprintf('Did not find question_id column in the input data\n');
0106 return
0107 end
0108
0109 qinfo = mysql_extract_metadata('table','question', ...
0110 'question_id',qids, ...
0111 'conn_id', conn_id);
0112
0113
0114 compqid_vect = [];
0115 if isfield(incol,'compqid')
0116 compqid_vect = data_st.data{incol.compqid};
0117 elseif isfield(incol,'subquestion')
0118 compqid_vect = make_compqid(data_st.data{incol.question_id}, ...
0119 data_st.data{incol.subquestion});
0120 end
0121
0122
0123 qinfo_enum_mask = ismember({qinfo.type},'enum');
0124 qinfo_enum_idxs = find(qinfo_enum_mask);
0125 nqid = length(qinfo_enum_idxs);
0126
0127
0128
0129 qinfo_bitmask_mask = ismember({qinfo.html_field_type},'checkbox');
0130
0131 qinfo_notbitmask_mask = qinfo_enum_mask & ~qinfo_bitmask_mask;
0132
0133
0134 enum_compqids = [qinfo(qinfo_enum_mask).compqid];
0135 enum_mask = ismember(compqid_vect, enum_compqids);
0136
0137 notbitmask_compqids = [qinfo(qinfo_notbitmask_mask).compqid];
0138 notbitmask_mask = ismember(compqid_vect, notbitmask_compqids);
0139
0140
0141 data_vect = zeros(size(compqid_vect));
0142 if ~iscell(data_st.data{incol.response_enum}(enum_mask))
0143 data_vect(enum_mask) = data_st.data{incol.response_enum}(enum_mask);
0144 elseif params.force_cell2double
0145 data_vect(enum_mask) = cellfun(@str2num,data_st.data{incol.response_enum}(enum_mask));
0146 else
0147 error('Response enum data is likely encrypted!')
0148 end
0149
0150
0151
0152 data_vect(notbitmask_mask) = enum2data(data_vect(notbitmask_mask));
0153
0154
0155 if isfield(params,'item_var')
0156 item_str = params.item_var;
0157 else
0158 item_str = 'session_id';
0159 end
0160
0161 switch item_str
0162 case 'session_id'
0163 item_type = 'session';
0164 case 'subject_id'
0165 item_type = 'subject';
0166 case 'stimulus_id'
0167 item_type = 'stimulus';
0168 case 'none'
0169 item_type = 'none';
0170 otherwise
0171 fprintf('%s: Unknown item variable: %s\n', mfilename, item_str);
0172 end
0173
0174 if strcmp(item_type,'none')
0175 item_mask_mtx = ones(length(data_st.data{1}),1);
0176 itemids = 1;
0177 else
0178
0179 [item_mask_mtx, itemids] = make_mask_mtx(data_st.data{incol.(item_str)});
0180 end
0181 nitems = length(itemids);
0182
0183
0184
0185
0186 try
0187 fid = ensemble_init_fid(params.(repstr).tables);
0188 catch
0189 fid = 1;
0190 end
0191
0192
0193
0194
0195 for iqid = 1:nqid
0196 curr_qid_idx = qinfo_enum_idxs(iqid);
0197
0198
0199
0200 params.(repstr).qinfo = qinfo(curr_qid_idx);
0201
0202
0203 if ~qinfo_enum_mask(curr_qid_idx)
0204 fprintf('Unexpected qinfo type mismatch: expected enum\n');
0205 continue
0206 end
0207
0208
0209 enum_values = qinfo(curr_qid_idx).enum_values;
0210 ncat = length(enum_values);
0211
0212
0213 qid_mask = ismember(compqid_vect,qinfo(curr_qid_idx).compqid);
0214
0215
0216
0217
0218 na = na+1;
0219
0220 an_st{na} = init_analysis_struct;
0221 an_st{na}.type = sprintf('enum_category_by_%s', item_type);
0222 an_vars = {item_str,'count','prop','nresp'};
0223 an_st{na}.vars = an_vars;
0224 an_cols = set_var_col_const(an_vars);
0225 by_item_an_cols = an_cols;
0226 by_item_an_idx = na;
0227 nr = 0;
0228
0229
0230 for ia = 1:length(an_vars)
0231 switch an_vars{ia}
0232 case {'count','prop'}
0233 an_st{na}.data{ia} = zeros(nitems,ncat);
0234 case item_str
0235 an_st{na}.data{ia} = itemids;
0236 case {'nresp'}
0237 an_st{na}.data{ia} = zeros(nitems,1);
0238 end
0239 end
0240
0241
0242
0243
0244 for iitem = 1:nitems
0245 item_mask = item_mask_mtx(:,iitem);
0246
0247
0248 nresp = sum(item_mask&qid_mask);
0249 an_st{na}.data{an_cols.nresp}(iitem) = nresp;
0250
0251 if ~nresp
0252 continue
0253 end
0254
0255 switch qinfo(curr_qid_idx).html_field_type
0256 case {'radiogroup','menu'}
0257 an_st{na}.data{an_cols.count}(iitem,:) = hist(data_vect(item_mask&qid_mask),1:ncat)';
0258 otherwise
0259 bitmask = data2bitmask(data_vect(item_mask&qid_mask),ncat);
0260 if size(bitmask,1) > 1
0261 sumvect = sum(bitmask);
0262 else
0263 sumvect = bitmask;
0264 end
0265 an_st{na}.data{an_cols.count}(iitem,:) = sumvect;
0266 end
0267 end
0268
0269 nresp_mtx = repmat(an_st{na}.data{an_cols.nresp},1,ncat);
0270 nresp_mtx(nresp_mtx == 0) = NaN;
0271 an_st{na}.data{an_cols.prop} = an_st{na}.data{an_cols.count} ./ nresp_mtx;
0272
0273 an_st{na}.meta.question = qinfo(curr_qid_idx);
0274
0275
0276
0277
0278
0279
0280
0281 reportByItem = 1;
0282 if isfield(params.report, 'by_item')
0283 reportByItem = params.report.by_item;
0284 end
0285
0286 if reportByItem
0287 nr=nr+1;
0288 an_st{na}.report{nr}.type = 'distrib_by_cat';
0289 an_st{na}.report{nr}.figfun = @plot_distrib_by_cat;
0290 if do_plot
0291 an_st{na}.report{nr}.figs = ...
0292 plot_distrib_by_cat(an_st{na}.data{an_cols.prop}, params.(repstr));
0293 end
0294
0295 fprintf(fid,'\nQuestion (%1.2f): %s\n', ...
0296 qinfo(curr_qid_idx).compqid, qinfo(curr_qid_idx).question_text);
0297 if ~isempty(qinfo(curr_qid_idx).heading)
0298 fprintf(fid,'Subquestion: %s\n', qinfo(curr_qid_idx).heading);
0299 end
0300 fprintf(fid,'N = %d\n', nitems);
0301 fprintf(fid,'%s\t%s\n', item_str, cell2str(enum_values,'\t'));
0302 for iitem = 1:nitems
0303 switch item_str
0304 case {'session_id','stimulus_id'}
0305 itemval_str = sprintf('%d',itemids(iitem));
0306 case 'subject_id'
0307 itemval_str = itemids{iitem};
0308 case 'none'
0309 itemval_str = 'all';
0310 otherwise
0311 itemval_str = itemids{iitem};
0312 end
0313
0314 enum_str = sprintf('\t%d', an_st{na}.data{an_cols.count}(iitem,:));
0315 fprintf(fid,'%s%s\n', itemval_str, enum_str);
0316 end
0317 end
0318
0319
0320
0321 na = na+1;
0322
0323 an_st{na} = init_analysis_struct;
0324 an_st{na}.type = sprintf('enum_category_across_%s', item_type);
0325
0326 an_vars = {'prop','count'};
0327 an_st{na}.vars = an_vars;
0328 an_cols = set_var_col_const(an_vars);
0329 nr = 0;
0330
0331 if isfield(params.report, 'across_items') && isfield(params.report.across_items, 'an_types')
0332 report_vars = params.report.across_items.an_types;
0333 else
0334 report_vars = an_vars;
0335 end
0336
0337
0338 for ivar = 1:length(an_vars)
0339 an_type = an_vars{ivar};
0340 switch an_type
0341 case 'prop'
0342 data_col = by_item_an_cols.(an_type);
0343 plot_title = 'Average proportion of responses in each category';
0344 case 'count'
0345 data_col = by_item_an_cols.(an_type);
0346 plot_title = 'Overall counts within in each category';
0347
0348 end
0349 data = an_st{by_item_an_idx}.data{data_col};
0350
0351
0352
0353 sa_st = init_analysis_struct;
0354 sa_st.type = 'distrib_stats';
0355 sa_vars = {'mean','std','min','max','nitems'};
0356 sa_cols = set_var_col_const(sa_vars);
0357 sa_st.vars = sa_vars;
0358
0359
0360 for jvar = 1:length(sa_vars)
0361 sa_str = sa_vars{jvar};
0362 switch sa_str
0363 case 'mean'
0364 if any(isnan(data)) fun = @nanmean; else fun = @mean; end
0365 result = fun(data,1);
0366 case 'nitems'
0367 result = sum(~isnan(data(:,1)));
0368 case 'std'
0369 if any(isnan(data)) fun = @nanstd; else fun = @std; end
0370 result = fun(data,1);
0371 case 'min'
0372 if any(isnan(data)) fun = @nanmin; else fun = @min; end
0373 result = fun(data,1);
0374 case 'max'
0375 if any(isnan(data)) fun = @nanmax; else fun = @max; end
0376 result = fun(data,1);
0377
0378 end
0379
0380
0381 sa_st.data{sa_cols.(sa_str)} = result;
0382 end
0383
0384
0385 an_st{na}.data{ivar} = sa_st;
0386
0387
0388
0389
0390 nr=nr+1;
0391 an_st{na}.report{nr}.type = 'distrib_by_cat';
0392 an_st{na}.report{nr}.figfun = @plot_hist;
0393 if do_plot && any(strcmp(an_type, report_vars))
0394 switch an_type
0395 case 'prop'
0396 plot_title = 'Average proportion of responses in each category';
0397 ylim = [0 1];
0398 case 'count'
0399 plot_title = 'Overall counts within in each category';
0400 ylim = [];
0401 end
0402 params.(repstr).figs.ylim = ylim;
0403 params.(repstr).figs.title = plot_title;
0404 an_st{na}.report{nr}.figs = plot_hist(an_st{na}.data{an_cols.(an_type)},params.(repstr));
0405 end
0406 end
0407 an_st{na}.meta.question = qinfo(curr_qid_idx);
0408
0409 fprintf(fid,'\nQuestion (%1.2f): %s\n', ...
0410 qinfo(curr_qid_idx).compqid, qinfo(curr_qid_idx).question_text);
0411 if ~isempty(qinfo(curr_qid_idx).heading)
0412 fprintf(fid,'Subquestion: %s\n', qinfo(curr_qid_idx).heading);
0413 end
0414
0415 fprintf(fid,'\nN = %d\n', nitems);
0416
0417 for ienum = 1:length(qinfo(curr_qid_idx).enum_values)
0418 fprintf(fid,'%30s:\t%1.2f\n', qinfo(curr_qid_idx).enum_values{ienum}, ...
0419 an_st{na}.data{an_cols.prop}.data{sa_cols.mean}(ienum));
0420 end
0421 end
0422
0423 if fid > 1
0424 fclose(fid);
0425 end
0426
0427 end
0428
0429 function an_st = init_analysis_struct
0430 an_st.type = '';
0431 an_st.vars = {};
0432 an_st.data = {};
0433 an_st.meta = [];
0434
0435 end
0436
0437 function out_pp = plot_hist(data_st,params)
0438
0439
0440
0441
0442 pp = params.figs;
0443
0444
0445 try use_fig = params.use_fig;
0446 catch
0447 try use_fig = pp.start_fignum; catch use_fig = []; end
0448 end
0449
0450 try use_axes = params.use_axes; catch use_axes = []; end
0451
0452 try labelfontsize = params.axislabelfontsize; catch labelfontsize = 9; end
0453
0454 col = set_var_col_const(data_st.vars);
0455
0456 ncat = length(params.qinfo.enum_values);
0457 nitems = data_st.data{col.nitems};
0458
0459 max_chars_per_line = 50;
0460
0461
0462
0463
0464 if isempty(use_fig)
0465 figure
0466 else
0467 figure(use_fig), clf
0468 end
0469
0470 nax = 0;
0471
0472 nax=nax+1;
0473 if ~isempty(use_axes)
0474 axes(use_axes)
0475 end
0476
0477
0478 try plot_sorted = pp.sort; catch plot_sorted = 0; end
0479
0480 if plot_sorted
0481 try sortdir = pp.sort_dir; catch sortdir = 'descend'; end
0482 [sorted_data, sorted_idxs] = sort(data_st.data{col.mean},sortdir);
0483 else
0484 sorted_idxs = 1:length(data_st.data{col.mean});
0485 end
0486
0487
0488 sorted_data = data_st.data{col.mean}(sorted_idxs);
0489 sorted_stderr = sorted_data/sqrt(data_st.data{col.nitems});
0490
0491
0492 bs = bar(1:ncat, sorted_data);
0493 add_errorbars(bs,sorted_stderr');
0494 set(gca,'xtick',[],'xlim',[0 ncat+1])
0495 set(gca,'activepositionproperty','outerposition')
0496
0497 ax(nax) = gca;
0498
0499
0500
0501
0502 label_data = {};
0503 for icat = 1:ncat
0504
0505
0506
0507 if ~isempty(params.qinfo.enum_values{sorted_idxs(icat)}) && ...
0508 ~isempty(linewrap(params.qinfo.enum_values{sorted_idxs(icat)},18))
0509 label_text = cell2str(linewrap(params.qinfo.enum_values{sorted_idxs(icat)},18),'\n');
0510 else
0511 label_text = ' ';
0512 end
0513 text(icat,0,label_text,'rotation',-90, ...
0514 'horizontalalign','left', ...
0515 'verticalalign','middle', ...
0516 'fontsize', labelfontsize);
0517 label_data{icat} = label_text;
0518 end
0519
0520 try ylim = pp.ylim; catch ylim=[]; end
0521 if isempty(ylim)
0522 ylim = [0 1.2*max(data_st.data{col.mean}(sorted_idxs))];
0523 end
0524 set(gca,'ylim',ylim)
0525
0526 try ylabel_str = pp.ylabel; catch ylabel_str = ''; end
0527 ylabel(ylabel_str)
0528
0529 if ~isfield(pp,'title')
0530 pp.title = '';
0531 end
0532 title(pp.title)
0533
0534 if ~isfield(pp,'add_nitems') || pp.add_nitems
0535 th = add_nitems_txt(data_st.data{col.nitems});
0536 end
0537
0538
0539 try add_pagehdr = params.add_pagehdr; catch add_pagehdr = 1; end
0540 if add_pagehdr
0541 pp.pagehdr.title = sprintf(['%s\nQuestion (%1.2f): %s'], ...
0542 pp.title, params.qinfo.compqid, ...
0543 cell2str(linewrap(params.qinfo.question_text,max_chars_per_line),'\n'));
0544 nax = nax+1;
0545 ax(nax)=add_fighdr(pp.pagehdr);
0546 pp.fig.title = pp.pagehdr.title;
0547 end
0548
0549 print_fig(pp)
0550
0551
0552 try data2file = pp.data2file; catch data2file = true; end
0553 if data2file && isfield(pp, 'figfname') && ~isempty(pp.figfname)
0554 [fpath,fname,fext] = fileparts(pp.figfname);
0555 datafname = fullfile(fpath,[fname '.csv']);
0556
0557 fid = fopen(datafname,'wt');
0558 if fid == -1
0559 fprintf('FAILED to open %s for writing\n', datafname);
0560 else
0561 fprintf('Writing figure data to: %s\n', datafname);
0562 fprintf(fid,'Category,Data,StdErr\n');
0563 for icat = 1:length(label_data)
0564 fprintf(fid,'%s,%1.6f,%1.6f\n', label_data{icat}, sorted_data(icat), sorted_stderr(icat));
0565 end
0566 fclose(fid);
0567 end
0568 end
0569
0570 pp.fignum = gcf;
0571 pp.axes = ax;
0572
0573 out_pp = pp;
0574 end
0575
0576 function pp = plot_distrib_by_cat(data_st,params)
0577 pp = params.figs;
0578 max_chars_per_line = 50;
0579
0580
0581 try use_fig = params.use_fig;
0582 catch
0583 try use_fig = pp.start_fignum; catch use_fig = []; end
0584 end
0585
0586 try use_axes = params.use_axes; catch use_axes = []; end
0587
0588 try labelfontsize = params.axislabelfontsize; catch labelfontsize = 9; end
0589
0590 if ~isstruct(data_st)
0591 tmp.vars = {'prop'};
0592 tmp.data = {data_st};
0593 data_st = tmp;
0594 end
0595 col = set_var_col_const(data_st.vars);
0596 ncat = length(params.qinfo.enum_values);
0597 nitems = size(data_st.data{col.prop},1);
0598
0599
0600
0601
0602
0603 if isempty(use_fig)
0604 figure
0605 else
0606 figure(use_fig)
0607 clf
0608 end
0609
0610 nax = 0;
0611 ax = [];
0612
0613 try ncol = params.ncol; catch ncol = 3; end
0614 try tick_interval = pp.tick_interval; catch tick_interval = 0.1; end
0615 hist_scale = 0:tick_interval:1;
0616
0617 for icat = 1:ncat
0618 hist_vals = hist(data_st.data{col.prop}(:,icat),hist_scale);
0619
0620 try ylim = params.ylim; catch ylim=[]; end
0621 if isempty(ylim)
0622 ylim = [0 1.2*max(hist_vals)];
0623 end
0624
0625 nax = nax+1;
0626 ax(nax) = subplot(fix(ncat/ncol)+rem(ncat,ncol),ncol,icat);
0627 bar(hist_scale,hist_vals)
0628 set(gca,'xtick',0:tick_interval:1,'xlim',[-0.05 1.05])
0629 set(gca,'ylim',ylim)
0630 title_str = params.qinfo.enum_values{icat};
0631 if strcmpi(title_str,'parent')
0632 title_str = [title_str ' '];
0633 end
0634 title(title_str)
0635 if ~isfield(pp,'add_nitems') || pp.add_nitems
0636 th = add_nitems_txt(nitems);
0637 end
0638 end
0639
0640 pp.fig.fignum = gcf;
0641
0642 if ~isfield(pp,'title')
0643 pp.title = '';
0644 end
0645
0646 pp.pagehdr.title = sprintf(['%s\nQuestion (%1.2f): %s'], pp.title, ...
0647 params.qinfo.compqid,...
0648 cell2str(linewrap(params.qinfo.question_text,max_chars_per_line),'\n'));
0649 nax = nax+1;
0650 ax(nax)=add_fighdr(pp.pagehdr);
0651
0652 pp.fig.title = pp.pagehdr.title;
0653 pp.fig.axes = ax;
0654
0655 print_fig(pp);
0656
0657 out_pp = pp;
0658 end
0659
0660 function th = add_nitems_txt(nitems,params)
0661 th = text(0.95,0.95,sprintf('N=%d', nitems), ...
0662 'units','norm', ...
0663 'horizontalalign','right', ...
0664 'verticalalign', 'top');
0665 end
0666
0667 function print_fig(pp)
0668 if isfield(pp,'write2file') && pp.write2file
0669 if isfield(pp,'printargs')
0670 printargs = pp.printargs;
0671 else
0672 printargs = {'-dpsc','-append'};
0673 end
0674 fprintf('Printing figure to file: %s\n', pp.figfname);
0675 print(pp.figfname, printargs{:})
0676 end
0677 end