0001 function as = repack_formdata(as,ed)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 NUMERIC = 1;
0029
0030 DEBUG = 2;
0031
0032 numeric_types = {'int16','int32','int64','double','enum'};
0033
0034 p = as.params;
0035
0036 nforms = length(as.forms);
0037
0038 iter_cnt_num = [];
0039 iter_cnt_txt = [];
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 master_sublist = {};
0051 for ifrm = 1:nforms
0052 cfname = as.forms{ifrm};
0053 frm_idx = strmatch(cfname,ed.form_names,'exact');
0054
0055
0056 fd = ed.form_data{frm_idx};
0057
0058
0059 nentries = size(fd.data{1},1);
0060 if nentries == 0
0061 continue
0062 end
0063
0064
0065 FD = set_form_col_const(fd.vars);
0066
0067
0068 subids = fd.data{FD.SUB_ID};
0069
0070
0071 master_sublist = union(master_sublist,subids);
0072 end
0073
0074
0075 as.num.subid = master_sublist;
0076 as.txt.subid = master_sublist;
0077
0078 for ifrm = 1:nforms
0079 cfname = as.forms{ifrm};
0080 frm_idx = strmatch(cfname,ed.form_names,'exact');
0081
0082
0083 fd = ed.form_data{frm_idx};
0084 nentries = size(fd.data{1},1);
0085
0086 if nentries == 0
0087 continue
0088 end
0089
0090
0091 FD = set_form_col_const(fd.vars);
0092
0093
0094 subids = fd.data{FD.SUB_ID};
0095 unique_subs = unique(subids);
0096 nsub = length(unique_subs);
0097
0098
0099
0100
0101 qid = cat(2,fd.data{[FD.QUEST_ID FD.SUBQUEST_ID]});
0102 compqid = qid(:,1)+qid(:,2)/10;
0103
0104 [unique_quest_ids, quest_idxs] = unique(qid,'rows');
0105 nquest = size(unique_quest_ids,1);
0106
0107
0108 out_ids = unique_quest_ids(:,1)+unique_quest_ids(:,2)/10;
0109
0110
0111 qid_str = sprintf('(question_id=%d AND subquestion=%d) OR ', unique_quest_ids');
0112 qid_str(end-3:end) = [];
0113 mysql_str = sprintf(['SELECT type, data_format_id FROM data_format ' ...
0114 'RIGHT JOIN question_x_data_format ON' ...
0115 ' data_format.data_format_id=question_x_data_format.answer_format_id ' ...
0116 'WHERE (%s);'], qid_str);
0117 [types, dfid] = mysql(p.conn_id,mysql_str);
0118
0119
0120 stim_mask = ~isnan(fd.data{FD.STIM_ID});
0121
0122
0123 is_numeric = ismember(types,numeric_types);
0124
0125 for itype = 1:2
0126
0127
0128 if itype == NUMERIC
0129 ts = as.num;
0130 type_mask = is_numeric;
0131 src_col = FD.RESP_ENUM;
0132 iter_cnt = iter_cnt_num;
0133 else
0134 ts = as.txt;
0135 type_mask = ~is_numeric;
0136 src_col = FD.RESP_TXT;
0137 iter_cnt = iter_cnt_txt;
0138 end
0139
0140
0141 if ~any(type_mask)
0142 continue
0143 end
0144
0145
0146 proc_qid_idxs = find(~ismember(out_ids,ts.qid) & type_mask);
0147 nproc = length(proc_qid_idxs);
0148
0149 if nproc
0150
0151 insert_idxs = (length(ts.qid)+1):(length(ts.qid)+nproc);
0152 ts.qid(insert_idxs) = out_ids(proc_qid_idxs);
0153
0154
0155 if itype == 1
0156 ts.dfid(insert_idxs) = dfid(proc_qid_idxs);
0157 end
0158
0159
0160 tmp = cat(2,fd.data{[FD.QUEST_TXT FD.SUBQUEST_TXT]});
0161 for iproc = 1:nproc
0162 if ~strcmp(tmp{quest_idxs(iproc),1},tmp{quest_idxs(proc_qid_idxs(iproc)),2})
0163 ts.qtxt{insert_idxs(iproc)} = ...
0164 cell2str(tmp(quest_idxs(proc_qid_idxs(iproc)),:),'\n');
0165 else
0166 ts.qtxt{insert_idxs(iproc)} = tmp{quest_idxs(proc_qid_idxs(iproc)),1};
0167 end
0168 end
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178 end
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193 for isub = 1:nsub
0194 sid = unique_subs{isub};
0195
0196
0197
0198
0199
0200
0201
0202 row_idx = find(strcmp(ts.subid,sid));
0203 if isempty(row_idx)
0204 if DEBUG == 2
0205 fprintf('Creating Type %d entry for subject: %s\n', itype, sid);
0206 end
0207 ts.subid{end+1} = sid;
0208 row_idx = length(ts.subid);
0209 end
0210
0211
0212 if (size(iter_cnt,1) < row_idx) | (size(iter_cnt,2) < length(ts.qid))
0213 ts.niter(row_idx,length(ts.qid)) = 0;
0214 iter_cnt(row_idx,length(ts.qid)) = 0;
0215 end
0216
0217
0218 submask = strcmp(subids,sid);
0219
0220
0221
0222
0223
0224
0225
0226 destmask = zeros(size(submask));
0227 [have_dest,dest_idx] = ismember(compqid(submask),ts.qid);
0228
0229
0230 if any(have_dest)
0231 destmask(submask) = have_dest;
0232
0233 if itype == NUMERIC & (DEBUG == NUMERIC)
0234 [dest_idx(have_dest) fd.data{src_col}(submask&destmask)]
0235 [sum(submask&destmask) length(dest_idx(have_dest))]
0236 end
0237
0238
0239 dest_cols = unique(dest_idx(have_dest));
0240 ndest = length(dest_cols);
0241 if ndest > 1
0242 ts.niter(row_idx,dest_cols) = ...
0243 ts.niter(row_idx,dest_cols) + ...
0244 hist(dest_idx(have_dest),dest_cols);
0245 else
0246 ts.niter(row_idx,dest_cols) = ...
0247 ts.niter(row_idx,dest_cols) + ...
0248 sum(have_dest);
0249 end
0250
0251
0252
0253
0254 for idest = 1:ndest
0255 col_idx = dest_cols(idest);
0256
0257 colmask = compqid == ts.qid(col_idx);
0258
0259 curr_mask = colmask&submask;
0260 clear colmask
0261
0262
0263 is_stim = any(fd.data{FD.STIM_ID}(curr_mask&stim_mask));
0264
0265 if is_stim
0266 curr_mask = curr_mask&stim_mask;
0267 end
0268
0269
0270 nresp = sum(curr_mask);
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280 if ~is_stim
0281
0282
0283 rep_idxs = ...
0284 iter_cnt(row_idx,col_idx)+1:iter_cnt(row_idx,col_idx)+nresp;
0285
0286
0287 iter_cnt(row_idx,col_idx) = max(rep_idxs);
0288
0289
0290 ts.data(row_idx,col_idx,rep_idxs) = ...
0291 fd.data{src_col}(curr_mask);
0292
0293 ts.datenum.by_question(row_idx,col_idx,rep_idxs) = fd.data{FD.DATE_TIME}(curr_mask);
0294
0295 else
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311 [curr_stim_ids,idx1,idx2] = ...
0312 unique(fd.data{FD.STIM_ID}(curr_mask));
0313 curr_mask_idxs = find(curr_mask);
0314
0315
0316 bad_ids = isnan(curr_stim_ids);
0317 if any(bad_ids)
0318 warning(sprintf('Found %d NaNs among the stim IDs. subid=%s\n', sum(bad_ids), subid))
0319 end
0320
0321
0322 curr_stim_ids = curr_stim_ids(idx2);
0323 curr_stim_times = fd.data{FD.DATE_TIME}(curr_mask_idxs);
0324
0325
0326 dup_idxs = find((diff(curr_stim_ids)==0) & ...
0327 (diff(curr_stim_times)==0))+1;
0328
0329 if ~isempty(dup_idxs)
0330 warning(sprintf(['\n%d duplicated responses with same stimulus ' ...
0331 'ID (%s) and timestamp: subject (%s)\n'], length(dup_idxs), sprintf('%d,', curr_stim_ids(dup_idxs)), sid))
0332 curr_stim_ids(dup_idxs) = [];
0333 curr_stim_times(dup_idxs) = [];
0334 nresp = nresp-1;
0335 end
0336
0337 num_presented_stims = length(curr_stim_ids);
0338
0339
0340
0341 if num_presented_stims ~= nresp
0342 warning(sprintf('Encountered %d stimuli and %d responses\n', num_presented_stims, nresp))
0343 end
0344
0345 for istim = 1:num_presented_stims
0346 curr_stim_id = curr_stim_ids(istim);
0347
0348
0349
0350 master_stim_idx = find(as.stims.ids==curr_stim_id);
0351
0352 if isempty(master_stim_idx)
0353 master_stim_idx = length(as.stims.ids)+1;
0354
0355
0356 as.stims.ids(master_stim_idx) = curr_stim_id;
0357 end
0358
0359
0360
0361
0362
0363
0364 if size(ts.stimidx,1) < row_idx
0365 ts.stimidx(row_idx,1) = 0;
0366 end
0367
0368 ts.stimidx(row_idx,istim) = master_stim_idx;
0369 ts.datenum.by_stim(row_idx,istim) = curr_stim_times(istim);
0370
0371
0372 curr_stimmask = (fd.data{FD.STIM_ID} == curr_stim_id) & ...
0373 (fd.data{FD.DATE_TIME} == curr_stim_times(istim));
0374
0375
0376 tmpdata = fd.data{src_col}(curr_mask&curr_stimmask);
0377
0378
0379
0380
0381 ts.data(row_idx,col_idx,istim) = tmpdata(1);
0382
0383
0384 iter_cnt(row_idx,col_idx) = iter_cnt(row_idx,col_idx)+1;
0385 end
0386 end
0387 end
0388
0389 end
0390 end
0391
0392
0393 if itype == NUMERIC
0394 as.num = ts;
0395 iter_cnt_num = iter_cnt;
0396 else
0397 as.txt = ts;
0398 iter_cnt_txt;
0399 end
0400
0401 end
0402 end