0001 function [dg, subject_vars] = mysql_demographics(params, conn_id)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 CONN_ID = 1;
0023
0024 dg = {};
0025
0026
0027 GENDER_DFID = 77;
0028 ETHNICITY_DFID = 78;
0029 RACE_DFID = 79;
0030
0031 GENDER_QID = 317;
0032 ETHNICITY_QID = 318;
0033 RACE_QID = 319;
0034 DEMO_VAR_LIST = {'gender','ethnicity','race'};
0035
0036 race_mapping = {...
0037 'Caucasian', {'Cauc','Caucasian'}; ...
0038 'AfrAmer', {'Black or African American', 'AfrAm'}; ...
0039 'Asian', {'Asian'}; ...
0040 'NatAmer', {'American Indian','NatAm'}; ...
0041 'Hawaiian', {'Native Hawaiian','NatHaw'}; ...
0042 'Multiple', {'More','More than One Race'}; ...
0043 'Other', {'Other'}; ...
0044 'Unknown', {'Unknown','Unknown or No Answer'}};
0045
0046
0047
0048 if isempty(params.exp_list)
0049 exp_list = {};
0050 INFO_BY_EXP = 0;
0051 else
0052 exp_list = params.exp_list;
0053 INFO_BY_EXP = 1;
0054 end
0055
0056 if isempty(params.irb_id)
0057 irb_list = {};
0058 INFO_BY_IRB = 0;
0059 else
0060 irb_list = params.irb_id;
0061 INFO_BY_IRB = 1;
0062 end
0063
0064 if ~isfield(params,'exclude_subs') || isempty(params.exclude_subs)
0065 exclude_subs = {};
0066 else
0067 exclude_subs = params.exclude_subs;
0068 end
0069
0070 if ~isfield(params,'start_date') || isempty(params.start_date)
0071 start_date = datenum('01-Jul-2004');
0072 else
0073 start_date = params.start_date;
0074 end
0075
0076 if ~isfield(params,'stop_date') || isempty(params.stop_date)
0077 stop_date = datenum(now);
0078 else
0079 stop_date = params.stop_date;
0080 end
0081
0082
0083
0084 subject_vars = { ...
0085 'subject_id'; ...
0086 'gender'; ...
0087 'ethnic_category'; ...
0088 'racial_category'; ...
0089 'dob'; ...
0090 };
0091
0092
0093 if isstr(exp_list)
0094 exp_list = {exp_list};
0095 end
0096
0097 if isstr(irb_list)
0098 irb_list = {irb_list};
0099 end
0100
0101
0102 try conn_id(1);
0103 catch
0104 tmp_conn_id = 1;
0105 mysql_make_conn([],[],CONN_ID);
0106 conn_id = CONN_ID;
0107 end
0108
0109
0110 exp_id = [];
0111 exp_names = {};
0112 resp_tbl_list = {};
0113
0114
0115 if INFO_BY_EXP
0116 sql_str = sprintf(['SELECT experiment_id, experiment_title, response_table FROM experiment' ...
0117 ' WHERE experiment_title IN ("%s");'], cell2str(exp_list,'","'));
0118 [tmp_id,tmp_names,tmp_tbl_list] = mysql(conn_id,sql_str);
0119
0120 fprintf('Found %d/%d experiments matching the experiment list:\n%s\n', length(tmp_names), length(exp_list), cell2str(tmp_names,'\n'));
0121
0122 exp_id = [exp_id, tmp_id];
0123 exp_names = [exp_names, tmp_names];
0124 resp_tbl_list = [resp_tbl_list, tmp_tbl_list];
0125 end
0126
0127 if INFO_BY_IRB
0128 sql_str = sprintf(['SELECT experiment_id, experiment_title, response_table FROM experiment' ...
0129 ' WHERE irb_id IN (%s);'], cell2str(irb_list,','));
0130 [tmp_id,tmp_names,tmp_tbl_list] = mysql(conn_id,sql_str);
0131 nexp = length(tmp_names);
0132
0133 fprintf('Found %d experiments matching the IRB ID list (%s):\n', length(tmp_names), cell2str(irb_list,','));
0134 for iexp = 1:nexp
0135 fprintf('%s\t%d\n',tmp_names{iexp}, tmp_id(iexp));
0136 end
0137
0138 exp_id = [exp_id, tmp_id];
0139 exp_names = [exp_names, tmp_names];
0140 resp_tbl_list = [resp_tbl_list, tmp_tbl_list];
0141 end
0142
0143
0144
0145 [unique_ids, unique_idxs] = unique(exp_id);
0146 exp_id = exp_id(unique_idxs);
0147 exp_names = exp_names(unique_idxs);
0148 resp_tbl_list = resp_tbl_list(unique_idxs);
0149
0150
0151 exp_subs = {};
0152 sub_exp_ids = [];
0153 sess_ids = [];
0154 for iexp = 1:length(exp_names)
0155
0156 [tmp_subid, tmp_sessid] = mysql(conn_id,sprintf('SELECT subject_id, session_id FROM %s WHERE experiment_id=%d;', resp_tbl_list{iexp}, exp_id(iexp)));
0157
0158
0159 [tmp_sessid, tmp_idxs] = unique(tmp_sessid);
0160 sess_ids = [sess_ids; tmp_sessid];
0161 exp_subs = [exp_subs; tmp_subid(tmp_idxs)];
0162 sub_exp_ids = [sub_exp_ids; ones(size(tmp_sessid))*exp_id(iexp)];
0163 end
0164
0165 rm_idxs = [];
0166
0167 rm_idxs = [rm_idxs; strmatch('tmp_',exp_subs)];
0168
0169
0170 if ~isempty(exclude_subs)
0171 [rm_mask] = ismember(exp_subs,exclude_subs);
0172 rm_idxs = [rm_idxs; find(rm_mask)];
0173 end
0174 exp_subs(rm_idxs) = [];
0175 sub_exp_ids(rm_idxs) = [];
0176 sess_ids(rm_idxs) = [];
0177
0178
0179 sub_str = sprintf('''%s'',', exp_subs{:});
0180 sub_str(end) = [];
0181
0182
0183
0184
0185 sql_str = sprintf('SELECT %s FROM subject WHERE subject_id IN (%s)', ...
0186 cell2str(subject_vars,','), sub_str);
0187
0188
0189 [dg{1:length(subject_vars)}] = mysql(conn_id,sql_str);
0190
0191
0192
0193 tmp_sub_ids = dg{:,strcmp(subject_vars,'subject_id')};
0194 tmp_gender = dg{:,strcmp(subject_vars,'gender')};
0195 tmp_ethnicity = dg{:,strcmp(subject_vars,'ethnic_category')};
0196 tmp_race = dg{:,strcmp(subject_vars,'racial_category')};
0197
0198 gender_empty_mask = cellfun('isempty',tmp_gender);
0199 ethnicity_empty_mask = cellfun('isempty', tmp_ethnicity);
0200 race_empty_mask = cellfun('isempty', tmp_race);
0201
0202
0203 sub_tbl_empty_mask = gender_empty_mask & ethnicity_empty_mask & ...
0204 race_empty_mask;
0205
0206 missing_subs_mask = ismember(exp_subs, tmp_sub_ids(sub_tbl_empty_mask));
0207 missing_subs = exp_subs(missing_subs_mask);
0208 missing_subs_str = sprintf('''%s'',', missing_subs{:});
0209 missing_subs_str(end) = [];
0210
0211
0212 for idg = 1:length(dg)
0213 dg{idg}(sub_tbl_empty_mask,:) = [];
0214 end
0215
0216 sub_ids = exp_subs(~missing_subs_mask);
0217 sess_ids = sess_ids(~missing_subs_mask);
0218
0219
0220 new_exp_id_list = unique(sub_exp_ids(missing_subs_mask));
0221 sub_exp_ids = sub_exp_ids(~missing_subs_mask);
0222
0223
0224 tmp_sub_ids = dg{:,strcmp(subject_vars,'subject_id')};
0225
0226 nsub_ids = length(sub_ids);
0227 gender = cell(nsub_ids,1);
0228 ethnicity = cell(nsub_ids,1);
0229 race = cell(nsub_ids,1);
0230 age = zeros(nsub_ids,1);
0231 unique_subids = unique(sub_ids);
0232 nunique = length(unique_subids);
0233 for isub = 1:nunique
0234 curr_subid = unique_subids(isub);
0235 curr_sub_ids_idxs = strmatch(curr_subid,sub_ids,'exact');
0236 curr_dg_idx = strmatch(curr_subid,tmp_sub_ids,'exact');
0237 gender(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'gender')}(curr_dg_idx);
0238 ethnicity(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'ethnic_category')}(curr_dg_idx);
0239 race(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'racial_category')}(curr_dg_idx);
0240 age(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'dob')}(curr_dg_idx);
0241 end
0242
0243
0244 [cat_labels] = mysql_resolve_enum([GENDER_DFID, ETHNICITY_DFID, RACE_DFID], conn_id);
0245
0246
0247
0248 new_sub_ids = {};
0249 new_sess_ids = [];
0250 for iexp = 1:length(new_exp_id_list)
0251 curr_exp_id = new_exp_id_list(iexp);
0252 curr_resp_tbl = resp_tbl_list{find(exp_id == curr_exp_id)};
0253
0254 sql_str = sprintf(['SELECT subject_id, response_enum, question_id, session_id FROM %s ' ...
0255 'WHERE experiment_id=%d AND question_id IN (%d,%d,%d) ' ...
0256 'AND subject_id IN (%s);'], curr_resp_tbl, curr_exp_id, GENDER_QID, ETHNICITY_QID, RACE_QID, missing_subs_str);
0257 [tmp_subid, tmp_resp, tmp_qid, tmp_sessid] = mysql(conn_id,sql_str);
0258
0259 new_sub_ids = [new_sub_ids; tmp_subid(1:3:end)];
0260 new_sess_ids = [new_sess_ids; tmp_sessid(1:3:end)];
0261
0262 for ivar = 1:length(DEMO_VAR_LIST)
0263 curr_var = DEMO_VAR_LIST{ivar};
0264 tmp_mask = [];
0265 switch curr_var
0266 case 'gender'
0267 tmp_mask = (tmp_qid == GENDER_QID);
0268 case 'ethnicity'
0269 tmp_mask = (tmp_qid == ETHNICITY_QID);
0270 case 'race'
0271 tmp_mask = (tmp_qid == RACE_QID);
0272 end
0273
0274
0275 enum_idxs = log2(tmp_resp(tmp_mask))+1;
0276 tmp_vals = cat_labels{ivar}(enum_idxs);
0277 tmp_vals = strrep(tmp_vals,'"','');
0278 eval([curr_var '=[' curr_var '; tmp_vals''];']);
0279
0280 end
0281 end
0282
0283 sub_ids = [sub_ids; new_sub_ids];
0284 sess_ids = [sess_ids; new_sess_ids];
0285
0286
0287 sessid_str = sprintf('%d,', sess_ids);
0288 sessid_str(end) = [];
0289 sql_str = sprintf('SELECT date_time FROM session WHERE session_id IN (%s);', sessid_str);
0290 session_date = mysql(conn_id,sql_str);
0291
0292 date_mask = (session_date >= start_date) & (session_date <= stop_date);
0293 valid_mask = date_mask;
0294
0295
0296
0297
0298 fprintf('\nTotal number of subjects: %d\n', sum(valid_mask));
0299 fprintf('Start date: %s\n', datestr(start_date));
0300 fprintf('Stop date: %s\n\n', datestr(stop_date));
0301
0302 male_mask = ismember(gender,{'M','Male'}) & valid_mask;
0303 female_mask = ismember(gender,{'F','Female'}) & valid_mask;
0304 unknown_gender_mask = ~male_mask & ~female_mask & valid_mask;
0305
0306
0307 hispanic_mask = ismember(ethnicity,{'HL','Hispanic or Latino'}) & valid_mask;
0308 nonhispanic_mask = ismember(ethnicity,{'Not Hispanic or Latino','not_HL'}) & valid_mask;
0309 hispanic_unkwn_mask = ~hispanic_mask & ~nonhispanic_mask & valid_mask;
0310
0311 fprintf('\t%15s\t%15s\t%15s\n','Hispanic','Non-Hispanic','Unknown');
0312 fprintf('Male:\t%15d\t%15d\t%15d\n', ...
0313 sum(male_mask & hispanic_mask), ...
0314 sum(male_mask & nonhispanic_mask), ...
0315 sum(male_mask & hispanic_unkwn_mask));
0316 fprintf('Female:\t%15d\t%15d\t%15d\n', ...
0317 sum(female_mask & hispanic_mask), ...
0318 sum(female_mask & nonhispanic_mask), ...
0319 sum(female_mask & hispanic_unkwn_mask));
0320 fprintf('Unknown:%15d\t%15d\t%15d\n', ...
0321 sum(unknown_gender_mask & hispanic_mask), ...
0322 sum(unknown_gender_mask & nonhispanic_mask), ...
0323 sum(unknown_gender_mask & hispanic_unkwn_mask));
0324 fprintf('Total:\t%15d\t%15d\t%15d\n', sum(hispanic_mask), sum(nonhispanic_mask), sum(hispanic_unkwn_mask));
0325
0326
0327 race_cat = race_mapping(:,1);
0328 header = sprintf('\n\n\t');
0329 for ir = 1:length(race_cat)
0330 header = sprintf('%s\t%8s', header,race_cat{ir});
0331 race_mask = ismember(race,race_mapping{ir,2});
0332 males_by_race(ir) = sum(race_mask&male_mask & valid_mask);
0333 females_by_race(ir) = sum(race_mask&female_mask & valid_mask);
0334 unknown_by_race(ir) = sum(race_mask&unknown_gender_mask & valid_mask);
0335
0336 hispanic_males_by_race(ir) = sum(race_mask & male_mask & valid_mask & hispanic_mask);
0337 hispanic_females_by_race(ir) = sum(race_mask&female_mask & valid_mask & hispanic_mask);
0338 hispanic_unknown_by_race(ir) = sum(race_mask&unknown_gender_mask & valid_mask & hispanic_mask);
0339 end
0340 header = sprintf('%s\t%8s', header, 'Total');
0341 males_by_race(end+1) = sum(males_by_race);
0342 females_by_race(end+1) = sum(females_by_race);
0343 unknown_by_race(end+1) = sum(unknown_by_race);
0344 hispanic_males_by_race(end+1) = sum(hispanic_males_by_race);
0345 hispanic_females_by_race(end+1) = sum(hispanic_females_by_race);
0346 hispanic_unknown_by_race(end+1) = sum(hispanic_unknown_by_race);
0347
0348 fprintf('%s\n', header);
0349 fprintf('Male: %s\n', sprintf('\t%8d',males_by_race));
0350 fprintf('Female: %s\n', sprintf('\t%8d',females_by_race));
0351 fprintf('Unknown:%s\n', sprintf('\t%8d',unknown_by_race));
0352 fprintf('Total: %s\n', sprintf('\t%8d',males_by_race+females_by_race+unknown_by_race))
0353
0354
0355 fprintf('\n\nRacial breakdown for hispanics')
0356 fprintf('%s\n', header);
0357 fprintf('Male: %s\n', sprintf('\t%8d',hispanic_males_by_race));
0358 fprintf('Female: %s\n', sprintf('\t%8d',hispanic_females_by_race));
0359 fprintf('Unknown:%s\n', sprintf('\t%8d',hispanic_unknown_by_race));
0360 fprintf('Total: %s\n', sprintf('\t%8d',hispanic_males_by_race+hispanic_females_by_race+hispanic_unknown_by_race))
0361
0362
0363 if exist('tmp_conn_id','var')
0364 mysql('close');
0365 end