Home > database > examples > mysql_demographics.m

mysql_demographics

PURPOSE ^

[dg, subject_vars] = mysql_demographics(params, conn_id);

SYNOPSIS ^

function [dg, subject_vars] = mysql_demographics(params, conn_id)

DESCRIPTION ^

 [dg, subject_vars] = mysql_demographics(params, conn_id);

 Gets subject demographic information based on the information request
 specified in the params structure

 params
    .exp_list -- cell string of experiments to pull data for
    .irb_id -- protocol numbers
    .extract_vars -- list of variables to extract
    .exclude_subs -- list of subject IDs to exclude from the summaries,
                     e.g. test subject IDs, etc.
    .start_date -- start of reporting period (must be in datenum format)
    .stop_date -- end of reporting period (must be in datenum format)

 conn_id -- mysql connection ID

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [dg, subject_vars] = mysql_demographics(params, conn_id)
0002 % [dg, subject_vars] = mysql_demographics(params, conn_id);
0003 %
0004 % Gets subject demographic information based on the information request
0005 % specified in the params structure
0006 %
0007 % params
0008 %    .exp_list -- cell string of experiments to pull data for
0009 %    .irb_id -- protocol numbers
0010 %    .extract_vars -- list of variables to extract
0011 %    .exclude_subs -- list of subject IDs to exclude from the summaries,
0012 %                     e.g. test subject IDs, etc.
0013 %    .start_date -- start of reporting period (must be in datenum format)
0014 %    .stop_date -- end of reporting period (must be in datenum format)
0015 %
0016 % conn_id -- mysql connection ID
0017 %
0018 
0019 % 01/25/05 Petr Janata
0020 % 06/26/05 PJ -- cleaned up conn_id handling
0021 
0022 CONN_ID = 1;
0023 
0024 dg = {};
0025 
0026 % Data format table identifiers for enums containing the different category labels
0027 GENDER_DFID = 77;
0028 ETHNICITY_DFID = 78;
0029 RACE_DFID = 79;
0030 
0031 GENDER_QID = 317;
0032 ETHNICITY_QID = 318;
0033 RACE_QID = 319;
0034 DEMO_VAR_LIST = {'gender','ethnicity','race'};
0035 
0036 race_mapping = {...
0037       'Caucasian', {'Cauc','Caucasian'}; ...
0038       'AfrAmer', {'Black or African American', 'AfrAm'}; ...
0039       'Asian', {'Asian'}; ...
0040       'NatAmer', {'American Indian','NatAm'}; ...
0041       'Hawaiian', {'Native Hawaiian','NatHaw'}; ...
0042       'Multiple', {'More','More than One Race'}; ...
0043       'Other', {'Other'}; ...
0044       'Unknown', {'Unknown','Unknown or No Answer'}};
0045 
0046 
0047 % Check the input arguments
0048 if isempty(params.exp_list)
0049   exp_list = {};
0050   INFO_BY_EXP = 0;
0051 else
0052   exp_list = params.exp_list;
0053   INFO_BY_EXP = 1;
0054 end
0055 
0056 if isempty(params.irb_id)
0057   irb_list = {};
0058   INFO_BY_IRB = 0;
0059 else
0060   irb_list = params.irb_id;
0061   INFO_BY_IRB = 1;
0062 end
0063 
0064 if ~isfield(params,'exclude_subs') || isempty(params.exclude_subs)
0065   exclude_subs = {};
0066 else
0067   exclude_subs = params.exclude_subs;
0068 end
0069 
0070 if ~isfield(params,'start_date') || isempty(params.start_date)
0071   start_date = datenum('01-Jul-2004');
0072 else
0073   start_date = params.start_date;
0074 end
0075 
0076 if ~isfield(params,'stop_date') || isempty(params.stop_date)
0077   stop_date = datenum(now);
0078 else
0079   stop_date = params.stop_date;
0080 end
0081 
0082 
0083 % List of subject variables to extract
0084 subject_vars = { ...
0085       'subject_id'; ...
0086       'gender'; ...
0087       'ethnic_category'; ...
0088       'racial_category'; ...
0089       'dob'; ...
0090       };
0091 
0092 % Do some additional variable checking
0093 if isstr(exp_list)
0094   exp_list = {exp_list};
0095 end
0096 
0097 if isstr(irb_list)
0098   irb_list = {irb_list};
0099 end
0100 
0101 % Connect to default host and database if no connection ID is specified
0102 try conn_id(1);
0103 catch 
0104   tmp_conn_id = 1;
0105   mysql_make_conn([],[],CONN_ID);
0106   conn_id = CONN_ID;
0107 end
0108 
0109 % Initialize some variables
0110 exp_id = [];
0111 exp_names = {};
0112 resp_tbl_list = {};
0113 
0114 % Get a list of the response tables we need to deal with
0115 if INFO_BY_EXP
0116   sql_str = sprintf(['SELECT experiment_id, experiment_title, response_table FROM experiment' ...
0117     ' WHERE experiment_title IN ("%s");'], cell2str(exp_list,'","'));
0118   [tmp_id,tmp_names,tmp_tbl_list] = mysql(conn_id,sql_str);
0119 
0120   fprintf('Found %d/%d experiments matching the experiment list:\n%s\n', length(tmp_names), length(exp_list), cell2str(tmp_names,'\n'));
0121   
0122   exp_id = [exp_id, tmp_id];
0123   exp_names = [exp_names, tmp_names];
0124   resp_tbl_list = [resp_tbl_list, tmp_tbl_list];
0125 end % if INFO_BY_EXP
0126 
0127 if INFO_BY_IRB
0128   sql_str = sprintf(['SELECT experiment_id, experiment_title, response_table FROM experiment' ...
0129     ' WHERE irb_id IN (%s);'], cell2str(irb_list,','));
0130   [tmp_id,tmp_names,tmp_tbl_list] = mysql(conn_id,sql_str);
0131   nexp = length(tmp_names);
0132 
0133   fprintf('Found %d experiments matching the IRB ID list (%s):\n', length(tmp_names), cell2str(irb_list,','));
0134   for iexp = 1:nexp
0135     fprintf('%s\t%d\n',tmp_names{iexp}, tmp_id(iexp));
0136   end
0137   
0138   exp_id = [exp_id, tmp_id];
0139   exp_names = [exp_names, tmp_names];
0140   resp_tbl_list = [resp_tbl_list, tmp_tbl_list];
0141 end % if INFO_BY_IRB
0142 
0143 % In case we were selecting by both experiment title and IRB protocol ID, prune
0144 % the list for only the unique experiments
0145 [unique_ids, unique_idxs] = unique(exp_id);
0146 exp_id = exp_id(unique_idxs);
0147 exp_names = exp_names(unique_idxs);
0148 resp_tbl_list = resp_tbl_list(unique_idxs);
0149 
0150 % Now generate a list of subject IDs
0151 exp_subs = {};
0152 sub_exp_ids = [];
0153 sess_ids = [];
0154 for iexp = 1:length(exp_names)
0155   % Get all the subject IDs and session IDs in this response table
0156   [tmp_subid, tmp_sessid] = mysql(conn_id,sprintf('SELECT subject_id, session_id FROM %s WHERE experiment_id=%d;', resp_tbl_list{iexp}, exp_id(iexp)));
0157   
0158   % Match on unique session_id
0159   [tmp_sessid, tmp_idxs] = unique(tmp_sessid);
0160   sess_ids = [sess_ids; tmp_sessid];
0161   exp_subs = [exp_subs; tmp_subid(tmp_idxs)];
0162   sub_exp_ids = [sub_exp_ids; ones(size(tmp_sessid))*exp_id(iexp)];
0163 end
0164 
0165 rm_idxs = [];
0166 % Strip out any temporary subject IDs
0167 rm_idxs = [rm_idxs; strmatch('tmp_',exp_subs)];
0168 
0169 % Remove any unwanted subject IDs
0170 if ~isempty(exclude_subs)
0171   [rm_mask] = ismember(exp_subs,exclude_subs);
0172   rm_idxs = [rm_idxs; find(rm_mask)];
0173 end
0174 exp_subs(rm_idxs) = [];
0175 sub_exp_ids(rm_idxs) = [];
0176 sess_ids(rm_idxs) = [];
0177 
0178 % Generate a comma-separated list of subject IDs
0179 sub_str = sprintf('''%s'',', exp_subs{:});
0180 sub_str(end) = [];
0181 
0182 % Extract information from the subject table. Note, following the revision of
0183 % the handling of demographics forms, demographic information is stored in the
0184 % response tables, so we really have to look in two place to get all the info.
0185 sql_str = sprintf('SELECT %s FROM subject WHERE subject_id IN (%s)', ...
0186     cell2str(subject_vars,','), sub_str);
0187 
0188 % Get the data from the database
0189 [dg{1:length(subject_vars)}] = mysql(conn_id,sql_str);
0190 
0191 % Figure out which of the subjects we need to look into the response tables for
0192 % the demographic info
0193 tmp_sub_ids = dg{:,strcmp(subject_vars,'subject_id')};
0194 tmp_gender = dg{:,strcmp(subject_vars,'gender')};
0195 tmp_ethnicity = dg{:,strcmp(subject_vars,'ethnic_category')};
0196 tmp_race = dg{:,strcmp(subject_vars,'racial_category')};
0197 
0198 gender_empty_mask = cellfun('isempty',tmp_gender);
0199 ethnicity_empty_mask = cellfun('isempty', tmp_ethnicity);
0200 race_empty_mask = cellfun('isempty', tmp_race);
0201 
0202 % create the missing data mask
0203 sub_tbl_empty_mask = gender_empty_mask & ethnicity_empty_mask & ...
0204     race_empty_mask;
0205 
0206 missing_subs_mask = ismember(exp_subs, tmp_sub_ids(sub_tbl_empty_mask));
0207 missing_subs = exp_subs(missing_subs_mask);
0208 missing_subs_str = sprintf('''%s'',', missing_subs{:});
0209 missing_subs_str(end) = [];
0210 
0211 % Remove the entries for these subjects from the current data
0212 for idg = 1:length(dg)
0213   dg{idg}(sub_tbl_empty_mask,:) = [];
0214 end
0215 
0216 sub_ids = exp_subs(~missing_subs_mask);
0217 sess_ids = sess_ids(~missing_subs_mask);
0218 
0219 % get list of experiments whose response tables we need to check
0220 new_exp_id_list = unique(sub_exp_ids(missing_subs_mask));
0221 sub_exp_ids = sub_exp_ids(~missing_subs_mask);
0222 
0223 % Now extract variables from the data structure for existing subjects
0224 tmp_sub_ids = dg{:,strcmp(subject_vars,'subject_id')};
0225 
0226 nsub_ids = length(sub_ids);
0227 gender = cell(nsub_ids,1);
0228 ethnicity = cell(nsub_ids,1);
0229 race = cell(nsub_ids,1);
0230 age = zeros(nsub_ids,1);
0231 unique_subids = unique(sub_ids);
0232 nunique = length(unique_subids);
0233 for isub = 1:nunique
0234   curr_subid = unique_subids(isub);
0235   curr_sub_ids_idxs = strmatch(curr_subid,sub_ids,'exact');
0236   curr_dg_idx = strmatch(curr_subid,tmp_sub_ids,'exact');
0237   gender(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'gender')}(curr_dg_idx);
0238   ethnicity(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'ethnic_category')}(curr_dg_idx);
0239   race(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'racial_category')}(curr_dg_idx);
0240   age(curr_sub_ids_idxs) = dg{:,strcmp(subject_vars,'dob')}(curr_dg_idx);
0241 end
0242 
0243 % Retrieve the category labels
0244 [cat_labels] = mysql_resolve_enum([GENDER_DFID, ETHNICITY_DFID, RACE_DFID], conn_id);
0245 
0246 % Now loop over the experiments that we need to consult in order to pull in the
0247 % demographic information from the response tables
0248 new_sub_ids = {};
0249 new_sess_ids = [];
0250 for iexp = 1:length(new_exp_id_list)
0251   curr_exp_id = new_exp_id_list(iexp);
0252   curr_resp_tbl = resp_tbl_list{find(exp_id == curr_exp_id)};
0253   
0254   sql_str = sprintf(['SELECT subject_id, response_enum, question_id, session_id FROM %s ' ...
0255     'WHERE experiment_id=%d AND question_id IN (%d,%d,%d) ' ...
0256     'AND subject_id IN (%s);'], curr_resp_tbl, curr_exp_id, GENDER_QID, ETHNICITY_QID, RACE_QID, missing_subs_str);
0257   [tmp_subid, tmp_resp, tmp_qid, tmp_sessid] = mysql(conn_id,sql_str);
0258   
0259   new_sub_ids = [new_sub_ids; tmp_subid(1:3:end)];
0260   new_sess_ids = [new_sess_ids; tmp_sessid(1:3:end)];
0261   
0262   for ivar = 1:length(DEMO_VAR_LIST)
0263     curr_var = DEMO_VAR_LIST{ivar};
0264     tmp_mask = [];
0265     switch curr_var
0266       case 'gender'
0267     tmp_mask = (tmp_qid == GENDER_QID);
0268       case 'ethnicity'
0269     tmp_mask = (tmp_qid == ETHNICITY_QID);
0270       case 'race'
0271     tmp_mask = (tmp_qid == RACE_QID);
0272     end
0273     
0274     % Populate the gender, ethnicity, and race variables
0275     enum_idxs = log2(tmp_resp(tmp_mask))+1;
0276     tmp_vals = cat_labels{ivar}(enum_idxs);
0277     tmp_vals = strrep(tmp_vals,'"','');
0278     eval([curr_var '=[' curr_var '; tmp_vals''];']);
0279     
0280   end % for ivar=
0281 end % for iexp = 1:length(new_exp_id_list)
0282 
0283 sub_ids = [sub_ids; new_sub_ids];
0284 sess_ids = [sess_ids; new_sess_ids];
0285 
0286 % Get the date of participation from the session table
0287 sessid_str = sprintf('%d,', sess_ids);
0288 sessid_str(end) = [];
0289 sql_str = sprintf('SELECT date_time FROM session WHERE session_id IN (%s);', sessid_str);
0290 session_date = mysql(conn_id,sql_str);
0291 
0292 date_mask = (session_date >= start_date) & (session_date <= stop_date);
0293 valid_mask = date_mask;
0294 
0295 %
0296 % Print some summary info
0297 %
0298 fprintf('\nTotal number of subjects: %d\n', sum(valid_mask));
0299 fprintf('Start date: %s\n', datestr(start_date));
0300 fprintf('Stop date: %s\n\n', datestr(stop_date));
0301 
0302 male_mask = ismember(gender,{'M','Male'}) & valid_mask;
0303 female_mask = ismember(gender,{'F','Female'}) & valid_mask;
0304 unknown_gender_mask = ~male_mask & ~female_mask & valid_mask;
0305 
0306 % Hispanic breakdown
0307 hispanic_mask = ismember(ethnicity,{'HL','Hispanic or Latino'}) & valid_mask;
0308 nonhispanic_mask = ismember(ethnicity,{'Not Hispanic or Latino','not_HL'}) & valid_mask;
0309 hispanic_unkwn_mask = ~hispanic_mask & ~nonhispanic_mask & valid_mask;
0310 
0311 fprintf('\t%15s\t%15s\t%15s\n','Hispanic','Non-Hispanic','Unknown');
0312 fprintf('Male:\t%15d\t%15d\t%15d\n', ...
0313     sum(male_mask & hispanic_mask), ...
0314     sum(male_mask & nonhispanic_mask), ...
0315     sum(male_mask & hispanic_unkwn_mask));
0316 fprintf('Female:\t%15d\t%15d\t%15d\n', ...
0317     sum(female_mask & hispanic_mask), ...
0318     sum(female_mask & nonhispanic_mask), ...
0319     sum(female_mask & hispanic_unkwn_mask));
0320 fprintf('Unknown:%15d\t%15d\t%15d\n', ...
0321     sum(unknown_gender_mask & hispanic_mask), ...
0322     sum(unknown_gender_mask & nonhispanic_mask), ...
0323     sum(unknown_gender_mask & hispanic_unkwn_mask));
0324 fprintf('Total:\t%15d\t%15d\t%15d\n', sum(hispanic_mask), sum(nonhispanic_mask), sum(hispanic_unkwn_mask));
0325 
0326 % Do overall racial breakdown
0327 race_cat = race_mapping(:,1);
0328 header = sprintf('\n\n\t');
0329 for ir = 1:length(race_cat)
0330   header = sprintf('%s\t%8s', header,race_cat{ir});
0331   race_mask = ismember(race,race_mapping{ir,2});
0332   males_by_race(ir) = sum(race_mask&male_mask & valid_mask);
0333   females_by_race(ir) = sum(race_mask&female_mask & valid_mask);
0334   unknown_by_race(ir) = sum(race_mask&unknown_gender_mask & valid_mask);
0335   
0336   hispanic_males_by_race(ir) = sum(race_mask & male_mask & valid_mask & hispanic_mask);
0337   hispanic_females_by_race(ir) = sum(race_mask&female_mask & valid_mask & hispanic_mask);
0338   hispanic_unknown_by_race(ir) = sum(race_mask&unknown_gender_mask & valid_mask & hispanic_mask);
0339 end
0340 header = sprintf('%s\t%8s', header, 'Total');
0341 males_by_race(end+1) = sum(males_by_race);
0342 females_by_race(end+1) = sum(females_by_race);
0343 unknown_by_race(end+1) = sum(unknown_by_race);
0344 hispanic_males_by_race(end+1) = sum(hispanic_males_by_race);
0345 hispanic_females_by_race(end+1) = sum(hispanic_females_by_race);
0346 hispanic_unknown_by_race(end+1) = sum(hispanic_unknown_by_race);
0347 
0348 fprintf('%s\n', header);
0349 fprintf('Male:   %s\n', sprintf('\t%8d',males_by_race));
0350 fprintf('Female: %s\n', sprintf('\t%8d',females_by_race));
0351 fprintf('Unknown:%s\n', sprintf('\t%8d',unknown_by_race));
0352 fprintf('Total:  %s\n', sprintf('\t%8d',males_by_race+females_by_race+unknown_by_race))
0353 
0354 % Do hispanic racial breakdown
0355 fprintf('\n\nRacial breakdown for hispanics')
0356 fprintf('%s\n', header);
0357 fprintf('Male:   %s\n', sprintf('\t%8d',hispanic_males_by_race));
0358 fprintf('Female: %s\n', sprintf('\t%8d',hispanic_females_by_race));
0359 fprintf('Unknown:%s\n', sprintf('\t%8d',hispanic_unknown_by_race));
0360 fprintf('Total:  %s\n', sprintf('\t%8d',hispanic_males_by_race+hispanic_females_by_race+hispanic_unknown_by_race))
0361 
0362 % Close the mysql connection
0363 if exist('tmp_conn_id','var')
0364   mysql('close');
0365 end

Generated on Wed 20-Sep-2023 04:00:50 by m2html © 2003