/********* Teacher Program ***********/ * Assign the locations for the SAS data and format library; libname tdata "c:\data\profiles2010"; /* tells SAS where the data are */ options fmtsearch=(tdata.xx2010_formats) nofmterr ; /* tells SAS where the formats are (xx is the site code as it appears on the file name on the CD ROM) */ * Create a temporary dataset from the teacher SAS data set on the CD ROM retaining only records that have teacher analysis weights greater than zero (xx is the site code as it appears on the file name on the CD ROM); data teacher; set tdata.xxt2010; if finalwt > 0; run; * Sort the data by stratum - this is a SUDAAN requirement; proc sort data=teacher; by tsstrat; run; * Run the SUDAAN descript procedure for computing percent estimates and standard errors for Q1 and Q2 by grade level. Save SUDAAN output to a data set for use in subsequent processing steps; proc descript data=teacher filetype=sas design=strwr atlevel1=1 ; weight finalwt; nest tsstrat; subgroup f_grdlvl2; levels 3; var q1 q1 q1 q1 q1 q2_1 q2_2 q2_3 q2_4 q2_5 q2_6 q2_7 ; catlevel /* q1 */ 1 2 3 4 5 /* q2 */ 1 1 1 1 1 1 1 ; setenv colspce=2 rowwidth=13 colwidth=17 leftmgn=17; output nsum wsum percent sepercent /filename=tdata.xx2010_teacher_estimate filetype=sas tablecell=default replace; title "2010 school health profiles - teacher survey"; print nsum wsum percent sepercent atlev1 /wsumfmt=f8.2 percentfmt=f8.4 sepercentfmt=f8.5; run; *Compute asymmetric 95% confidence intervals for teacher percent estimates with finite population correction; * Extract finite population correction factor (ts_fpc) from the teacher SAS dataset on the CD ROM; proc sort data= tdata.xxt2010 out=ts_fpc nodupkey; by ts_fpc ; run; * Run Proc Means for computing the number of participating teachers (nobs_t) and the number of unique strata in the teacher dataset (ntsstrat) - these variables are used for calculating the degrees of freedom (df_t); proc means data= tdata.xxt2010; var tsstrat; output out=out_t n=nobs_t max=ntsstrat; run; * Create a dataset that contains ts_fpc, nobs_t, ntsstrat, percent, sepercent ; data tdata.xx2010_allvar; if _n_ = 1 then set ts_fpc; if _n_ = 1 then set out_t (DROP=_TYPE_ _FREQ_); set tdata.xx2010_teacher_estimate; format _all_; run; * Compute 95% asymmetric confidence intervals with fpc adjustment; data tdata.teacher_ci; set tdata.xx2010_allvar; se_s1=sepercent; wpct_s= percent/100; se_s2 = se_s1*(sqrt(ts_fpc)); if 0 < wpct_s < 1 then do; wpct_s_2 = 1 - wpct_s; ln_t = log(wpct_s)-log(wpct_s_2); if wpct_s=0 or wpct_s_2 = 0 then se_t=0; else se_t = (se_s2/100)/(wpct_s*(wpct_s_2)); df_t = nobs_t - ntsstrat; lf = ln_t-(tinv(0.975,df_t))*se_t; uf = ln_t + (tinv(0.975,df_t))*se_t; lower = 100*(exp(lf)/(1+exp(lf))); upper = 100*(exp(uf)/(1+exp(uf))); end; else do; lower = . ; upper = . ; end; * Create VAR_NAME to map the actual variable name to SUDAAN variable identifier; length var_name $12 ; if variable = 1 then var_name = 'q1'; if variable = 2 then var_name = 'q1'; if variable = 3 then var_name = 'q1'; if variable = 4 then var_name = 'q1'; if variable = 5 then var_name = 'q1'; if variable = 6 then var_name = 'q2_1'; if variable = 7 then var_name = 'q2_2'; if variable = 8 then var_name = 'q2_3'; if variable = 9 then var_name = 'q2_4'; if variable = 10 then var_name = 'q2_5'; if variable = 11 then var_name = 'q2_6'; if variable = 12 then var_name = 'q2_7'; label ts_fpc = "teacher finite population correction factor" nobs_t = "number of participating teachers" ntsstrat = "number of unique strata in the teacher dataset" procnum = "sudaan output variable: procedure number" tableno = "sudaan output variable: table number" variable = "sudaan output variable: variable" _c1 = "sudaan output variable: codes for categorical variable f_grdlvl2" nsum = "sudaan output variable: sample size" wsum = "sudaan output variable: weighted size" percent = "sudaan output variable: percent" sepercent = "sudaan output variable: se percent" total = "sudaan output variable: total" lowtotal = "sudaan output variable: lower 95% limit total" uptotal = "sudaan output variable: upper 95% limit total" lowpct = "sudaan output variable: lower 95% limit percent" uppct = "sudaan output variable: upper 95% limit percent" atlev1 = "sudaan output variable: count at level 1" se_s1 = "same as sepercent from sudaan" wpct_s = "proportion : percent from sudaan divided by 100" se_s2 = "standard error of weighted percent with finite population correction" wpct_s_2 = "1 minus wpct_s" ln_t = "logit transformation of proportion" se_t = "standard error of logit-transformed proportion" df_t = "degrees of freedom in the principal dataset" lf = "lower bound of 95% asymmetric confidence interval in logit scale" uf = "upper bound of 95% asymmetric confidence interval in logit scale" lower = "lower bound of 95% asymmetric confidence interval" upper = "upper bound of 95% asymmetric confidence interval" var_name = "variable name" ; run; * print the final results; proc print data=tdata.teacher_ci; var var_name variable f_grdlvl2 f_higrade f_lowgrade finalwt tsstrat tsunit ts_fpc nobs_t ntsstrat nsum wsum percent sepercent total lowtotal uptotal lowpct uppct se_s1 wpct_s se_s2 wpct_s_2 ln_t se_t df_t lf uf lower upper ; run;