/********* Principal Program **********/ * Assign the locations for the SAS data and format library; libname pdata "c:\data\profiles2010"; /* tells SAS where the data are */ options fmtsearch=(pdata.xx2010_formats) nofmterr ; /* tells SAS where the formats are (xx is the site code as it appears on the file name on the CD ROM) */ * Create a temporary dataset from the principal SAS data set on the CD ROM retaining only records that have principal analysis weights greater than zero (xx is the site code as it appears on the file name on the CD ROM); data principal; set pdata.xxp2010; if finalwt > 0; run; * Sort the data by stratum - this is a SUDAAN requirement; proc sort data=principal; by psstrat; run; * Run the SUDAAN descript procedure for computing percent estimates and standard errors for Q1 and Q4 by grade level. Save SUDAAN output to a data set for use in subsequent processing steps; proc descript data=principal filetype=sas design=strwr atlevel1=1 ; weight finalwt; nest psstrat; subgroup f_grdlvl2; levels 3; var q1_1 q1_2 q1_3 q1_4 q1_5 q4 q4 ; catlevel /* q1 */ 1 1 1 1 1 /* q4 */ 1 2 ; setenv colspce=2 rowwidth=13 colwidth=17 leftmgn=17; output nsum wsum percent sepercent /filename=pdata.xx2010_principal_estimate filetype=sas tablecell=default replace; title "2010 school health profiles - principal survey"; print nsum wsum percent sepercent atlev1 /wsumfmt=f8.2 percentfmt=f8.4 sepercentfmt=f8.5; run; * Compute asymmetric 95% confidence intervals for principal percent estimates with finite population correction; * Extract finite population correction factor (ps_fpc) from the principal SAS dataset on the CD ROM; proc sort data= pdata.xxp2010 out=ps_fpc nodupkey; by ps_fpc ; run; * Run Proc Means for computing the number of participating principals (nobs_p) and the number of unique strata in the principal dataset (npsstrat) - these variables are used for calculating the degrees of freedom (df_p); proc means data= pdata.xxp2010; var psstrat; output out=out_p n=nobs_p max=npsstrat; run; * Create a dataset that contains ps_fpc, nobs_p, npsstrat, percent, sepercent; data pdata.xx2010_allvar; if _n_ = 1 then set ps_fpc; if _n_ = 1 then set out_p (DROP=_TYPE_ _FREQ_); set pdata.xx2010_principal_estimate; format _all_; run; * Compute 95% asymmetric confidence intervals with fpc adjustment; data pdata.principal_ci; set pdata.xx2010_allvar; se_s1=sepercent; wpct_s= percent/100; se_s2 = se_s1*(sqrt(ps_fpc)); if 0 < wpct_s < 1 then do; wpct_s_2 = 1 - wpct_s; ln_p = log(wpct_s)-log(wpct_s_2); if wpct_s=0 or wpct_s_2 = 0 then se_p=0; else se_p = (se_s2/100)/(wpct_s*(wpct_s_2)); df_p = nobs_p - npsstrat; lf = ln_p-(tinv(0.975,df_p))*se_p; uf = ln_p + (tinv(0.975,df_p))*se_p; lower = 100*(exp(lf)/(1+exp(lf))); upper = 100*(exp(uf)/(1+exp(uf))); end; else do; lower = . ; upper = . ; end; /* Create VAR_NAME to map the actual variable name to SUDAAN variable identifier */ length var_name $12 ; if variable = 1 then var_name = 'q1_1'; if variable = 2 then var_name = 'q1_2'; if variable = 3 then var_name = 'q1_3'; if variable = 4 then var_name = 'q1_4'; if variable = 5 then var_name = 'q1_5'; if variable = 6 then var_name = 'q4'; if variable = 7 then var_name = 'q4'; label ps_fpc = "principal finite population correction factor" nobs_p = "number of participating principals " npsstrat = "number of unique strata in the principal dataset " procnum = "sudaan output variable: procedure number " tableno = "sudaan output variable: table number" variable = "sudaan output variable: variable" _c1 = "sudaan output variable: codes for categorical variable f_grdlvl2" nsum = "sudaan output variable: sample size" wsum = "sudaan output variable: weighted size" percent = "sudaan output variable: percent" sepercent = "sudaan output variable: se percent" total = "sudaan output variable: total" lowtotal = "sudaan output variable: lower 95% limit total" uptotal = "sudaan output variable: upper 95% limit total" lowpct = "sudaan output variable: lower 95% limit percent" uppct = "sudaan output variable: upper 95% limit percent" atlev1 = "sudaan output variable: count at level 1" se_s1 = "same as sepercent from sudaan" wpct_s = "proportion : percent from sudaan divided by 100" se_s2 = "standard error of weighted percent with finite population correction" wpct_s_2 = "1 minus wpct_s" ln_p = "logit transformation of proportion" se_p = "standard error of logit-transformed proportion" df_p = "degrees of freedom in the principal dataset" lf = "lower bound of 95% asymmetric confidence interval in logit scale" uf = "upper bound of 95% asymmetric confidence interval in logit scale" lower = "lower bound of 95% asymmetric confidence interval" upper = "upper bound of 95% asymmetric confidence interval" var_name = "variable name" ; run; * print the final results; proc print data=pdata.principal_ci; var var_name variable f_grdlvl2 f_higrade f_lowgrade finalwt psstrat psunit ps_fpc nobs_p npsstrat nsum wsum percent sepercent total lowtotal uptotal lowpct uppct se_s1 wpct_s se_s2 wpct_s_2 ln_p se_p df_p lf uf lower upper ; run;