* Macro definition *. DEFINE COMPAREROC(!POSITIONAL=!TOKENS(2)/ !POSITIONAL !CHAREND('(')/ !POSITIONAL !CHAREND(')')/ testpos= !DEFAULT(LARGE) !TOKENS(1)/ k= !DEFAULT (20000)!TOKENS(1)/ seed= !DEFAULT(RANDOM)!TOKENS(1)). DATASET NAME OriginalData. DATASET COPY WorkingData WINDOW=HIDDEN. DATASET ACTIVATE WorkingData. * Preparing data for MATRIX (recoding state variable to 0&1 and sorting by it) *. !IF (!UPCASE(!testpos) !EQ 'LARGE') !THEN. . COMPUTE !2=(!2 NE !3). !ELSE. . COMPUTE !2=(!2 EQ !3). !IFEND. SORT CASES BY !2 (A). PRESERVE. * Initialize seed and fix mxloops to number of bootstrap samples *. SET RNG=MT. SET MTINDEX=!seed. SET MXLOOPS=!k. DO IF $casenum EQ 1. . PRINT. . !IF (!UPCASE(!seed) !EQ 'RANDOM') !THEN. . PRINT /'RANDOM seed was used'. . !ELSE. . PRINT /'Seed value: ' !QUOTE(!seed). . !IFEND. END IF. MATRIX. PRINT /TITLE='*** BOOTSTRAPPING 95%CI FOR DIFFERENCE IN TWO AUC - ROC (PAIRED DATA) ***'. * Read sorted data *. GET data /VAR=!2 !1 /NAMES=vnames /MISSING=OMIT. COMPUTE vname={vnames(2:3),'Diff'}. * Sample sizes *. COMPUTE totaln=NROW(data). COMPUTE n2=CSUM(data(:,1)). COMPUTE n1=totaln-n2. * Split data in two samples *. COMPUTE group1.1=data(1:n1,2). COMPUTE group1.2=data((n1+1):totaln,2). COMPUTE group2.1=data(1:n1,3). COMPUTE group2.2=data((n1+1):totaln,3). * Sample AUCs *. COMPUTE Ranks=RNKORDER({group1.1;group1.2}). COMPUTE R2=CSUM(Ranks&*data(:,1)). COMPUTE U2=n1*n2+n2*(n2+1)/2-R2. COMPUTE AUC1=U2/(n1*n2). COMPUTE Ranks=RNKORDER({group2.1;group2.2}). COMPUTE R2=CSUM(Ranks&*data(:,1)). COMPUTE U2=n1*n2+n2*(n2+1)/2-R2. COMPUTE AUC2=U2/(n1*n2). COMPUTE Diff=AUC1-AUC2. PRINT {AUC1,AUC2,Diff} /FORMAT='F8.3' /CNAMES=vname /TITLE='SAMPLE AUC FOR BOTH VARIABLES'. *** BOOTSTRAPPING ***. COMPUTE k=!k. /* Number of bootsamples *. COMPUTE bootAUC =MAKE(k,3,0). COMPUTE BtGrp1.1=MAKE(n1,1,0). COMPUTE BtGrp1.2=MAKE(n2,1,0). COMPUTE BtGrp2.1=MAKE(n1,1,0). COMPUTE BtGrp2.2=MAKE(n2,1,0). LOOP i=1 TO k. /* Extracting k bootstrap samples from both groups (paired) *. - COMPUTE flipcoin=1+TRUNC(n1*UNIFORM(n1,1)). - COMPUTE BtGrp1.1=group1.1(flipcoin,:). - COMPUTE BtGrp2.1=group2.1(flipcoin,:). - COMPUTE flipcoin=1+TRUNC(n2*UNIFORM(n2,1)). - COMPUTE BtGrp1.2=group1.2(flipcoin,:). - COMPUTE BtGrp2.2=group2.2(flipcoin,:). * Boot AUC1 *. - COMPUTE Ranks=RNKORDER({BtGrp1.1;BtGrp1.2}). - COMPUTE R2=CSUM(Ranks&*data(:,1)). - COMPUTE U2=n1*n2+n2*(n2+1)/2-R2. - COMPUTE AUC1=U2/(n1*n2). * Boot AUC2 *. - COMPUTE Ranks=RNKORDER({BtGrp2.1;BtGrp2.2}). - COMPUTE R2=CSUM(Ranks&*data(:,1)). - COMPUTE U2=n1*n2+n2*(n2+1)/2-R2. - COMPUTE AUC2=U2/(n1*n2). - COMPUTE bootAUC(i,1)=AUC1-AUC2. - COMPUTE bootAUC(i,2)=AUC1. - COMPUTE bootAUC(i,3)=AUC2. END LOOP. * Gran mean of bootstrapped AUC diff. & values *. COMPUTE mean=CSUM(bootAUC)/k. * Bootstrap estimator of the standard error of the AUC diff. *. COMPUTE BootSEM=SQRT((CSSQ(bootAUC)-k&*(mean&**2))/(k-1)). PRINT {T(mean(2:3)),T(BootSEM(2:3))} /FORMAT='F8.3' /CLABEL='AUC','SE(AUC)' /RNAME=vname /TITLE='Bootstrapped Statistics for AUC1&AUC2'. * Bootstrap estimator of the standard error of AUC1&AUC2 diff. assuming independent samples *. COMPUTE BootSEMD=SQRT(BootSEM(2)**2+BootSEM(3)**2). COMPUTE RAUC=(BootSEMD**2-BootSEM(1)**2)/(2*BootSEM(2)*BootSEM(3)). PRINT RAUC /FORMAT='F8.2' /RLABEL='R(AUC)=' /TITLE='Correlation between bootstrapped AUC1&AUC2'. * Test for paired smples *. COMPUTE Zval=mean(1)/BootSEM(1). COMPUTE pvalue=1-CDFNORM(ABS(Zval)). PRINT {mean(1),BootSEM(1),Zval,pvalue,2*pvalue} /FORMAT='F8.3' /CLABEL='Dif(AUC)','SE(Dif)*','Z value','1-tail p','2-tail p' /TITLE='Bootstrapped Statistics for AUC difference (used for BV1&BV2)'. PRINT/TITLE='(*) Std. Deviation of bootstrapped AUC differences'. * NP confidence interval *. * Ordered array: sorting algorithm by R Ristow & J Peck *. COMPUTE sortedbm=bootAUC(:,1). COMPUTE sortedbm(GRADE(bootAUC(:,1)))=bootAUC(:,1). COMPUTE lower1=sortedbm(k*0.025). COMPUTE upper1=sortedbm(1+k*0.975). * Parametric confidence intervals (BV1&BV2)*. COMPUTE z = 1.959964. COMPUTE lower2=mean(1)-z*BootSEM(1). COMPUTE upper2=mean(1)+z*BootSEM(1). COMPUTE lower3=Diff-z*BootSEM(1). COMPUTE upper3=Diff+z*BootSEM(1). PRINT {lower1,upper1;lower2,upper2;lower3,upper3} /FORMAT='F8.3' /CLABEL='Lower CL','Upper CL' /RLABEL='BP','BV1','BV2' /TITLE='95%CI: Non parametric (Pctiles. 2.5 & 97.5) & Parametric (Z based) BV1&BV2 (+)'. PRINT /TITLE='(+) BV1 -> mean Diff(AUC) & SE(Diff); BV2 -> Sample Diff. & SE(Diff).'. * Export data for histogram *. COMPUTE vname={'DiffAUC'}. SAVE bootAUC(:,1) /OUTFILE='C:\Temp\BootStrappedAUC.sav' /NAMES=vname. PRINT k /FORMAT='F8' /RLABEL='K=' /TITLE='K bootsampled AUC dif. saved to C:\Temp\BootStrappedAUC.sav'. END MATRIX. RESTORE. GET FILE ='C:\Temp\BootStrappedAUC.sav' . DATASET NAME BootstrappedAUC. FREQUENCIES VARIABLES=ALL /FORMAT=NOTABLE /HISTOGRAM NORMAL /STATISTICS=SKEWNESS KURTOSIS. DATASET ACTIVATE OriginalData. DATASET CLOSE WorkingData. DATASET CLOSE BootstrappedAUC. !ENDDEFINE. * Sample dataset *. SET LOCALE=ENGLISH. DATA LIST FREE/Outcome (F8) Hemoglob Bilirrub (2 F8.1). BEGIN DATA 1 18.7 2.2 1 17.0 1.6 1 15.6 2.0 1 14.3 3.8 1 13.3 1.8 1 10.9 3.5 1 8.7 5.5 1 17.8 2.7 1 16.6 3.6 1 15.6 1.6 1 14.3 4.2 1 12.5 4.5 1 10.9 4.1 1 7.4 3.0 1 17.8 2.5 1 16.3 4.1 1 15.4 4.1 1 14.3 3.3 1 12.3 5.0 1 10.9 1.5 1 5.7 4.6 1 17.6 4.1 1 16.1 2.0 1 15.4 2.2 1 14.1 3.7 1 12.2 3.5 1 10.8 3.3 1 9.7 4.9 1 17.6 3.2 1 16.0 2.6 1 15.3 2.0 1 14.0 5.8 1 12.2 2.4 1 10.6 3.4 1 11.6 3.7 1 17.6 1.0 1 16.0 0.8 1 15.1 3.2 1 13.9 2.9 1 12.0 2.8 1 10.5 6.3 1 13.4 2.3 1 17.5 1.6 1 15.8 3.7 1 14.8 1.8 1 13.8 3.7 1 12.0 3.5 1 10.2 3.3 1 14.6 5.0 1 17.4 1.8 1 15.8 3.0 1 14.7 3.7 1 13.6 2.3 1 11.8 2.3 1 9.9 4.0 1 15.6 1.4 1 17.4 2.4 1 15.8 1.7 1 14.7 3.0 1 13.5 2.1 1 11.8 4.5 1 9.8 4.2 1 17.0 0.4 2 15.8 1.8 2 5.7 6.2 2 7.6 4.7 2 9.2 5.6 2 5.1 5.8 2 6.7 5.9 2 12.3 5.6 2 5.5 4.8 2 7.4 6.8 2 8.8 5.6 2 3.4 3.9 2 9.5 3.6 2 5.3 4.8 2 7.1 5.6 2 9.4 3.8 2 5.3 2.8 END DATA. VALUE LABELS Outcome 1 ' Absent' 2 'Present'. * Independent samples Z test for AUC *. PRESERVE. SET OLANG=ENGLISH. DATASET NAME OriginalData. DATASET DECLARE ROCStats. OMS /SELECT TABLES /IF COMMANDS = ["ROC Curve"] SUBTYPES = ["Area Under the Curve"] /DESTINATION FORMAT = SAV OUTFILE = ROCStats. ROC Hemoglob BY Outcome (2) /PLOT = CURVE(REFERENCE) /PRINT = SE /CRITERIA = TESTPOS(SMALL) CI(95). ROC Bilirrub BY Outcome (2) /PLOT = CURVE(REFERENCE) /PRINT = SE /CRITERIA = TESTPOS(LARGE) CI(95). OMSEND. RESTORE. DATASET ACTIVATE ROCStats. DO IF $casenum EQ 2. - COMPUTE ZVal=ABS(Area-LAG(Area))/SQRT((Std.Error**2)+(LAG(Std.Error))**2). - COMPUTE PValue2=2*(1-CDF.NORMAL(ZVal,0,1)). - COMPUTE PValue1=PValue2/2. ELSE. /* Next lines needed in SPSS 15, eliminate in later versions */. - COMPUTE ZVal =$SYSMIS. - COMPUTE PValue2=$SYSMIS. - COMPUTE PValue1=$SYSMIS. END IF. FORMAT ZVal PValue1 PValue2 (F8.3). VAR LABEL Var1'Variables' ZVal'Z statistic' PValue2'2-tailed P' PValue1'1-tailed P'. SUMMARIZE /TABLES=Var1 Area Std.Error ZVal PValue1 PValue2 /FORMAT=LIST NOCASENUM TOTAL /TITLE='Z-test for comparing two AUC' /FOOTNOTE 'Independent samples' /CELLS=NONE. DATASET ACTIVATE OriginalData. DATASET CLOSE ROCStats. COMPUTE InvHemo=1/Hemoglob. /* For the macro to work OK, both variables must have the same effect (same side) */. * Minimum arguments MACRO call *. COMPAREROC InvHemo Bilirrub Outcome(2). * Other optional arguments: - testpos=SMALL (if larger values are associated with controls in both variables) - k = Nr of bootstrap samples (more samples, more stable results but more running time) - seed = RANDOM or any number (if you want to replicate exactly your results, set it to a fixed number) *.