/* This program is stored as bliss.sas */ /* This program uses PROC LOGISTIC in SAS to fit models with logistic, probit, and complimentary log-log link functions to the beetle mortality data collected by Chester Bliss */ DATA SET1 INPUT Z X1 X2 ZL = LOG(Z) N = X1+x2 LABEL Z = DOSE ZL = LOG(DOSE) X1 = NUMBER DEAD X2 = NUMBER ALIVE N = NUMBER EXPOSED 975 CARDS 49.057 6 53 52.991 13 47 56.911 18 44 60.842 28 28 64.759 52 11 68.691 53 6 72.611 61 1 76.542 60 0 PROC PRINT DATA=SET1 TITLE ' LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP1 MODEL X1/N = ZL / ITPRINT COVB MAXITER=50 CONVERGE=.000001 PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR1 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'LOGISTIC REGRESSION MODEL ON LOG(DOSE) FOR THE BLISS DATA' 976 PROC PRINT DATA=SETP1 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR1 TITLE ' ESTIMATES OF MORTALITY RATES' /* Estimate the LD50 */ data set2 set setp1 if(_type_ = 'PARMS') keep intercept zl run data set3 set setp1 if(_type_ = 'COV') keep intercept zl run 977 proc iml start ld50 use set2 read all into b use set3 read all into v ld50 = -b[1,1]/b[1,2] g = (-1 b[1,1]/b[1,2])/b[1,2] s = sqrt(g*v*t(g)) lower = ld50 - (1.96)*s upper = ld50 + (1.96)*s ld50 = exp(ld50) stderr = ld50*s lower=exp(lower) upper=exp(upper) print ld50 stderr lower upper finish 978
run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ goptions cback=white colors=(black) device=win target=winprtc rotate=portrait /* Use this to produce a postscript plot in the VINCENT system */ /* filename graffile pipe 'lpr -Dpostscript' goptions gsfmode=replace gsfname=graffile cback=white colors=(black) targetdevice=ps300 rotate=landscape */ SYMBOL1 V=NONE I=SPLINE L=1 W=2 H=2 SYMBOL2 V=circle H=2 PROC GPLOT DATA=SETR1 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL/ OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'LOGISTIC REGRESSION ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' 979 980 /* FIT A PROBIT MODEL TO THE BLISS DATA */ PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP2 MODEL X1/N = ZL / ITPRINT COVB LINK=NORMIT PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR2 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'PROBIT MODEL USING LOG(DOSE) FOR THE BLISS DATA' PROC PRINT DATA=SETP2 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR2 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR2 SET SETR2 PROB= X1/(X1+X2) PROC GPLOT DATA=SETR2 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'PROBIT ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' 981 982
/* Fit the complimentary log-log model to the Bliss data */ TITLE ' COMPLIMENTARY LOG-LOG ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP3 MODEL X1/N = ZL / ITPRINT COVB LINK=CLOGLOG PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR3 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'COMPLIMENTARY LOG-LOG MODEL FOR THE BLISS DATA' run PROC PRINT DATA=SETP3 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR3 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR3 SET SETR3 PROB = X1/(X1+X2) 983 PROC GPLOT DATA=SETR3 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss ) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL=1 PROB*ZL=2 / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'COMPLIMENTARY LOG-LOG ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' 984 LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA Obs Z X1 X2 ZL N 1 49.057 6 53 3.89298 59 2 52.991 13 47 3.97012 60 3 56.911 18 44 4.04149 62 4 60.842 28 28 4.10828 56 5 64.759 52 11 4.17067 63 6 68.691 53 6 4.22962 59 7 72.611 61 1 4.28512 62 8 76.542 60 0 4.33784 60 Response Profile Ordered Binary Total Value Outcome Frequency 1 Event 291 2 Nonevent 190 Maximum Likelihood Iteration History Iter Ridge -2 Log L Intercept ZL Model Information Data Set WORK.SET1 Response Variable (Events) X1 Response Variable (Trials) N Number of Observations 8 Link Function Logit Optimization Technique Fisher's scoring 985 0 0 645.441025 0.426299 0 1 0 395.941537-39.615600 9.694171 2 0 374.092238-54.667149 13.394642 3 0 372.484914-60.122455 14.736966 4 0 372.470133-60.711104 14.881848 5 0 372.470132-60.717199 14.883348 6 0 372.470132-60.717199 14.883348 986
Analysis of Maximum Likelihood Estimates Model Fit Statistics Intercept Intercept and Criterion Only Covariates Standard Parameter DF Estimate Error Chi-Square Pr > C Intercept 1-60.7172 5.1806 137.3587 <.0001 ZL 1 14.8833 1.2647 138.4904 <.0001 AIC 647.441 376.470 SC 651.617 384.822-2 Log L 645.441 372.470 Odds Ratio Estimates Point 95% Wald Effect Estimate Confidence Limits Testing Global Null Hypothesis: BETA=0 ZL >999.999 >999.999 >999.999 Test Chi-Square DF Pr > ChiSq Likelihood Ratio 272.9709 1 <.0001 Score 227.5810 1 <.0001 Wald 138.4904 1 <.0001 987 Association of Predicted Probabilities and Observed Responses Percent Concordant 87.0 Somers' D 0.802 Percent Discordant 6.8 Gamma 0.856 Percent Tied 6.3 Tau-a 0.384 Pairs 55290 c 0.901 988 Estimated Covariance Matrix Variable Intercept ZL Intercept 26.83906-6.54992 ZL -6.54992 1.59949 DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES I _ n _ S t L T _ e N L T A N r L I Y T A c I O N P U M e K b K E S E p Z E s t L _ ESTIMATES OF MORTALITY RATES Obs Z X1 X2 ZL N PHAT LOWER95 UPPER95 1 49.057 6 53 3.89298 59 0.05860 0.03425 0.09850 2 52.991 13 47 3.97012 60 0.16404 0.11605 0.22678 3 56.911 18 44 4.04149 62 0.36209 0.29841 0.43101 4 60.842 28 28 4.10828 56 0.60534 0.54224 0.66511 5 64.759 52 11 4.17067 63 0.79517 0.73862 0.84210 6 68.691 53 6 4.22962 59 0.90323 0.85972 0.93428 7 72.611 61 1 4.28512 62 0.95520 0.92576 0.97330 8 76.542 60 0 4.33784 60 0.97905 0.96046 0.98900 1 LOGIT PARMS 0 Converged X1-60.7172 14.8833-186.235 2 LOGIT COV 0 Converged Int 26.8391-6.5499-186.235 3 LOGIT COV 0 Converged ZL -6.5499 1.5995-186.235 989 990
This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, 52.991, 56.911, 60.842, 64.759, 68.691, 72.611, 76.542) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) Now fit a logistic regression of mortality rates on log-dose bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) Define a function to extract the estimated covariance matrix from the summary object f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) 991 992 Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50) 993 994
Compute p-values for lack-of-fit tests. These results may be unreliable for sparse data. goftests <- function(x, m1, m2, df) { Compute Pearson chi-squared and deviance tests and p-values In this function x = observed counts m1 = expected counts under H0 m2 = expected counts under HA df = degrees of freedom k <- length(x) m1 <- m1 +.00000000000000001 m2 <- m2 +.00000000000000001 x2p <- sum(((m1-m2)^2)/m1) pvalp <- 1 - pchisq(x2p, df) Compute the G^2 statistic g2 <- 2*sum(x*(log(m2/m1))) pvalg <- 1-pchisq(g2, df) cat("\n", " Pearson test = ", round(x2p,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalp,2)) cat("\n", " Deviance test = ", round(g2,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalg,2),"\n" } Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) 995 996 Open a motif window for displaying plots Windows users should not do this motif( ) Plot the estimated curve for the mortality probabilities against the log-dose since the plot function uses type="n" it only defines and labels the axes and writes a title at the top of the plot. the points( ) function plots points on the plot orresponding to the observed proportions the lines( ) function plots the estimated curve plot(c(3.8, 4.4), c(0,1), type="n", xlab="log-dose", ylab="mortality rate", main="bliss Beetle Data") points(ldose, ndead/(ndead+nalive), pch=18, mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(ld, predict(bliss.lg, data.frame(ldose=ld), type="response")) Plot the estimated curve for the mortality probabilities against dose plot(c(48, 78), c(0,1), type="n", xlab="dose", lab="mortality rate", main="bliss Beetle Data mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(exp(ld), predict(bliss.lg, data.frame(ldose=ld), type="response")) 997 998
Now fit a logistic regression of mortality rates on log-dose This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, 52.991, 56.911, 60.842, 64.759, 68.691, 72.611, 76.542) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) 999 bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) GLM linear loop 1: deviance = 11.4476 GLM linear loop 2: deviance = 11.2318 GLM linear loop 3: deviance = 11.2316 Call: glm(formula = y ~ ldose, family = binomial(link = logit), x = T, trace = T) Deviance Residuals: Min 1Q Median 3Q Max -1.594517-0.394218 0.8328301 1.259146 1.594018 Coefficients: Value Std. Error t value (Intercept) -60.71708 5.173451-11.73628 ldose 14.88332 1.262945 11.78461 1000 (Dispersion Parameter for Binomial family taken to be 1) Null Deviance: 284.2024 on 7 degrees of freedom Residual Deviance: 11.23156 on 6 degrees of freedom Number of Fisher Scoring Iterations: 3 Correlation of Coefficients: (Intercept) ldose -0.9996809 f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } Print the estimated proportions and the model matrix cbind(bliss.lg$x, bliss.lg$fit) (Intercept) ldose 1 1 3.892983 0.05860329 2 1 3.970122 0.16403780 3 1 4.041489 0.36208748 4 1 4.108280 0.60534026 5 1 4.170673 0.79516940 6 1 4.229618 0.90323318 7 1 4.285116 0.95519837 8 1 4.337840 0.97904849 1001 bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov (Intercept) ldose (Intercept) 26.7646-6.53170 ldose -6.5317 1.59503 1002
Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) (Intercept) ldose sp 1 1 3.892983 0.05860329 0.015816590 2 1 3.970122 0.16403780 0.028091274 3 1 4.041489 0.36208748 0.033986913 4 1 4.108280 0.60534026 0.031475466 5 1 4.170673 0.79516940 0.026365653 6 1 4.229618 0.90323318 0.018739648 7 1 4.285116 0.95519837 0.011697048 8 1 4.337840 0.97904849 0.006837601 1003 Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) (Intercept) ldose 1 1 3.892983 0.05860329 0.03427399 2 1 3.970122 0.16403780 0.11608923 3 1 4.041489 0.36208748 0.29844494 4 1 4.108280 0.60534026 0.54228308 5 1 4.170673 0.79516940 0.73867511 6 1 4.229618 0.90323318 0.85977743 7 1 4.285116 0.95519837 0.92580355 8 1 4.337840 0.97904849 0.96049528 1004 Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) 1 0.09844225 2 0.22671114 3 0.43096459 4 0.66507568 5 0.84206186 6 0.93425160 7 0.97328374 8 0.98898817 b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50) 59.1182 58.0984 60.156 0.524874 1005 1006
Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Pearson test = 10.03 Degrees of freedom = 6 p-value = 0.12 Deviance test = 11.23 df = 6 p-value = 0.08 1007