run ld50 /* Plot the onserved proportions and the fitted curve / DATA SETR1 SET SETR1 PROB=X1/(X1+X2) / Use this to create graphs in Windows */ gopt

Size: px

Start display at page:

Download "run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ gopt"

Oswin Francis
5 years ago
Views:

1 /* This program is stored as bliss.sas */ /* This program uses PROC LOGISTIC in SAS to fit models with logistic, probit, and complimentary log-log link functions to the beetle mortality data collected by Chester Bliss */ DATA SET1 INPUT Z X1 X2 ZL = LOG(Z) N = X1+x2 LABEL Z = DOSE ZL = LOG(DOSE) X1 = NUMBER DEAD X2 = NUMBER ALIVE N = NUMBER EXPOSED 975 CARDS PROC PRINT DATA=SET1 TITLE ' LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP1 MODEL X1/N = ZL / ITPRINT COVB MAXITER=50 CONVERGE= PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR1 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'LOGISTIC REGRESSION MODEL ON LOG(DOSE) FOR THE BLISS DATA' 976 PROC PRINT DATA=SETP1 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR1 TITLE ' ESTIMATES OF MORTALITY RATES' /* Estimate the LD50 */ data set2 set setp1 if(_type_ = 'PARMS') keep intercept zl run data set3 set setp1 if(_type_ = 'COV') keep intercept zl run 977 proc iml start ld50 use set2 read all into b use set3 read all into v ld50 = -b[1,1]/b[1,2] g = (-1 b[1,1]/b[1,2])/b[1,2] s = sqrt(g*v*t(g)) lower = ld50 - (1.96)*s upper = ld50 + (1.96)*s ld50 = exp(ld50) stderr = ld50*s lower=exp(lower) upper=exp(upper) print ld50 stderr lower upper finish 978

2 run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ goptions cback=white colors=(black) device=win target=winprtc rotate=portrait /* Use this to produce a postscript plot in the VINCENT system */ /* filename graffile pipe 'lpr -Dpostscript' goptions gsfmode=replace gsfname=graffile cback=white colors=(black) targetdevice=ps300 rotate=landscape */ SYMBOL1 V=NONE I=SPLINE L=1 W=2 H=2 SYMBOL2 V=circle H=2 PROC GPLOT DATA=SETR1 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL/ OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'LOGISTIC REGRESSION ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' /* FIT A PROBIT MODEL TO THE BLISS DATA */ PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP2 MODEL X1/N = ZL / ITPRINT COVB LINK=NORMIT PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR2 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'PROBIT MODEL USING LOG(DOSE) FOR THE BLISS DATA' PROC PRINT DATA=SETP2 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR2 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR2 SET SETR2 PROB= X1/(X1+X2) PROC GPLOT DATA=SETR2 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'PROBIT ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data'

3 /* Fit the complimentary log-log model to the Bliss data */ TITLE ' COMPLIMENTARY LOG-LOG ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP3 MODEL X1/N = ZL / ITPRINT COVB LINK=CLOGLOG PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR3 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'COMPLIMENTARY LOG-LOG MODEL FOR THE BLISS DATA' run PROC PRINT DATA=SETP3 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR3 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR3 SET SETR3 PROB = X1/(X1+X2) 983 PROC GPLOT DATA=SETR3 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss ) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL=1 PROB*ZL=2 / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'COMPLIMENTARY LOG-LOG ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' 984 LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA Obs Z X1 X2 ZL N Response Profile Ordered Binary Total Value Outcome Frequency 1 Event Nonevent 190 Maximum Likelihood Iteration History Iter Ridge -2 Log L Intercept ZL Model Information Data Set WORK.SET1 Response Variable (Events) X1 Response Variable (Trials) N Number of Observations 8 Link Function Logit Optimization Technique Fisher's scoring

4 Analysis of Maximum Likelihood Estimates Model Fit Statistics Intercept Intercept and Criterion Only Covariates Standard Parameter DF Estimate Error Chi-Square Pr > C Intercept <.0001 ZL <.0001 AIC SC Log L Odds Ratio Estimates Point 95% Wald Effect Estimate Confidence Limits Testing Global Null Hypothesis: BETA=0 ZL > > > Test Chi-Square DF Pr > ChiSq Likelihood Ratio <.0001 Score <.0001 Wald < Association of Predicted Probabilities and Observed Responses Percent Concordant 87.0 Somers' D Percent Discordant 6.8 Gamma Percent Tied 6.3 Tau-a Pairs c Estimated Covariance Matrix Variable Intercept ZL Intercept ZL DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES I _ n _ S t L T _ e N L T A N r L I Y T A c I O N P U M e K b K E S E p Z E s t L _ ESTIMATES OF MORTALITY RATES Obs Z X1 X2 ZL N PHAT LOWER95 UPPER LOGIT PARMS 0 Converged X LOGIT COV 0 Converged Int LOGIT COV 0 Converged ZL

5 This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, , , , , , , ) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) Now fit a logistic regression of mortality rates on log-dose bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) Define a function to extract the estimated covariance matrix from the summary object f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50)

6 Compute p-values for lack-of-fit tests. These results may be unreliable for sparse data. goftests <- function(x, m1, m2, df) { Compute Pearson chi-squared and deviance tests and p-values In this function x = observed counts m1 = expected counts under H0 m2 = expected counts under HA df = degrees of freedom k <- length(x) m1 <- m m2 <- m x2p <- sum(((m1-m2)^2)/m1) pvalp <- 1 - pchisq(x2p, df) Compute the G^2 statistic g2 <- 2*sum(x*(log(m2/m1))) pvalg <- 1-pchisq(g2, df) cat("\n", " Pearson test = ", round(x2p,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalp,2)) cat("\n", " Deviance test = ", round(g2,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalg,2),"\n" } Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Open a motif window for displaying plots Windows users should not do this motif( ) Plot the estimated curve for the mortality probabilities against the log-dose since the plot function uses type="n" it only defines and labels the axes and writes a title at the top of the plot. the points( ) function plots points on the plot orresponding to the observed proportions the lines( ) function plots the estimated curve plot(c(3.8, 4.4), c(0,1), type="n", xlab="log-dose", ylab="mortality rate", main="bliss Beetle Data") points(ldose, ndead/(ndead+nalive), pch=18, mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(ld, predict(bliss.lg, data.frame(ldose=ld), type="response")) Plot the estimated curve for the mortality probabilities against dose plot(c(48, 78), c(0,1), type="n", xlab="dose", lab="mortality rate", main="bliss Beetle Data mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(exp(ld), predict(bliss.lg, data.frame(ldose=ld), type="response"))

7 Now fit a logistic regression of mortality rates on log-dose This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, , , , , , , ) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) 999 bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) GLM linear loop 1: deviance = GLM linear loop 2: deviance = GLM linear loop 3: deviance = Call: glm(formula = y ~ ldose, family = binomial(link = logit), x = T, trace = T) Deviance Residuals: Min 1Q Median 3Q Max Coefficients: Value Std. Error t value (Intercept) ldose (Dispersion Parameter for Binomial family taken to be 1) Null Deviance: on 7 degrees of freedom Residual Deviance: on 6 degrees of freedom Number of Fisher Scoring Iterations: 3 Correlation of Coefficients: (Intercept) ldose f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } Print the estimated proportions and the model matrix cbind(bliss.lg$x, bliss.lg$fit) (Intercept) ldose bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov (Intercept) ldose (Intercept) ldose

8 Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) (Intercept) ldose sp Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) (Intercept) ldose Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50)

9 Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Pearson test = Degrees of freedom = 6 p-value = 0.12 Deviance test = df = 6 p-value =

Then Log-odds with respect to a baseline category (eg, the last category) exp( ß ji = 0j + 1j X 1i + + kj X ki ) 1 + J 1 X exp( 0` + i`x 1i + + kj X k

Then Log-odds with respect to a baseline category (eg, the last category) exp( ß ji = 0j + 1j X 1i + + kj X ki ) 1 + J 1 X exp( 0` + i`x 1i + + kj X k Connections between -linear and istic regression models Any -linear model can be expressed as a istic regression model Logisitic regression requires specication of a response variable Log-linear models