run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ gopt

Size: px
Start display at page:

Download "run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ gopt"

Transcription

1 /* This program is stored as bliss.sas */ /* This program uses PROC LOGISTIC in SAS to fit models with logistic, probit, and complimentary log-log link functions to the beetle mortality data collected by Chester Bliss */ DATA SET1 INPUT Z X1 X2 ZL = LOG(Z) N = X1+x2 LABEL Z = DOSE ZL = LOG(DOSE) X1 = NUMBER DEAD X2 = NUMBER ALIVE N = NUMBER EXPOSED 975 CARDS PROC PRINT DATA=SET1 TITLE ' LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP1 MODEL X1/N = ZL / ITPRINT COVB MAXITER=50 CONVERGE= PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR1 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'LOGISTIC REGRESSION MODEL ON LOG(DOSE) FOR THE BLISS DATA' 976 PROC PRINT DATA=SETP1 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR1 TITLE ' ESTIMATES OF MORTALITY RATES' /* Estimate the LD50 */ data set2 set setp1 if(_type_ = 'PARMS') keep intercept zl run data set3 set setp1 if(_type_ = 'COV') keep intercept zl run 977 proc iml start ld50 use set2 read all into b use set3 read all into v ld50 = -b[1,1]/b[1,2] g = (-1 b[1,1]/b[1,2])/b[1,2] s = sqrt(g*v*t(g)) lower = ld50 - (1.96)*s upper = ld50 + (1.96)*s ld50 = exp(ld50) stderr = ld50*s lower=exp(lower) upper=exp(upper) print ld50 stderr lower upper finish 978

2 run ld50 /* Plot the onserved proportions and the fitted curve */ DATA SETR1 SET SETR1 PROB=X1/(X1+X2) /* Use this to create graphs in Windows */ goptions cback=white colors=(black) device=win target=winprtc rotate=portrait /* Use this to produce a postscript plot in the VINCENT system */ /* filename graffile pipe 'lpr -Dpostscript' goptions gsfmode=replace gsfname=graffile cback=white colors=(black) targetdevice=ps300 rotate=landscape */ SYMBOL1 V=NONE I=SPLINE L=1 W=2 H=2 SYMBOL2 V=circle H=2 PROC GPLOT DATA=SETR1 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL/ OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'LOGISTIC REGRESSION ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' /* FIT A PROBIT MODEL TO THE BLISS DATA */ PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP2 MODEL X1/N = ZL / ITPRINT COVB LINK=NORMIT PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR2 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'PROBIT MODEL USING LOG(DOSE) FOR THE BLISS DATA' PROC PRINT DATA=SETP2 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR2 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR2 SET SETR2 PROB= X1/(X1+X2) PROC GPLOT DATA=SETR2 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL PROB*ZL / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'PROBIT ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data'

3 /* Fit the complimentary log-log model to the Bliss data */ TITLE ' COMPLIMENTARY LOG-LOG ANALYSIS OF THE BLISS DATA' PROC LOGISTIC DATA=SET1 COVOUT OUTEST=SETP3 MODEL X1/N = ZL / ITPRINT COVB LINK=CLOGLOG PPROB=.5 SELECTION=NONE OUTPUT OUT=SETR3 L=LOWER95 P=PHAT U=UPPER95 / ALPHA=.05 TITLE 'COMPLIMENTARY LOG-LOG MODEL FOR THE BLISS DATA' run PROC PRINT DATA=SETP3 TITLE 'DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES' PROC PRINT DATA=SETR3 TITLE ' ESTIMATES OF MORTALITY RATES' DATA SETR3 SET SETR3 PROB = X1/(X1+X2) 983 PROC GPLOT DATA=SETR3 AXIS1 LABEL= (H=2.5 R=0 A=90 F=swiss 'Mortality Rate' ) VALUE=(H=2 F=swiss ) LENGTH=5 in ORDER=0.0 TO 1.0 BY 0.2 AXIS2 LABEL= (H=2.5 F=swiss 'Log-dose' ) VALUE=(H=2 F=swiss) LENGTH=6 in ORDER=3.8 TO 4.4 BY 0.1 PLOT PHAT*ZL=1 PROB*ZL=2 / OVERLAY VAXIS=AXIS1 HAXIS=AXIS2 TITLE H=3. F=swiss 'COMPLIMENTARY LOG-LOG ANALYSIS' TITLE2 H=3 F=swiss 'Bliss beetle data' 984 LOGISTIC REGRESSION ANALYSIS OF THE BLISS DATA Obs Z X1 X2 ZL N Response Profile Ordered Binary Total Value Outcome Frequency 1 Event Nonevent 190 Maximum Likelihood Iteration History Iter Ridge -2 Log L Intercept ZL Model Information Data Set WORK.SET1 Response Variable (Events) X1 Response Variable (Trials) N Number of Observations 8 Link Function Logit Optimization Technique Fisher's scoring

4 Analysis of Maximum Likelihood Estimates Model Fit Statistics Intercept Intercept and Criterion Only Covariates Standard Parameter DF Estimate Error Chi-Square Pr > C Intercept <.0001 ZL <.0001 AIC SC Log L Odds Ratio Estimates Point 95% Wald Effect Estimate Confidence Limits Testing Global Null Hypothesis: BETA=0 ZL > > > Test Chi-Square DF Pr > ChiSq Likelihood Ratio <.0001 Score <.0001 Wald < Association of Predicted Probabilities and Observed Responses Percent Concordant 87.0 Somers' D Percent Discordant 6.8 Gamma Percent Tied 6.3 Tau-a Pairs c Estimated Covariance Matrix Variable Intercept ZL Intercept ZL DATA SET CONTAINING INFORMATION ON PARAMETER ESTIMATES I _ n _ S t L T _ e N L T A N r L I Y T A c I O N P U M e K b K E S E p Z E s t L _ ESTIMATES OF MORTALITY RATES Obs Z X1 X2 ZL N PHAT LOWER95 UPPER LOGIT PARMS 0 Converged X LOGIT COV 0 Converged Int LOGIT COV 0 Converged ZL

5 This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, , , , , , , ) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) Now fit a logistic regression of mortality rates on log-dose bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) Define a function to extract the estimated covariance matrix from the summary object f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50)

6 Compute p-values for lack-of-fit tests. These results may be unreliable for sparse data. goftests <- function(x, m1, m2, df) { Compute Pearson chi-squared and deviance tests and p-values In this function x = observed counts m1 = expected counts under H0 m2 = expected counts under HA df = degrees of freedom k <- length(x) m1 <- m m2 <- m x2p <- sum(((m1-m2)^2)/m1) pvalp <- 1 - pchisq(x2p, df) Compute the G^2 statistic g2 <- 2*sum(x*(log(m2/m1))) pvalg <- 1-pchisq(g2, df) cat("\n", " Pearson test = ", round(x2p,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalp,2)) cat("\n", " Deviance test = ", round(g2,2)) cat("\n", " df = ", df) cat("\n", " p-value = ", round(pvalg,2),"\n" } Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Open a motif window for displaying plots Windows users should not do this motif( ) Plot the estimated curve for the mortality probabilities against the log-dose since the plot function uses type="n" it only defines and labels the axes and writes a title at the top of the plot. the points( ) function plots points on the plot orresponding to the observed proportions the lines( ) function plots the estimated curve plot(c(3.8, 4.4), c(0,1), type="n", xlab="log-dose", ylab="mortality rate", main="bliss Beetle Data") points(ldose, ndead/(ndead+nalive), pch=18, mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(ld, predict(bliss.lg, data.frame(ldose=ld), type="response")) Plot the estimated curve for the mortality probabilities against dose plot(c(48, 78), c(0,1), type="n", xlab="dose", lab="mortality rate", main="bliss Beetle Data mkh=0, cex=2.0 ) ld <- seq(3.8, 4.4, 0.02) lines(exp(ld), predict(bliss.lg, data.frame(ldose=ld), type="response"))

7 Now fit a logistic regression of mortality rates on log-dose This code uses the glm function in Splus to fit models to the Bliss beetle data. It is stored in the file bliss.ssc First enter the data dose <- c(49.057, , , , , , , ) ldose <- log(dose) ndead <- c(6, 13, 18, 28, 52, 53, 61, 60) nalive <- c(53, 47, 44, 28, 11, 6, 1, 0) y <- cbind(ndead, nalive) 999 bliss.lg <- glm(y ~ ldose, x=t, trace=t, family=binomial(link=logit)) summary(bliss.lg) GLM linear loop 1: deviance = GLM linear loop 2: deviance = GLM linear loop 3: deviance = Call: glm(formula = y ~ ldose, family = binomial(link = logit), x = T, trace = T) Deviance Residuals: Min 1Q Median 3Q Max Coefficients: Value Std. Error t value (Intercept) ldose (Dispersion Parameter for Binomial family taken to be 1) Null Deviance: on 7 degrees of freedom Residual Deviance: on 6 degrees of freedom Number of Fisher Scoring Iterations: 3 Correlation of Coefficients: (Intercept) ldose f.vcov <- function(obj) { so <- summary(obj, corr=f) so$dispersion*so$cov.unscaled } Print the estimated proportions and the model matrix cbind(bliss.lg$x, bliss.lg$fit) (Intercept) ldose bliss.lg$vcov <- f.vcov(bliss.lg) bliss.lg$vcov (Intercept) ldose (Intercept) ldose

8 Compute standard errors for the estimated probabilities at the dosage levels used in the study sp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) * bliss.lg$fit* (1-bliss.lg$fit) cbind(bliss.lg$x, bliss.lg$fit, sp) (Intercept) ldose sp Construct approximate one-at-a-time 95 percent confidence intervals for the estimated proportions slp <- sqrt(diag((bliss.lg$x%*%bliss.lg$vcov%*% t(bliss.lg$x)))) llp <- bliss.lg$x%*%bliss.lg$coef - (1.96)*slp ulp <- bliss.lg$x%*%bliss.lg$coef + (1.96)*slp lower <- exp(llp)/(1+exp(llp)) upper <- exp(ulp)/(1+exp(ulp)) cbind(bliss.lg$x, bliss.lg$fit, lower, upper) (Intercept) ldose Estimate the LD50 and construct and approximate 95 percent confidence interval First make a confidnece interval for log(ld50) b0 <- bliss.lg$coef[1] b1 <- bliss.lg$coef[2] l.ld50 <- -b0/b1 g <- cbind(-1, b0/b1)/b1 sl.ld50 <- sqrt(g%*%bliss.lg$vcov%*%t(g)) l.upper <- l.ld50 + (1.96)*sl.ld50 l.lower <- l.ld50 - (1.96)*sl.ld50 ld50 <- signif(exp(l.ld50)) lower <- signif(exp(l.lower)) upper <- signif(exp(l.upper)) sld50 <- signif(ld50*sl.ld50) cat(ld50, lower, upper, sld50)

9 Compute estimates of expected counts nd <- (ndead+nalive)*bliss.lg$fit na <- (ndead+nalive)-nd my <- c(nd, na) goftests(y,my,y,bliss.lg$df) Pearson test = Degrees of freedom = 6 p-value = 0.12 Deviance test = df = 6 p-value =

Then Log-odds with respect to a baseline category (eg, the last category) exp( ß ji = 0j + 1j X 1i + + kj X ki ) 1 + J 1 X exp( 0` + i`x 1i + + kj X k

Then Log-odds with respect to a baseline category (eg, the last category) exp( ß ji = 0j + 1j X 1i + + kj X ki ) 1 + J 1 X exp( 0` + i`x 1i + + kj X k Connections between -linear and istic regression models Any -linear model can be expressed as a istic regression model Logisitic regression requires specication of a response variable Log-linear models

More information

Stat 5100 Handout #14.a SAS: Logistic Regression

Stat 5100 Handout #14.a SAS: Logistic Regression Stat 5100 Handout #14.a SAS: Logistic Regression Example: (Text Table 14.3) Individuals were randomly sampled within two sectors of a city, and checked for presence of disease (here, spread by mosquitoes).

More information

Binary Regression in S-Plus

Binary Regression in S-Plus Fall 200 STA 216 September 7, 2000 1 Getting Started in UNIX Binary Regression in S-Plus Create a class working directory and.data directory for S-Plus 5.0. If you have used Splus 3.x before, then it is

More information

STENO Introductory R-Workshop: Loading a Data Set Tommi Suvitaival, Steno Diabetes Center June 11, 2015

STENO Introductory R-Workshop: Loading a Data Set Tommi Suvitaival, Steno Diabetes Center June 11, 2015 STENO Introductory R-Workshop: Loading a Data Set Tommi Suvitaival, tsvv@steno.dk, Steno Diabetes Center June 11, 2015 Contents 1 Introduction 1 2 Recap: Variables 2 3 Data Containers 2 3.1 Vectors................................................

More information

CH9.Generalized Additive Model

CH9.Generalized Additive Model CH9.Generalized Additive Model Regression Model For a response variable and predictor variables can be modeled using a mean function as follows: would be a parametric / nonparametric regression or a smoothing

More information

Generalized Additive Models

Generalized Additive Models Generalized Additive Models Statistics 135 Autumn 2005 Copyright c 2005 by Mark E. Irwin Generalized Additive Models GAMs are one approach to non-parametric regression in the multiple predictor setting.

More information

Repeated Measures Part 4: Blood Flow data

Repeated Measures Part 4: Blood Flow data Repeated Measures Part 4: Blood Flow data /* bloodflow.sas */ options linesize=79 pagesize=100 noovp formdlim='_'; title 'Two within-subjecs factors: Blood flow data (NWK p. 1181)'; proc format; value

More information

/* Parametric models: AFT modeling */ /* Data described in Chapter 3 of P. Allison, "Survival Analysis Using the SAS System." */

/* Parametric models: AFT modeling */ /* Data described in Chapter 3 of P. Allison, Survival Analysis Using the SAS System. */ /* Parametric models: AFT modeling */ /* Data described in Chapter 3 of P. Allison, "Survival Analysis Using the SAS System." */ options ls =79; data recidall; input week arrest fin age race wexp mar paro

More information

CH5: CORR & SIMPLE LINEAR REFRESSION =======================================

CH5: CORR & SIMPLE LINEAR REFRESSION ======================================= STAT 430 SAS Examples SAS5 ===================== ssh xyz@glue.umd.edu, tap sas913 (old sas82), sas https://www.statlab.umd.edu/sasdoc/sashtml/onldoc.htm CH5: CORR & SIMPLE LINEAR REFRESSION =======================================

More information

An introduction to SPSS

An introduction to SPSS An introduction to SPSS To open the SPSS software using U of Iowa Virtual Desktop... Go to https://virtualdesktop.uiowa.edu and choose SPSS 24. Contents NOTE: Save data files in a drive that is accessible

More information

Unit 5 Logistic Regression Practice Problems

Unit 5 Logistic Regression Practice Problems Unit 5 Logistic Regression Practice Problems SOLUTIONS R Users Source: Afifi A., Clark VA and May S. Computer Aided Multivariate Analysis, Fourth Edition. Boca Raton: Chapman and Hall, 2004. Exercises

More information

Multinomial Logit Models with R

Multinomial Logit Models with R Multinomial Logit Models with R > rm(list=ls()); options(scipen=999) # To avoid scientific notation > # install.packages("mlogit", dependencies=true) # Only need to do this once > library(mlogit) # Load

More information

This is called a linear basis expansion, and h m is the mth basis function For example if X is one-dimensional: f (X) = β 0 + β 1 X + β 2 X 2, or

This is called a linear basis expansion, and h m is the mth basis function For example if X is one-dimensional: f (X) = β 0 + β 1 X + β 2 X 2, or STA 450/4000 S: February 2 2005 Flexible modelling using basis expansions (Chapter 5) Linear regression: y = Xβ + ɛ, ɛ (0, σ 2 ) Smooth regression: y = f (X) + ɛ: f (X) = E(Y X) to be specified Flexible

More information

Poisson Regression and Model Checking

Poisson Regression and Model Checking Poisson Regression and Model Checking Readings GH Chapter 6-8 September 27, 2017 HIV & Risk Behaviour Study The variables couples and women_alone code the intervention: control - no counselling (both 0)

More information

Logistic Regression. (Dichotomous predicted variable) Tim Frasier

Logistic Regression. (Dichotomous predicted variable) Tim Frasier Logistic Regression (Dichotomous predicted variable) Tim Frasier Copyright Tim Frasier This work is licensed under the Creative Commons Attribution 4.0 International license. Click here for more information.

More information

Generalized Additive Model

Generalized Additive Model Generalized Additive Model by Huimin Liu Department of Mathematics and Statistics University of Minnesota Duluth, Duluth, MN 55812 December 2008 Table of Contents Abstract... 2 Chapter 1 Introduction 1.1

More information

Generalized Least Squares (GLS) and Estimated Generalized Least Squares (EGLS)

Generalized Least Squares (GLS) and Estimated Generalized Least Squares (EGLS) Generalized Least Squares (GLS) and Estimated Generalized Least Squares (EGLS) Linear Model in matrix notation for the population Y = Xβ + Var ( ) = In GLS, the error covariance matrix is known In EGLS

More information

Predictive Checking. Readings GH Chapter 6-8. February 8, 2017

Predictive Checking. Readings GH Chapter 6-8. February 8, 2017 Predictive Checking Readings GH Chapter 6-8 February 8, 2017 Model Choice and Model Checking 2 Questions: 1. Is my Model good enough? (no alternative models in mind) 2. Which Model is best? (comparison

More information

Dynamic Network Regression Using R Package dnr

Dynamic Network Regression Using R Package dnr Dynamic Network Regression Using R Package dnr Abhirup Mallik July 26, 2018 R package dnr enables the user to fit dynamic network regression models for time variate network data available mostly in social

More information

Stat 500 lab notes c Philip M. Dixon, Week 10: Autocorrelated errors

Stat 500 lab notes c Philip M. Dixon, Week 10: Autocorrelated errors Week 10: Autocorrelated errors This week, I have done one possible analysis and provided lots of output for you to consider. Case study: predicting body fat Body fat is an important health measure, but

More information

Discriminant analysis in R QMMA

Discriminant analysis in R QMMA Discriminant analysis in R QMMA Emanuele Taufer file:///c:/users/emanuele.taufer/google%20drive/2%20corsi/5%20qmma%20-%20mim/0%20labs/l4-lda-eng.html#(1) 1/26 Default data Get the data set Default library(islr)

More information

Package glmmml. R topics documented: March 25, Encoding UTF-8 Version Date Title Generalized Linear Models with Clustering

Package glmmml. R topics documented: March 25, Encoding UTF-8 Version Date Title Generalized Linear Models with Clustering Encoding UTF-8 Version 1.0.3 Date 2018-03-25 Title Generalized Linear Models with Clustering Package glmmml March 25, 2018 Binomial and Poisson regression for clustered data, fixed and random effects with

More information

Didacticiel - Études de cas

Didacticiel - Études de cas Subject In some circumstances, the goal of the supervised learning is not to classify examples but rather to organize them in order to point up the most interesting individuals. For instance, in the direct

More information

1 The SAS System 23:01 Friday, November 9, 2012

1 The SAS System 23:01 Friday, November 9, 2012 2101f12HW9chickwts.log Saved: Wednesday, November 14, 2012 6:50:49 PM Page 1 of 3 1 The SAS System 23:01 Friday, November 9, 2012 NOTE: Copyright (c) 2002-2010 by SAS Institute Inc., Cary, NC, USA. NOTE:

More information

Stat 4510/7510 Homework 4

Stat 4510/7510 Homework 4 Stat 45/75 1/7. Stat 45/75 Homework 4 Instructions: Please list your name and student number clearly. In order to receive credit for a problem, your solution must show sufficient details so that the grader

More information

Baruch College STA Senem Acet Coskun

Baruch College STA Senem Acet Coskun Baruch College STA 9750 BOOK BUY A Predictive Mode Senem Acet Coskun Table of Contents Summary 3 Why this topic? 4 Data Sources 6 Variable Definitions 7 Descriptive Statistics 8 Univariate Analysis 9 Two-Sample

More information

NCSS Statistical Software

NCSS Statistical Software Chapter 327 Geometric Regression Introduction Geometric regression is a special case of negative binomial regression in which the dispersion parameter is set to one. It is similar to regular multiple regression

More information

Biostat Methods STAT 5820/6910 Handout #9 Meta-Analysis Examples

Biostat Methods STAT 5820/6910 Handout #9 Meta-Analysis Examples Biostat Methods STAT 5820/6910 Handout #9 Meta-Analysis Examples Example 1 A RCT was conducted to consider whether steroid therapy for expectant mothers affects death rate of premature [less than 37 weeks]

More information

Chapter 10: Extensions to the GLM

Chapter 10: Extensions to the GLM Chapter 10: Extensions to the GLM 10.1 Implement a GAM for the Swedish mortality data, for males, using smooth functions for age and year. Age and year are standardized as described in Section 4.11, for

More information

Correctly Compute Complex Samples Statistics

Correctly Compute Complex Samples Statistics PASW Complex Samples 17.0 Specifications Correctly Compute Complex Samples Statistics When you conduct sample surveys, use a statistics package dedicated to producing correct estimates for complex sample

More information

Module 3: SAS. 3.1 Initial explorative analysis 02429/MIXED LINEAR MODELS PREPARED BY THE STATISTICS GROUPS AT IMM, DTU AND KU-LIFE

Module 3: SAS. 3.1 Initial explorative analysis 02429/MIXED LINEAR MODELS PREPARED BY THE STATISTICS GROUPS AT IMM, DTU AND KU-LIFE St@tmaster 02429/MIXED LINEAR MODELS PREPARED BY THE STATISTICS GROUPS AT IMM, DTU AND KU-LIFE Module 3: SAS 3.1 Initial explorative analysis....................... 1 3.1.1 SAS JMP............................

More information

The linear mixed model: modeling hierarchical and longitudinal data

The linear mixed model: modeling hierarchical and longitudinal data The linear mixed model: modeling hierarchical and longitudinal data Analysis of Experimental Data AED The linear mixed model: modeling hierarchical and longitudinal data 1 of 44 Contents 1 Modeling Hierarchical

More information

The glmmml Package. August 20, 2006

The glmmml Package. August 20, 2006 The glmmml Package August 20, 2006 Version 0.65-1 Date 2006/08/20 Title Generalized linear models with clustering A Maximum Likelihood and bootstrap approach to mixed models. License GPL version 2 or newer.

More information

Package EMLRT. August 7, 2014

Package EMLRT. August 7, 2014 Package EMLRT August 7, 2014 Type Package Title Association Studies with Imputed SNPs Using Expectation-Maximization-Likelihood-Ratio Test LazyData yes Version 1.0 Date 2014-08-01 Author Maintainer

More information

Stat 8053, Fall 2013: Additive Models

Stat 8053, Fall 2013: Additive Models Stat 853, Fall 213: Additive Models We will only use the package mgcv for fitting additive and later generalized additive models. The best reference is S. N. Wood (26), Generalized Additive Models, An

More information

Statistics & Analysis. Fitting Generalized Additive Models with the GAM Procedure in SAS 9.2

Statistics & Analysis. Fitting Generalized Additive Models with the GAM Procedure in SAS 9.2 Fitting Generalized Additive Models with the GAM Procedure in SAS 9.2 Weijie Cai, SAS Institute Inc., Cary NC July 1, 2008 ABSTRACT Generalized additive models are useful in finding predictor-response

More information

Bivariate (Simple) Regression Analysis

Bivariate (Simple) Regression Analysis Revised July 2018 Bivariate (Simple) Regression Analysis This set of notes shows how to use Stata to estimate a simple (two-variable) regression equation. It assumes that you have set Stata up on your

More information

Lecture 24: Generalized Additive Models Stat 704: Data Analysis I, Fall 2010

Lecture 24: Generalized Additive Models Stat 704: Data Analysis I, Fall 2010 Lecture 24: Generalized Additive Models Stat 704: Data Analysis I, Fall 2010 Tim Hanson, Ph.D. University of South Carolina T. Hanson (USC) Stat 704: Data Analysis I, Fall 2010 1 / 26 Additive predictors

More information

STAT Statistical Learning. Predictive Modeling. Statistical Learning. Overview. Predictive Modeling. Classification Methods.

STAT Statistical Learning. Predictive Modeling. Statistical Learning. Overview. Predictive Modeling. Classification Methods. STAT 48 - STAT 48 - December 5, 27 STAT 48 - STAT 48 - Here are a few questions to consider: What does statistical learning mean to you? Is statistical learning different from statistics as a whole? What

More information

Factorial ANOVA. Skipping... Page 1 of 18

Factorial ANOVA. Skipping... Page 1 of 18 Factorial ANOVA The potato data: Batches of potatoes randomly assigned to to be stored at either cool or warm temperature, infected with one of three bacterial types. Then wait a set period. The dependent

More information

Binary IFA-IRT Models in Mplus version 7.11

Binary IFA-IRT Models in Mplus version 7.11 Binary IFA-IRT Models in Mplus version 7.11 Example data: 635 older adults (age 80-100) self-reporting on 7 items assessing the Instrumental Activities of Daily Living (IADL) as follows: 1. Housework (cleaning

More information

Stat 5100 Handout #11.a SAS: Variations on Ordinary Least Squares

Stat 5100 Handout #11.a SAS: Variations on Ordinary Least Squares Stat 5100 Handout #11.a SAS: Variations on Ordinary Least Squares Example 1: (Weighted Least Squares) A health researcher is interested in studying the relationship between diastolic blood pressure (bp)

More information

Correctly Compute Complex Samples Statistics

Correctly Compute Complex Samples Statistics SPSS Complex Samples 15.0 Specifications Correctly Compute Complex Samples Statistics When you conduct sample surveys, use a statistics package dedicated to producing correct estimates for complex sample

More information

Package mcemglm. November 29, 2015

Package mcemglm. November 29, 2015 Type Package Package mcemglm November 29, 2015 Title Maximum Likelihood Estimation for Generalized Linear Mixed Models Version 1.1 Date 2015-11-28 Author Felipe Acosta Archila Maintainer Maximum likelihood

More information

Strategies for Modeling Two Categorical Variables with Multiple Category Choices

Strategies for Modeling Two Categorical Variables with Multiple Category Choices 003 Joint Statistical Meetings - Section on Survey Research Methods Strategies for Modeling Two Categorical Variables with Multiple Category Choices Christopher R. Bilder Department of Statistics, University

More information

Modelling Proportions and Count Data

Modelling Proportions and Count Data Modelling Proportions and Count Data Rick White May 4, 2016 Outline Analysis of Count Data Binary Data Analysis Categorical Data Analysis Generalized Linear Models Questions Types of Data Continuous data:

More information

Set up of the data is similar to the Randomized Block Design situation. A. Chang 1. 1) Setting up the data sheet

Set up of the data is similar to the Randomized Block Design situation. A. Chang 1. 1) Setting up the data sheet Repeated Measure Analysis (Univariate Mixed Effect Model Approach) (Treatment as the Fixed Effect and the Subject as the Random Effect) (This univariate approach can be used for randomized block design

More information

Package ordinalnet. December 5, 2017

Package ordinalnet. December 5, 2017 Type Package Title Penalized Ordinal Regression Version 2.4 Package ordinalnet December 5, 2017 Fits ordinal regression models with elastic net penalty. Supported model families include cumulative probability,

More information

Product Catalog. AcaStat. Software

Product Catalog. AcaStat. Software Product Catalog AcaStat Software AcaStat AcaStat is an inexpensive and easy-to-use data analysis tool. Easily create data files or import data from spreadsheets or delimited text files. Run crosstabulations,

More information

A SAS Macro for Covariate Specification in Linear, Logistic, or Survival Regression

A SAS Macro for Covariate Specification in Linear, Logistic, or Survival Regression Paper 1223-2017 A SAS Macro for Covariate Specification in Linear, Logistic, or Survival Regression Sai Liu and Margaret R. Stedman, Stanford University; ABSTRACT Specifying the functional form of a covariate

More information

Intermediate SAS: Statistics

Intermediate SAS: Statistics Intermediate SAS: Statistics OIT TSS 293-4444 oithelp@mail.wvu.edu oit.wvu.edu/training/classmat/sas/ Table of Contents Procedures... 2 Two-sample t-test:... 2 Paired differences t-test:... 2 Chi Square

More information

Regression Analysis and Linear Regression Models

Regression Analysis and Linear Regression Models Regression Analysis and Linear Regression Models University of Trento - FBK 2 March, 2015 (UNITN-FBK) Regression Analysis and Linear Regression Models 2 March, 2015 1 / 33 Relationship between numerical

More information

Modelling Proportions and Count Data

Modelling Proportions and Count Data Modelling Proportions and Count Data Rick White May 5, 2015 Outline Analysis of Count Data Binary Data Analysis Categorical Data Analysis Generalized Linear Models Questions Types of Data Continuous data:

More information

A New Method of Using Polytomous Independent Variables with Many Levels for the Binary Outcome of Big Data Analysis

A New Method of Using Polytomous Independent Variables with Many Levels for the Binary Outcome of Big Data Analysis Paper 2641-2015 A New Method of Using Polytomous Independent Variables with Many Levels for the Binary Outcome of Big Data Analysis ABSTRACT John Gao, ConstantContact; Jesse Harriott, ConstantContact;

More information

Zero-Inflated Poisson Regression

Zero-Inflated Poisson Regression Chapter 329 Zero-Inflated Poisson Regression Introduction The zero-inflated Poisson (ZIP) regression is used for count data that exhibit overdispersion and excess zeros. The data distribution combines

More information

The Data. Math 158, Spring 2016 Jo Hardin Shrinkage Methods R code Ridge Regression & LASSO

The Data. Math 158, Spring 2016 Jo Hardin Shrinkage Methods R code Ridge Regression & LASSO Math 158, Spring 2016 Jo Hardin Shrinkage Methods R code Ridge Regression & LASSO The Data The following dataset is from Hastie, Tibshirani and Friedman (2009), from a studyby Stamey et al. (1989) of prostate

More information

Introduction to the R Statistical Computing Environment R Programming: Exercises

Introduction to the R Statistical Computing Environment R Programming: Exercises Introduction to the R Statistical Computing Environment R Programming: Exercises John Fox (McMaster University) ICPSR Summer Program 2010 1. A challenging problem: Iterated weighted least squares (IWLS)

More information

GxE.scan. October 30, 2018

GxE.scan. October 30, 2018 GxE.scan October 30, 2018 Overview GxE.scan can process a GWAS scan using the snp.logistic, additive.test, snp.score or snp.matched functions, whereas snp.scan.logistic only calls snp.logistic. GxE.scan

More information

The SAS RELRISK9 Macro

The SAS RELRISK9 Macro The SAS RELRISK9 Macro Sally Skinner, Ruifeng Li, Ellen Hertzmark, and Donna Spiegelman November 15, 2012 Abstract The %RELRISK9 macro obtains relative risk estimates using PROC GENMOD with the binomial

More information

USING MACROS TO CREATE PARAMETER DRIVEN PROCEDURES THAT SUMMARIZE AND PRESENT STATISTICAL OUTPUT IN TABULAR FORM

USING MACROS TO CREATE PARAMETER DRIVEN PROCEDURES THAT SUMMARIZE AND PRESENT STATISTICAL OUTPUT IN TABULAR FORM 458 Statistics USING MACROS TO CREATE PARAMETER DRIVEN PROCEDURES THAT SUMMARIZE AND PRESENT STATISTICAL OUTPUT IN TABULAR FORM John A. Wenston National Development and Research Institutes, Inc. INTRODUCTION

More information

Linear Modeling with Bayesian Statistics

Linear Modeling with Bayesian Statistics Linear Modeling with Bayesian Statistics Bayesian Approach I I I I I Estimate probability of a parameter State degree of believe in specific parameter values Evaluate probability of hypothesis given the

More information

Introduction to the R Statistical Computing Environment R Programming: Exercises

Introduction to the R Statistical Computing Environment R Programming: Exercises Introduction to the R Statistical Computing Environment R Programming: Exercises John Fox (McMaster University) ICPSR 2014 1. A straightforward problem: Write an R function for linear least-squares regression.

More information

optimization_machine_probit_bush106.c

optimization_machine_probit_bush106.c optimization_machine_probit_bush106.c. probit ybush black00 south hispanic00 income owner00 dwnom1n dwnom2n Iteration 0: log likelihood = -299.27289 Iteration 1: log likelihood = -154.89847 Iteration 2:

More information

Unit: Quadratic Functions

Unit: Quadratic Functions Unit: Quadratic Functions Learning increases when you have a goal to work towards. Use this checklist as guide to track how well you are grasping the material. In the center column, rate your understand

More information

CHAPTER 7 EXAMPLES: MIXTURE MODELING WITH CROSS- SECTIONAL DATA

CHAPTER 7 EXAMPLES: MIXTURE MODELING WITH CROSS- SECTIONAL DATA Examples: Mixture Modeling With Cross-Sectional Data CHAPTER 7 EXAMPLES: MIXTURE MODELING WITH CROSS- SECTIONAL DATA Mixture modeling refers to modeling with categorical latent variables that represent

More information

Generalized Additive Models

Generalized Additive Models :p Texts in Statistical Science Generalized Additive Models An Introduction with R Simon N. Wood Contents Preface XV 1 Linear Models 1 1.1 A simple linear model 2 Simple least squares estimation 3 1.1.1

More information

SYS 6021 Linear Statistical Models

SYS 6021 Linear Statistical Models SYS 6021 Linear Statistical Models Project 2 Spam Filters Jinghe Zhang Summary The spambase data and time indexed counts of spams and hams are studied to develop accurate spam filters. Static models are

More information

Robust Linear Regression (Passing- Bablok Median-Slope)

Robust Linear Regression (Passing- Bablok Median-Slope) Chapter 314 Robust Linear Regression (Passing- Bablok Median-Slope) Introduction This procedure performs robust linear regression estimation using the Passing-Bablok (1988) median-slope algorithm. Their

More information

Conditional and Unconditional Regression with No Measurement Error

Conditional and Unconditional Regression with No Measurement Error Conditional and with No Measurement Error /* reg2ways.sas */ %include 'readsenic.sas'; title2 ''; proc reg; title3 'Conditional Regression'; model infrisk = stay census; proc calis cov; /* Analyze the

More information

SAS/STAT 14.2 User s Guide. The HPLOGISTIC Procedure

SAS/STAT 14.2 User s Guide. The HPLOGISTIC Procedure SAS/STAT 14.2 User s Guide The HPLOGISTIC Procedure This document is an individual chapter from SAS/STAT 14.2 User s Guide. The correct bibliographic citation for this manual is as follows: SAS Institute

More information

Fathom Dynamic Data TM Version 2 Specifications

Fathom Dynamic Data TM Version 2 Specifications Data Sources Fathom Dynamic Data TM Version 2 Specifications Use data from one of the many sample documents that come with Fathom. Enter your own data by typing into a case table. Paste data from other

More information

Among those 14 potential explanatory variables,non-dummy variables are:

Among those 14 potential explanatory variables,non-dummy variables are: Among those 14 potential explanatory variables,non-dummy variables are: Size: 2nd column in the dataset Land: 14th column in the dataset Bed.Rooms: 5th column in the dataset Fireplace: 7th column in the

More information

Regression Lab 1. The data set cholesterol.txt available on your thumb drive contains the following variables:

Regression Lab 1. The data set cholesterol.txt available on your thumb drive contains the following variables: Regression Lab The data set cholesterol.txt available on your thumb drive contains the following variables: Field Descriptions ID: Subject ID sex: Sex: 0 = male, = female age: Age in years chol: Serum

More information

Package PTE. October 10, 2017

Package PTE. October 10, 2017 Type Package Title Personalized Treatment Evaluator Version 1.6 Date 2017-10-9 Package PTE October 10, 2017 Author Adam Kapelner, Alina Levine & Justin Bleich Maintainer Adam Kapelner

More information

Outline. Topic 16 - Other Remedies. Ridge Regression. Ridge Regression. Ridge Regression. Robust Regression. Regression Trees. Piecewise Linear Model

Outline. Topic 16 - Other Remedies. Ridge Regression. Ridge Regression. Ridge Regression. Robust Regression. Regression Trees. Piecewise Linear Model Topic 16 - Other Remedies Ridge Regression Robust Regression Regression Trees Outline - Fall 2013 Piecewise Linear Model Bootstrapping Topic 16 2 Ridge Regression Modification of least squares that addresses

More information

Introduction to SAS proc calis

Introduction to SAS proc calis Introduction to SAS proc calis /* path1.sas */ %include 'SenicRead.sas'; title2 ''; /************************************************************************ * * * Cases are hospitals * * * * stay Average

More information

book 2014/5/6 15:21 page v #3 List of figures List of tables Preface to the second edition Preface to the first edition

book 2014/5/6 15:21 page v #3 List of figures List of tables Preface to the second edition Preface to the first edition book 2014/5/6 15:21 page v #3 Contents List of figures List of tables Preface to the second edition Preface to the first edition xvii xix xxi xxiii 1 Data input and output 1 1.1 Input........................................

More information

Example Using Missing Data 1

Example Using Missing Data 1 Ronald H. Heck and Lynn N. Tabata 1 Example Using Missing Data 1 Creating the Missing Data Variable (Miss) Here is a data set (achieve subset MANOVAmiss.sav) with the actual missing data on the outcomes.

More information

Statistics and Data Analysis. Common Pitfalls in SAS Statistical Analysis Macros in a Mass Production Environment

Statistics and Data Analysis. Common Pitfalls in SAS Statistical Analysis Macros in a Mass Production Environment Common Pitfalls in SAS Statistical Analysis Macros in a Mass Production Environment Huei-Ling Chen, Merck & Co., Inc., Rahway, NJ Aiming Yang, Merck & Co., Inc., Rahway, NJ ABSTRACT Four pitfalls are commonly

More information

Multiple imputation using chained equations: Issues and guidance for practice

Multiple imputation using chained equations: Issues and guidance for practice Multiple imputation using chained equations: Issues and guidance for practice Ian R. White, Patrick Royston and Angela M. Wood http://onlinelibrary.wiley.com/doi/10.1002/sim.4067/full By Gabrielle Simoneau

More information

Performing Cluster Bootstrapped Regressions in R

Performing Cluster Bootstrapped Regressions in R Performing Cluster Bootstrapped Regressions in R Francis L. Huang / October 6, 2016 Supplementary material for: Using Cluster Bootstrapping to Analyze Nested Data with a Few Clusters in Educational and

More information

Minitab 17 commands Prepared by Jeffrey S. Simonoff

Minitab 17 commands Prepared by Jeffrey S. Simonoff Minitab 17 commands Prepared by Jeffrey S. Simonoff Data entry and manipulation To enter data by hand, click on the Worksheet window, and enter the values in as you would in any spreadsheet. To then save

More information

ST512. Fall Quarter, Exam 1. Directions: Answer questions as directed. Please show work. For true/false questions, circle either true or false.

ST512. Fall Quarter, Exam 1. Directions: Answer questions as directed. Please show work. For true/false questions, circle either true or false. ST512 Fall Quarter, 2005 Exam 1 Name: Directions: Answer questions as directed. Please show work. For true/false questions, circle either true or false. 1. (42 points) A random sample of n = 30 NBA basketball

More information

SPSS Modules Features

SPSS Modules Features SPSS Modules Features Core System Functionality (included in every license) Data access and management Data Prep features: Define Variable properties tool; copy data properties tool, Visual Bander, Identify

More information

Centering and Interactions: The Training Data

Centering and Interactions: The Training Data Centering and Interactions: The Training Data A random sample of 150 technical support workers were first given a test of their technical skill and knowledge, and then randomly assigned to one of three

More information

Package EBglmnet. January 30, 2016

Package EBglmnet. January 30, 2016 Type Package Package EBglmnet January 30, 2016 Title Empirical Bayesian Lasso and Elastic Net Methods for Generalized Linear Models Version 4.1 Date 2016-01-15 Author Anhui Huang, Dianting Liu Maintainer

More information

Package endogenous. October 29, 2016

Package endogenous. October 29, 2016 Package endogenous October 29, 2016 Type Package Title Classical Simultaneous Equation Models Version 1.0 Date 2016-10-25 Maintainer Andrew J. Spieker Description Likelihood-based

More information

Generalized least squares (GLS) estimates of the level-2 coefficients,

Generalized least squares (GLS) estimates of the level-2 coefficients, Contents 1 Conceptual and Statistical Background for Two-Level Models...7 1.1 The general two-level model... 7 1.1.1 Level-1 model... 8 1.1.2 Level-2 model... 8 1.2 Parameter estimation... 9 1.3 Empirical

More information

Gelman-Hill Chapter 3

Gelman-Hill Chapter 3 Gelman-Hill Chapter 3 Linear Regression Basics In linear regression with a single independent variable, as we have seen, the fundamental equation is where ŷ bx 1 b0 b b b y 1 yx, 0 y 1 x x Bivariate Normal

More information

1 Condence Intervals for Mean Value Parameters

1 Condence Intervals for Mean Value Parameters 1 Condence Intervals for Mean Value Parameters For complete separation example of Agresti (2013, Section 6.5.1), we need con- dence intervals. The theory in Geyer (2009) says a 100(1 α)% condence region

More information

Nina Zumel and John Mount Win-Vector LLC

Nina Zumel and John Mount Win-Vector LLC SUPERVISED LEARNING IN R: REGRESSION Logistic regression to predict probabilities Nina Zumel and John Mount Win-Vector LLC Predicting Probabilities Predicting whether an event occurs (yes/no): classification

More information

CHAPTER 18 OUTPUT, SAVEDATA, AND PLOT COMMANDS

CHAPTER 18 OUTPUT, SAVEDATA, AND PLOT COMMANDS OUTPUT, SAVEDATA, And PLOT Commands CHAPTER 18 OUTPUT, SAVEDATA, AND PLOT COMMANDS THE OUTPUT COMMAND OUTPUT: In this chapter, the OUTPUT, SAVEDATA, and PLOT commands are discussed. The OUTPUT command

More information

Using HLM for Presenting Meta Analysis Results. R, C, Gardner Department of Psychology

Using HLM for Presenting Meta Analysis Results. R, C, Gardner Department of Psychology Data_Analysis.calm: dacmeta Using HLM for Presenting Meta Analysis Results R, C, Gardner Department of Psychology The primary purpose of meta analysis is to summarize the effect size results from a number

More information

Some methods for the quantification of prediction uncertainties for digital soil mapping: Universal kriging prediction variance.

Some methods for the quantification of prediction uncertainties for digital soil mapping: Universal kriging prediction variance. Some methods for the quantification of prediction uncertainties for digital soil mapping: Universal kriging prediction variance. Soil Security Laboratory 2018 1 Universal kriging prediction variance In

More information

Package GWRM. R topics documented: July 31, Type Package

Package GWRM. R topics documented: July 31, Type Package Type Package Package GWRM July 31, 2017 Title Generalized Waring Regression Model for Count Data Version 2.1.0.3 Date 2017-07-18 Maintainer Antonio Jose Saez-Castillo Depends R (>= 3.0.0)

More information

Chapter 6: Linear Model Selection and Regularization

Chapter 6: Linear Model Selection and Regularization Chapter 6: Linear Model Selection and Regularization As p (the number of predictors) comes close to or exceeds n (the sample size) standard linear regression is faced with problems. The variance of the

More information

Practical 4: Mixed effect models

Practical 4: Mixed effect models Practical 4: Mixed effect models This practical is about how to fit (generalised) linear mixed effects models using the lme4 package. You may need to install it first (using either the install.packages

More information

Investigation of sequential experimental approaches in logistic regression modeling

Investigation of sequential experimental approaches in logistic regression modeling Rochester Institute of Technology RIT Scholar Works Theses Thesis/Dissertation Collections 5-17-2017 Investigation of sequential experimental approaches in logistic regression modeling Darsh Thakkar dt1412@rit.edu

More information

Package tpr. R topics documented: February 20, Type Package. Title Temporal Process Regression. Version

Package tpr. R topics documented: February 20, Type Package. Title Temporal Process Regression. Version Package tpr February 20, 2015 Type Package Title Temporal Process Regression Version 0.3-1 Date 2010-04-11 Author Jun Yan Maintainer Jun Yan Regression models

More information

Using the SemiPar Package

Using the SemiPar Package Using the SemiPar Package NICHOLAS J. SALKOWSKI Division of Biostatistics, School of Public Health, University of Minnesota, Minneapolis, MN 55455, USA salk0008@umn.edu May 15, 2008 1 Introduction The

More information