library(faraway) data(savings) savings summary(savings) pairs(savings) hist(savings\$sr,breaks = 10) g <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data=savings, x = TRUE, y = TRUE) summary(g) xtx <- t(g\$x)%*%g\$x ixtx <- solve(xtx) bhat <- ixtx%*%t(g\$x)%*%g\$y Py <- g\$x%*%bhat res <- g\$y - g\$x%*%bhat sighat <- sqrt(norm(res,type="F")^2/(45)) t1 <- bhat[1]/(sighat*sqrt(ixtx[1,1])) pval1 <- (1 - pt(t1,45))*2 ybar <- mean(g\$y) one <- matrix(1,nrow = 50,ncol = 1) P0y <- ybar*one RSS0 <- sum((g\$y - P0y)^2) RSS <- norm(g\$y - Py,type="F")^2 Fstat <- (45/4)*norm(Py - P0y,type="F")^2/RSS Pval <- 1- pf(Fstat,4,45) R2 <- (RSS0 - RSS)/RSS0 # Hypothesis Testing # Testing b_ddpi = 0 gr <- lm(sr ~ pop15+pop75+dpi,savings) anova(gr,g) # Testing b_ddpi = 1 gr <- lm(sr ~ pop15+pop75+dpi+offset(ddpi),savings) anova(gr,g) # Testing b_pop15 = b_pop75 gr <- lm(sr ~ I(pop15+pop75)+dpi+ddpi,savings) anova(gr,g) # Confidence Intervals qt(0.975,45) c(-1.69-2.01*1.08,-1.69+2.01*1.08) # 95% CI for pop75 c(0.41-2.01*0.196,0.41+2.01*0.196) # 95% CI for ddpi cor(savings\$pop15,savings\$pop75) ixtx[2,3] # Diagnostics plot(g\$res,ylab="Residuals",main="Index plot of residuals") sort(g\$res)[c(1,50)] countries <- row.names(savings) identify(1:50,g\$res,countries) # Find the leverages # library(stats) x <- model.matrix(g) lev <- hat(x) plot(lev,ylab="Leverages",main="Index plot of Leverages") abline(h=2*5/50) sum(lev) names(lev) <- countries lev[lev > 0.2] identify(1:50,lev,countries) # studentized residuals gs <- summary(g) gs\$sig stud <- g\$res/(gs\$sig*sqrt(1-lev)) plot(stud,ylab="Studentized Residuals",main="Studentized Residuals") # Influential observations cook <- cooks.distance(g) plot(cook,ylab="Cooks distances") identify(1:50,cook,countries) gl <- lm(sr ~ pop15+pop75+dpi+ddpi,savings,subset=(cook < max(cook))) summary(gl) ginf <- lm.influence(g) plot(ginf\$coef[,2],ginf\$coef[,3],xlab="Change in pop15 coef", ylab="Change in pop75 coef") identify(ginf\$coef[,2],ginf\$coef[,3],countries) gj <- lm(sr ˜ pop15+pop75+dpi+ddpi,savings, subset=(countries!= "Japan")) summary(gj) # Residual plots plot(g\$fit,g\$res,xlab="Fitted",ylab="Residuals") abline(h=0) plot(savings\$pop15,g\$res,xlab="Fitted",ylab="Pop'n under 15") plot(savings\$pop75,g\$res,xlab="Fitted",ylab="Pop'n under 75") # Simulating different varaince effects par(mfrow=c(2,2)) for(i in 1:4) plot(1:50,rnorm(50)) # Constant Variance for(i in 1:4) plot(1:50,(1:50)*rnorm(50)) # Strong non-constant variance for(i in 1:4) plot(1:50,sqrt((1:50))*rnorm(50)) # Mild non-constant variance for(i in 1:4) plot(1:50,cos((1:50)*pi/25)+ rnorm(50)) # Non-linearity for(i in 1:4) plot(1:50,cos((1:50)*pi/25)+ 0.1*rnorm(50)) # Non-linearity par(mfrow=c(1,1)) # Assessing Normality qqnorm(g\$res,ylab="Raw Residuals") qqline(g\$res) qqnorm(rstudent(g),ylab="Studentized residuals") abline(0,1) hist(g\$res,10) boxplot(g\$res,main="Boxplot of savings residuals") par(mfrow=c(2,2)) for(i in 1:4){ qqnorm(rnorm(50))} # Normal for(i in 1:4) qqnorm(exp(rnorm(50))) # Lognormal -- skewed distribution for(i in 1:4) qqnorm(rcauchy(50)) # Cauchy -- long-tailed for(i in 1:4) qqnorm(runif(50)) # Uniform -- short-tailed par(mfrow=c(1,1)) # Box-Cox transformations boxcox(g,plotit=T) boxcox(g,plotit=T,lambda=seq(0.5,1.5,by=0.1))