|
Here's the R code for today's section: library(MASS) attach(mcycle) plot(accel~times)
# This is obviously not linear, fitting a # linear model is inappropriate
m1 = lm(accel~times) summary(m1)
par(mfrow=c(2,2)) plot(m1)
# Terrible: trends in the residuals, clear # nonconstant variance, poor normality # # # Let's try fitting a polynomial model: # y ~ 1 + x + x^2 + x^3 # # We'll use the I() function in the model; this # insures that the actual timepoint squared gets fit # rather than the model term being squared
# lm(y ~ x*y) <- fit y = b1 + b2*x + b3*y +b4*x*y # lm(y ~ I(x*y)) <- fit y= b1 + b2*x*y
m2 = lm(accel ~ times + I(times^2) + I(times^3))
summary(m2) plot(m2)
par(mfrow=c(1,1)) plot(m2$fit~times, type='l') points(accel~times)
# We still have something that looks significant # but the diagnostics don't look that good; the # pattern is still evident in the residuals v # fitted plot. # # Fit a higher polynomial?
m3 = lm(accel ~ times + I(times^2) + I(times^3) + I(times^4) + I(times^5)) par(mfrow=c(2,2)) summary(m3) plot(m3)
par(mfrow=c(1,1)) plot(m3$fit~times, type='l') points(accel~times)
m4 = lm(accel ~ times + I(times^2) + I(times^3) + I(times^4) + I(times^5) + I(times^6) + I(times^7) + I(times^8) + I(times^9)) par(mfrow=c(2,2)) summary(m4) plot(m4)
par(mfrow=c(1,1)) plot(m4$fit~times, type='l') points(accel~times)
X = as.matrix(cbind(rep(1,133), times, times^2, times^3, times^4)) eigen(solve((t(X)%*%X)))
# Note the huge difference in eigenvalues
X2 = as.matrix(cbind(rep(1,133), times, times^2, times^3, times^4, times^5, times^6, times^7)) eigen(solve((t(X2)%*%X2)))
# And the difference gets even worse; near singlular # matrix --> bad fits and high variance
# Another problem -- we've got (obviously) highly # colinear predictors; note the near-zero eigenvalues # # So what can we do? # This is a case where linear models can't help. This # would be more appropriately handled via time series # analysis or nonparametric analysis (which we won't # get into here, but here's a spline model just for # fun).
library(assist) times2 = times/max(times) splinemodel = ssr(accel~times2, rk=cubic(times2)) plot(splinemodel, ask=TRUE) 2 0 points(accel~times2)
################################################################# ################################################################# #################################################################
attach(trees)
LVolume = log(Volume) m1 = lm(LVolume ~ Girth*I(Girth^2)) m2 = lm(LVolume ~ Girth+I(Girth^2)+I(Girth^3))
summary(m1) summary(m2)
# Note that these work out the same -- including # interaction terms between two polynomial terms is # slightly pointless.
m3 = lm(LVolume ~ (Girth+I(Girth^2))*(Height + I(Height^2))) summary(m3) par(mfrow=c(2,2)) plot(m3)
drop1(m3, test='Chisq') drop1(m3, test='F') drop1(m3)
drop1(m3, k=log(31))
step(m3, direction="both")
|