Nov. 5 R code
Thursday, 06 November 2008 05:13

Here's the R code for today's section:

library(MASS)
attach(mcycle)
plot(accel~times)

#    This is obviously not linear, fitting a
#    linear model is inappropriate

m1 = lm(accel~times)
summary(m1)

par(mfrow=c(2,2))
plot(m1)

#    Terrible: trends in the residuals, clear
#    nonconstant variance, poor normality
#
#
#    Let's try fitting a polynomial model:
#    y ~ 1 + x + x^2 + x^3
#
#    We'll use the I() function in the model; this
#    insures that the actual timepoint squared gets fit
#    rather than the model term being squared

# lm(y ~ x*y)  <- fit y = b1 + b2*x + b3*y +b4*x*y
# lm(y ~ I(x*y))   <- fit y= b1 + b2*x*y

m2 = lm(accel ~ times + I(times^2) + I(times^3))

summary(m2)
plot(m2)

par(mfrow=c(1,1))
plot(m2$fit~times, type='l')
points(accel~times)


#    We still have something that looks significant
#    but the diagnostics don't look that good; the
#    pattern is still evident in the residuals v
#    fitted plot. 
#
#    Fit a higher polynomial?

m3 = lm(accel ~ times + I(times^2) + I(times^3) + I(times^4) + I(times^5))
par(mfrow=c(2,2))
summary(m3)
plot(m3)

par(mfrow=c(1,1))
plot(m3$fit~times, type='l')
points(accel~times)



m4 = lm(accel ~ times + I(times^2) + I(times^3) + I(times^4) + I(times^5) + I(times^6) + I(times^7) + I(times^8) + I(times^9))
par(mfrow=c(2,2))
summary(m4)
plot(m4)

par(mfrow=c(1,1))
plot(m4$fit~times, type='l')
points(accel~times)


X = as.matrix(cbind(rep(1,133), times, times^2, times^3, times^4))
eigen(solve((t(X)%*%X)))

#    Note the huge difference in eigenvalues

X2 = as.matrix(cbind(rep(1,133), times, times^2, times^3, times^4, times^5, times^6, times^7))
eigen(solve((t(X2)%*%X2)))

#     And the difference gets even worse; near singlular
#    matrix --> bad fits and high variance

#    Another problem -- we've got (obviously) highly
#    colinear predictors; note the near-zero eigenvalues
#
#    So what can we do?
#    This is a case where linear models can't help. This
#    would be more appropriately handled via time series
#    analysis or nonparametric analysis (which we won't
#    get into here, but here's a spline model just for
#    fun).

library(assist)
times2 = times/max(times)
splinemodel = ssr(accel~times2, rk=cubic(times2))
plot(splinemodel, ask=TRUE)
2
0
points(accel~times2)

#################################################################
#################################################################
#################################################################

attach(trees)

LVolume = log(Volume)
m1 = lm(LVolume ~ Girth*I(Girth^2))
m2 = lm(LVolume ~ Girth+I(Girth^2)+I(Girth^3))

summary(m1)
summary(m2)

#    Note that these work out the same -- including
#    interaction terms between two polynomial terms is
#    slightly pointless.

m3 = lm(LVolume ~ (Girth+I(Girth^2))*(Height + I(Height^2)))
summary(m3)
par(mfrow=c(2,2))
plot(m3)

drop1(m3, test='Chisq')
drop1(m3, test='F')
drop1(m3)

drop1(m3, k=log(31))

step(m3, direction="both")