R code from section
Thursday, 16 October 2008 18:25

Here's the R code I used in section.  You can just copy and paste this to a script file:

library(MASS)
data(mammals)
attach(mammals)

plot(body,brain)

plot(density(body))
plot(density(brain))
##These look exponential-ish, so let's try a log transform
## and look at the densities again
plot(density(log(body)))
plot(density(log(brain)))

plot(log(body), log(brain))

lbrain=log(brain)
lbody=log(body)

linmodel <- lm(brain~body)
logmodel <- lm(lbrain~lbody)

summary(linmodel)
summary(logmodel)

qqnorm(body)
qqnorm(lbody)

qqnorm(brain)
qqnorm(lbrain)

##Look at all the additional data stored about the linear model
names(linmodel)

## We can access them with the '$' operator

linmodel$coefficients
linmodel$fitted.values
linmodel$residuals

## Some commands that can be used with lm objects

## Diagnostic plots
par(mfrow=c(2,2))
plot(linmodel)
plot(logmodel)

## ANOVA tests
anova(linmodel)
anova(logmodel)

## Confidence intervals for parameters
confint(linmodel)
confint(logmodel)

## Extract residuals through a function
resid(linmodel)
resid(logmodel)

## Standardized residuals
plot(stdres(linmodel))
plot(stdres(logmodel))

## Studentized residuals
plot(studres(linmodel))
plot(studres(logmodel))

## cook's distance of the data points
plot(cooks.distance(linmodel))
plot(cooks.distance(logmodel))

## Find prediction and confidence intervals for new data

newlindata = data.frame(body=c(1, 10, 100))
predict(linmodel, newlindata,interval="prediction")


newlogdata = data.frame(lbody=c(0.1, 0.2, 0.3, 0.4))
predict(logmodel, newlogdata,interval="prediction")

predict(logmodel, interval="confidence", level=0.99999)


#####################
#####################
#####################
rm(list=ls(all=TRUE))

data(cement)

attach(cement)

x0=rep(1,13)

x=as.matrix(cbind(x0,x1,x2,x3,x4))

(x)%*%solve(t(x)%*%x)%*%t(x)

### remember that our hat matrix x[(x'x)^-1]x' is what we use to calculate our
### regression coefficients.  This looks ok.  But...

solve(t(x)%*%x)

### There's a large difference here; nearly singular
eigen(t(x)%*%x)

summary(lm(y~x))

### Really sensitive to small changes
x2=x
x2[12, 4]=11
x2[12, 2]=9


summary(lm(y~x2))

par(mfrow=c(2,2))
plot(lm(y~x))
plot(lm(y~x2))