11-26 section R code
Thursday, 27 November 2008 07:59
attach(stackloss)
library(MASS)

## We won't cover doing least squares since everyone should be pretty
## comfortable with that at this point.

######################################
##  Huber method
######################################

## The MASS library contains the rlm function, ("robust linear models")
## which will let us do the Huber estimator without a lot of grief.

m_huber <- rlm(stack.loss~Air.Flow+Water.Temp+Acid.Conc., data=stackloss)
summary(m_huber)

## If we want to play with the constant c, then we need to invoke rlm
## a bit differently

# First we inspect psi.huber to see the defaults
psi.huber

# define a new weight function with a different constant, in this case 1.1
psi.huber2<-function(u)psi.huber(u, k=1.1)

# Observe the difference
psi.huber(c(1,1.1,1.11, 1.345, 1.346))
psi.huber2(c(1,1.1,1.11, 1.345, 1.346))

# refit
m_huber_2 <- rlm(stack.loss~., data=stackloss, psi=psi.huber2)

######################################
##  LAD
######################################

## We could just recycle the rlm function, since it does basic m-estimation,
## by passing it a different m function.  Recall that we can get a weight
## function of 1/u for LAD...

psi.LAD <- function(u)1/u

m_LAD <- rlm(stack.loss~., data=stackloss, psi=psi.LAD)

## But we run into a missing weights problem (division by zero).  There's two
## ways to fix this.  The 'cheap trick' way is to simply assign the weight
## function the biggest weight possible if u=0...

psi.LAD2 <- function(u, deriv=0) return(abs(pmin(.Machine$double.xmax, 1/u)))
m_LAD_hack <- rlm(stack.loss~., data=stackloss, psi=psi.LAD2)
summary(m_LAD_hack)

## A better way is probably to get the quantreg package and use the rq
## function in that.

library(quantreg)
m_LAD <- rq(stack.loss~., data=stackloss)
summary(m_LAD)

## It turns out our nasty hack wasn't actually all that bad an approximation.

###################
## Least trimmed squares
###################

## Faraway recommends the 'lqs' package for this, which has since been rolled
## into MASS, so no need to install yet another package.  You can use the
## ltsreg as in faraway, or you can use the lqs function (which does exactly
## the same thing; ltsreg is just a 'wrapper' function for lqs at this time)

## There is a catch, though.  
m_lts1 = ltsreg(stack.loss ~., data=stackloss)
m_lts2 = ltsreg(stack.loss ~., data=stackloss)
m_lts1$coeff
m_lts2$coeff

## if you have more than one predictor, it's a complex problem to get the
## minimization exact.  R does a random search to try and minimize it, with
## the net effect that you might see a small difference in results each time
## you run it.  For SMALL data sets you can add 'nsamp=exact' to the command:

m_lts_exact = ltsreg(stack.loss ~., data=stackloss, nsamp='exact')

## But the computational time goes up something link k^n, where n is the size
## of the data set, so it doesn't take much to make it totally unreasonable.

############################################################
###  Bootstrapping
############################################################
## I'm only doing these two for the linear model.  You should also think about what might
## be appropriate to do for the other models/estimation techniques.
###################
#####  Case resampling
###################

repl = 1000
coeff=matrix(0,1000,4)

for(i in 1:repl){
    temp_model <- lm(stack.loss ~ ., data = stackloss[sample(21, rep=T),])
    coeff[i,] = temp_model$coeff
}

par(mfrow=c(2,2))
plot(density(coeff[,1]), main="Intercept")
plot(density(coeff[,2]), main="Air.Flow")
plot(density(coeff[,3]), main="Water.temp")
plot(density(coeff[,4]), main="Acid.Conc.")


###################
#####  Error resampling
###################

##  We'll just use the raw residuals.  Is this the best choice?

lm_resids = lm(stack.loss ~., data=stackloss)$resi
lm_coeffs = lm(stack.loss ~., data=stackloss)$coeff


for(i in 1:repl){
    newYi = as.matrix(cbind(rep(1,21), Air.Flow, Water.Temp, Acid.Conc.)) %*% (as.matrix(lm_coeffs)) + lm_resids[sample(21, rep=T)]
    temp_model <- lm(newYi ~ Air.Flow + Water.Temp + Acid.Conc.)
    coeff[i,] = temp_model$coeff
}
par(mfrow=c(2,2))
plot(density(coeff[,1]), main="Intercept")
plot(density(coeff[,2]), main="Air.Flow")
plot(density(coeff[,3]), main="Water.temp")
plot(density(coeff[,4]), main="Acid.Conc.")