| 11-26 section R code |
| Thursday, 27 November 2008 07:59 | |
|
attach(stackloss) library(MASS) ## We won't cover doing least squares since everyone should be pretty ## comfortable with that at this point. ###################################### ## Huber method ###################################### ## The MASS library contains the rlm function, ("robust linear models") ## which will let us do the Huber estimator without a lot of grief. m_huber <- rlm(stack.loss~Air.Flow+Water.Temp+Acid.Conc., data=stackloss) summary(m_huber) ## If we want to play with the constant c, then we need to invoke rlm ## a bit differently # First we inspect psi.huber to see the defaults psi.huber # define a new weight function with a different constant, in this case 1.1 psi.huber2<-function(u)psi.huber(u, k=1.1) # Observe the difference psi.huber(c(1,1.1,1.11, 1.345, 1.346)) psi.huber2(c(1,1.1,1.11, 1.345, 1.346)) # refit m_huber_2 <- rlm(stack.loss~., data=stackloss, psi=psi.huber2) ###################################### ## LAD ###################################### ## We could just recycle the rlm function, since it does basic m-estimation, ## by passing it a different m function. Recall that we can get a weight ## function of 1/u for LAD... psi.LAD <- function(u)1/u m_LAD <- rlm(stack.loss~., data=stackloss, psi=psi.LAD) ## But we run into a missing weights problem (division by zero). There's two ## ways to fix this. The 'cheap trick' way is to simply assign the weight ## function the biggest weight possible if u=0... psi.LAD2 <- function(u, deriv=0) return(abs(pmin(.Machine$double.xmax, 1/u))) m_LAD_hack <- rlm(stack.loss~., data=stackloss, psi=psi.LAD2) summary(m_LAD_hack) ## A better way is probably to get the quantreg package and use the rq ## function in that. library(quantreg) m_LAD <- rq(stack.loss~., data=stackloss) summary(m_LAD) ## It turns out our nasty hack wasn't actually all that bad an approximation. ################### ## Least trimmed squares ################### ## Faraway recommends the 'lqs' package for this, which has since been rolled ## into MASS, so no need to install yet another package. You can use the ## ltsreg as in faraway, or you can use the lqs function (which does exactly ## the same thing; ltsreg is just a 'wrapper' function for lqs at this time) ## There is a catch, though. m_lts1 = ltsreg(stack.loss ~., data=stackloss) m_lts2 = ltsreg(stack.loss ~., data=stackloss) m_lts1$coeff m_lts2$coeff ## if you have more than one predictor, it's a complex problem to get the ## minimization exact. R does a random search to try and minimize it, with ## the net effect that you might see a small difference in results each time ## you run it. For SMALL data sets you can add 'nsamp=exact' to the command: m_lts_exact = ltsreg(stack.loss ~., data=stackloss, nsamp='exact') ## But the computational time goes up something link k^n, where n is the size ## of the data set, so it doesn't take much to make it totally unreasonable. ############################################################ ### Bootstrapping ############################################################ ## I'm only doing these two for the linear model. You should also think about what might ## be appropriate to do for the other models/estimation techniques. ################### ##### Case resampling ################### repl = 1000 coeff=matrix(0,1000,4) for(i in 1:repl){ temp_model <- lm(stack.loss ~ ., data = stackloss[sample(21, rep=T),]) coeff[i,] = temp_model$coeff } par(mfrow=c(2,2)) plot(density(coeff[,1]), main="Intercept") plot(density(coeff[,2]), main="Air.Flow") plot(density(coeff[,3]), main="Water.temp") plot(density(coeff[,4]), main="Acid.Conc.") ################### ##### Error resampling ################### ## We'll just use the raw residuals. Is this the best choice? lm_resids = lm(stack.loss ~., data=stackloss)$resi lm_coeffs = lm(stack.loss ~., data=stackloss)$coeff for(i in 1:repl){ newYi = as.matrix(cbind(rep(1,21), Air.Flow, Water.Temp, Acid.Conc.)) %*% (as.matrix(lm_coeffs)) + lm_resids[sample(21, rep=T)] temp_model <- lm(newYi ~ Air.Flow + Water.Temp + Acid.Conc.) coeff[i,] = temp_model$coeff } par(mfrow=c(2,2)) plot(density(coeff[,1]), main="Intercept") plot(density(coeff[,2]), main="Air.Flow") plot(density(coeff[,3]), main="Water.temp") plot(density(coeff[,4]), main="Acid.Conc.")
|