R code for 11/19 section
Thursday, 20 November 2008 18:39

Here's the code:

## Local regression
#
#    When we can't fit a linear model, often it makes sense to fit a piecewise
# model, where we estimate the function at a particular point by using
# information from nearby points. 
#
#    Two examples of this are spline fitting and LOESS (or LOWESS) regression.
# We already applied spline fitting to the mcycle data in MASS (and Dr. Wang teaches
# an excellent class on smoothing splines), so let's look at LOESS.
#
#########
# LOESS regression
#########
# We can approximate any function in a neighborhood about a single data point
# by it's taylor series expansion.  Denote the expansion about x at the point xi
#
#    f(xi|x) ~~ a0 + a1(xi-x) + 1/2 a2(xi - x)^2 + 1/6 a3(xi - x)^3 + ...
#
#    We will truncate this at the second term (although we can use more if we
# want) and try to find the coefficients a0, a1, a2.
#
#    We want to consider points "in the neighborhood" of x, and apply more
# weight to those points that are closer to x.  There are many weight functions
# that can be used, but we're going to use the most common one, the "tri-cube"
# function:
#
#    w(x) = (1 - abs(x)^3)^3 for |x| < 1
#         0            for |x| >= 1
#
#    Let's also introduce a "bandwidth" parameter -- h -- which governs "how
# far" in each direction we're willing to look (the size of the "neighborhood"
# around x). 
#
#    For each point xi around x, we'll assign it weight w((xi-x)/h); note
# that for |xi-x|>h, this will make the weight 0 (since w(x) = 0 for |x| > 1).

library(MASS)
data(mcycle)
attach(mcycle)
plot(mcycle)

# Let's find the local fit around 14 msec; roughly when the first 'elbow' in
# in the data is, and use a bandwidth of 1.5.

BW = 5
ctr = 15

# Here's our weight function
w <- function(x) max((1 - abs(x)^3)^3,0)

# And here's a plot of it
tx = as.matrix(seq(-1,1, len=100))
plot(apply(tx,1,w))

# generate the scaled delta-times
dt = as.matrix((times - ctr)/BW)

# and the list of weights we need for WLS
wlist = apply(dt, 1, w)

# Now make our predictors: (xi-x), (xi-x)^2
X = cbind( times-ctr, (times-ctr)^2)

# And use the 'weights' option in lm to do WLS regression
M_local = lm(accel~X, weights = wlist)
summary(M_local)
B = M_local$coeff

local_x = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW))
local_fit<-function(x) B[1] + B[2]*(x-ctr) + (B[3]/2)*(x-ctr)^2

pred = apply(local_x, 1, local_fit)
plot(local_x,pred, xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel)))
points(mcycle)

## Obviously, if we shift the center a bit we should expect a different fit,
# but it should be close.

ctr = 13
BW = 5

B2 = lm(accel~cbind( times-ctr, (times-ctr)^2), weights = apply(as.matrix((times - ctr)/BW), 1, w))$coeff
local_fit2<-function(x) B2[1] + B2[2]*(x-ctr) + (B2[3]/2)*(x-ctr)^2
local_x2 = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW))
pred2 = apply(local_x2, 1, local_fit2)

plot(c(local_x, local_x2), c(pred, pred2), xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel)))
points(mcycle, pty=2)

ctr = 16
BW = 5

B3 = lm(accel~cbind( times-ctr, (times-ctr)^2), weights = apply(as.matrix((times - ctr)/BW), 1, w))$coeff
local_fit3<-function(x) B3[1] + B3[2]*(x-ctr) + (B3[3]/2)*(x-ctr)^2
local_x3 = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW))
pred3 = apply(local_x3, 1, local_fit3)

plot(c(local_x, local_x2, local_x3), c(pred, pred2,pred3), xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel)))
points(mcycle, pch=46)

local_fit3(16)

## Of course, we can also just use the built-in R package.  "span" is the smoothing parameter, and in this case
# we are using 25% of the data per point, for a bandwidth of about 14
mloess = loess(accel~times, degree=2, span=0.25)
x = seq(min(times), max(times), len=200)
plot(times, mloess$fit, type='l',xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel)))
points(mcycle)

summary(mloess)

#######################
###
#######################

library(assist)
ssr(accel~times, kernel = cubic(times), scale = 1)

times2 = (times-min(times))/diff(range(times))

spline = ssr(accel~times2, rk = cubic(times2), scale = 1)

plot(spline, ask=1)
2
0
points(mcycle$accel~times2)