|
Here's the code: ## Local regression # # When we can't fit a linear model, often it makes sense to fit a piecewise # model, where we estimate the function at a particular point by using # information from nearby points. # # Two examples of this are spline fitting and LOESS (or LOWESS) regression. # We already applied spline fitting to the mcycle data in MASS (and Dr. Wang teaches # an excellent class on smoothing splines), so let's look at LOESS. # ######### # LOESS regression ######### # We can approximate any function in a neighborhood about a single data point # by it's taylor series expansion. Denote the expansion about x at the point xi # # f(xi|x) ~~ a0 + a1(xi-x) + 1/2 a2(xi - x)^2 + 1/6 a3(xi - x)^3 + ... # # We will truncate this at the second term (although we can use more if we # want) and try to find the coefficients a0, a1, a2. # # We want to consider points "in the neighborhood" of x, and apply more # weight to those points that are closer to x. There are many weight functions # that can be used, but we're going to use the most common one, the "tri-cube" # function: # # w(x) = (1 - abs(x)^3)^3 for |x| < 1 # 0 for |x| >= 1 # # Let's also introduce a "bandwidth" parameter -- h -- which governs "how # far" in each direction we're willing to look (the size of the "neighborhood" # around x). # # For each point xi around x, we'll assign it weight w((xi-x)/h); note # that for |xi-x|>h, this will make the weight 0 (since w(x) = 0 for |x| > 1).
library(MASS) data(mcycle) attach(mcycle) plot(mcycle)
# Let's find the local fit around 14 msec; roughly when the first 'elbow' in # in the data is, and use a bandwidth of 1.5.
BW = 5 ctr = 15
# Here's our weight function w <- function(x) max((1 - abs(x)^3)^3,0)
# And here's a plot of it tx = as.matrix(seq(-1,1, len=100)) plot(apply(tx,1,w))
# generate the scaled delta-times dt = as.matrix((times - ctr)/BW)
# and the list of weights we need for WLS wlist = apply(dt, 1, w)
# Now make our predictors: (xi-x), (xi-x)^2 X = cbind( times-ctr, (times-ctr)^2)
# And use the 'weights' option in lm to do WLS regression M_local = lm(accel~X, weights = wlist) summary(M_local) B = M_local$coeff
local_x = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW)) local_fit<-function(x) B[1] + B[2]*(x-ctr) + (B[3]/2)*(x-ctr)^2
pred = apply(local_x, 1, local_fit) plot(local_x,pred, xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel))) points(mcycle)
## Obviously, if we shift the center a bit we should expect a different fit, # but it should be close.
ctr = 13 BW = 5
B2 = lm(accel~cbind( times-ctr, (times-ctr)^2), weights = apply(as.matrix((times - ctr)/BW), 1, w))$coeff local_fit2<-function(x) B2[1] + B2[2]*(x-ctr) + (B2[3]/2)*(x-ctr)^2 local_x2 = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW)) pred2 = apply(local_x2, 1, local_fit2)
plot(c(local_x, local_x2), c(pred, pred2), xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel))) points(mcycle, pty=2)
ctr = 16 BW = 5
B3 = lm(accel~cbind( times-ctr, (times-ctr)^2), weights = apply(as.matrix((times - ctr)/BW), 1, w))$coeff local_fit3<-function(x) B3[1] + B3[2]*(x-ctr) + (B3[3]/2)*(x-ctr)^2 local_x3 = as.matrix(seq(ctr-BW/5, ctr+BW/5, len=5*BW)) pred3 = apply(local_x3, 1, local_fit3)
plot(c(local_x, local_x2, local_x3), c(pred, pred2,pred3), xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel))) points(mcycle, pch=46)
local_fit3(16)
## Of course, we can also just use the built-in R package. "span" is the smoothing parameter, and in this case # we are using 25% of the data per point, for a bandwidth of about 14 mloess = loess(accel~times, degree=2, span=0.25) x = seq(min(times), max(times), len=200) plot(times, mloess$fit, type='l',xlim=c(min(times), max(times)), ylim=c(min(accel), max(accel))) points(mcycle)
summary(mloess)
####################### ### #######################
library(assist) ssr(accel~times, kernel = cubic(times), scale = 1)
times2 = (times-min(times))/diff(range(times))
spline = ssr(accel~times2, rk = cubic(times2), scale = 1)
plot(spline, ask=1) 2 0 points(mcycle$accel~times2)
|