# POLS 509 # Lecture 3: The Geometry of OLS Regression # Dr. Justin Esarey # Emory University # # February 6, 2012 ################################################## set.seed(123456) A=matrix(data=c(3,1,2,4),byrow=T,ncol=2) xy=as.matrix(c(4,2)) qr.solve(A)%*%xy # show that error and regression predictions are uncorrelated x<-runif(100, min=0, max=10) y<-2*x+3+rnorm(100, sd=1) model<-lm(y~x) model.pred<-predict(model) model.resid<-y-model.pred plot(model.resid~model.pred) summary(lm(model.resid~model.pred)) # show that projection matrices are idempotent rm(list=ls()) X<-rnorm(200, sd=1) X<-cbind(X,rep(1, 200)) b<-c(1,0.5) y<-X%*%b+rnorm(200, sd=1) Px<-X%*%qr.solve(t(X)%*%X)%*%t(X) max(Px-Px%*%Px) # show that a projection matrix equals its transpose max(t(Px)-Px) # look at predicted y compared to observed y y.hat<-Px%*%y plot(y~y.hat) abline(0,1) # "prove" the FWL Theorem rm(list=ls()) library(mvtnorm) X<-rmvnorm(200, mean=c(0,0), sigma=matrix(data=c(1,.6,.6,1), nrow=2, ncol=2)) plot(X[,1]~X[,2]) X<-cbind(X,rep(1,200)) b<-c(-3,1,0.5) y<-X%*%b+rnorm(200, sd=1) summary(lm(y~X-1)) Z<-X[,c(1,3)] y.res<-y-predict(lm(y~Z-1)) x.res<-X[,2]-predict(lm(X[,2]~Z-1)) summary(lm(y.res~x.res-1)) # FWL plots par(mfrow=c(1,2)) plot(y~X[,2], main="Raw Scatterplot") summary(lm(y~X[,2])) abline(coefficients(lm(y~X[,2]))) abline(0.5, 1, lty=2) plot(y.res~x.res, main="FWL-Corrected Scatterplot") summary(lm(y.res~x.res-1)) abline(coefficients(lm(y.res~x.res))) abline(0, 1, lty=2) # de-meaning variables rm(list=ls()) library(mvtnorm) X<-rmvnorm(200, mean=c(0,0), sigma=matrix(data=c(1,.6,.6,1), nrow=2, ncol=2)) b<-c(-3,1) y<-X%*%b+rnorm(200, sd=1)+0.5 summary(lm(y~X)) #perform demeaning X.demean<-X-predict(lm(X~rep(1,200))) y.demean<-y-predict(lm(y~rep(1,200))) summary(lm(y.demean~X.demean-1)) # demonstrate leverage rm(list=ls()) x<-runif(100, min=0, max=10) y<-2*x+3+rnorm(100, sd=1) dat<-data.frame(cbind(y,x,1:100)) names(dat)<-c("y","x","r") plot(y~x) summary(lm(y~x)) abline(coefficients(lm(y~x))) y.resid<-y-predict(lm(y~x)) model.coef<-coefficients(lm(y~x)) lev.store<-matrix(data=NA, ncol=2, nrow=100) for(i in 1:100){ dat.sub<-subset(dat, r!=i) lev.store[i,]<-coefficients(lm(y~x, data=dat.sub)) } leverage<-lev.store[,2]-model.coef[2] plot(leverage~x) abline(v=mean(x), lty=2) plot(leverage~y.resid) beta.difs<-dfbetas(lm(y~x)) plot(beta.difs[,2]~x) plot(beta.difs[,2]~y.resid)