npfit<-function(form, fam, dat, reps=1000, span.l="aicc", color=F,...){ # This function calculates the deviation between predictions from a GLM model and # non-parametrically smoothed empirical frequencies. Models that are good at prediction will tend # to have small deviation; for example, if a model predicts that Pr(y=1)=k%, about k% of observations # with this predicted probability should have y=1. dat<-subset(dat, select=all.vars(form)) dat<-na.omit(dat) n<-dim(dat)[1] store<-glm(formula=form, family=fam, data=dat) # fit the model to the data pred<-predict(store, type="response") # obtain p-hats from model YY<-eval(parse(text=paste("dat$",all.vars(form)[1], sep=""))) # call the y-variable YY dat.names<-c(names(dat),"index.counter") # add an index counter to the data dat<-data.frame(cbind(dat, 1:dim(dat)[1])) # names(dat)<-dat.names # # if an optimal bandwidth is specified, find it if(span.l=="aicc" | span.l=="gcv"){ # use Michael Friendly's function for calculating AIC/GCV from a loess loess.aic <- function (x) { # Written by Michael Friendly # http://tolstoy.newcastle.edu.au/R/help/05/11/15899.html if (!(inherits(x,"loess"))) stop("Error: argument must be a loess object") # extract values from loess object span <- x$pars$span n <- x$n traceL <- x$trace.hat sigma2 <- sum( x$residuals^2 ) / (n-1) delta1 <- x$one.delta delta2 <- x$two.delta enp <- x$enp aicc <- log(sigma2) + 1 + 2* (2*(traceL+1)) / (n-traceL-2) aicc1<- n*log(sigma2) + n* ( (delta1/delta2)*(n+enp)/(delta1^2/delta2)-2 ) gcv <- n*sigma2 / (n-traceL)^2 result <- list(span=span, aicc=aicc, aicc1=aicc1, gcv=gcv) return(result) } # this is the optimization object; it just returns the AIC or GCV for a bandwidth argument smooth.err<-function(span.arg){ assign("last.warning", NULL, envir = baseenv()) ok<-T plot.model<-withCallingHandlers(tryCatch(loess(YY~pred, degree=1, span=span.arg)), warning = function(w){ok<<-F; invokeRestart("muffleWarning")}) if(ok==T){return(eval(parse(text=paste("loess.aic(plot.model)$", span.l, sep=""))))} if(ok==F){return(2e10)} } # do the optimization, set the span argument to the optimal value span.l.name<-span.l span.l<-optimize(f=smooth.err, interval=c(0.01, 0.99))$minimum cat(span.l.name, "Chosen Span = ", span.l, "\n") } ok<-T plot.model<-withCallingHandlers(tryCatch(loess(YY~pred, degree=1, span=span.l)), warning = function(w){ok<<-F; invokeRestart("muffleWarning")}) # if a problem is detected with the GCV/AIC-chosen span, default to a 75% bandwidth if(ok==F){cat("Defaulting to span = 0.75", "\n"); span.l<-0.75; plot.model<-loess(YY~pred, degree=1, span=span.l)} y.obs<-predict(plot.model, newdata=pred) # determine observed y using loess smooth for(j in 1:length(y.obs)){y.obs[j]<-max(min(y.obs[j],1),0)} # keep y.obs in bounds # prepare for heat map plot: predict empirical y at a bunch of points y-hat tick<-(max(pred)-min(pred))/500 pr<-seq(from=min(pred),to=max(pred),by=tick) yo<-predict(plot.model,newdata=pr) for(j in 1:length(yo)){yo[j]<-max(min(yo[j],1),0)} # keep yo in bounds # determine the distribution of the heat map line # using bootstrapping y.obs.boot.store<-matrix(data=NA, nrow=reps, ncol=length(pred)) # for calculating heat map at each observation y.obs.bs<-matrix(data=NA, nrow=reps, ncol=length(pr)) # for the heat map plot cat(c("Generating Bootstrap Predictions...","\n")) pb <- txtProgressBar(min = 0, max = reps, style = 3) # text progress bar for bootstrap replicates o<-order(pred) for(i in 1:reps){ setTxtProgressBar(pb, i) boot.pred<-pred boot.y<-ifelse(runif(n, min=0, max=1)