## ## Code used in: ## Jeffrey B. Lewis and Drew A. Linzer. 2005. "Estimating Regression ## Models in which the Dependent Variable Is Based on Estimates," ## Political Analysis. 13(4): 345-364. ## ## To replicate and extend: ## Cohen, Jeffrey E. 2004. "Economic Perceptions and Executive Approval in Comparative ## Perspective." Political Behavior. 26(1): 27-43. ## ## Jeffrey Lewis (jblewis@ucla.edu) ## Drew Linzer (dlinzer@ucla.edu) ## Department of Political Science ## University of California, Los Angeles ## ## March 8, 2005 ## ## Replication_EDV.R ## ## Source data: http://people-press.org/dataarchive/signup.php3?DocID=169 ## ## Pew Research Center For The People & The Press data archive ## http://people-press.org/dataarchive/ ## Jun 3, 2003: Views of a Changing World Combined Global Attitudes dataset (large file, 3.7MB) ## library(MASS) # needed to use ginv(), the generalized inverse of a matrix library(Design) # needed to use ols() and robcov() for Huber and Efron robust standard errors, and lrm() for logistic regressions library(lasso2) # needed to use tr(), the trace of a matrix options(digits=3) par(ask=T,mar=c(5,4,0.5,0.5)) # Retrieve data from original Pew data set. # Note: it is necessary to convert the downloaded SPSS file "Pew GAP final 44 country dataset 1.1sav.sav" # to a comma-delimited text file using a program such as Stat/Transfer to allow importing into R. pewdata <- read.csv("pew gap final 44 country dataset 1.1sav.csv") # Drop China (8), Egypt (11), Vietnam (43) repdata <- pewdata[(pewdata$COUNTRY!=8) & (pewdata$COUNTRY!=11) & (pewdata$COUNTRY!=43),] # Code individual-level variable for new vs old democracy using Cohen's system; 7 countries are "new" olddem <- matrix(0,nrow=nrow(repdata),ncol=1) olddem[(repdata$COUNTRY==7)|(repdata$COUNTRY==12)|(repdata$COUNTRY==13)|(repdata$COUNTRY==19)|(repdata$COUNTRY==20)|(repdata$COUNTRY==38)|(repdata$COUNTRY==40)] <- 1 newdem <- 1-olddem # Code country-level variable for number of interviews in each country c.n <- c(table(repdata$COUNTRY)[]) # N is number of countries in cross-national study N <- nrow(matrix(c.n)) # Code country level variables (denoted "c.xxxxxx") for aggregate percentages of variables of interest c.approve <- 100*matrix((table(repdata$COUNTRY,repdata$Q35B)[,1] + table(repdata$COUNTRY,repdata$Q35B)[,2])/c.n) c.retro <- 100*matrix((table(repdata$COUNTRY,repdata$Q12)[,1] + table(repdata$COUNTRY,repdata$Q12)[,2])/c.n) c.prosp <- 100*matrix((table(repdata$COUNTRY,repdata$Q13)[,1] + table(repdata$COUNTRY,repdata$Q13)[,2])/c.n) c.olddem <- matrix(0,nrow=N,ncol=1) c.olddem[table(repdata$COUNTRY,olddem)[,2]>0] <- 1 c.newdem <- 1-c.olddem c.retro.new <- c.retro*c.newdem c.prosp.old <- c.prosp*c.olddem # Cohen Model 1 cohen1 <- lm(c.approve~c.prosp+c.retro+c.olddem) # Cohen Model 2 cohen2 <- robcov(ols(c.approve~c.prosp+c.retro+c.olddem, x=T, y=T), method='huber') # Cohen Model 3 cohen3 <- lm(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old) # Cohen Model 4 cohen4 <- robcov(ols(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old, x=T, y=T), method='huber') # Efron small-sample standard errors model.Efron <- robcov(ols(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old, x=T, y=T), method='efron') # Bind together IVs in models 3-4 as column vectors for ease of use later iv <- cbind(c.prosp,c.retro,c.olddem,c.retro.new,c.prosp.old) # WLS dvv <- c((c.approve*(100-c.approve))/c.n) model.WLS <- lm(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old,weights=1/dvv) plot(density(sqrt(dvv),adjust=1.5)$x,density(sqrt(dvv),adjust=1.5)$y,xlab="Standard error of approval of leader",ylab="Density",xlim=c(0,3),cex.axis=1.3,cex.lab=1.4,las=1,type="l",lwd=2) # savePlot(filename="dvv-dens",type="ps") # FGLS - known variance dvv <- c((c.approve*(100-c.approve))/c.n) sigmahatsq <- (sum((residuals(lm(c.approve~iv)))^2) - sum(dvv) + tr(ginv(t(cbind(1,iv)) %*% cbind(1,iv)) %*% t(cbind(1,iv)) %*% (diag(N) * dvv) %*% cbind(1,iv)))/(N-ncol(iv)-1) if (sigmahatsq < 0) sigmahatsq <- 0 model.FGLS <- lm(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old,weights=(1/(dvv+sigmahatsq))) # FGLSpv - proportional variance dvv <- 1/c.n residsq <- (resid(lm(c.approve~iv)))^2 steptwo <- lm(residsq~dvv) if (coef(steptwo)[1] < 0) steptwo <- lm(residsq~0+dvv) predval <- fitted.values(steptwo) predval[predval<0] <- 0.0001 # error check against very rare possibility of negative weight model.FGLSpv <- lm(c.approve~c.prosp+c.retro+c.olddem+c.retro.new+c.prosp.old,weights=(1/predval)) # Output results of five models: Cohen 3, Cohen 4, WLS, FGLS, FGLSpv cat("\n") cat("Replications and extensions of Cohen's original cross-national aggregate data models \n") cat("model: \t Cohen 3 OLS \t Cohen 4 White \t Efron \t WLS \t FGLS \t FGLSpv \n") cat("intercept \t", coefficients(summary(cohen3))[1,1], " \t", coefficients(cohen4)[1], " \t", coefficients(model.Efron)[1], " \t", coefficients(summary(model.WLS))[1,1], " \t", coefficients(summary(model.FGLS))[1,1], " \t", coefficients(summary(model.FGLSpv))[1,1], "\n") cat("std err \t", coefficients(summary(cohen3))[1,2], " \t", sqrt(diag(cohen4$var))[1], " \t", sqrt(diag(model.Efron$var))[1], " \t", coefficients(summary(model.WLS))[1,2], " \t", coefficients(summary(model.FGLS))[1,2], " \t", coefficients(summary(model.FGLSpv))[1,2], "\n") cat("c.prosp \t", coefficients(summary(cohen3))[2,1], "\t", coefficients(cohen4)[2], "\t", coefficients(model.Efron)[2], "\t", coefficients(summary(model.WLS))[2,1], "\t", coefficients(summary(model.FGLS))[2,1], "\t", coefficients(summary(model.FGLSpv))[2,1], "\n") cat("std err \t", coefficients(summary(cohen3))[2,2], " \t", sqrt(diag(cohen4$var))[2], " \t", sqrt(diag(model.Efron$var))[2], " \t", coefficients(summary(model.WLS))[2,2], " \t", coefficients(summary(model.FGLS))[2,2], " \t", coefficients(summary(model.FGLSpv))[2,2], "\n") cat("c.retro \t", coefficients(summary(cohen3))[3,1], "\t", coefficients(cohen4)[3], "\t", coefficients(model.Efron)[3], "\t", coefficients(summary(model.WLS))[3,1], "\t", coefficients(summary(model.FGLS))[3,1], "\t", coefficients(summary(model.FGLSpv))[3,1], "\n") cat("std err \t", coefficients(summary(cohen3))[3,2], " \t", sqrt(diag(cohen4$var))[3], " \t", sqrt(diag(model.Efron$var))[3], " \t", coefficients(summary(model.WLS))[3,2], " \t", coefficients(summary(model.FGLS))[3,2], " \t", coefficients(summary(model.FGLSpv))[3,2], "\n") cat("c.olddem \t", coefficients(summary(cohen3))[4,1], " \t", coefficients(cohen4)[4], " \t", coefficients(model.Efron)[4], " \t", coefficients(summary(model.WLS))[4,1], " \t", coefficients(summary(model.FGLS))[4,1], " \t", coefficients(summary(model.FGLSpv))[4,1], "\n") cat("std err \t", coefficients(summary(cohen3))[4,2], " \t", sqrt(diag(cohen4$var))[4], " \t", sqrt(diag(model.Efron$var))[4], " \t", coefficients(summary(model.WLS))[4,2], " \t", coefficients(summary(model.FGLS))[4,2], " \t", coefficients(summary(model.FGLSpv))[4,2], "\n") cat("c.ret.new \t", coefficients(summary(cohen3))[5,1], " \t", coefficients(cohen4)[5], " \t", coefficients(model.Efron)[5], " \t", coefficients(summary(model.WLS))[5,1], " \t", coefficients(summary(model.FGLS))[5,1], " \t", coefficients(summary(model.FGLSpv))[5,1], "\n") cat("std err \t", coefficients(summary(cohen3))[5,2], " \t", sqrt(diag(cohen4$var))[5], " \t", sqrt(diag(model.Efron$var))[5], " \t", coefficients(summary(model.WLS))[5,2], " \t", coefficients(summary(model.FGLS))[5,2], " \t", coefficients(summary(model.FGLSpv))[5,2], "\n") cat("c.pro.old \t", coefficients(summary(cohen3))[6,1], " \t", coefficients(cohen4)[6], " \t", coefficients(model.Efron)[6], " \t", coefficients(summary(model.WLS))[6,1], " \t", coefficients(summary(model.FGLS))[6,1], " \t", coefficients(summary(model.FGLSpv))[6,1], "\n") cat("std err \t", coefficients(summary(cohen3))[6,2], " \t", sqrt(diag(cohen4$var))[6], " \t", sqrt(diag(model.Efron$var))[6], " \t", coefficients(summary(model.WLS))[6,2], " \t", coefficients(summary(model.FGLS))[6,2], " \t", coefficients(summary(model.FGLSpv))[6,2], "\n") cat("R^2 \t", summary(cohen3)$r.squared, " \t", (0.4637), " \t", (0.4637), " \t", summary(model.WLS)$r.squared, " \t", summary(model.FGLS)$r.squared, " \t", summary(model.FGLSpv)$r.squared, "\n") cat("\n") # Scatterplots of country level data c.approve.new <- c.approve*c.newdem c.approve.new[c.approve.new[]==0] <- NA c.approve.old <- c.approve*c.olddem c.approve.old[c.approve.old[]==0] <- NA matplot(c.prosp,cbind(c.approve.new,c.approve.old),pch=c(1,15),xlim=c(0,100),ylim=c(0,100),cex.axis=1.2,cex.lab=1.2,las=1,xlab="% Favorable economic prospection",ylab="% Leader approval") abline(coefficients(summary(lm(c.approve.new~c.prosp)))[1,1], coefficients(summary(lm(c.approve.new~c.prosp)))[2,1]) abline(coefficients(summary(lm(c.approve.old~c.prosp)))[1,1], coefficients(summary(lm(c.approve.old~c.prosp)))[2,1],lwd=2) # savePlot(filename="country-prosp",type="ps") matplot(c.retro,cbind(c.approve.new,c.approve.old),pch=c(1,15),xlim=c(0,100),ylim=c(0,100),cex.axis=1.2,cex.lab=1.2,las=1,xlab="% Favorable economic retrospection",ylab="% Leader approval") abline(coefficients(summary(lm(c.approve.new~c.retro)))[1,1], coefficients(summary(lm(c.approve.new~c.retro)))[2,1]) abline(coefficients(summary(lm(c.approve.old~c.retro)))[1,1], coefficients(summary(lm(c.approve.old~c.retro)))[2,1],lwd=2) # savePlot(filename="country-retro",type="ps") # End scatterplots of country level data # Can these models be improved upon by running a two stage regression, now that we have individual-level data? b.prosp <- matrix(nrow=N,ncol=3) # Initialize to store coefficents and se's b.retro <- matrix(nrow=N,ncol=3) # Initialize to store coefficents and se's cnum <- 1 for (i in 1:max(repdata$COUNTRY)) { if (nrow(matrix(repdata$COUNTRY[repdata$COUNTRY==i]))!=0) { # Check is needed b/c countries not numbered sequentially approve <- repdata$Q35B[repdata$COUNTRY==i] approve[approve[]==5|approve[]==6] <- NA # Recode DK and Refused to NA approve <- ifelse(approve[]==1|approve[]==2,1,0) # Recode 1,2 to 1 and 3,4 to 0 retro <- repdata$Q12[repdata$COUNTRY==i] retro[retro[]==5|retro[]==6] <- NA # Recode DK and Refused to NA prosp <- repdata$Q13[repdata$COUNTRY==i] prosp[prosp[]==6|prosp[]==7] <- NA # Recode DK and Refused to NA countrymodel <- lrm(approve~prosp+retro) # Run country LOGIT model b.prosp[cnum,1] <- i # Store country number b.retro[cnum,1] <- i # Store country number b.prosp[cnum,2] <- coefficients(countrymodel)[2] # Store coefficient on prosp variable b.prosp[cnum,3] <- sqrt(countrymodel$var[2,2]) # Store standard error on prosp variable b.retro[cnum,2] <- coefficients(countrymodel)[3] # Store coefficient on retro variable b.retro[cnum,3] <- sqrt(countrymodel$var[3,3]) # Store standard error on retro variable cnum <- cnum+1 } } # Real GDP per capita (PPP) in 2000 from the World Bank WDI CD-ROM gdppc <- matrix(c(2187,12377,1602,2424,7625,5710,27840,1630,13991,24223,25103,1964,3821,2453,2358,3043,23626,26755,1022,17380,797,9023,896,1928,4799,3971,9051,8377,1510,11243,9401,523,6974,1208,23509,3816,34142,2441,5794,4308,3966),nrow=N) loggdppc <- log10(gdppc) cat("\n") # Plot second-stage estimates of effects of log10(GDP/cap) on coefficients of economic prospection-leader approval logit dvv <- b.prosp[,3]^2 model.OLS <- lm(b.prosp[,2]~loggdppc) model.Efron <- robcov(ols(b.prosp[,2]~loggdppc, x=T, y=T), method='efron') model.WLS <- lm(b.prosp[,2]~loggdppc,weights=1/dvv) iv <- loggdppc sigmahatsq <- (sum((residuals(model.OLS))^2) - sum(dvv) + tr(ginv(t(cbind(1,iv)) %*% cbind(1,iv)) %*% t(cbind(1,iv)) %*% (diag(N) * dvv) %*% cbind(1,iv)))/(N-ncol(iv)-1) if (sigmahatsq < 0) sigmahatsq <- 0 model.FGLS <- lm(b.prosp[,2]~loggdppc,weights=(1/(dvv+sigmahatsq))) plot(loggdppc,b.prosp[,2],ylim=c(-1.5,0.5),cex.axis=1.2,cex.lab=1.2,las=1,pch=16,xlab="log of GDP per capita, 2000",ylab="First-stage prospection logit coefficient") abline(coefficients(summary(model.OLS))[1,1],coefficients(summary(model.OLS))[2,1]) # OLS best fit line abline(coefficients(summary(model.WLS))[1,1],coefficients(summary(model.WLS))[2,1],lwd=2,lty=3) # WLS best fit line abline(coefficients(summary(model.FGLS))[1,1],coefficients(summary(model.FGLS))[2,1],lwd=2,lty=5) # FGLS best fit line y0.coord <- b.prosp[,2] - 1.96*b.prosp[,3] y1.coord <- b.prosp[,2] + 1.96*b.prosp[,3] segments(loggdppc,y0.coord,loggdppc,y1.coord) #savePlot(filename="2stage-prosp",type="ps") cat("Prospective models \n") cat("model: \t OLS-Efron \t WLS \t FGLS \n") cat("intercept \t", coefficients(model.Efron)[1], "\t", coefficients(summary(model.WLS))[1,1], "\t", coefficients(summary(model.FGLS))[1,1], "\n") cat("std err \t", sqrt(diag(model.Efron$var))[1], " \t", coefficients(summary(model.WLS))[1,2], " \t", coefficients(summary(model.FGLS))[1,2], "\n") cat("slope \t", coefficients(model.Efron)[2], "\t", coefficients(summary(model.WLS))[2,1], "\t", coefficients(summary(model.FGLS))[2,1], "\n") cat("std err \t", sqrt(diag(model.Efron$var))[2], " \t", coefficients(summary(model.WLS))[2,2], " \t", coefficients(summary(model.FGLS))[2,2], "\n \n") # Plot second-stage estimates of effects of log10(GDP/cap) on coefficients of economic retrospection-leader approval logit dvv <- b.retro[,3]^2 model.OLS <- lm(b.retro[,2]~loggdppc) model.Efron <- robcov(ols(b.retro[,2]~loggdppc, x=T, y=T), method='efron') model.WLS <- lm(b.retro[,2]~loggdppc,weights=1/dvv) iv <- loggdppc sigmahatsq <- (sum((residuals(model.OLS))^2) - sum(dvv) + tr(ginv(t(cbind(1,iv)) %*% cbind(1,iv)) %*% t(cbind(1,iv)) %*% (diag(N) * dvv) %*% cbind(1,iv)))/(N-ncol(iv)-1) if (sigmahatsq < 0) sigmahatsq <- 0 model.FGLS <- lm(b.retro[,2]~loggdppc,weights=(1/(dvv+sigmahatsq))) plot(loggdppc,b.retro[,2],ylim=c(-1.5,0.5),cex.axis=1.2,cex.lab=1.2,las=1,pch=16,xlab="log of GDP per capita, 2000",ylab="First-stage retrospection logit coefficient") abline(coefficients(summary(model.OLS))[1,1],coefficients(summary(model.OLS))[2,1]) # OLS best fit line abline(coefficients(summary(model.WLS))[1,1],coefficients(summary(model.WLS))[2,1],lwd=2,lty=3) # WLS best fit line abline(coefficients(summary(model.FGLS))[1,1],coefficients(summary(model.FGLS))[2,1],lwd=2,lty=5) # FGLS best fit line y0.coord <- b.retro[,2] - 1.96*b.retro[,3] y1.coord <- b.retro[,2] + 1.96*b.retro[,3] segments(loggdppc,y0.coord,loggdppc,y1.coord) #savePlot(filename="2stage-retro",type="ps") cat("Retrospective models \n") cat("model: \t OLS-Efron \t WLS \t FGLS \n") cat("intercept \t", coefficients(model.Efron)[1], "\t", coefficients(summary(model.WLS))[1,1], "\t", coefficients(summary(model.FGLS))[1,1], "\n") cat("std err \t", sqrt(diag(model.Efron$var))[1], " \t", coefficients(summary(model.WLS))[1,2], " \t", coefficients(summary(model.FGLS))[1,2], "\n") cat("slope \t", coefficients(model.Efron)[2], "\t", coefficients(summary(model.WLS))[2,1], "\t", coefficients(summary(model.FGLS))[2,1], "\n") cat("std err \t", sqrt(diag(model.Efron$var))[2], " \t", coefficients(summary(model.WLS))[2,2], " \t", coefficients(summary(model.FGLS))[2,2], "\n \n") cat("finished. \n") # End of program.