library(car) load("Countries2.Rdata") summary(Countries) scatterplot(Countries$gdp2011, Countries$internet_users_2011) rownames(Countries) = Countries$Country model = lm(internet_users_2011 ~ gdp2011, data = Countries) summary(model) # use the plot command for common regression diagnostics plot(model) # plot 1: residuals versus fitted values. check for heteroskedasticity. Also, can look for non-linear relationships. # plot 2: qqplot of standardized residuals vs. normal curve. Check to see if the errors are normally distributed # plot 3: scale-location. variance of the residuals appears as points higher on y-axis # plot 4: residuals vs leverage. Look for outliers that may be biasing the model # look for points with large residuals outlierTest(model) # Could use the Durbin-Watson test to check for autocorrelation # It's really not necessary here since the order of Countries # is alphabetical, and there's no reason to suspect that alphabetical # proximity influences residuals. dwt(model) # The residuals vs fitted values plot looks heteroskedastic, we can check with a Breusch-Pagan test. library(lmtest) bptest(model) # just in case, let's check the heteroskedasticity-robust errors library(sandwich) coeftest(model, vcov = vcovHC) # As an exercise, you could try the log of gdp instead # to see what really nice regression output looks like