library(car)

load("Countries2.Rdata")
summary(Countries)

scatterplot(Countries$gdp2011, Countries$internet_users_2011)
rownames(Countries) = Countries$Country

model = lm(internet_users_2011 ~ gdp2011, data = Countries)
summary(model)

# use the plot command for common regression diagnostics
plot(model)

# plot 1: residuals versus fitted values.  check for heteroskedasticity.  Also, can look for non-linear relationships.
# plot 2: qqplot of standardized residuals vs. normal curve.  Check to see if the errors are normally distributed
# plot 3: scale-location.  variance of the residuals appears as points higher on y-axis
# plot 4: residuals vs leverage. Look for outliers that may be biasing the model

# look for points with large residuals 
outlierTest(model)

# Could use the Durbin-Watson test to check for autocorrelation
# It's really not necessary here since the order of Countries
# is alphabetical, and there's no reason to suspect that alphabetical
# proximity influences residuals.
dwt(model)

# The residuals vs fitted values plot looks heteroskedastic, we can check with a Breusch-Pagan test.
library(lmtest)
bptest(model)

# just in case, let's check the heteroskedasticity-robust errors
library(sandwich)
coeftest(model, vcov = vcovHC)

# As an exercise, you could try the log of gdp instead
# to see what really nice regression output looks like