# Techniques for testing basic assumptions in R: # normality and homogeneity of variance # more standard plots library(car) #install psych if you haven't already install.packages("psych", dependencies = TRUE) # use psych for the describe function library(psych) # load the countries dataset, including # corruption and internet growth variables load("Countries.Rdata") summary(Countries) ### Normality # use a histogram to see if the distribution of gdp looks normal hist(Countries$gdp2012) hist(Countries$gdp2012, breaks = 20) # Check some numerical measures using describe describe(Countries$gdp2012) # check normality using a qqplot qqnorm(Countries$gdp2012) # Finally, use a Shapiro-Wilk test to see if normality is a plausible hypothesis shapiro.test(Countries$gdp2012) # Next, let's do the same thing with the log of gdp # This is a very common transformation in econometrics Countries$loggdp = log10(Countries$gdp2012) # Begin with the Shapiro-Wilk test shapiro.test(Countries$loggdp) # And look at the shape of the qqplot qqnorm(Countries$loggdp) # use a histogram to see if the distribution of loggdp looks normal hist(Countries$loggdp) ### Homogeneity of Variance # Let's use compare loggdp between two groups of countries: # those with higher fertility and those with lower fertility Countries$high_fertility = factor(Countries$fertility_rate > mean(Countries$fertility_rate, na.rm = TRUE)) # First, check the means by(Countries$loggdp, Countries$high_fertility, mean, na.rm = TRUE) # check if the variances are the same for both groups by(Countries$loggdp, Countries$high_fertility, var, na.rm = TRUE) # use a Levene test to see if equal variances is a plausible hypothesis leveneTest(Countries$loggdp, Countries$high_fertility)