### T-tests to compare two means in R

# car gives us Levene's test
library(car)

# Load Countries dataset
load("Countries2.Rdata")
summary(Countries)


# Let's take another look at log gdp between the corrupt and
# trustworthy Country groups
Countries$loggdp = log10(Countries$gdp2012)

# Based on a shapiro test (or a qqplot) loggdp looks normal
shapiro.test(Countries$loggdp)
qqnorm(Countries$loggdp)

# The means look different between groups
by(Countries$loggdp, Countries$high_cpi, mean, na.rm = TRUE)

# But is this statistically significant?
# Use a regular t.test to find out
t.test(Countries$loggdp ~ Countries$high_cpi, Countries)

# Suppose we were just looking at countries in the Americas
Americas = Countries[Countries$region == "Americas",]
summary(Americas)

# We may ask whether the more corrupt countries in this 
# group issue more or less takedown requests than the 
# more trustworthy ones
by(Americas$total.takedowns, Americas$high_cpi, mean, na.rm = TRUE)

# Notice that total takedowns is not at all normal.
qqnorm(Americas$total.takedowns)

# Use the Wilcoxon rank-sum test to compare means
wilcox.test(Countries$total.takedowns ~ Countries$high_cpi, Countries)


# Let's finally compare the number of takedown requests
# issued by courts, with those issued by executives / police
mean(Countries$Court.Orders, na.rm = T)
mean(Countries$Executive, na.rm = T)

# Because there is just one group of countries, with two
# variables per country, we need a dependent sample test
# (paired = TRUE)
# Because of the large sample size, we can use the parametric
# t-test.
t.test(Countries$Court.Orders, Countries$Executive, paired = T)