### T-tests to compare two means in R # car gives us Levene's test library(car) # Load Countries dataset load("Countries2.Rdata") summary(Countries) # Let's take another look at log gdp between the corrupt and # trustworthy Country groups Countries$loggdp = log10(Countries$gdp2012) # Based on a shapiro test (or a qqplot) loggdp looks normal shapiro.test(Countries$loggdp) qqnorm(Countries$loggdp) # The means look different between groups by(Countries$loggdp, Countries$high_cpi, mean, na.rm = TRUE) # But is this statistically significant? # Use a regular t.test to find out t.test(Countries$loggdp ~ Countries$high_cpi, Countries) # Suppose we were just looking at countries in the Americas Americas = Countries[Countries$region == "Americas",] summary(Americas) # We may ask whether the more corrupt countries in this # group issue more or less takedown requests than the # more trustworthy ones by(Americas$total.takedowns, Americas$high_cpi, mean, na.rm = TRUE) # Notice that total takedowns is not at all normal. qqnorm(Americas$total.takedowns) # Use the Wilcoxon rank-sum test to compare means wilcox.test(Countries$total.takedowns ~ Countries$high_cpi, Countries) # Let's finally compare the number of takedown requests # issued by courts, with those issued by executives / police mean(Countries$Court.Orders, na.rm = T) mean(Countries$Executive, na.rm = T) # Because there is just one group of countries, with two # variables per country, we need a dependent sample test # (paired = TRUE) # Because of the large sample size, we can use the parametric # t-test. t.test(Countries$Court.Orders, Countries$Executive, paired = T)