# Need car for the scatterplot function
library(car)

load("Countries.Rdata")
Countries$gdp = Countries$gdp2012
summary(Countries)

rownames(Countries) = Countries$Country

# observe the original scatterplot with one outlier on the gdp axis
scatterplot(internet_users_2011~gdp, reg.line=lm, smooth=TRUE, spread=TRUE, id.method="identify", boxplots='xy', span=0.5, data=Countries)
  
# let's sort the data to see what the outlier is
# note that order gives a vector of positions
order(-Countries$gdp)

# Can use it as an index vector, either for a vector
Countries$gdp[order(-Countries$gdp)]

# Or to subset an entire dataframe and look at the first few entries
Countries2 = Countries[order(-Countries$gdp), c("Country","gdp")]
head(Countries2)

# Next, make a boolean vector to identify countries missing internet data
# Note that using != does not work correctly, because 
# NA takes over the whole expression
Countries$internet_users_2011 != NA

# Use the is.na function instead
! is.na(Countries$internet_users_2011)

Can use this as an index vector to subset our dataframe
Countries3 = Countries[! is.na(Countries$internet_users_2011), ]

# And make the scatterplot again
scatterplot(internet_users_2011~gdp, reg.line=lm, smooth=TRUE, spread=TRUE, id.method="identify", boxplots='xy', span=0.5, data=Countries3)