Self-Test

Using what you have learnt so far, obtain descriptive statistics and draw histograms of first-year exam —scores, computer literacy, numeracy and lectures attended.

//import data
rexam <- read.delim(“rexam.dat”, header=TRUE)

//set factor for the variable uni
rexam$uni<-factor(rexam$uni, levels = c(0:1), labels = c(“Duncetown University”, “Sussex University”))

//obtain descriptive statistics
describe(rexam[,c(“exam”, “computer”, “lectures”, “numeracy”)])
stat.desc(rexam[, c(“exam”, “computer”, “lectures”, “numeracy”)], basic = FALSE, norm = TRUE)

//round to 3 decimal digits
round(stat.desc(rexam[,c(“exam”, “computer”, “lectures”, “numeracy”)], basic = FALSE, norm = TRUE), digits = 3)

Screen Shot 2015-08-19 at 2.59.09 AM

we can interpret absolute values of kurt.2SE and skew.2SE greater than 1, 1.29, and 1.65 as significant p < .05, p < .01, and p < .001, respectively.

//histogram with normal curve on it (using exam score as example)
exam exam
exam + stat_function(fun = dnorm, args = list(mean = mean(rexam$exam, na.rm = TRUE), sd = sd(rexam$exam, na.rm = TRUE)), colour = “black”, size = 1)

Rplot01

Repeat these analyses for the computer literacy and percentage of lectures attended and interpret the results.

//descriptive stats based on groups
by(cbind(data=rexam$computer,data=rexam$lectures), rexam$uni, describe)

Screen Shot 2015-08-20 at 3.04.54 PM

//normality test based on groups
by(rexam$lectures, rexam$uni, stat.desc, basic = FALSE, norm = TRUE)

Screen Shot 2015-08-20 at 3.44.21 PM

//histogram based on groups using lectures as an example
dunceData<-subset(rexam, rexam$uni=="Duncetown University") sussexData<-subset(rexam, rexam$uni=="Sussex University") hist.lectures.duncetown <- ggplot(dunceData, aes(lectures)) + theme(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Lectures", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$lectures, na.rm = TRUE), sd = sd(dunceData$lectures, na.rm = TRUE)), colour = "blue", size=1)

hist.lectures.duncetown

Rplot

hist.lectures.sussex <- ggplot(sussexData, aes(lectures)) + theme(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Lectures", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$lectures, na.rm = TRUE), sd = sd(sussexData$lectures, na.rm = TRUE)), colour = "blue", size=1)

hist.lectures.sussex

Rplot01

Answer for Smart Alex’s Tasks

Task 1

//import data
lecturerData<- read.delim(“Lecturer Data.dat”,  header = TRUE)

Screen Shot 2015-08-09 at 1.14.43 AM

//factor adjustment
lecturerData$job<-factor(lecturerData$job, levels = c(1:2), labels = c(“Lecturer”, “Student”))

Screen Shot 2015-08-09 at 1.12.47 AM

//1
bar <- ggplot(lecturerData, aes(job, friends)) bar + stat_summary(fun.y = mean, geom = "bar", fill = "grey", colour = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange") + labs(x = "job", y = "friends") Screen Shot 2015-08-09 at 12.18.21 AM

//2
bar <- ggplot(lecturerData, aes(job, alcohol)) bar + stat_summary(fun.y = mean, geom = "bar", fill = "grey", colour = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange") + labs(x = "job", y = "alcohol") Rplot

//3
bar <- ggplot(lecturerData, aes(job, income)) bar + stat_summary(fun.y = mean, geom = "bar", fill = "grey", colour = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange") + labs(x = "job", y = "income") Rplot01

//4
bar <- ggplot(lecturerData, aes(job, neurotic)) bar + stat_summary(fun.y = mean, geom = "bar", fill = "grey", colour = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange") + labs(x = "job", y = "neurotic") Rplot02

//5
scatter <- ggplot(lecturerData, aes(alcohol, neurotic)) scatter + geom_point() + geom_smooth(method ="lm", aes(fill = job), colour = "black", se = F) Rplot

Task2

//import data
infidelityData<- read.delim("Infidelity.dat", header = TRUE)

Screen Shot 2015-08-09 at 1.53.16 AM

//factor adjustment
infidelityData$Gender<-factor(infidelityData$Gender, levels = c(1:2), labels = c("Male", "Female"))

Screen Shot 2015-08-09 at 1.54.34 AM

//wide data into the long format
infidelity<-melt(infidelityData, id = c("Gender"), measured = c("Partner", "Self"))

Screen Shot 2015-08-09 at 1.55.48 AM

//rename variables
names(infidelity)<-c("Gender", "Target", "Bullets")

Screen Shot 2015-08-09 at 1.56.23 AM

//plot1
bar <- ggplot(infidelity, aes(Target, Bullets, fill = Gender)) bar + stat_summary(fun.y = mean, geom = "bar", position="dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width=0.90), width = 0.2) + labs(x = "Target", y = "Bullets", fill = "Gender")

Rplot01

//plot2
bar + stat_summary(fun.y = mean, geom = “bar”) + stat_summary(fun.data = mean_cl_normal, geom = “errorbar”, width = 0.2) + facet_wrap( ~ Gender) + labs(x = “Target”, y = “Bullets”) + theme(legend.position = “none”)

Rplot02