Dataset: http://openpsych.net/datasets/GSSsubset.7z
library(foreign) #needed to load SPSS
library(plyr) #for easy recode
library(Hmisc) #for rcorr
library(psych) #for stuff
library(gplots) #for plotmeans
data = read.spss("GSSsubset.sav") #read SPSS file
DF = as.data.frame(data) #convert to DF
#fix wordsum
DF$wordsum[DF$wordsum==-1] = NA #recodes -1's as NA
DF$wordsum[DF$wordsum==99] = NA #recodes 99's as NA
#sex and race
describeBy(DF$wordsum,DF$sex) # descrip. stats by sex
describeBy(DF$wordsum,DF$race) # descrip. stats by race
#yearly changes
hist(DF$year) #histogram of years to see distribution
rcorr(DF$year, DF$wordsum) #year x wordsum cor
cor(DF$year, DF$wordsum, use="pairwise.complete.obs") #using cor fun with pairwise complete
year.mean = by(DF$wordsum, INDICES = DF$year, FUN = mean, na.rm=TRUE) #get mean by year, remove missing values
year.mean.matrix = as.matrix(year.mean) #convert to matrix for plotting
plot(year.mean.matrix) #plots the matrix, but it doesnt show the year properly
plotmeans(formula = wordsum ~ year, data=DF, n.label=F) #much easier way of plotting means by year,
#remove labels for sample size. No clear FLynn effect.