Set working directory and Read in data
# set my working directory
setwd("~/AndreCollaborations/QueenExperimentBurnham")
# read in my data
QueenDF <- read.table("2016QueensHam.csv", header=TRUE, sep = ",", stringsAsFactors = FALSE)
###########################################################################
# function name: RepANOVA
# description:conducts a repeated measures ANOVA on a data set and plots
# parameters:
# data = data frame,
# column = name of continuous variable column in quotations
###########################################################################
RepANOVA <- function(data, column){
data$x <- data[,column]
# repeated measures on continuous variable
aov.out <- aov(x~Origin * Time + Error(FieldID), data=data)
#PLOT:
library(plyr)
sum <- ddply(data, c("Origin", "Time"), summarise,
n = length(x),
mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
se = sd / sqrt(n))
sum <- sum[complete.cases(sum),]
# split data frame by origin
sumsplit <- split(sum, sum$Origin)
# create matrix of values and vector of time
Times <- c(1:length(sumsplit$Local$mean))
Local <- sumsplit$Local$mean
California <- sumsplit$California$mean
mat <- as.matrix(cbind(Local, California))
# plot matrix
matplot(x=Times, y=mat,
type="l",
xlab="Time",
ylab=as.character(column),
lwd=2,
lty=1,
font.lab=2,
bty="l",
col=2:5)
grid(col="gray")
# create legend for plot
legend(x=1,y=max(Local),
c("Local",
"California"),
pch=19,
col=2:5,
bty="n",
bg="white")
# return matrix and stats for ANOVA
return(list(mat, aov.out, summary(aov.out)))
}
###########################################################################
# END OF FUNCITON
###########################################################################
# testing function on 4 variables in my data set:
RepANOVA(data=QueenDF, column="Brood")
## [[1]]
## Local California
## [1,] 3.900000 3.900000
## [2,] 4.650000 4.000000
## [3,] 2.842105 3.375000
## [4,] 5.235294 3.428571
## [5,] 6.000000 4.555556
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 4.169591
##
## Stratum 1: FieldID
##
## Terms:
## Origin Time Origin:Time Residuals
## Sum of Squares 18.78369 25.45107 1.60155 102.16223
## Deg. of Freedom 1 1 1 36
##
## Residual standard error: 1.684589
## Estimated effects may be unbalanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 11.40339 19.93090 206.74904
## Deg. of Freedom 1 1 129
##
## Residual standard error: 1.26598
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 18.78 18.784 6.619 0.01436 *
## Time 1 25.45 25.451 8.968 0.00494 **
## Origin:Time 1 1.60 1.602 0.564 0.45739
## Residuals 36 102.16 2.838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 11.40 11.403 7.115 0.008624 **
## Origin:Time 1 19.93 19.931 12.436 0.000584 ***
## Residuals 129 206.75 1.603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RepANOVA(data=QueenDF, column="Mass")
## [[1]]
## Local California
## [1,] 25.21000 23.43000
## [2,] 38.14737 27.86250
## [3,] 43.77647 39.93333
## [4,] 57.87059 40.68889
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 36.19685
##
## Stratum 1: FieldID
##
## Terms:
## Origin Time Origin:Time Residuals
## Sum of Squares 3189.240 2537.774 7.600 14134.736
## Deg. of Freedom 1 1 1 36
##
## Residual standard error: 19.81493
## Estimated effects may be unbalanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 9481.922 831.916 6453.132
## Deg. of Freedom 1 1 85
##
## Residual standard error: 8.713162
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 3189 3189 8.123 0.00719 **
## Time 1 2538 2538 6.463 0.01546 *
## Origin:Time 1 8 8 0.019 0.89012
## Residuals 36 14135 393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 9482 9482 124.89 < 2e-16 ***
## Origin:Time 1 832 832 10.96 0.00137 **
## Residuals 85 6453 76
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RepANOVA(data=QueenDF, column="Varroa")
## [[1]]
## Local California
## [1,] 7.4000000 9.5000000
## [2,] 0.8947368 0.8666667
## [3,] 1.1176471 1.4000000
## [4,] 1.5294118 1.5555556
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 3.376
##
## Stratum 1: FieldID
##
## Terms:
## Origin Time Origin:Time Residuals
## Sum of Squares 43.7453 1.1257 14.5706 466.4698
## Deg. of Freedom 1 1 1 34
##
## Residual standard error: 3.704011
## Estimated effects may be unbalanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 855.6476 100.2137 1941.5554
## Deg. of Freedom 1 1 85
##
## Residual standard error: 4.779313
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 43.7 43.75 3.189 0.0831 .
## Time 1 1.1 1.13 0.082 0.7763
## Origin:Time 1 14.6 14.57 1.062 0.3100
## Residuals 34 466.5 13.72
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 855.6 855.6 37.460 2.77e-08 ***
## Origin:Time 1 100.2 100.2 4.387 0.0392 *
## Residuals 85 1941.6 22.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RepANOVA(data=QueenDF, column="Nosema")
## [[1]]
## Local California
## [1,] 1665000.0 1268421
## [2,] 851470.6 3292857
## [3,] 325000.0 2200000
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 1492708
##
## Stratum 1: FieldID
##
## Terms:
## Origin Time Origin:Time Residuals
## Sum of Squares 3.156111e+13 2.589290e+12 9.222707e+11 1.385609e+14
## Deg. of Freedom 1 1 1 35
##
## Residual standard error: 1989694
## Estimated effects may be unbalanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 5.619501e+10 4.933546e+13 1.795622e+14
## Deg. of Freedom 1 1 55
##
## Residual standard error: 1806867
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 3.156e+13 3.156e+13 7.972 0.00779 **
## Time 1 2.589e+12 2.589e+12 0.654 0.42413
## Origin:Time 1 9.223e+11 9.223e+11 0.233 0.63234
## Residuals 35 1.386e+14 3.959e+12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 5.620e+10 5.620e+10 0.017 0.896098
## Origin:Time 1 4.934e+13 4.934e+13 15.111 0.000275 ***
## Residuals 55 1.796e+14 3.265e+12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# create DF with factor variables and only Brood
PracDF <- QueenDF[,c("Brood", "FieldID", "Yard", "Origin", "Time")]
# subset by factor
Cal <- subset(PracDF, Origin=="California")
Loc <- subset(PracDF, Origin=="Local")
# Split by Time
calSplit <- split(Cal, Cal$Time)
locSplit <- split(Loc, Loc$Time)
# need 10 different distributions (for each time point for each group)
# (seeing what actual data look like)
# T1
hist(calSplit$"1"$Brood, breaks = c(2:5))
hist(locSplit$"1"$Brood, breaks = c(2:5))
# T2
hist(calSplit$"2"$Brood)
hist(locSplit$"2"$Brood)
# T3
hist(calSplit$"3"$Brood)
hist(locSplit$"3"$Brood)
# T4
hist(calSplit$"4"$Brood)
hist(locSplit$"4"$Brood)
# T5
hist(calSplit$"5"$Brood)
hist(locSplit$"5"$Brood)
# summary stats for distributions that are approximatly normal (mean followd by sd)
# T2
list(mean(calSplit$"2"$Brood, na.rm=TRUE), sd(calSplit$"2"$Brood, na.rm = TRUE))
## [[1]]
## [1] 4
##
## [[2]]
## [1] 1.054093
list(mean(locSplit$"2"$Brood, na.rm=TRUE), sd(locSplit$"2"$Brood, na.rm = TRUE))
## [[1]]
## [1] 4.65
##
## [[2]]
## [1] 0.875094
# T3
list(mean(calSplit$"3"$Brood, na.rm=TRUE), sd(calSplit$"3"$Brood, na.rm = TRUE))
## [[1]]
## [1] 3.375
##
## [[2]]
## [1] 1.892969
list(mean(locSplit$"3"$Brood, na.rm=TRUE), sd(locSplit$"3"$Brood, na.rm = TRUE))
## [[1]]
## [1] 2.842105
##
## [[2]]
## [1] 1.118688
# T4
list(mean(calSplit$"4"$Brood, na.rm=TRUE), sd(calSplit$"4"$Brood, na.rm = TRUE))
## [[1]]
## [1] 3.428571
##
## [[2]]
## [1] 1.910066
list(mean(locSplit$"4"$Brood, na.rm=TRUE), sd(locSplit$"4"$Brood, na.rm = TRUE))
## [[1]]
## [1] 5.235294
##
## [[2]]
## [1] 1.521899
# T5
list(mean(calSplit$"5"$Brood, na.rm=TRUE), sd(calSplit$"5"$Brood, na.rm = TRUE))
## [[1]]
## [1] 4.555556
##
## [[2]]
## [1] 1.424001
list(mean(locSplit$"5"$Brood, na.rm=TRUE), sd(locSplit$"5"$Brood, na.rm = TRUE))
## [[1]]
## [1] 6
##
## [[2]]
## [1] 1.457738
#######################################################################################
# function name: multiDist
# description: creates data set based on brood data with multiple calls to uniform and
# the normal distribution for differnt time poitns (diff parameters)
# parameters:
# n = sample size for each each group (local and california) (default=20 same as my data)
#######################################################################################
multiDist <- function(n=20){
# creating uniform dist for time 1 for each groups
T1cal <- runif(n=n,min=4,max=4)
T1loc <- runif(n=n,min=4,max=4)
#-----------------------------------------------------------
# creating normal ditributions for times 2 - 5 for each group
T2cal <- rnorm(n=n, mean = 4, sd = 1.04)
T2loc <- rnorm(n=n, mean = 4.65, sd = 0.88)
T3cal <- rnorm(n=n, mean = 3.375, sd = 1.89)
T3loc <- rnorm(n=n, mean = 2.84, sd = 1.1)
T4cal <- rnorm(n=n, mean = 3.4, sd = 1.9)
T4loc <- rnorm(n=n, mean = 5.2, sd = 1.5)
T5cal <- rnorm(n=n, mean = 4.6, sd = 1.42)
T5loc <- rnorm(n=n, mean = 6, sd = 1.46)
#-----------------------------------------------------------
# prepare vectors of necessary data for RepANOVA()
Time <- as.factor(c(rep(1, n * 2),rep(2, n * 2),rep(3, n * 2),rep(4, n * 2),rep(5, n * 2)))
Brood <- c(T1cal, T1loc, T2cal, T2loc, T3cal, T3loc, T4cal, T4loc, T5cal, T5loc)
Origin <- rep(c(rep("California", n), rep("Local", n)),5)
FieldID <- as.factor(rep(c(1:(2*n)),5))
# create data frame
DF <- data.frame(FieldID, Time, Brood, Origin)
return(DF)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# actual data
RepANOVA(data=QueenDF, column="Brood")
## [[1]]
## Local California
## [1,] 3.900000 3.900000
## [2,] 4.650000 4.000000
## [3,] 2.842105 3.375000
## [4,] 5.235294 3.428571
## [5,] 6.000000 4.555556
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 4.169591
##
## Stratum 1: FieldID
##
## Terms:
## Origin Time Origin:Time Residuals
## Sum of Squares 18.78369 25.45107 1.60155 102.16223
## Deg. of Freedom 1 1 1 36
##
## Residual standard error: 1.684589
## Estimated effects may be unbalanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 11.40339 19.93090 206.74904
## Deg. of Freedom 1 1 129
##
## Residual standard error: 1.26598
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 18.78 18.784 6.619 0.01436 *
## Time 1 25.45 25.451 8.968 0.00494 **
## Origin:Time 1 1.60 1.602 0.564 0.45739
## Residuals 36 102.16 2.838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 11.40 11.403 7.115 0.008624 **
## Origin:Time 1 19.93 19.931 12.436 0.000584 ***
## Residuals 129 206.75 1.603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fake data with my sample size (n=20)
RepANOVA(data=multiDist(n=20), column="Brood")
## [[1]]
## Local California
## [1,] 4.000000 4.000000
## [2,] 4.668601 4.330724
## [3,] 2.632493 3.457106
## [4,] 5.415585 3.049757
## [5,] 6.330766 5.075593
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 4.296063
##
## Stratum 1: FieldID
##
## Terms:
## Origin Residuals
## Sum of Squares 19.64724 46.29907
## Deg. of Freedom 1 38
##
## Residual standard error: 1.10381
## 4 out of 5 effects not estimable
## Estimated effects are balanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 147.15053 60.02026 260.56183
## Deg. of Freedom 4 4 152
##
## Residual standard error: 1.309283
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 19.65 19.647 16.12 0.00027 ***
## Residuals 38 46.30 1.218
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 4 147.15 36.79 21.460 4.74e-14 ***
## Origin:Time 4 60.02 15.01 8.753 2.19e-06 ***
## Residuals 152 260.56 1.71
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# fake data with smaller sample size (n=5)
RepANOVA(data=multiDist(n=5), column="Brood")
## [[1]]
## Local California
## [1,] 4.000000 4.000000
## [2,] 4.790075 3.743744
## [3,] 2.757697 1.704736
## [4,] 4.930416 3.705702
## [5,] 4.474060 4.472225
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 3.857865
##
## Stratum 1: FieldID
##
## Terms:
## Origin Residuals
## Sum of Squares 5.530609 10.925234
## Deg. of Freedom 1 8
##
## Residual standard error: 1.168612
## 4 out of 5 effects not estimable
## Estimated effects are balanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 34.23851 3.72805 56.39383
## Deg. of Freedom 4 4 32
##
## Residual standard error: 1.327519
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 5.531 5.531 4.05 0.079 .
## Residuals 8 10.925 1.366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 4 34.24 8.560 4.857 0.00356 **
## Origin:Time 4 3.73 0.932 0.529 0.71532
## Residuals 32 56.39 1.762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#######################################################################################
# function name: RanDatBling
# description: creates data set based on brood data with multiple calls to uniform and
# the normal distribution for differnt time poitns (diff parameters) that is ideal!!!!
# parameters:
# n = sample size for each each group (local and california) (default=20 same as my data)
#######################################################################################
RanDatBling <- function(n=20){
# creating uniform dist for time 1 for each groups
T1cal <- runif(n=n,min=4,max=4)
T1loc <- runif(n=n,min=4,max=4)
#-----------------------------------------------------------
# creating normal ditributions for times 2 - 5 for each group
T2cal <- rnorm(n=n, mean = 4, sd = 1.04)
T2loc <- rnorm(n=n, mean = 5, sd = 0.88)
T3cal <- rnorm(n=n, mean = 3, sd = 1.1)
T3loc <- rnorm(n=n, mean = 6, sd = 1.1)
T4cal <- rnorm(n=n, mean = 2, sd = 1.9)
T4loc <- rnorm(n=n, mean = 7, sd = 1.5)
T5cal <- rnorm(n=n, mean = 1, sd = 1.42)
T5loc <- rnorm(n=n, mean = 8, sd = 1.46)
#-----------------------------------------------------------
# prepare vectors of necessary data for RepANOVA()
Time <- as.factor(c(rep(1, n * 2),rep(2, n * 2),rep(3, n * 2),rep(4, n * 2),rep(5, n * 2)))
Brood <- c(T1cal, T1loc, T2cal, T2loc, T3cal, T3loc, T4cal, T4loc, T5cal, T5loc)
Origin <- rep(c(rep("California", n), rep("Local", n)),5)
FieldID <- as.factor(rep(c(1:(2*n)),5))
# create data frame
DF <- data.frame(FieldID, Time, Brood, Origin)
return(DF)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# fake data with my sample size (n=20)
RepANOVA(data=RanDatBling(n=20), column="Brood")
## [[1]]
## Local California
## [1,] 4.000000 4.000000
## [2,] 4.521693 3.858958
## [3,] 5.920371 3.080946
## [4,] 7.583831 2.054104
## [5,] 8.138882 1.365143
##
## [[2]]
##
## Call:
## aov(formula = x ~ Origin * Time + Error(FieldID), data = data)
##
## Grand Mean: 4.452393
##
## Stratum 1: FieldID
##
## Terms:
## Origin Residuals
## Sum of Squares 499.6357 46.4621
## Deg. of Freedom 1 38
##
## Residual standard error: 1.105752
## 4 out of 5 effects not estimable
## Estimated effects are balanced
##
## Stratum 2: Within
##
## Terms:
## Time Origin:Time Residuals
## Sum of Squares 19.9927 349.9941 262.3854
## Deg. of Freedom 4 4 152
##
## Residual standard error: 1.313857
## Estimated effects may be unbalanced
##
## [[3]]
##
## Error: FieldID
## Df Sum Sq Mean Sq F value Pr(>F)
## Origin 1 499.6 499.6 408.6 <2e-16 ***
## Residuals 38 46.5 1.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 4 20.0 5.00 2.895 0.0241 *
## Origin:Time 4 350.0 87.50 50.688 <2e-16 ***
## Residuals 152 262.4 1.73
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1