​
###########################################################################################
# Data Analysis for California Local
# P. Alexander Burnham
# Sept. 19, 2017
###########################################################################################
​
# Preliminaries:
# Clear memory of characters:
rm(list=ls())
​
# Set Working Directory: 
setwd("~/AndreCollaborations/QueenExperimentBurnham")
​
###########################################################################################
# Read in Virus Data:
Virus <- read.table("CompiledLoCalPCRdata.csv", 
                       header=TRUE, 
                       sep = ",", 
                       stringsAsFactors = FALSE) 
​
EcoDat <- read.table("EcoDat.csv", 
                       header=TRUE, 
                       sep = ",", 
                       stringsAsFactors = FALSE) 
​
OldPCR <- read.table("RNAVirus.csv", 
                     header=TRUE, 
                     sep = ",", 
                     stringsAsFactors = FALSE) 
​
​
​
###########################################################################################
# Functions
​
###########################################################################
# function name: PrelimClean (requires dplyr package)
# description: removes unneeded PCR file columns, gblock, NTC and duplicates 
# parameters: 
# data = data frame, 
# returns a DF that is partially cleaned
###########################################################################
​
PrelimClean <- function(data=MigVirus){
  
  # take only columns that we want:
  library(dplyr)
  data <- select(data, Sample.Name, Target.Name, Cq.Mean, Cq.Standard.Deviation, Quantity.Mean, Quantity.Standard.Deviation, Run)
  
  # remove duplicate rows
  data<-data[!duplicated(data), ]
  
  # remove NTC rows from dataframe:
  data<-data[!(data$Sample.Name=="No Sample"),]
  
  # remove Gblock rows from dataframe:
  data<-data[!(data$Sample.Name=="G-Block"),]
  
  return(data)
}
​
###########################################################################
# END OF FUNCITON
###########################################################################
​
​
​
###########################################################################
# function name: VirusNorm
# description: normalizes virus data with dilutions and constants 
# parameters: number of bees and a data frame 
# returns a dataframe with normalized virus values 
###########################################################################
​
VirusNorm <- function(number_bees = 50, data=data){
  
  # set constant values for genome copies per bee calculations:
  crude_extr <- 100
  eluteRNA <- 50
  GITCperbee <- 200
  cDNA_eff <- 0.1
  rxn_vol <- 3
  
  #create column for total_extr_vol
  total_extr_vol <- (GITCperbee * number_bees)
  
  # create column for genome copies per bee:
  data$genomeCopy <- ((((((data$Quantity.Mean / cDNA_eff) / rxn_vol) * data$dil.factor) * eluteRNA) / crude_extr) * total_extr_vol) / number_bees
  
  # norm_genomeCopy is 0 if NA
  data$genomeCopy[is.na(data$genomeCopy)] <- 0
  
  return(data)
  
}
​
###########################################################################
# END OF FUNCITON
###########################################################################
​
​
​
​
​
​
###########################################################################
# function name: actinNormal
# description: normalizes virus data with actin values 
# parameters: a data frame with actin values
# returns a dataframe with normalized virus values 
###########################################################################
​
actinNormal <- function(data=MigVirus){
  
  # pull only actin values out of dataframe
  ActinOnly <- data[which(data$Target.Name=="ACTIN"),]
  
  # create DF of ACTIN genome copies and lab ID:
  ActinDF <- data.frame(ActinOnly$Sample.Name, ActinOnly$Run, ActinOnly$genomeCopy)
  colnames(ActinDF) <- c("Sample.Name", "Run", "ACT_genomeCopy")
  
  # merge ACTIN dataframe with main dataframe:
  #Need rownames and all.x=TRUE because data frames are different sizes.
  data <- merge(data, ActinDF, by=c("Sample.Name", "Run"), all.x=TRUE)
  
  # find mean of all ACTIN values:
  ActinMean <- mean(ActinOnly$genomeCopy, na.rm = TRUE)
  
  # create column for normalized genome copies per bee:
  data$NormGenomeCopy <- (data$genomeCopy/data$ACT_genomeCopy)*ActinMean
  
  return(data)
}
​
​
###########################################################################
# END OF FUNCITON
###########################################################################
​
​
​
​
​
​
###########################################################################
# function name: CT_Threash
# description: creates binary data and makes genome copy 0 if below Ct threash
# parameters: dataframe
###########################################################################
​
CT_Threash <- function(data=data){
  
  splitDF <- split(data, data$Target.Name)
  
  # make DWV norm_genome_copbee 0 if Ct value is > 32.918
  splitDF$DWV$NormGenomeCopy[which(splitDF$DWV$Cq.Mean > 32.918)] <- 0
  splitDF$BQCV$NormGenomeCopy[which(splitDF$BQCV$Cq.Mean > 32.525)] <- 0
  splitDF$IAPV$NormGenomeCopy[which(splitDF$IAPV$Cq.Mean > 30.796)] <- 0
  
  splitDF$DWV$virusBINY  <- ifelse(splitDF$DWV$Cq.Mean > 32.918, 0, 1)
  splitDF$BQCV$virusBINY  <- ifelse(splitDF$BQCV$Cq.Mean > 32.525, 0, 1)
  splitDF$IAPV$virusBINY  <- ifelse(splitDF$IAPV$Cq.Mean > 30.796, 0, 1)
  
  # merge split dataframe back into "BombSurv" dataframe:
  data <- rbind(splitDF$DWV, splitDF$BQCV, splitDF$IAPV)
  
  # norm_genomeCopy is 0 if NA
  data$virusBINY[is.na(data$virusBINY)] <- 0
  
  return(data)
  
}
​
​
​
###############################################################################################
################################### PROGRAM BODY ##############################################
###############################################################################################
​
library(plyr)
library(ggplot2)
library(dplyr)
library(lme4)
library(car)
​
#--------------------------------------------------------------------------
# data cleaning
​
# preliminary data cleaning
CleanVirus <- PrelimClean(data=Virus) 
​
# merge with eco data
CleanVirus <- merge(CleanVirus, EcoDat, by="Sample.Name")
​
# use dilution factors to calcualte normalized virus load
CleanVirus <- VirusNorm(data=CleanVirus, number_bees = 50)
​
# use actin to normalize normalized viral load
CleanVirus <- actinNormal(data=CleanVirus)
​
# remove actin from data frame
CleanVirus <- CleanVirus[!(CleanVirus$Target.Name=="ACTIN"),]
​
# adds virus binary data and makes norm genome copy 0 if above threashold CT
CleanVirus <- CT_Threash(data=CleanVirus)
​
​
# merge in old PCR data
OldPCR <- select(OldPCR, ID, Target.Name, Time, Band_Intensity)
CleanVirus <- merge(CleanVirus, OldPCR, by=c("ID", "Target.Name", "Time"))
​
# log transform virus data:
CleanVirus$logVirus <- log(CleanVirus$NormGenomeCopy + 1)
​
​
​
​
​
​
​
​
# summary stats for plotting purposes:
VirusSummary <- ddply(CleanVirus, c("Origin", "Target.Name", "Time"), summarise, 
                      n = length(logVirus),
                      mean = mean(logVirus, na.rm = TRUE),
                      sd = sd(logVirus, na.rm = TRUE),
                      se = sd / sqrt(n))
​
# split data frame by virus type:
splitVir <- split(VirusSummary, VirusSummary$Target.Name)
​
​
​
colors <- c("white", "gray32")
​
p1 <- ggplot(splitVir$DWV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="DWV", y = "Viral Load log(genome copies/bee)") + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 25)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(.5, .9), panel.border = element_blank(),axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
​
#------------------------------------------------------------------------
#BQCV
​
p2 <- ggplot(splitVir$BQCV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="BQCV", y = NULL) + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 25)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(3, 3), axis.text.y=element_blank(), axis.ticks.y=element_blank(),  panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line.y = element_blank()) 
​
#------------------------------------------------------------------------
#IAPV
​
p3 <- ggplot(splitVir$IAPV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="IAPV", y = NULL) + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 25)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(3, 3),axis.text.y=element_blank(),axis.ticks.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
​
# merge figures into 1 side by side figure:
grid.newpage()
grid.draw(cbind(ggplotGrob(p1), ggplotGrob(p2), ggplotGrob(p3), size = "last"))
​
​
​
#------------------------------------------------------------------------
# IAPV - use repeated measures anova to look at differences between local 
​
# split data frame by virus type:
splitDat <- split(CleanVirus, CleanVirus$Target.Name)
​
# and california through time:  
aov.out <- aov(logVirus ~ Origin * Time + Error(ID), data=splitDat$IAPV)
​
# look at summary of model: SIGNIFICANT
summary(aov.out)
​
​
# summary stats for plotting purposes:
VirusSummary <- ddply(CleanVirus, c("Origin", "Target.Name", "Time"), summarise, 
                      n = length(virusBINY),
                      mean = mean(virusBINY, na.rm = TRUE),
                      sd = sd(virusBINY, na.rm = TRUE),
                      se = sd / sqrt(n))
​
# split data frame by virus type:
splitVir <- split(VirusSummary, VirusSummary$Target.Name)
​
colors <- c("white", "gray32")
​
​
​
p1 <- ggplot(splitVir$DWV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="DWV", y = "Virus Prevalence") + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 1)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(3,3), panel.border = element_blank(),axis.line.y = element_line(colour = 'black', size=0.5, linetype='solid'), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
​
​
​
#------------------------------------------------------------------------
#BQCV
p2 <- ggplot(splitVir$BQCV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="BQCV", y = NULL) + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 1)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(3, 3), axis.text.y=element_blank(), axis.ticks.y=element_blank(),  panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line.y = element_blank()) 
​
​
​
#------------------------------------------------------------------------
#IAPV
p3 <- ggplot(splitVir$IAPV, aes(x=Time, y=mean, fill=Origin)) + 
  geom_bar(stat="identity", col="black",  
           position=position_dodge()) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), 
                width=.4,
                position=position_dodge(.9)) + labs(x="IAPV", y = NULL) + theme_classic(base_size = 17) + coord_cartesian(ylim = c(0, 1)) + scale_fill_manual(values=colors, name="Queen Origin", labels=c("California", "Local")) + theme(legend.position=c(.5, .9),axis.text.y=element_blank(),axis.ticks.y=element_blank(), axis.line.y = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
​
# merge figures into 1 side by side figure:
grid.newpage()
grid.draw(cbind(ggplotGrob(p1), ggplotGrob(p2), ggplotGrob(p3), size = "last"))
​
​
CleanVirus$Band_Intensity
​
​
​
​
​
​
​
```
​
​