P. Alexander Burnham
xxxxxxxxxx
#########################################################################################
# Functions I Wrote and Use on a Daily Basis
# P. Alexander Burnham
# September 18, 2017
########################################################################################
# BEGIN FUNCTIONS ########################################################################################
########################################################################################
###########################################################################
# function name: RepANOVA
# description:conducts a repeated measures ANOVA on a data set and plots
# parameters:
# data = data frame, in this case requires columns for SamplingEvent, ID
# and Treatment plus a column for yvar
# column = name of continuous variable column in quotations
###########################################################################
RepANOVA <- function(data, column, key=10){
data$x <- data[,column]
# repeated measures on continuous variable
aov.out <- aov(x~Treatment * SamplingEvent + Error(ID), data=data)
#PLOT:
library(plyr)
sum <- ddply(data, c("Treatment", "SamplingEvent"), summarise,
n = length(x),
mean = mean(x, na.rm = TRUE),
sd = sd(x, na.rm = TRUE),
se = sd / sqrt(n))
# split data frame by origin
sumsplit <- split(sum, sum$Treatment)
# create matrix of values and vector of time
SamplingEvent <- c(1:3)
Migratory <- sumsplit$Migratory$mean
Stationary <- sumsplit$Stationary$mean
Exposed <- sumsplit$Exposed$mean
mat <- as.matrix(cbind(Migratory, Stationary, Exposed))
# plot matrix
matplot(x=SamplingEvent, y=mat,
type="l",
xlab="Sampling Event",
ylab=as.character(column),
lwd=2,
lty=1,
font.lab=2,
bty="l",
col=2:6)
grid(col="gray")
# create legend for plot
legend(x=1,y=max(Migratory),
c("Migratory",
"Stationary",
"Exposed"),
pch=19,
col=2:6,
bty="n",
bg="white")
# return matrix and stats for ANOVA
return(list(mat, aov.out, summary(aov.out)))
}
###########################################################################
# END OF FUNCITON
###########################################################################
###########################################################################
# function name: PrelimClean (requires dplyr package)
# description: removes unneeded PCR file columns, gblock, NTC and duplicates
# parameters:
# data = data frame,
# returns a DF that is partially cleaned
###########################################################################
PrelimClean <- function(data=MigVirus){
# take only columns that we want:
library(dplyr)
data <- select(data, Sample.Name, Target.Name, Cq.Mean, Cq.Standard.Deviation, Quantity.Mean, Quantity.Standard.Deviation, Run)
# remove duplicate rows
data<-data[!duplicated(data), ]
# remove NTC rows from dataframe:
data<-data[!(data$Sample.Name=="No Sample"),]
# remove Gblock rows from dataframe:
data<-data[!(data$Sample.Name=="G-Block"),]
return(data)
}
###########################################################################
# END OF FUNCITON
###########################################################################
###########################################################################
# function name: VirusNorm
# description: normalizes virus data with dilutions and constants
# parameters: number of bees and a data frame
# returns a dataframe with normalized virus values
###########################################################################
VirusNorm <- function(number_bees = 50, data=data){
# set constant values for genome copies per bee calculations:
crude_extr <- 100
eluteRNA <- 50
GITCperbee <- 200
cDNA_eff <- 0.1
rxn_vol <- 3
#create column for total_extr_vol
total_extr_vol <- (GITCperbee * number_bees)
# create column for genome copies per bee:
data$genomeCopy <- ((((((data$Quantity.Mean / cDNA_eff) / rxn_vol) * data$dil.factor) * eluteRNA) / crude_extr) * total_extr_vol) / number_bees
# norm_genomeCopy is 0 if NA
data$genomeCopy[is.na(data$genomeCopy)] <- 0
return(data)
}
###########################################################################
# END OF FUNCITON
###########################################################################
###########################################################################
# function name: actinNormal
# description: normalizes virus data with actin values
# parameters: a data frame with actin values
# returns a dataframe with normalized virus values
###########################################################################
actinNormal <- function(data=MigVirus){
# pull only actin values out of dataframe
ActinOnly <- data[which(data$Target.Name=="ACTIN"),]
# create DF of ACTIN genome copies and lab ID:
ActinDF <- data.frame(ActinOnly$Sample.Name, ActinOnly$Run, ActinOnly$genomeCopy)
colnames(ActinDF) <- c("Sample.Name", "Run", "ACT_genomeCopy")
# merge ACTIN dataframe with main dataframe:
#Need rownames and all.x=TRUE because data frames are different sizes.
data <- merge(data, ActinDF, by=c("Sample.Name", "Run"), all.x=TRUE)
# find mean of all ACTIN values:
ActinMean <- mean(ActinOnly$genomeCopy, na.rm = TRUE)
# create column for normalized genome copies per bee:
data$NormGenomeCopy <- (data$genomeCopy/data$ACT_genomeCopy)*ActinMean
return(data)
}
###########################################################################
# END OF FUNCITON
###########################################################################
###########################################################################
# function name: CT_Threash
# description: creates binary data and makes genome copy 0 if below Ct threash
# parameters: dataframe
###########################################################################
CT_Threash <- function(data=data){
splitDF <- split(data, data$Target.Name)
# make DWV norm_genome_copbee 0 if Ct value is > 32.918
splitDF$DWV$NormGenomeCopy[which(splitDF$DWV$Cq.Mean > 32.918)] <- 0
splitDF$BQCV$NormGenomeCopy[which(splitDF$BQCV$Cq.Mean > 32.525)] <- 0
splitDF$IAPV$NormGenomeCopy[which(splitDF$IAPV$Cq.Mean > 30.796)] <- 0
splitDF$DWV$virusBINY <- ifelse(splitDF$DWV$Cq.Mean > 32.918, 0, 1)
splitDF$BQCV$virusBINY <- ifelse(splitDF$BQCV$Cq.Mean > 32.525, 0, 1)
splitDF$IAPV$virusBINY <- ifelse(splitDF$IAPV$Cq.Mean > 30.796, 0, 1)
# merge split dataframe back into "BombSurv" dataframe:
data <- rbind(splitDF$DWV, splitDF$BQCV, splitDF$IAPV)
# norm_genomeCopy is 0 if NA
data$virusBINY[is.na(data$virusBINY)] <- 0
return(data)
}
###########################################################################
# function name: VirusMerger2000
# description: merges binary and viral load data for 3 viruses to data frame
# parameters: needs two data frames (data1 is the main one)
###########################################################################
VirusMerger2000 <- function(data1=DF2, data2=DF2){
MigVirusSplit <- split(data1, data1$target_name)
BQCV <- select(MigVirusSplit$BQCV, sample_name, NormGenomeCopy, virusBINY)
DWV <- select(MigVirusSplit$DWV, sample_name, NormGenomeCopy, virusBINY)
IAPV <- select(MigVirusSplit$IAPV, sample_name, NormGenomeCopy, virusBINY)
sample_name <- BQCV$sample_name
BQCVload <- BQCV$NormGenomeCopy
BQCVbinary <- BQCV$virusBINY
DWVload <- DWV$NormGenomeCopy
DWVbinary <- DWV$virusBINY
IAPVload <- IAPV$NormGenomeCopy
IAPVbinary <- IAPV$virusBINY
DF <- data.frame(sample_name, BQCVload, DWVload, IAPVload, DWVbinary, IAPVbinary, BQCVbinary)
data <- merge(data2, DF, by = c("sample_name"), all.x = T)
return(data)
}
###########################################################################
# END OF FUNCITON
###########################################################################