#Preliminaries:
# Clear memory of characters
ls()
## character(0)
rm(list=ls())
# set working dirrectory
setwd("~/ComputationalBiology/CompBio")
# read in data
Data <- read.table("RawData/hbdata.csv",header=TRUE,sep=",",stringsAsFactors=FALSE)
str(Data)
## 'data.frame': 172 obs. of 18 variables:
## $ FieldID : chr "Y1L1" "Y1L2" "Y1L3" "Y1L4" ...
## $ Time : chr "T1" "T1" "T1" "T1" ...
## $ Origin : chr "Local" "Local" "Local" "Local" ...
## $ Yard : chr "Yard1" "Yard1" "Yard1" "Yard1" ...
## $ Mass : num NA NA NA NA NA NA NA NA NA NA ...
## $ Nosema : int NA NA NA NA NA NA NA NA NA NA ...
## $ Varroa : int NA NA NA NA NA NA NA NA NA NA ...
## $ Brood : int 4 4 4 4 4 4 4 4 4 4 ...
## $ NosemaPA : int NA NA NA NA NA NA NA NA NA NA ...
## $ VarroaPA : int NA NA NA NA NA NA NA NA NA NA ...
## $ MassDay : int NA NA NA NA NA NA NA NA NA NA ...
## $ NosemaDay : int NA NA NA NA NA NA NA NA NA NA ...
## $ VarroaDay : int NA NA NA NA NA NA NA NA NA NA ...
## $ BroodDay : int -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 ...
## $ MassDate : chr NA NA NA NA ...
## $ NosemaDate: chr NA NA NA NA ...
## $ VarroaDate: chr NA NA NA NA ...
## $ BroodDate : chr "5/30/16" "5/30/16" "5/30/16" "5/30/16" ...
table(Data$Origin)
##
## California Local
## 79 93
head(Data)
## FieldID Time Origin Yard Mass Nosema Varroa Brood NosemaPA VarroaPA
## 1 Y1L1 T1 Local Yard1 NA NA NA 4 NA NA
## 2 Y1L2 T1 Local Yard1 NA NA NA 4 NA NA
## 3 Y1L3 T1 Local Yard1 NA NA NA 4 NA NA
## 4 Y1L4 T1 Local Yard1 NA NA NA 4 NA NA
## 5 Y1L5 T1 Local Yard1 NA NA NA 4 NA NA
## 6 Y1L6 T1 Local Yard1 NA NA NA 4 NA NA
## MassDay NosemaDay VarroaDay BroodDay MassDate NosemaDate VarroaDate
## 1 NA NA NA -4 <NA> <NA> <NA>
## 2 NA NA NA -4 <NA> <NA> <NA>
## 3 NA NA NA -4 <NA> <NA> <NA>
## 4 NA NA NA -4 <NA> <NA> <NA>
## 5 NA NA NA -4 <NA> <NA> <NA>
## 6 NA NA NA -4 <NA> <NA> <NA>
## BroodDate
## 1 5/30/16
## 2 5/30/16
## 3 5/30/16
## 4 5/30/16
## 5 5/30/16
## 6 5/30/16
tail(Data)
## FieldID Time Origin Yard Mass Nosema Varroa Brood NosemaPA
## 167 Y1C9 T5 California Yard1 23.2 3100000 0 4 NA
## 168 Y2C1 T5 California Yard2 35.8 3275000 2 5 NA
## 169 Y2C3 T5 California Yard2 61.6 2550000 1 5 NA
## 170 Y2C4 T5 California Yard2 39.2 1300000 4 1 NA
## 171 Y2C7 T5 California Yard2 37.4 2300000 1 5 NA
## 172 Y2C10 T5 California Yard2 76.0 2200000 0 6 NA
## VarroaPA MassDay NosemaDay VarroaDay BroodDay MassDate NosemaDate
## 167 0 67 67 61 61 8/9/16 8/9/16
## 168 1 67 67 61 61 8/9/16 8/9/16
## 169 1 67 67 61 61 8/9/16 8/9/16
## 170 1 67 67 61 61 8/9/16 8/9/16
## 171 1 67 67 61 61 8/9/16 8/9/16
## 172 0 67 67 61 61 8/9/16 8/9/16
## VarroaDate BroodDate
## 167 8/3/16 8/3/16
## 168 8/3/16 8/3/16
## 169 8/3/16 8/3/16
## 170 8/3/16 8/3/16
## 171 8/3/16 8/3/16
## 172 8/3/16 8/3/16
summary(Data)
## FieldID Time Origin
## Length:172 Length:172 Length:172
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Yard Mass Nosema Varroa
## Length:172 Min. :12.8 Min. : 0 Min. : 0.000
## Class :character 1st Qu.:24.4 1st Qu.: 100000 1st Qu.: 1.000
## Mode :character Median :31.6 Median : 762500 Median : 1.000
## Mean :36.2 Mean : 1492708 Mean : 3.376
## 3rd Qu.:40.6 3rd Qu.: 1943750 3rd Qu.: 4.000
## Max. :91.4 Max. :10075000 Max. :36.000
## NA's :45 NA's :76 NA's :47
## Brood NosemaPA VarroaPA MassDay
## Min. :0.00 Min. :0.0000 Min. :0.000 Min. :11.00
## 1st Qu.:4.00 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.:11.00
## Median :4.00 Median :1.0000 Median :1.000 Median :34.00
## Mean :4.17 Mean :0.9143 Mean :0.816 Mean :37.99
## 3rd Qu.:5.00 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:53.00
## Max. :8.00 Max. :1.0000 Max. :1.000 Max. :67.00
## NA's :1 NA's :102 NA's :47 NA's :40
## NosemaDay VarroaDay BroodDay MassDate
## Min. :13.00 Min. :12.00 Min. :-4.00 Length:172
## 1st Qu.:13.00 1st Qu.:12.00 1st Qu.:12.00 Class :character
## Median :46.00 Median :24.00 Median :24.00 Mode :character
## Mean :38.02 Mean :33.05 Mean :23.17
## 3rd Qu.:67.00 3rd Qu.:47.00 3rd Qu.:40.00
## Max. :67.00 Max. :61.00 Max. :61.00
## NA's :75 NA's :40
## NosemaDate VarroaDate BroodDate
## Length:172 Length:172 Length:172
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
Solutions to the problems:
1)
Find: \s{2,}
Replace: ,
2)
Find: (\w+),\s(\w+),\s(.*)
Replace: \2 \1 (\3)
3a)
Find: [.mp3]{4}\s+
Replace: .mp3\n
3b)
Find: (\d+)\s(.*)([.mp3]{4})
Replace: \2_\1\3
4a)
Find:([A-Z])(\w*),(\w*),(.*,)(\d+)
Replace: \1_\3,\5
4b)
Find:([A-Z])(\w*),([\w]{4})(.*,)(.*,)(\d+)
Replace: \1_\3,\6