# data
xVar <- 1:10
yVar <- runif(10)
dataFrame <- data.frame(xVar,yVar)
# model
regModel <- lm(yVar~xVar,data=dataFrame)
# model output
print(regModel)
##
## Call:
## lm(formula = yVar ~ xVar, data = dataFrame)
##
## Coefficients:
## (Intercept) xVar
## 0.709479 -0.003963
print(summary(regModel))
##
## Call:
## lm(formula = yVar ~ xVar, data = dataFrame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28713 -0.07299 0.01080 0.07893 0.26695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.709479 0.113304 6.262 0.000243 ***
## xVar -0.003963 0.018261 -0.217 0.833615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1659 on 8 degrees of freedom
## Multiple R-squared: 0.005853, Adjusted R-squared: -0.1184
## F-statistic: 0.0471 on 1 and 8 DF, p-value: 0.8336
# plot
plot(y=dataFrame$yVar,x=dataFrame$xVar,pch=21,bg="lightblue",cex=2)
abline(regModel)
# data
xVar <- as.factor(rep(c("Control","Heated","Cooled"),each=5))
yVar <- c(rgamma(10,shape=5,scale=5),rgamma(5,shape=5,scale=10))
dataFrame <- data.frame(xVar,yVar)
# model
anovaModel <- aov(yVar~xVar,data=dataFrame)
# model output
print(anovaModel)
## Call:
## aov(formula = yVar ~ xVar, data = dataFrame)
##
## Terms:
## xVar Residuals
## Sum of Squares 2981.435 3202.527
## Deg. of Freedom 2 12
##
## Residual standard error: 16.33638
## Estimated effects may be unbalanced
summary(anovaModel)
## Df Sum Sq Mean Sq F value Pr(>F)
## xVar 2 2981 1490.7 5.586 0.0193 *
## Residuals 12 3203 266.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot
boxplot(yVar~xVar,data=dataFrame,col=c("grey","thistle","orchid"))
# data
vec1 <- c(50,66,22)
vec2 <- c(120,22,30)
dataMatrix <- rbind(vec1,vec2)
rownames(dataMatrix) <- c("Cold","Warm")
colnames(dataMatrix) <-c("Aphaenogaster",
"Camponotus",
"Crematogaster")
# model + model output
print(chisq.test(dataMatrix))
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 48.914, df = 2, p-value = 2.391e-11
# plot
mosaicplot(x=dataMatrix,
col=c("goldenrod","grey","black"),
shade=FALSE)
barplot(height=dataMatrix,
beside=TRUE,
col=c("cornflowerblue","tomato"))
# data
xVar <- rgamma(n=20,shape=5,scale=5)
yVar <- rbinom(n=20,size=1,p=0.5)
dataFrame <- data.frame(xVar,yVar)
# model
logRegMod <- glm(yVar ~ xVar,
data=dataFrame,
family=binomial(link="logit"))
# model output
print(logRegMod)
##
## Call: glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Coefficients:
## (Intercept) xVar
## 1.70227 -0.06478
##
## Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
## Null Deviance: 27.53
## Residual Deviance: 25.94 AIC: 29.94
summary(logRegMod)
##
## Call:
## glm(formula = yVar ~ xVar, family = binomial(link = "logit"),
## data = dataFrame)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4925 -1.1161 0.7621 0.9819 1.8633
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.70227 1.35767 1.254 0.210
## xVar -0.06478 0.05540 -1.169 0.242
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.526 on 19 degrees of freedom
## Residual deviance: 25.936 on 18 degrees of freedom
## AIC: 29.936
##
## Number of Fisher Scoring iterations: 4
# plot
plot(x=dataFrame$xVar, y=dataFrame$yVar,pch=21,bg="tan",cex=2.5)
curve(predict(logRegMod,data.frame(xVar=x),type="response"),add=TRUE,lwd=2)
1) Set up a new markdown file for this homework. For each of the 4 models create a function to run the model. You will need to think carefully about the formal parameters for the input, the default values, and the output from your function. The output should just include model results, not any graphics.
# Create fake Data Set:
Pop1 <- runif(20)
Pop2 <- runif(20)
Factor1 <- c(rep("A", 10), rep("B", 10))
Factor2 <- as.factor(c(rep(1, 10), rep(0, 10)))
MyDF <- data.frame(Pop1, Pop2, Factor1, Factor2)
###########################################################################
# Function for a simple linear regression (takes cont. y and x variables)
###########################################################################
Regression <- function(x=1:10, y=runif(10)){
regModel <- lm(y~x)
z <- list(regModel, summary(regModel))
return(z)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
Regression()
## [[1]]
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.88550 -0.04699
##
##
## [[2]]
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35746 -0.26297 -0.02917 0.25538 0.43328
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.88550 0.22262 3.978 0.00408 **
## x -0.04699 0.03588 -1.310 0.22667
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3259 on 8 degrees of freedom
## Multiple R-squared: 0.1765, Adjusted R-squared: 0.07361
## F-statistic: 1.715 on 1 and 8 DF, p-value: 0.2267
# Test Function with fake Data Set:
Regression(x=MyDF$Pop1, y=MyDF$Pop2)
## [[1]]
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 0.4757 0.1111
##
##
## [[2]]
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44392 -0.21719 -0.06129 0.25113 0.45066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4757 0.1653 2.877 0.010 *
## x 0.1111 0.2798 0.397 0.696
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2825 on 18 degrees of freedom
## Multiple R-squared: 0.008678, Adjusted R-squared: -0.0464
## F-statistic: 0.1576 on 1 and 18 DF, p-value: 0.6961
###########################################################################
######### Function for a simple ANOVA (takes cont. y and disc. x variables)
###########################################################################
myANOVA <- function(x=as.factor(c(rep("cat", 5), rep("dog", 5))), y=runif(10)){
AnModel <- aov(y~x)
z <- list(AnModel, summary(AnModel))
return(z)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
myANOVA()
## [[1]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0268161 0.8287665
## Deg. of Freedom 1 8
##
## Residual standard error: 0.321863
## Estimated effects may be unbalanced
##
## [[2]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0268 0.02682 0.259 0.625
## Residuals 8 0.8288 0.10360
# Test Function with fake Data Set:
myANOVA(x=MyDF$Factor1, y=MyDF$Pop2)
## [[1]]
## Call:
## aov(formula = y ~ x)
##
## Terms:
## x Residuals
## Sum of Squares 0.0248241 1.4247597
## Deg. of Freedom 1 18
##
## Residual standard error: 0.281342
## Estimated effects may be unbalanced
##
## [[2]]
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 0.0248 0.02482 0.314 0.582
## Residuals 18 1.4248 0.07915
###########################################################################
# Function for a contingency anlaysis (takes disc. y and disc. x variables)
###########################################################################
myCONT <- function(x=as.factor(c(rep("cat", 3), rep("dog", 17))), y=as.factor(c(rep("0", 10), rep("1", 10)))){
dataMatrix <- rbind(x,y)
chiModel <- chisq.test(dataMatrix)
z <- list(chiModel, summary(chiModel))
return(z)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
myCONT()
## Warning in chisq.test(dataMatrix): Chi-squared approximation may be
## incorrect
## [[1]]
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 1.6197, df = 19, p-value = 1
##
##
## [[2]]
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 40 -none- numeric
## expected 40 -none- numeric
## residuals 40 -none- numeric
## stdres 40 -none- numeric
# Test Function with fake Data Set:
myCONT(x=MyDF$Factor1, y=MyDF$Factor2)
## Warning in chisq.test(dataMatrix): Chi-squared approximation may be
## incorrect
## [[1]]
##
## Pearson's Chi-squared test
##
## data: dataMatrix
## X-squared = 6.6667, df = 19, p-value = 0.9958
##
##
## [[2]]
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## method 1 -none- character
## data.name 1 -none- character
## observed 40 -none- numeric
## expected 40 -none- numeric
## residuals 40 -none- numeric
## stdres 40 -none- numeric
###########################################################################
# Function for a logistic regression (takes disc. y and cont. x variables)
###########################################################################
LogReg <- function(x=runif(20), y=as.factor(c(rep("0", 10), rep("1", 10)))){
logRegMod <- glm(y~x,family=binomial(link="logit"))
z <- list(logRegMod, summary(logRegMod))
return(z)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
LogReg()
## [[1]]
##
## Call: glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Coefficients:
## (Intercept) x
## -0.3647 0.8009
##
## Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
## Null Deviance: 27.73
## Residual Deviance: 27.37 AIC: 31.37
##
## [[2]]
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.36322 -1.13423 -0.02011 1.21856 1.31599
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3647 0.7634 -0.478 0.633
## x 0.8009 1.3567 0.590 0.555
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 27.372 on 18 degrees of freedom
## AIC: 31.372
##
## Number of Fisher Scoring iterations: 4
# Test Function with fake Data Set:
LogReg(x=MyDF$Pop1, y=MyDF$Factor2)
## [[1]]
##
## Call: glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Coefficients:
## (Intercept) x
## 0.8077 -1.4757
##
## Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
## Null Deviance: 27.73
## Residual Deviance: 27.19 AIC: 31.19
##
## [[2]]
##
## Call:
## glm(formula = y ~ x, family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.33671 -1.12504 -0.07615 1.15935 1.41314
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.8077 1.2166 0.664 0.507
## x -1.4757 2.0531 -0.719 0.472
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 27.190 on 18 degrees of freedom
## AIC: 31.19
##
## Number of Fisher Scoring iterations: 4
2) Now, for each of the 4 statistical models, write a graphics function that will generate a nice plot of the results. The formal parameters for input to your graphics function should be the same as the input for your corresponding stats function. Again, illustrate the graphics function for your default settings and for the tiny fake data set you created in 1.
###########################################################################
# function that plots data from regression (takes cont. y and x variables):
###########################################################################
RegressionPlot <- function(x=1:10, y=runif(10)){
p <- plot(y=y,x=x,pch=21,bg="lightblue",cex=2, main="REGRESSION PLOT")
regModel <- lm(y~x)
abline(regModel)
return(p)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run the function with stock values:
RegressionPlot()
## NULL
# Test Function with fake Data Set:
RegressionPlot(x=MyDF$Pop1, y=MyDF$Pop2)
## NULL
###########################################################################
# Function that plots box plots from ANOVA (takes cont. y and x variables)
###########################################################################
myANOVAplot <- function(x=as.factor(c(rep("cat", 5), rep("dog", 5))), y=runif(10)){
p <- boxplot(y~x, main="ANOVA PLOT")
return(p)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
myANOVAplot()
## $stats
## [,1] [,2]
## [1,] 0.007710193 0.01264113
## [2,] 0.027367512 0.11695454
## [3,] 0.248532671 0.19298769
## [4,] 0.723226626 0.58424330
## [5,] 0.885160061 0.86299149
##
## $n
## [1] 5 5
##
## $conf
## [,1] [,2]
## [1,] -0.2431596 -0.1371974
## [2,] 0.7402250 0.5231728
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "cat" "dog"
# Test Function with fake Data Set:
myANOVAplot(x=MyDF$Factor1, y=MyDF$Pop2)
## $stats
## [,1] [,2]
## [1,] 0.08352479 0.1248698
## [2,] 0.35611704 0.2963937
## [3,] 0.57059685 0.4195975
## [4,] 0.77422063 0.7734556
## [5,] 0.96052560 0.9747175
##
## $n
## [1] 10 10
##
## $conf
## [,1] [,2]
## [1,] 0.3616956 0.1812383
## [2,] 0.7794981 0.6579566
##
## $out
## numeric(0)
##
## $group
## numeric(0)
##
## $names
## [1] "A" "B"
###########################################################################
#### Function for a contingency plot (takes disc. y and disc. x variables)
###########################################################################
myCONTplot <- function(x=as.factor(c(rep("cat", 10), rep("dog", 10))), y=as.factor(c(rep("0", 10), rep("1", 10)))){
dataMatrix <- rbind(x,y)
p <- mosaicplot(x=dataMatrix,
shade=FALSE)
return(p)
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
myCONTplot()
## NULL
# Test Function with fake Data Set:
myCONTplot(x=MyDF$Factor1, y=MyDF$Factor2)
## NULL
###########################################################################
### Function for a logistic reg. plot (takes disc. y and cont. x variables)
###########################################################################
LogRegplot <- function(x=runif(20), y=as.factor(c(rep("0", 10), rep("1", 10)))){
logRegMod <- glm(y~x,family=binomial(link="logit"))
plot(x=x, y=y,pch=21,bg="tan",cex=2.5)
curve(predict(logRegMod,data.frame(x),type="response"),add=TRUE,lwd=2)
return()
}
###########################################################################
# END OF FUNCITON
###########################################################################
# run function on default values
LogRegplot()
## NULL
# Test Function with fake Data Set:
LogRegplot(x=MyDF$Pop1, y=MyDF$Factor1)
## NULL