# Using Markdown in Hugo post

Hongtao Hao / 2020-01-28

# Exercise 4.1

df <- HairEyeColor # Changing the name of HairEyecolor
show (df) # displaying the dataframe
## , , Sex = Male
##
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    32   11    10     3
##   Brown    53   50    25    15
##   Red      10   10     7     7
##   Blond     3   30     5     8
##
## , , Sex = Female
##
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    36    9     5     2
##   Brown    66   34    29    14
##   Red      16    7     7     7
##   Blond     4   64     5     8
e.h.freq <- apply (df, c('Eye','Hair'), sum) # To get a table where 'Eye color' is the row and 'Hair color' is the column. This line of code will sum across sex.
e.h.prop <- e.h.freq / sum (e.h.freq) # Generating joint proportions in each cell.
show (round (e.h.prop, 2)) # Round off the values in e.h.prop to the 2nd number of decimal places and then display the dataframe. 
##        Hair
## Eye     Black Brown  Red Blond
##   Brown  0.11  0.20 0.04  0.01
##   Blue   0.03  0.14 0.03  0.16
##   Hazel  0.03  0.09 0.02  0.02
##   Green  0.01  0.05 0.02  0.03
h.freq <- apply (df, c('Hair'), sum) # This will sum across sex and eye, and display the number of people having a specific hair color.
h.prop <- h.freq / sum (h.freq) # This will display the marginal propabilities (Hair)
show ( round(h.prop,2) ) # Round off and display the dataframe.
## Black Brown   Red Blond
##  0.18  0.48  0.12  0.21
e.freq <- apply (df, c('Eye'), sum) # This will sum across sex and hair, and display the number of people having a specific eye color.
e.prop <- e.freq /sum (e.freq) # This will display the marginal probabilities (Eye)
show (round(e.prop, 2) ) # Round off and display the dataframe. 
## Brown  Blue Hazel Green
##  0.37  0.36  0.16  0.11
e.h.prop ['Blue', ] / e.prop ['Blue'] # This is p(h|e=blue), i.e., the probabilities of the hair colors given that eye color is blue. 
##      Black      Brown        Red      Blond
## 0.09302326 0.39069767 0.07906977 0.43720930
# To get the probabilities of hair colors given Brown eyes:
e.h.prop ['Brown',] / e.prop ['Brown']
##      Black      Brown        Red      Blond
## 0.30909091 0.54090909 0.11818182 0.03181818
# To get the probabilities of eye colors given Brown hair:
e.h.prop [, 'Brown'] / h.prop ['Brown']
##     Brown      Blue     Hazel     Green
## 0.4160839 0.2937063 0.1888112 0.1013986

# Exercise 4.2

set.seed(47405)
N <- 500
flipSequence <- sample (x = c(0,1), prob = c(1-pHeads,pHeads), size = N, replace = T)
num.flips <- c (1:N)
plot (num.flips, prop.heads, type = 'o', log='x',
col="skyblue", xlim = c(1,N), ylim = c(0.0,1.0), cex.axis = 1.5,
xlab = "Flip Number", ylab = "Proportion Heads", cex.lab = 1.5,
main = "Running Proportion of Heads", cex.main=1.5)
flipLetters <- paste (c("T", "H")[flipSequence[1:10]+1],collapse = "")
displayString <- paste0( "Flip Sequence =" , flipLetters , "...")
text (N, .5, displayString, adj = c(1,0.5), cex = 1.3)
text (N, .4, paste("End Proportion = ", prop.heads[N]), adj=c(1,0.5), cex=1.3) # 3

set.seed(1)
x <- seq(5, 15, length=1000)
y <- dnorm(x, mean=10, sd=.20)
plot(x, y, type="l", lwd=1, col="red") # 3A

set.seed(1)
dnorm(9.9,mean=10, sd=.20)
##  1.760327

# 3B

set.seed(1)
# Generating a random sample of 100,000 values from the normal distribution:
heapOdata <- rnorm (100000, mean = 10.0, sd = .20)
hist(heapOdata, breaks = 51, col = "orange", main = "rnorm") # Having a quick overview of this set of random numbers. # 3C

num.between <- sum( heapOdata >= 9.8 & heapOdata < 10.0 ) # counting how many numbers fall between the interval
prob.mass <- num.between / 100000 # Calculating probability mass
approx.prob <- prob.mass / (10.0-9.8) # Calculating probability density (probability mass / bin width)
approx.prob 
##  1.69395
# which is approximately the same as Part A.

# 4

According to Baye’s rule, among all joint outcomes with “negative results”, the proportion of people having the disease is $$p (user|-)$$. Specifically:

$p (user|-) = \frac{p (-|user)\cdot p(user)}{p(-)}$ We should notice that the prior, $$p(user)$$ is now the posterior computed from the Table 5.4 in the book. Also notice that,

$P(-) = \sum_{θ^*} \cdot P (-|θ^∗) \cdot P(θ^*)$ Now, I will compute the posterior from Table 5.4:

$P (user|+) = \frac {0.99 \cdot 0.001}{0.99 \cdot 0.001 + 0.05 \cdot (1-0.001)}$ Using R, we know that the result is:

posterior <- (0.99*0.001)/(0.99*0.001+0.05*(1-0.001))
posterior
##  0.01943463

$\begin{eqnarray} P (user|-) & = & \frac{P (-|user)\cdot P(user)}{P(-)}\\ \\ & = & \frac{P (-|user)\cdot P(user)}{\sum_{θ^*} \cdot P (-|θ^∗) \cdot P(θ^*)}\\ \\ & = & \frac {0.01 \cdot 0.01943463 }{P(-|user) \cdot P(user) + P(-|non-user) \cdot P (non-user)} \\ \\ & = & \frac {0.01 \cdot 0.01943463 }{0.01 \cdot 0.01943463 + 0.95 \cdot (1-0.01943463)} \end{eqnarray}$

Calculating using R:

(0.01*posterior) / {0.01*posterior+(0.95*(1-posterior))}
##  0.0002085862

Therefore, the probability that the person has the disease given that the re-test shows “negative” is 0.0002085862.

# 5

## 5A

Test User Non-user Margin
+ 99 4995 5094
- 1 94905 94906
Margin 100 99,900 100,000

## 5B

The proportion of people who have the disease given that the test result is positive:

99/5094
##  0.01943463

## 5C & 5D

upper.left <- 10000 * 0.99
upper.left
##  9900
upper.right <- 9990000 * 0.05
upper.right
##  499500
lower.left <- 10000 * 0.99 * 0.01
lower.left
##  99
lower.right <- 9990000 * 0.05 * 0.95
lower.right
##  474525
lower.left / (lower.left+lower.right) # The result is the same as the answer to Exercise 5.1. 
##  0.0002085862