############################################################################################

# single-number bets at roulette

# unbuffer the output

# setwd( 'C:/DD/Teaching/AMS-131/Summer-2019' )

print( population.data.set <- c( rep( -1, 37 ), 35 ) )

#  [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
# [28] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 35

hist( population.data.set, prob = T, breaks = ( -2:36 ) - 0.5,
  main = 'Population PMF: single number', xlab = 'your net gain on a single play' )

n <- 1000

M <- 1000000

random.number.generator.seed <- 4391557

set.seed( random.number.generator.seed )

simulated.sums <- rep( NA, M )

system.time( {

  for ( m in 1:M ) {

    simulated.sample.data.set <- sample( population.data.set, n, replace = T )

    simulated.sums[ m ] <- sum( simulated.sample.data.set )

  }

} 

)

#  user  system elapsed       n = 1000   M = 100000
#   2.7     0.0     2.7

#  user  system elapsed       n = 1000   M = 1000000 
# 26.31    0.03   26.36

estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 )

paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed,
  'estimated probability of coming out ahead =',
  estimated.probability.of.coming.out.ahead )

# [1] "n = 1000 M = 1e+06 seed = 4391557 estimated probability of coming out ahead = 0.397075"

hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain',
  breaks = 'FD' )

abline( v = 0, lwd = 2, col = 'red' )

table( simulated.sums ) / M

# simulated.sums

#     -100      -64      -28        8       44       80      116      152      188      224      260 
# 0.069067 0.187200 0.251781 0.221875 0.145672 0.075289 0.032287 0.011754 0.003770 0.000962 0.000265 

#      296      332      368 
# 0.000066 0.000009 0.000003

# normal approximation to P( coming out ahead )

print( mu <- mean( population.data.set ) )

# [1] -0.05263158

# expected value (EV) of sum S is n * mu

print( expected.value.of.sum <- n * mu )

# [1] -52.63158

print( sigma <- sqrt( mean( ( population.data.set - mu )^2 ) ) )

# [1] 5.762617

# standard deviation (SD) of sum S is sigma * sqrt( n )

print( standard.deviation.of.sum <- sigma * sqrt( n ) )

# [1] 182.23

# normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD )

print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) /
  standard.deviation.of.sum ) )

# [1] 0.3863597

# monte carlo estimate of this probability, which is more accurate, is 0.397075

( 0.3863597 - 0.397075 ) / 0.397075

# [1] -0.02698558

# even with n = 1000, normal approximation is too low by about 2.7%

hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain',
  breaks = 50 )

abline( v = 0, lwd = 2, col = 'red' )

S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 )

lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ),
  lwd = 2, col = 'blue' )

# install.packages( 'moments' )

library( moments )

print( population.skewness <- skewness( population.data.set ) )

# [1] 5.918364

print( population.kurtosis <- kurtosis( population.data.set ) - 3 )

# [1] 33.02703

print( theoretical.skewness.of.S <- population.skewness / sqrt( n ) )

# [1] 0.1871551

skewness( simulated.sums )

# [1] 0.1887953

print( theoretical.kurtosis.of.S <- population.kurtosis / n )

# [1] 0.03302703

kurtosis( simulated.sums ) - 3

# [1] 0.03185558

n.grid <- c( 1, 10, 50, 100, 500, 1000, 5000, 10000 )

skewness.grid <- population.skewness / sqrt( n.grid )

kurtosis.grid <- population.kurtosis / n.grid 

cbind( n.grid, skewness.grid, kurtosis.grid )

#      n.grid skewness.grid kurtosis.grid

# [1,]      1    5.91836354  33.027027027
# [2,]     10    1.87155088   3.302702703
# [3,]     50    0.83698300   0.660540541
# [4,]    100    0.59183635   0.330270270
# [5,]    500    0.26467726   0.066054054
# [6,]   1000    0.18715509   0.033027027
# [7,]   5000    0.08369830   0.006605405
# [8,]  10000    0.05918364   0.003302703

# even after making 10,000 bets on a single number,
# the skewness of the distribution of (your net gain)
# is still about 0.06

n <- 10000

M <- 100000

random.number.generator.seed <- 1037969

set.seed( random.number.generator.seed )

simulated.sums <- rep( NA, M )

system.time( {

  for ( m in 1:M ) {

    simulated.sample.data.set <- sample( population.data.set, n, replace = T )

    simulated.sums[ m ] <- sum( simulated.sample.data.set )

    if ( ( m %% 10000 ) == 0 ) print( m )

  }

} 

)


estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 )

paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed,
  'estimated probability of coming out ahead =',
  estimated.probability.of.coming.out.ahead )

[1] "n = 10000 M = 1e+05 seed = 1037969 estimated probability of coming out ahead = 0.18404"

hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain',
  breaks = 'FD' )

abline( v = 0, lwd = 2, col = 'red' )

# normal approximation to P( coming out ahead )

# expected value (EV) of sum S is n * mu

print( expected.value.of.sum <- n * mu )

# [1] -526.3158

# standard deviation (SD) of sum S is sigma * sqrt( n )

print( standard.deviation.of.sum <- sigma * sqrt( n ) )

# [1] 576.2617

# normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD )

print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) /
  standard.deviation.of.sum ) )

# [1] [1] 0.1805351

# monte carlo estimate of this probability, which is more accurate, is 0.18404

( 0.1805351 - 0.18404 ) / 0.18404

# [1] [1] -0.01904423

abs( 0.1805351 - 0.18404 )

# [1] 0.0035049

# even with n = 10000, normal approximation is too low by about 1.9%,
# but in absolute terms it's only off by about 0.004

hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain',
  breaks = 50 )

abline( v = 0, lwd = 2, col = 'red' )

S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 )

lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ),
  lwd = 2, col = 'blue' )

##########################################################################################

# betting on red or black (or odd or even) at roulette

print( population.data.set <- c( rep( -1, 20 ), rep( 1, 18 ) ) )

#  [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1  1  1  1  1  1  1  1  1  1  1
# [32]  1  1  1  1  1  1  1

hist( population.data.set, prob = T, breaks = ( -2:36 ) - 0.5,
  main = 'Population PMF: red or black', xlab = 'your net gain on a single play' )

n <- 1000

M <- 100000

random.number.generator.seed <- 4391557

set.seed( random.number.generator.seed )

simulated.sums <- rep( NA, M )

system.time( {

  for ( m in 1:M ) {

    simulated.sample.data.set <- sample( population.data.set, n, replace = T )

    simulated.sums[ m ] <- sum( simulated.sample.data.set )

  }

} 

)

#  user  system elapsed       n = 1000   M = 100000
#   2.7     0.0     2.7

#  user  system elapsed       n = 1000   M = 1000000 
# 26.31    0.03   26.36

estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 )

paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed,
  'estimated probability of coming out ahead =',
  estimated.probability.of.coming.out.ahead )

# [1] "n = 1000 M = 1e+06 seed = 4391557 estimated probability of coming out ahead = 0.044885"

hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain',
  breaks = 'FD' )

abline( v = 0, lwd = 2, col = 'red' )

table( simulated.sums ) / M

# normal approximation to P( coming out ahead )

print( mu <- mean( population.data.set ) )

# [1] -0.05263158

# expected value (EV) of sum S is n * mu

print( expected.value.of.sum <- n * mu )

# [1] -52.63158

print( sigma <- sqrt( mean( ( population.data.set - mu )^2 ) ) )

# [1] 0.998614

# standard deviation (SD) of sum S is sigma * sqrt( n )

print( standard.deviation.of.sum <- sigma * sqrt( n ) )

# [1] 31.57895

# normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD )

print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) /
  standard.deviation.of.sum ) )

# [1] 0.04779035

# monte carlo estimate of this probability, which is more accurate, is 0.044885

( 0.04779035 - 0.044885 ) / 0.044885

# [1] 0.06472875

# even with n = 1000, normal approximation is too high by about 6.4%

hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain',
  breaks = 50 )

abline( v = 0, lwd = 2, col = 'red' )

S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 )

lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ),
  lwd = 2, col = 'blue' )

# install.packages( 'moments' )

library( moments )

print( population.skewness <- skewness( population.data.set ) )

# [1] 5.918364

print( population.kurtosis <- kurtosis( population.data.set ) - 3 )

# [1] 33.02703

print( theoretical.skewness.of.S <- population.skewness / sqrt( n ) )

# [1] 0.1871551

skewness( simulated.sums )

# [1] 0.1887953

print( theoretical.kurtosis.of.S <- population.kurtosis / n )

# [1] 0.03302703

kurtosis( simulated.sums ) - 3

# [1] 0.03185558

n.grid <- c( 1, 10, 50, 100, 500, 1000, 5000, 10000 )

skewness.grid <- population.skewness / sqrt( n.grid )

kurtosis.grid <- population.kurtosis / n.grid 

cbind( n.grid, skewness.grid, kurtosis.grid )

#      n.grid skewness.grid kurtosis.grid

# [1,]      1    5.91836354  33.027027027
# [2,]     10    1.87155088   3.302702703
# [3,]     50    0.83698300   0.660540541
# [4,]    100    0.59183635   0.330270270
# [5,]    500    0.26467726   0.066054054
# [6,]   1000    0.18715509   0.033027027
# [7,]   5000    0.08369830   0.006605405
# [8,]  10000    0.05918364   0.003302703

# even after making 10,000 bets on a single number,
# the skewness of the distribution of (your net gain)
# is still about 0.06

n <- 10000

M <- 100000

random.number.generator.seed <- 1037969

set.seed( random.number.generator.seed )

simulated.sums <- rep( NA, M )

system.time( {

  for ( m in 1:M ) {

    simulated.sample.data.set <- sample( population.data.set, n, replace = T )

    simulated.sums[ m ] <- sum( simulated.sample.data.set )

    if ( ( m %% 10000 ) == 0 ) print( m )

  }

} 

)


estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 )

paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed,
  'estimated probability of coming out ahead =',
  estimated.probability.of.coming.out.ahead )

[1] "n = 10000 M = 1e+05 seed = 1037969 estimated probability of coming out ahead = 0.18404"

hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain',
  breaks = 'FD' )

abline( v = 0, lwd = 2, col = 'red' )

# normal approximation to P( coming out ahead )

# expected value (EV) of sum S is n * mu

print( expected.value.of.sum <- n * mu )

# [1] -526.3158

# standard deviation (SD) of sum S is sigma * sqrt( n )

print( standard.deviation.of.sum <- sigma * sqrt( n ) )

# [1] 576.2617

# normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD )

print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) /
  standard.deviation.of.sum ) )

# [1] [1] 0.1805351

# monte carlo estimate of this probability, which is more accurate, is 0.18404

( 0.1805351 - 0.18404 ) / 0.18404

# [1] [1] -0.01904423

abs( 0.1805351 - 0.18404 )

# [1] 0.0035049

# even with n = 10000, normal approximation is too low by about 1.9%,
# but in absolute terms it's only off by about 0.004

hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain',
  breaks = 50 )

abline( v = 0, lwd = 2, col = 'red' )

S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 )

lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ),
  lwd = 2, col = 'blue' )