############################################################################################ # single-number bets at roulette # unbuffer the output # setwd( 'C:/DD/Teaching/AMS-131/Summer-2019' ) print( population.data.set <- c( rep( -1, 37 ), 35 ) ) # [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 # [28] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 35 hist( population.data.set, prob = T, breaks = ( -2:36 ) - 0.5, main = 'Population PMF: single number', xlab = 'your net gain on a single play' ) n <- 1000 M <- 1000000 random.number.generator.seed <- 4391557 set.seed( random.number.generator.seed ) simulated.sums <- rep( NA, M ) system.time( { for ( m in 1:M ) { simulated.sample.data.set <- sample( population.data.set, n, replace = T ) simulated.sums[ m ] <- sum( simulated.sample.data.set ) } } ) # user system elapsed n = 1000 M = 100000 # 2.7 0.0 2.7 # user system elapsed n = 1000 M = 1000000 # 26.31 0.03 26.36 estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 ) paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed, 'estimated probability of coming out ahead =', estimated.probability.of.coming.out.ahead ) # [1] "n = 1000 M = 1e+06 seed = 4391557 estimated probability of coming out ahead = 0.397075" hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain', breaks = 'FD' ) abline( v = 0, lwd = 2, col = 'red' ) table( simulated.sums ) / M # simulated.sums # -100 -64 -28 8 44 80 116 152 188 224 260 # 0.069067 0.187200 0.251781 0.221875 0.145672 0.075289 0.032287 0.011754 0.003770 0.000962 0.000265 # 296 332 368 # 0.000066 0.000009 0.000003 # normal approximation to P( coming out ahead ) print( mu <- mean( population.data.set ) ) # [1] -0.05263158 # expected value (EV) of sum S is n * mu print( expected.value.of.sum <- n * mu ) # [1] -52.63158 print( sigma <- sqrt( mean( ( population.data.set - mu )^2 ) ) ) # [1] 5.762617 # standard deviation (SD) of sum S is sigma * sqrt( n ) print( standard.deviation.of.sum <- sigma * sqrt( n ) ) # [1] 182.23 # normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD ) print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) / standard.deviation.of.sum ) ) # [1] 0.3863597 # monte carlo estimate of this probability, which is more accurate, is 0.397075 ( 0.3863597 - 0.397075 ) / 0.397075 # [1] -0.02698558 # even with n = 1000, normal approximation is too low by about 2.7% hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain', breaks = 50 ) abline( v = 0, lwd = 2, col = 'red' ) S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 ) lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ), lwd = 2, col = 'blue' ) # install.packages( 'moments' ) library( moments ) print( population.skewness <- skewness( population.data.set ) ) # [1] 5.918364 print( population.kurtosis <- kurtosis( population.data.set ) - 3 ) # [1] 33.02703 print( theoretical.skewness.of.S <- population.skewness / sqrt( n ) ) # [1] 0.1871551 skewness( simulated.sums ) # [1] 0.1887953 print( theoretical.kurtosis.of.S <- population.kurtosis / n ) # [1] 0.03302703 kurtosis( simulated.sums ) - 3 # [1] 0.03185558 n.grid <- c( 1, 10, 50, 100, 500, 1000, 5000, 10000 ) skewness.grid <- population.skewness / sqrt( n.grid ) kurtosis.grid <- population.kurtosis / n.grid cbind( n.grid, skewness.grid, kurtosis.grid ) # n.grid skewness.grid kurtosis.grid # [1,] 1 5.91836354 33.027027027 # [2,] 10 1.87155088 3.302702703 # [3,] 50 0.83698300 0.660540541 # [4,] 100 0.59183635 0.330270270 # [5,] 500 0.26467726 0.066054054 # [6,] 1000 0.18715509 0.033027027 # [7,] 5000 0.08369830 0.006605405 # [8,] 10000 0.05918364 0.003302703 # even after making 10,000 bets on a single number, # the skewness of the distribution of (your net gain) # is still about 0.06 n <- 10000 M <- 100000 random.number.generator.seed <- 1037969 set.seed( random.number.generator.seed ) simulated.sums <- rep( NA, M ) system.time( { for ( m in 1:M ) { simulated.sample.data.set <- sample( population.data.set, n, replace = T ) simulated.sums[ m ] <- sum( simulated.sample.data.set ) if ( ( m %% 10000 ) == 0 ) print( m ) } } ) estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 ) paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed, 'estimated probability of coming out ahead =', estimated.probability.of.coming.out.ahead ) [1] "n = 10000 M = 1e+05 seed = 1037969 estimated probability of coming out ahead = 0.18404" hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain', breaks = 'FD' ) abline( v = 0, lwd = 2, col = 'red' ) # normal approximation to P( coming out ahead ) # expected value (EV) of sum S is n * mu print( expected.value.of.sum <- n * mu ) # [1] -526.3158 # standard deviation (SD) of sum S is sigma * sqrt( n ) print( standard.deviation.of.sum <- sigma * sqrt( n ) ) # [1] 576.2617 # normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD ) print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) / standard.deviation.of.sum ) ) # [1] [1] 0.1805351 # monte carlo estimate of this probability, which is more accurate, is 0.18404 ( 0.1805351 - 0.18404 ) / 0.18404 # [1] [1] -0.01904423 abs( 0.1805351 - 0.18404 ) # [1] 0.0035049 # even with n = 10000, normal approximation is too low by about 1.9%, # but in absolute terms it's only off by about 0.004 hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain', breaks = 50 ) abline( v = 0, lwd = 2, col = 'red' ) S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 ) lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ), lwd = 2, col = 'blue' ) ########################################################################################## # betting on red or black (or odd or even) at roulette print( population.data.set <- c( rep( -1, 20 ), rep( 1, 18 ) ) ) # [1] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1 1 # [32] 1 1 1 1 1 1 1 hist( population.data.set, prob = T, breaks = ( -2:36 ) - 0.5, main = 'Population PMF: red or black', xlab = 'your net gain on a single play' ) n <- 1000 M <- 100000 random.number.generator.seed <- 4391557 set.seed( random.number.generator.seed ) simulated.sums <- rep( NA, M ) system.time( { for ( m in 1:M ) { simulated.sample.data.set <- sample( population.data.set, n, replace = T ) simulated.sums[ m ] <- sum( simulated.sample.data.set ) } } ) # user system elapsed n = 1000 M = 100000 # 2.7 0.0 2.7 # user system elapsed n = 1000 M = 1000000 # 26.31 0.03 26.36 estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 ) paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed, 'estimated probability of coming out ahead =', estimated.probability.of.coming.out.ahead ) # [1] "n = 1000 M = 1e+06 seed = 4391557 estimated probability of coming out ahead = 0.044885" hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain', breaks = 'FD' ) abline( v = 0, lwd = 2, col = 'red' ) table( simulated.sums ) / M # normal approximation to P( coming out ahead ) print( mu <- mean( population.data.set ) ) # [1] -0.05263158 # expected value (EV) of sum S is n * mu print( expected.value.of.sum <- n * mu ) # [1] -52.63158 print( sigma <- sqrt( mean( ( population.data.set - mu )^2 ) ) ) # [1] 0.998614 # standard deviation (SD) of sum S is sigma * sqrt( n ) print( standard.deviation.of.sum <- sigma * sqrt( n ) ) # [1] 31.57895 # normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD ) print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) / standard.deviation.of.sum ) ) # [1] 0.04779035 # monte carlo estimate of this probability, which is more accurate, is 0.044885 ( 0.04779035 - 0.044885 ) / 0.044885 # [1] 0.06472875 # even with n = 1000, normal approximation is too high by about 6.4% hist( simulated.sums, prob = T, main = 'n = 1,000', xlab = 'Your Net Gain', breaks = 50 ) abline( v = 0, lwd = 2, col = 'red' ) S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 ) lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ), lwd = 2, col = 'blue' ) # install.packages( 'moments' ) library( moments ) print( population.skewness <- skewness( population.data.set ) ) # [1] 5.918364 print( population.kurtosis <- kurtosis( population.data.set ) - 3 ) # [1] 33.02703 print( theoretical.skewness.of.S <- population.skewness / sqrt( n ) ) # [1] 0.1871551 skewness( simulated.sums ) # [1] 0.1887953 print( theoretical.kurtosis.of.S <- population.kurtosis / n ) # [1] 0.03302703 kurtosis( simulated.sums ) - 3 # [1] 0.03185558 n.grid <- c( 1, 10, 50, 100, 500, 1000, 5000, 10000 ) skewness.grid <- population.skewness / sqrt( n.grid ) kurtosis.grid <- population.kurtosis / n.grid cbind( n.grid, skewness.grid, kurtosis.grid ) # n.grid skewness.grid kurtosis.grid # [1,] 1 5.91836354 33.027027027 # [2,] 10 1.87155088 3.302702703 # [3,] 50 0.83698300 0.660540541 # [4,] 100 0.59183635 0.330270270 # [5,] 500 0.26467726 0.066054054 # [6,] 1000 0.18715509 0.033027027 # [7,] 5000 0.08369830 0.006605405 # [8,] 10000 0.05918364 0.003302703 # even after making 10,000 bets on a single number, # the skewness of the distribution of (your net gain) # is still about 0.06 n <- 10000 M <- 100000 random.number.generator.seed <- 1037969 set.seed( random.number.generator.seed ) simulated.sums <- rep( NA, M ) system.time( { for ( m in 1:M ) { simulated.sample.data.set <- sample( population.data.set, n, replace = T ) simulated.sums[ m ] <- sum( simulated.sample.data.set ) if ( ( m %% 10000 ) == 0 ) print( m ) } } ) estimated.probability.of.coming.out.ahead <- mean( simulated.sums > 0 ) paste( 'n =', n, 'M =', M, 'seed =', random.number.generator.seed, 'estimated probability of coming out ahead =', estimated.probability.of.coming.out.ahead ) [1] "n = 10000 M = 1e+05 seed = 1037969 estimated probability of coming out ahead = 0.18404" hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain', breaks = 'FD' ) abline( v = 0, lwd = 2, col = 'red' ) # normal approximation to P( coming out ahead ) # expected value (EV) of sum S is n * mu print( expected.value.of.sum <- n * mu ) # [1] -526.3158 # standard deviation (SD) of sum S is sigma * sqrt( n ) print( standard.deviation.of.sum <- sigma * sqrt( n ) ) # [1] 576.2617 # normal approximation to P( S > 0 ) is 1 - Phi( ( 0 - EV ) / SD ) print( normal.approximation <- 1 - pnorm( ( 0 - expected.value.of.sum ) / standard.deviation.of.sum ) ) # [1] [1] 0.1805351 # monte carlo estimate of this probability, which is more accurate, is 0.18404 ( 0.1805351 - 0.18404 ) / 0.18404 # [1] [1] -0.01904423 abs( 0.1805351 - 0.18404 ) # [1] 0.0035049 # even with n = 10000, normal approximation is too low by about 1.9%, # but in absolute terms it's only off by about 0.004 hist( simulated.sums, prob = T, main = 'n = 10,000', xlab = 'Your Net Gain', breaks = 50 ) abline( v = 0, lwd = 2, col = 'red' ) S.grid <- seq( min( simulated.sums ), max( simulated.sums ), length = 500 ) lines( S.grid, dnorm( S.grid, expected.value.of.sum, standard.deviation.of.sum ), lwd = 2, col = 'blue' )