##################################################################### # exploring the Binomial family of distributions, # indexed by n >= 1 (the number of Bernoulli trials) # and 0 < p < 1 (the success probability on each trial) help( dbinom ) dbinom( 0, 5, 0.25 ) [1] 0.2373047 dbinom( 0:5, 5, 0.25 ) [1] 0.2373046875 0.3955078125 0.2636718750 0.0878906250 0.0146484375 0.0009765625 y P( Y = y ) 0 0.2373046875 1 0.3955078125 2 0.2636718750 3 0.0878906250 4 0.0146484375 5 0.0009765625 cbind( 0:5, dbinom( 0:5, 5, 0.25 ) ) [,1] [,2] [1,] 0 0.2373046875 [2,] 1 0.3955078125 [3,] 2 0.2636718750 [4,] 3 0.0878906250 [5,] 4 0.0146484375 [6,] 5 0.0009765625 set.seed( 1 ) y.star <- rbinom( 1000000, 5, 0.25 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.25 )' ) # the distribution is unimodal and positively skewed # (i.e., it has a long right-hand tail) y.star <- rbinom( 1000000, 5, 0.5 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.5 )' ) # holding n fixed at 5 and changing p from 0.25 to 0.5, # the distribution stays unimodal and is now symmetric # around 5 * 0.5 = 2.5 y.star <- rbinom( 1000000, 5, 0.75 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.75 )' ) # holding n fixed at 5 and changing p from 0.5 to 0.75, # the distribution stays unimodal and is now skewed again, # but with a long left-hand tail # conjecture 1: the mean, or expected value, # of the Binomial( n, p ) distribution is n * p # conjecture 2: changing p to ( 1 - p ) and holding # n constant results in a distribution that is what you # get by symmetrically flipping the previous distribution # around the point n * p y.star <- rbinom( 1000000, 5, 0.95 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.95 )' ) # holding n fixed at 5 and changing p from 0.75 to 0.95, # the distribution stays unimodal and is even more strongly skewed, # with a long left-hand tail; it's now J-shaped y.star <- rbinom( 1000000, 5, 0.05 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.05 )' ) # holding n fixed at 5 and changing p from 0.95 to 0.05, # the distribution stays unimodal and has the same amount # of skew, but now with a long right-hand tail; # it's now reverse-J-shaped # conjecture 3: p controls the basic shape of the distribution -- # for 0 < p < 0.5, the distribution has positive skew (i.e., # a long right-hand tail); for p = 0.5 it's symmetric (skew = 0); # and for 0.5 < p < 1, the distribution has negative skew (i.e., # a long left-hand tail) # now let's hold p constant and increase n par( mfrow = c( 2, 1 ) ) y.star <- rbinom( 1000000, 5, 0.5 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.5 )', xlim = c( 0, 25 ), ylim = c( 0, 0.325 ) ) y.star <- rbinom( 1000000, 25, 0.5 ) hist( y.star, prob = T, breaks = ( 0:26 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 25, p = 0.5 )', xlim = c( 0, 25 ), ylim = c( 0, 0.325 ) ) # holding p fixed at 0.5 and changing n from 5 to 25, # the distribution has shifted to the right and become # more spread out but has retained its symmetric shape y.star <- rbinom( 1000000, 5, 0.25 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.25 )', xlim = c( 0, 25 ), ylim = c( 0, 0.4 ) ) y.star <- rbinom( 1000000, 25, 0.25 ) hist( y.star, prob = T, breaks = ( 0:26 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 25, p = 0.25 )', xlim = c( 0, 25 ), ylim = c( 0, 0.4 ) ) # holding p fixed at 0.25 and changing n from 5 to 25, # the distribution has shifted to the right and become # more spread out but has become substantially more symmetric # conjecture 4: as n increases for fixed p, # the binomial distribution becomes more spread out par( mfrow = c( 3, 1 ) ) y.star <- rbinom( 1000000, 5, 0.95 ) hist( y.star, prob = T, breaks = ( 0:6 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 5, p = 0.95 )', xlim = c( 0, 50 ), ylim = c( 0, 0.8 ) ) y.star <- rbinom( 1000000, 25, 0.95 ) hist( y.star, prob = T, breaks = ( 0:26 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 25, p = 0.95 )', xlim = c( 0, 50 ), ylim = c( 0, 0.8 ) ) y.star <- rbinom( 1000000, 50, 0.95 ) hist( y.star, prob = T, breaks = ( 0:51 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( n = 50, p = 0.95 )', xlim = c( 0, 50 ), ylim = c( 0, 0.8 ) ) # conjecture 5: we can drive the skewness down to 0 # for any fixed p by increasing n, but the rate # at which the skewness vanished depends on p: # the farther p is from 0.5, the bigger n needs to be # to push the skewness close to 0 # conjecture 6: all binomial distributions are unimodal ##################################################################### # exploring the Poisson family of distributions, # indexed by lambda (the rate parameter) help( dpois ) dpois( 0:10, 3.0 ) [1] 0.0497870684 0.1493612051 0.2240418077 0.2240418077 0.1680313557 [6] 0.1008188134 0.0504094067 0.0216040315 0.0081015118 0.0027005039 [11] 0.0008101512 cbind( 0:10, dpois( 0:10, 3 ) ) [,1] [,2] [1,] 0 0.0497870684 [2,] 1 0.1493612051 [3,] 2 0.2240418077 [4,] 3 0.2240418077 [5,] 4 0.1680313557 [6,] 5 0.1008188134 [7,] 6 0.0504094067 [8,] 7 0.0216040315 [9,] 8 0.0081015118 [10,] 9 0.0027005039 [11,] 10 0.0008101512 par( mfrow = c( 2, 1 ) ) set.seed( 1 ) y.star <- rpois( 1000000, 3.0 ) table( y.star ) y.star 0 1 2 3 4 5 6 7 8 9 10 49953 149428 223438 224257 168330 101087 50025 21699 8112 2625 763 11 12 13 14 206 57 19 1 hist( y.star, prob = T, breaks = ( 0:15 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( lambda = 3.0 )', xlim = c( 0, 14 ) ) # with lambda = 3.0, the distribution has positive skew # (i.e., a long right-hand tail) y.star <- rpois( 1000000, 0.5 ) table( y.star ) y.star 0 1 2 3 4 5 6 7 606999 303068 75538 12601 1617 159 16 2 hist( y.star, prob = T, breaks = ( 0:16 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( lambda = 0.5 )', xlim = c( 0, 15 ) ) # for lambda small enough, the distribution is so skewed # that it's reverse-J-shaped par( mfrow = c( 3, 1 ) ) y.star <- rpois( 1000000, 0.5 ) hist( y.star, prob = T, breaks = ( 0:25 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( lambda = 0.5 )', xlim = c( 0, 24 ) ) y.star <- rpois( 1000000, 3.0 ) hist( y.star, prob = T, breaks = ( 0:25 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( lambda = 3.0 )', xlim = c( 0, 24 ) ) y.star <- rpois( 1000000, 6.0 ) table( y.star ) y.star 0 1 2 3 4 5 6 7 8 9 10 2487 14965 44675 89076 133991 160524 160310 138126 102909 69004 41432 11 12 13 14 15 16 17 18 19 20 21 22359 11233 5231 2271 882 344 133 35 11 1 1 max( y.star ) [1] 21 hist( y.star, prob = T, breaks = ( 0:25 ) - 0.5, xlab = 'Simulated Y values', ylab = 'Probability', main = '( lambda = 6.0 )', xlim = c( 0, 24 ) ) # conjecture 7: all Poisson distributions are unimodal, and all # have positive skew # conjecture 8: as lambda increases, the distribution shifts # to the right, becomes more spread out, and exhibits less # positive skew; you can make the skew go to 0 by making # lambda bigger and bigger ######################################################################## fact (not yet proven in this class): all of the above conjectures are true ########################################################################