# Goal: sample from poisson distribution

# Cookie dough with chocolate chips example 
# Average number of chips in one fistful of dough is lambda (intensity of Poisson process) 
# Consider dividing fistful into n pieces, each of which has one or zero chips
# prob of having chip is lambda/n.  
# Get actual Poisson distribution by taking n very large.

# Binomial distribution
# Binom(n,p): Take coin with prob p of being heads. Flip n times.  Count the number of heads.
# The coin flips are independent of one another.  
# In previous cell, total number of chips is given Binom(n,lambda/n)
# sum n copies of randint(0,1) is same as Binom(n,1/2)

# simulate binomial distribution
def binom(n,p):
    """Sample from Binom(n,p) distribution."""
    run_sum = 0 # running total of number of successes 
    for i in range(n):
        if random()<p: # this happens with probability p
            run_sum += 1
    return run_sum

binom(3,1/2)

1

import matplotlib.pyplot as plt

plt.hist([binom(3,1/3) for i in range(10000)], bins=range(0,5), align="left", rwidth=0.5)

(array([2989., 4452., 2165.,  394.]),
 array([0, 1, 2, 3, 4]),
 <a list of 4 Patch objects>)

# if we keep p constant, make n large, Central Limit Theorem and Law of Large numbers
# are relevant 
plt.hist([binom(100,1/2) for i in range(10000)])

(array([  18.,  138.,  786., 2119., 2308., 2768., 1437.,  366.,   56.,
           4.]),
 array([32. , 35.8, 39.6, 43.4, 47.2, 51. , 54.8, 58.6, 62.4, 66.2, 70. ]),
 <a list of 10 Patch objects>)

# Look at approximation to Poisson 
lam = 3.0 # intensity of the process
n=100 
p=lam/n 
plt.hist([binom(n,p) for i in range(10000)],bins=range(0,15), align="left", rwidth=0.5)

(array([4.200e+02, 1.503e+03, 2.314e+03, 2.316e+03, 1.657e+03, 1.026e+03,
        4.750e+02, 1.890e+02, 6.700e+01, 2.600e+01, 6.000e+00, 1.000e+00,
        0.000e+00, 0.000e+00]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),
 <a list of 14 Patch objects>)

lam = 3.0 # intensity of the process
n=1000 
p=lam/n 
plt.hist([binom(n,p) for i in range(10000)],bins=range(0,15), align="left", rwidth=0.5)

(array([5.390e+02, 1.506e+03, 2.205e+03, 2.258e+03, 1.653e+03, 9.880e+02,
        5.190e+02, 2.250e+02, 7.100e+01, 2.400e+01, 8.000e+00, 3.000e+00,
        0.000e+00, 1.000e+00]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),
 <a list of 14 Patch objects>)

# Observation: the above two distributions (n=100,n=1000) are very similar 
# suggest that as n->infinity, we get some limiting distribution
# Poisson distribution is this limit.  
# Poisson(lam) is limit of Binom(n,lam/n) as n-> infinity

# Properties of Poisson(lambda), lambda>0 real number
# - Discrete distribution (not continuous)
# - Average value is lambda 
# - Prob = k is greater than 0 if k non-negative integer

lam = 1.0 # intensity of the process
n=1000 
p=lam/n 
plt.hist([binom(n,p) for i in range(10000)],bins=range(0,15), align="left", rwidth=0.5)

(array([3.713e+03, 3.741e+03, 1.754e+03, 6.080e+02, 1.440e+02, 3.100e+01,
        7.000e+00, 2.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),
 <a list of 14 Patch objects>)

lam = 10.0 # intensity of the process
n=1000 
p=lam/n 
plt.hist([binom(n,p) for i in range(10000)],bins=range(0,40), align="left", rwidth=0.5)

(array([0.000e+00, 1.000e+00, 1.800e+01, 7.200e+01, 1.640e+02, 3.980e+02,
        6.350e+02, 9.000e+02, 1.135e+03, 1.257e+03, 1.296e+03, 1.103e+03,
        9.330e+02, 7.320e+02, 5.330e+02, 3.470e+02, 2.090e+02, 1.560e+02,
        7.100e+01, 1.700e+01, 1.000e+01, 8.000e+00, 3.000e+00, 0.000e+00,
        1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
        0.000e+00, 0.000e+00, 0.000e+00]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39]),
 <a list of 39 Patch objects>)

# Poisson can be defined in terms of its Probability Mass Function (pmf)
# (analagoue of Probability Density Function, but for discrete r.v.)
# pmf is a function f(x) that gives prob that a distribution takes some particular value x
# For instance, randint(0,1) pmf is f(0)=1/2, f(1)=1/2, f(x)=0 for any other x

#pmf for Poisson(lam):
# f(k) = lam^k * exp(-lam)/k!
# (note function is not even defined for k not a non-negative integer)

# PMF of Poisson(1)
lam=1
list_plot([lam^k * exp(-lam)/factorial(k) for k in range(0,10)])

# prob of Poisson(1) equaling 9:
f(k)=lam^k * exp(-lam)/factorial(k)
N(f(9))

1.01377711963030e-6

N(f(15)) # goes to 0 very quickly

2.81323432020840e-13

# PMF of Poisson(3)
lam=3
list_plot([lam^k * exp(-lam)/factorial(k) for k in range(0,10)])