# Last time: Law of Large numbers
# (X_1+ X_2 + ... + X_n)/n -> Ave(X_i), as n-> infinity, 
# with high probability.  
# where X_i are independent, identically distributed (iid)

# code copied from last time:
import matplotlib.pyplot as plt # for histogram

# Now demonstrate LLN for (0,1) uniform r.v.s

def ave_rand_01cts(n):
    """Generate n independent instances of random().
    Returns average of these."""
    list_rand = [random() for i in range(n)]
    return N(sum(list_rand)/n)
    # N() takes decimal approximation

ave_rand_01cts(2)

0.644102072847346

num_trials = 10000
n=2 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)

(array([ 75., 156., 253., 309., 415., 500., 598., 704., 741., 815., 950.,
        830., 749., 680., 545., 525., 368., 329., 248., 154.,  56.]),
 array([0.01069358, 0.05762407, 0.10455457, 0.15148506, 0.19841556,
        0.24534605, 0.29227655, 0.33920704, 0.38613754, 0.43306803,
        0.47999853, 0.52692902, 0.57385952, 0.62079001, 0.66772051,
        0.714651  , 0.7615815 , 0.80851199, 0.85544249, 0.90237298,
        0.94930348, 0.99623397]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=3 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)

(array([  14.,   49.,  105.,  189.,  307.,  449.,  599.,  788.,  873.,
         994., 1046., 1032.,  914.,  849.,  615.,  480.,  315.,  195.,
         123.,   51.,   13.]),
 array([0.01679141, 0.0623599 , 0.10792838, 0.15349687, 0.19906535,
        0.24463384, 0.29020232, 0.3357708 , 0.38133929, 0.42690777,
        0.47247626, 0.51804474, 0.56361323, 0.60918171, 0.65475019,
        0.70031868, 0.74588716, 0.79145565, 0.83702413, 0.88259262,
        0.9281611 , 0.97372959]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=100 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)

(array([6.000e+00, 2.200e+01, 5.700e+01, 1.160e+02, 2.390e+02, 4.240e+02,
        7.060e+02, 9.840e+02, 1.224e+03, 1.439e+03, 1.328e+03, 1.218e+03,
        9.190e+02, 5.670e+02, 3.780e+02, 2.070e+02, 1.040e+02, 3.400e+01,
        2.400e+01, 3.000e+00, 1.000e+00]),
 array([0.40157922, 0.41154438, 0.42150953, 0.43147469, 0.44143985,
        0.451405  , 0.46137016, 0.47133532, 0.48130047, 0.49126563,
        0.50123079, 0.51119594, 0.5211611 , 0.53112626, 0.54109142,
        0.55105657, 0.56102173, 0.57098689, 0.58095204, 0.5909172 ,
        0.60088236, 0.61084751]),
 <a list of 21 Patch objects>)

# LLN : these histograms get more and more concentrated near Ave(X_i)=0.5

# Now, look at actual sample means X_1, (X_1+X_2)/2, (X_1+X_2+X_3)/3,...
# reusing the X_i's for later sample means.

def sample_means_01cts(n):
    """Generate X_1,...,X_n by sampling random().
    Return list of sample means"""
    randlist = [random() for i in range(n)]
    return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]

sample_means_01cts(8)

[0.833343709771760,
 0.759808231809042,
 0.572769403923202,
 0.499957873796816,
 0.529233766981981,
 0.601201709931481,
 0.637165239121753,
 0.603251240039679]

list_plot(sample_means_01cts(100), plotjoined=True)

p1=list_plot(sample_means_01cts(100), plotjoined=True)
p2=list_plot(sample_means_01cts(100), plotjoined=True, color='black')
p3=list_plot(sample_means_01cts(100), plotjoined=True, color='red')
p4=list_plot(sample_means_01cts(100), plotjoined=True, color='green')
p5=list_plot(sample_means_01cts(100), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)

# above demonstrates LLN for (0,1) uniform r.v.

# Now do same for {0,1} uniform (discrete)

def sample_means_01discrete(n):
    """Generate X_1,...,X_n by sampling randint(0,1).
    Return list of sample means"""
    randlist = [randint(0,1) for i in range(n)]
    return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]

sample_means_01discrete(3)

[1.00000000000000, 0.500000000000000, 0.333333333333333]

p1=list_plot(sample_means_01discrete(100), plotjoined=True)
p2=list_plot(sample_means_01discrete(100), plotjoined=True, color='black')
p3=list_plot(sample_means_01discrete(100), plotjoined=True, color='red')
p4=list_plot(sample_means_01discrete(100), plotjoined=True, color='green')
p5=list_plot(sample_means_01discrete(100), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)

# still concentrate around 0.5 as n gets larger

# Moving on, question is: what is distribution around Ave(X_i),
# when properly rescaled?  
# Q1: How much does sample mean (X1+...Xn)/n typically vary from Ave(X_i),
# in terms of n (LLN: the amounts that it varies should go to 0 as 
# n goes to infinity)
# A: variance of sample mean is (1/n)Var(X_i)
# So the variance of (X1+...+Xn)/(sqrt(n)) doesn't depend on n

def sqrt_ave_rand_cts(n):
    """Generate n independent instances of random()-1/2.
    (Since want average to be 0)
    Return sum, divided by sqrt(n) """
    list_rand = [random()-1/2 for i in range(n)] 
    return N(sum(list_rand)/sqrt(n))
    # N() takes decimal approximation

num_trials = 10000
n=2 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)

(array([ 47., 132., 245., 337., 410., 463., 602., 690., 778., 864., 903.,
        889., 759., 692., 561., 476., 391., 317., 226., 166.,  52.]),
 array([-0.70204625, -0.63547808, -0.56890992, -0.50234176, -0.43577359,
        -0.36920543, -0.30263727, -0.23606911, -0.16950094, -0.10293278,
        -0.03636462,  0.03020355,  0.09677171,  0.16333987,  0.22990803,
         0.2964762 ,  0.36304436,  0.42961252,  0.49618069,  0.56274885,
         0.62931701,  0.69588517]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=8 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)

(array([   7.,   31.,   51.,  132.,  260.,  430.,  664.,  921., 1202.,
        1267., 1311., 1199.,  953.,  656.,  441.,  253.,  150.,   45.,
          20.,    4.,    3.]),
 array([-0.97553539, -0.87850308, -0.78147076, -0.68443845, -0.58740613,
        -0.49037382, -0.3933415 , -0.29630919, -0.19927687, -0.10224456,
        -0.00521224,  0.09182007,  0.18885239,  0.2858847 ,  0.38291702,
         0.47994934,  0.57698165,  0.67401397,  0.77104628,  0.8680786 ,
         0.96511091,  1.06214323]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=100 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)

(array([1.000e+00, 2.000e+00, 1.200e+01, 3.300e+01, 8.900e+01, 1.860e+02,
        3.480e+02, 6.390e+02, 9.840e+02, 1.204e+03, 1.423e+03, 1.425e+03,
        1.266e+03, 9.400e+02, 6.600e+02, 3.970e+02, 2.290e+02, 1.100e+02,
        3.400e+01, 9.000e+00, 9.000e+00]),
 array([-1.14612867, -1.04231858, -0.9385085 , -0.83469841, -0.73088832,
        -0.62707823, -0.52326814, -0.41945806, -0.31564797, -0.21183788,
        -0.10802779, -0.0042177 ,  0.09959238,  0.20340247,  0.30721256,
         0.41102265,  0.51483274,  0.61864283,  0.72245291,  0.826263  ,
         0.93007309,  1.03388318]),
 <a list of 21 Patch objects>)

# all three histograms above have similar x-axis ranges, i.e.
# we've found the right scaling

# now try above with {-1/2,1/2} discrete

def sqrt_ave_rand_discrete(n):
    """Generate n independent instances of randint(0,1)-1/2.
    (Since want average to be 0)
    Return sum, divided by sqrt(n) """
    list_rand = [randint(0,1)-1/2 for i in range(n)] 
    return N(sum(list_rand)/sqrt(n))
    # N() takes decimal approximation

num_trials = 10000
n=2 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)

(array([2559.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0., 5015.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0., 2426.]),
 array([-0.70710678, -0.63976328, -0.57241978, -0.50507627, -0.43773277,
        -0.37038927, -0.30304576, -0.23570226, -0.16835876, -0.10101525,
        -0.03367175,  0.03367175,  0.10101525,  0.16835876,  0.23570226,
         0.30304576,  0.37038927,  0.43773277,  0.50507627,  0.57241978,
         0.63976328,  0.70710678]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=8 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)

(array([  31.,    0.,  304.,    0.,    0., 1104.,    0., 2241.,    0.,
           0., 2839.,    0.,    0., 2077.,    0., 1041.,    0.,    0.,
         326.,    0.,   37.]),
 array([-1.41421356, -1.27952656, -1.14483955, -1.01015254, -0.87546554,
        -0.74077853, -0.60609153, -0.47140452, -0.33671751, -0.20203051,
        -0.0673435 ,  0.0673435 ,  0.20203051,  0.33671751,  0.47140452,
         0.60609153,  0.74077853,  0.87546554,  1.01015254,  1.14483955,
         1.27952656,  1.41421356]),
 <a list of 21 Patch objects>)

num_trials = 10000
n=100 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)

(array([1.000e+00, 9.000e+00, 1.200e+01, 7.400e+01, 1.680e+02, 1.510e+02,
        5.520e+02, 8.600e+02, 1.268e+03, 1.454e+03, 8.220e+02, 1.489e+03,
        1.266e+03, 8.850e+02, 5.390e+02, 1.590e+02, 1.970e+02, 6.200e+01,
        2.300e+01, 7.000e+00, 2.000e+00]),
 array([-1.9       , -1.71904762, -1.53809524, -1.35714286, -1.17619048,
        -0.9952381 , -0.81428571, -0.63333333, -0.45238095, -0.27142857,
        -0.09047619,  0.09047619,  0.27142857,  0.45238095,  0.63333333,
         0.81428571,  0.9952381 ,  1.17619048,  1.35714286,  1.53809524,
         1.71904762,  1.9       ]),
 <a list of 21 Patch objects>)

# Limiting distribution for large n is broadly similar (bell curve)
# to distribution for continuous r.v. case
# Demonstration of the Central Limit Theorem