# last time: Law of Large numbers
# (X_1+ X_2 + ... + X_n)/n -> Ave(X_i), as n-> infinity
# where X_i are independent, identically distributed (iid)

# Code from last time
import matplotlib.pyplot as plt # for histogram

def ave_rand_01cts(n):
    """Generate n independent instances of random() (0,1) uniform.
    Returns average of these."""
    list_rand = [random() for i in range(n)]
    return sum(list_rand)/n

ave_rand_01cts(1)

0.782676521297451

plt.hist([ave_rand_01cts(1) for i in range(10000)])
# for single sample, histogram gives the distribution of random()

(array([ 947., 1023., 1008., 1008.,  984.,  999.,  982., 1018., 1020.,
        1011.]),
 array([3.94529161e-05, 1.00027346e-01, 2.00015239e-01, 3.00003133e-01,
        3.99991026e-01, 4.99978919e-01, 5.99966812e-01, 6.99954705e-01,
        7.99942599e-01, 8.99930492e-01, 9.99918385e-01]),
 <a list of 10 Patch objects>)

plt.hist([ave_rand_01cts(3) for i in range(10000)])

(array([  91.,  406.,  900., 1550., 2020., 1970., 1636.,  944.,  380.,
         103.]),
 array([0.03421618, 0.12722333, 0.22023048, 0.31323763, 0.40624479,
        0.49925194, 0.59225909, 0.68526624, 0.77827339, 0.87128054,
        0.96428769]),
 <a list of 10 Patch objects>)

plt.hist([ave_rand_01cts(10) for i in range(10000)])

(array([  22.,  160.,  637., 1695., 2615., 2591., 1588.,  563.,  119.,
          10.]),
 array([0.17436969, 0.24030623, 0.30624278, 0.37217932, 0.43811587,
        0.50405241, 0.56998896, 0.6359255 , 0.70186205, 0.76779859,
        0.83373513]),
 <a list of 10 Patch objects>)

plt.hist([ave_rand_01cts(100) for i in range(10000)])

(array([  32.,  141.,  614., 1665., 2620., 2577., 1566.,  608.,  160.,
          17.]),
 array([0.39909187, 0.4193099 , 0.43952792, 0.45974595, 0.47996397,
        0.500182  , 0.52040002, 0.54061805, 0.56083607, 0.5810541 ,
        0.60127212]),
 <a list of 10 Patch objects>)

# above histograms concentrated near 0.5, as number of copies
# added together increases.

# Goal: see how sample means: X_1, (X_1+X_2)/2, (X_1+X_2+X_3)/3, ... behave

def sample_means_01discrete(n):
    """Generate X_1,...,X_n from sampling randint(0,1).
    Return list of sample means."""
    randlist = [randint(0,1) for i in range(n)]
    # print(randlist)
    return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
    # N() takes decimal approx

sample_means_01discrete(4)

[0, 1, 0, 1]

[0.000000000000000, 0.500000000000000, 0.333333333333333, 0.500000000000000]

list_plot(sample_means_01discrete(200), plotjoined=True)

# plot several realizations of above on same graph
p1=list_plot(sample_means_01discrete(200), plotjoined=True) 
p2=list_plot(sample_means_01discrete(200), plotjoined=True, color='red')
p3=list_plot(sample_means_01discrete(200), plotjoined=True, color='green')
p4=list_plot(sample_means_01discrete(200), plotjoined=True, color='black') 
p5=list_plot(sample_means_01discrete(200), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)

# All of the above eventually concentrate around 0.5
# Demonstration of LLN for {0,1} discrete r.v.

# Now do above for continuous r.v., random()

def sample_means_01cts(n):
    """Generate X_1,...,X_n from sampling randint(0,1).
    Return list of sample means."""
    randlist = [random() for i in range(n)]
    # print(randlist)
    return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
    # N() takes decimal approx

# plot several realizations of 01cts on same graph
p1=list_plot(sample_means_01cts(200), plotjoined=True) 
p2=list_plot(sample_means_01cts(200), plotjoined=True, color='red')
p3=list_plot(sample_means_01cts(200), plotjoined=True, color='green')
p4=list_plot(sample_means_01cts(200), plotjoined=True, color='black') 
p5=list_plot(sample_means_01cts(200), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)

# Moving on, question is what is distribution around Ave(X_i),
# when properly rescaled?  
# Q1: How much does (X_1+...+X_n)/n typically vary from Ave(X_i)
# in terms of n ? 
# A1: Variance is Var(X_i)/n
# So the variance of (X1+...+Xn)/(sqrt(n)) doesn't depend on n

def sqrt_ave_rand_01cts(n):
    """Generate n independent instances of random() (0,1) uniform."""
    list_rand = [random() for i in range(n)]
    return N((sum(list_rand)-n*0.5)/sqrt(n)) 
    # normalizing so average 0, and variance doesn't depend on n

num_trials = 10000 # number of times to repeat experiment
n = 2 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)])

(array([ 204.,  620., 1001., 1411., 1795., 1788., 1388., 1011.,  573.,
         209.]),
 array([-6.96622978e-01, -5.57169456e-01, -4.17715933e-01, -2.78262411e-01,
        -1.38808888e-01,  6.44634692e-04,  1.40098157e-01,  2.79551680e-01,
         4.19005203e-01,  5.58458725e-01,  6.97912248e-01]),
 <a list of 10 Patch objects>)

num_trials = 10000 # number of times to repeat experiment
n = 10 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)])

(array([2.000e+00, 2.900e+01, 2.430e+02, 1.027e+03, 2.462e+03, 2.983e+03,
        2.184e+03, 8.710e+02, 1.760e+02, 2.300e+01]),
 array([-1.23672374, -1.00884134, -0.78095894, -0.55307655, -0.32519415,
        -0.09731175,  0.13057065,  0.35845304,  0.58633544,  0.81421784,
         1.04210024]),
 <a list of 10 Patch objects>)

num_trials = 10000 # number of times to repeat experiment
n = 100 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)], bins=21)

(array([1.000e+00, 3.000e+00, 9.000e+00, 1.600e+01, 3.300e+01, 1.180e+02,
        2.640e+02, 4.850e+02, 8.350e+02, 1.169e+03, 1.514e+03, 1.538e+03,
        1.402e+03, 1.111e+03, 7.270e+02, 4.230e+02, 2.300e+02, 6.800e+01,
        4.100e+01, 9.000e+00, 4.000e+00]),
 array([-1.27928016, -1.16641363, -1.05354711, -0.94068059, -0.82781407,
        -0.71494755, -0.60208102, -0.4892145 , -0.37634798, -0.26348146,
        -0.15061494, -0.03774841,  0.07511811,  0.18798463,  0.30085115,
         0.41371767,  0.5265842 ,  0.63945072,  0.75231724,  0.86518376,
         0.97805029,  1.09091681]),
 <a list of 21 Patch objects>)

# the range of all three histograms above have similar x-axis ranges, i.e.
# we've found the right scaling

# Now try above with discrete r.v.

def sqrt_ave_rand_01discrete(n):
    """Generate n independent instances of randint(0,1). """
    list_rand = [randint(0,1) for i in range(n)]
    return N((sum(list_rand)-n*0.5)/sqrt(n)) 
    # normalizing so average 1, variance doesn't depend on n

num_trials = 10000 # number of times to repeat experiment
n = 2 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])

(array([2592.,    0.,    0.,    0.,    0., 4859.,    0.,    0.,    0.,
        2549.]),
 array([-7.07106781e-01, -5.65685425e-01, -4.24264069e-01, -2.82842712e-01,
        -1.41421356e-01, -1.11022302e-16,  1.41421356e-01,  2.82842712e-01,
         4.24264069e-01,  5.65685425e-01,  7.07106781e-01]),
 <a list of 10 Patch objects>)

num_trials = 10000 # number of times to repeat experiment
n = 10 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])

(array([ 106.,  457.,    0., 1181., 2044., 2414., 2125., 1167.,  409.,
          97.]),
 array([-1.58113883, -1.26491106, -0.9486833 , -0.63245553, -0.31622777,
         0.        ,  0.31622777,  0.63245553,  0.9486833 ,  1.26491106,
         1.58113883]),
 <a list of 10 Patch objects>)

num_trials = 10000 # number of times to repeat experiment
n = 100 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])

(array([   4.,   28.,  256., 1053., 1704., 3049., 2506., 1125.,  235.,
          40.]),
 array([-2.1 , -1.72, -1.34, -0.96, -0.58, -0.2 ,  0.18,  0.56,  0.94,
         1.32,  1.7 ]),
 <a list of 10 Patch objects>)

# Limiting distribution for large n is broadly similar (bell curve)
# to distribution for continuous r.v. case
# Demonstration of the Central Limit Theorem