# last time: Law of Large numbers
# (X_1+ X_2 + ... + X_n)/n -> Ave(X_i), as n-> infinity
# where X_i are independent, identically distributed (iid)
# Code from last time
import matplotlib.pyplot as plt # for histogram
def ave_rand_01cts(n):
"""Generate n independent instances of random() (0,1) uniform.
Returns average of these."""
list_rand = [random() for i in range(n)]
return sum(list_rand)/n
ave_rand_01cts(1)
plt.hist([ave_rand_01cts(1) for i in range(10000)])
# for single sample, histogram gives the distribution of random()
plt.hist([ave_rand_01cts(3) for i in range(10000)])
plt.hist([ave_rand_01cts(10) for i in range(10000)])
plt.hist([ave_rand_01cts(100) for i in range(10000)])
# above histograms concentrated near 0.5, as number of copies
# added together increases.
# Goal: see how sample means: X_1, (X_1+X_2)/2, (X_1+X_2+X_3)/3, ... behave
def sample_means_01discrete(n):
"""Generate X_1,...,X_n from sampling randint(0,1).
Return list of sample means."""
randlist = [randint(0,1) for i in range(n)]
# print(randlist)
return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
# N() takes decimal approx
sample_means_01discrete(4)
list_plot(sample_means_01discrete(200), plotjoined=True)
# plot several realizations of above on same graph
p1=list_plot(sample_means_01discrete(200), plotjoined=True)
p2=list_plot(sample_means_01discrete(200), plotjoined=True, color='red')
p3=list_plot(sample_means_01discrete(200), plotjoined=True, color='green')
p4=list_plot(sample_means_01discrete(200), plotjoined=True, color='black')
p5=list_plot(sample_means_01discrete(200), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)
# All of the above eventually concentrate around 0.5
# Demonstration of LLN for {0,1} discrete r.v.
# Now do above for continuous r.v., random()
def sample_means_01cts(n):
"""Generate X_1,...,X_n from sampling randint(0,1).
Return list of sample means."""
randlist = [random() for i in range(n)]
# print(randlist)
return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
# N() takes decimal approx
# plot several realizations of 01cts on same graph
p1=list_plot(sample_means_01cts(200), plotjoined=True)
p2=list_plot(sample_means_01cts(200), plotjoined=True, color='red')
p3=list_plot(sample_means_01cts(200), plotjoined=True, color='green')
p4=list_plot(sample_means_01cts(200), plotjoined=True, color='black')
p5=list_plot(sample_means_01cts(200), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)
# Moving on, question is what is distribution around Ave(X_i),
# when properly rescaled?
# Q1: How much does (X_1+...+X_n)/n typically vary from Ave(X_i)
# in terms of n ?
# A1: Variance is Var(X_i)/n
# So the variance of (X1+...+Xn)/(sqrt(n)) doesn't depend on n
def sqrt_ave_rand_01cts(n):
"""Generate n independent instances of random() (0,1) uniform."""
list_rand = [random() for i in range(n)]
return N((sum(list_rand)-n*0.5)/sqrt(n))
# normalizing so average 0, and variance doesn't depend on n
num_trials = 10000 # number of times to repeat experiment
n = 2 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)])
num_trials = 10000 # number of times to repeat experiment
n = 10 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)])
num_trials = 10000 # number of times to repeat experiment
n = 100 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01cts(n) for i in range(num_trials)], bins=21)
# the range of all three histograms above have similar x-axis ranges, i.e.
# we've found the right scaling
# Now try above with discrete r.v.
def sqrt_ave_rand_01discrete(n):
"""Generate n independent instances of randint(0,1). """
list_rand = [randint(0,1) for i in range(n)]
return N((sum(list_rand)-n*0.5)/sqrt(n))
# normalizing so average 1, variance doesn't depend on n
num_trials = 10000 # number of times to repeat experiment
n = 2 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])
num_trials = 10000 # number of times to repeat experiment
n = 10 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])
num_trials = 10000 # number of times to repeat experiment
n = 100 # number of copies of r.v. to add together
plt.hist([sqrt_ave_rand_01discrete(n) for i in range(num_trials)])
# Limiting distribution for large n is broadly similar (bell curve)
# to distribution for continuous r.v. case
# Demonstration of the Central Limit Theorem