# Last time: Law of Large numbers
# (X_1+ X_2 + ... + X_n)/n -> Ave(X_i), as n-> infinity,
# with high probability.
# where X_i are independent, identically distributed (iid)
# code copied from last time:
import matplotlib.pyplot as plt # for histogram
# Now demonstrate LLN for (0,1) uniform r.v.s
def ave_rand_01cts(n):
"""Generate n independent instances of random().
Returns average of these."""
list_rand = [random() for i in range(n)]
return N(sum(list_rand)/n)
# N() takes decimal approximation
ave_rand_01cts(2)
num_trials = 10000
n=2 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=3 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=100 # num r.v. being added together
plt.hist([ave_rand_01cts(n) for i in range(num_trials)], bins=21)
# LLN : these histograms get more and more concentrated near Ave(X_i)=0.5
# Now, look at actual sample means X_1, (X_1+X_2)/2, (X_1+X_2+X_3)/3,...
# reusing the X_i's for later sample means.
def sample_means_01cts(n):
"""Generate X_1,...,X_n by sampling random().
Return list of sample means"""
randlist = [random() for i in range(n)]
return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
sample_means_01cts(8)
list_plot(sample_means_01cts(100), plotjoined=True)
p1=list_plot(sample_means_01cts(100), plotjoined=True)
p2=list_plot(sample_means_01cts(100), plotjoined=True, color='black')
p3=list_plot(sample_means_01cts(100), plotjoined=True, color='red')
p4=list_plot(sample_means_01cts(100), plotjoined=True, color='green')
p5=list_plot(sample_means_01cts(100), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)
# above demonstrates LLN for (0,1) uniform r.v.
# Now do same for {0,1} uniform (discrete)
def sample_means_01discrete(n):
"""Generate X_1,...,X_n by sampling randint(0,1).
Return list of sample means"""
randlist = [randint(0,1) for i in range(n)]
return [N(sum(randlist[0:i+1])/(i+1)) for i in range(n)]
sample_means_01discrete(3)
p1=list_plot(sample_means_01discrete(100), plotjoined=True)
p2=list_plot(sample_means_01discrete(100), plotjoined=True, color='black')
p3=list_plot(sample_means_01discrete(100), plotjoined=True, color='red')
p4=list_plot(sample_means_01discrete(100), plotjoined=True, color='green')
p5=list_plot(sample_means_01discrete(100), plotjoined=True, color='orange')
show(p1+p2+p3+p4+p5)
# still concentrate around 0.5 as n gets larger
# Moving on, question is: what is distribution around Ave(X_i),
# when properly rescaled?
# Q1: How much does sample mean (X1+...Xn)/n typically vary from Ave(X_i),
# in terms of n (LLN: the amounts that it varies should go to 0 as
# n goes to infinity)
# A: variance of sample mean is (1/n)Var(X_i)
# So the variance of (X1+...+Xn)/(sqrt(n)) doesn't depend on n
def sqrt_ave_rand_cts(n):
"""Generate n independent instances of random()-1/2.
(Since want average to be 0)
Return sum, divided by sqrt(n) """
list_rand = [random()-1/2 for i in range(n)]
return N(sum(list_rand)/sqrt(n))
# N() takes decimal approximation
num_trials = 10000
n=2 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=8 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=100 # num r.v. being added together
plt.hist([sqrt_ave_rand_cts(n) for i in range(num_trials)], bins=21)
# all three histograms above have similar x-axis ranges, i.e.
# we've found the right scaling
# now try above with {-1/2,1/2} discrete
def sqrt_ave_rand_discrete(n):
"""Generate n independent instances of randint(0,1)-1/2.
(Since want average to be 0)
Return sum, divided by sqrt(n) """
list_rand = [randint(0,1)-1/2 for i in range(n)]
return N(sum(list_rand)/sqrt(n))
# N() takes decimal approximation
num_trials = 10000
n=2 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=8 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)
num_trials = 10000
n=100 # num r.v. being added together
plt.hist([sqrt_ave_rand_discrete(n) for i in range(num_trials)], bins=21)
# Limiting distribution for large n is broadly similar (bell curve)
# to distribution for continuous r.v. case
# Demonstration of the Central Limit Theorem