In [1]:
# Probability Density Function (pdf) of Normal Distribution is
# f(x) = 1/(sqrt(2*pi)) * exp(-x^2/2)
plot(1/(sqrt(2*pi)) * exp(-x^2/2), x, -5,5)
Out[1]:
In [2]:
N(integral(1/(sqrt(2*pi)) * exp(-x^2/2), (x, 0, 0.5))) 
# prob that normally distributed random variable
# has value in (0,0.5)
# N(-) function gives numerical approximation  
Out[2]:
0.191462461274013
In [21]:
N(integral(1/(sqrt(2*pi)) * exp(-x^2/2), (x, -10, 10)), digits=30) 
# prob value is in (-10,10) 
# almost exactly 1
Out[21]:
0.999999999999999999999984760294
In [4]:
integral(1/(sqrt(2*pi)) * exp(-x^2/2), (x, -10, 10))
# the exactly value is not a nice number
Out[4]:
erf(5*sqrt(2))
In [5]:
integral(1/(sqrt(2*pi)) * exp(-x^2/2), (x, -oo, oo))
# prob that value is in (-oo, oo), which should be 1
Out[5]:
1
In [6]:
# expected value E(X)
integral(x * 1/(sqrt(2*pi)) * exp(-x^2/2), (x,-oo,oo))
Out[6]:
0
In [7]:
# variance
integral(x^2 * 1/(sqrt(2*pi)) * exp(-x^2/2), (x,-oo,oo))
#"standard normal distribution" has expected value = 1, variance = 1
Out[7]:
1
In [8]:
# Code copied from last time 
# Use uniform random numbers between -1 and 1
import matplotlib.pyplot as plt # python package that has histogram function
num_iters = 100000
n = 1
results = [] # will store the result for all the iterations
for j in range(num_iters):
    # generate random numbers, and take average
    X = [] # list of X_i
    for i in range(n):
        rand = 2*random()-1  # uniform rand number between -1 and 1
        X.append(rand)
    Sn = sum(X) # sum of all X_i 
    ave = Sn/n  # average of the X_i
    results.append(N(ave))
# print(results)
plt.hist(results,bins=31)
Out[8]:
(array([3255., 3202., 3237., 3286., 3236., 3202., 3280., 3220., 3285.,
        3245., 3080., 3240., 3140., 3178., 3209., 3273., 3214., 3249.,
        3251., 3240., 3203., 3181., 3121., 3162., 3258., 3313., 3185.,
        3212., 3317., 3229., 3297.]),
 array([-0.99999002, -0.93547459, -0.87095916, -0.80644372, -0.74192829,
        -0.67741286, -0.61289743, -0.548382  , -0.48386656, -0.41935113,
        -0.3548357 , -0.29032027, -0.22580484, -0.16128941, -0.09677397,
        -0.03225854,  0.03225689,  0.09677232,  0.16128775,  0.22580319,
         0.29031862,  0.35483405,  0.41934948,  0.48386491,  0.54838034,
         0.61289578,  0.67741121,  0.74192664,  0.80644207,  0.8709575 ,
         0.93547294,  0.99998837]),
 <a list of 31 Patch objects>)
In [9]:
# above is num_iters instances of a single realization
# of a uniform (-1,1) random variable
# above should approximate pdf if num_iters is high
# The actual pdf would be a straight horizontal line above (-1,1)
In [10]:
# Use uniform random numbers between -1 and 1
import matplotlib.pyplot as plt # python package that has histogram
num_iters = 100000
n = 10  # number of random variables to sum up 
results = [] # will store the result for all the iterations
for j in range(num_iters):
    # generate random numbers, and take average
    X = [] # list of X_i
    for i in range(n):
        rand = 2*random()-1
        X.append(rand)
    Sn = sum(X) # sum of all X_i 
    ave = Sn/n
    results.append(N(ave))
# print(results)
plt.hist(results,bins=51)
Out[10]:
(array([4.000e+00, 1.000e+00, 6.000e+00, 1.700e+01, 1.600e+01, 3.500e+01,
        7.300e+01, 1.310e+02, 1.830e+02, 2.910e+02, 4.570e+02, 5.890e+02,
        8.080e+02, 1.166e+03, 1.514e+03, 2.016e+03, 2.517e+03, 3.109e+03,
        3.671e+03, 4.220e+03, 4.692e+03, 5.128e+03, 5.605e+03, 5.935e+03,
        6.091e+03, 6.063e+03, 6.176e+03, 5.842e+03, 5.565e+03, 4.959e+03,
        4.464e+03, 3.941e+03, 3.286e+03, 2.761e+03, 2.243e+03, 1.805e+03,
        1.323e+03, 1.041e+03, 7.360e+02, 5.120e+02, 3.920e+02, 2.260e+02,
        1.480e+02, 1.220e+02, 6.000e+01, 3.300e+01, 1.400e+01, 8.000e+00,
        3.000e+00, 1.000e+00, 1.000e+00]),
 array([-0.721649  , -0.69310098, -0.66455295, -0.63600492, -0.6074569 ,
        -0.57890887, -0.55036084, -0.52181282, -0.49326479, -0.46471676,
        -0.43616874, -0.40762071, -0.37907268, -0.35052466, -0.32197663,
        -0.2934286 , -0.26488058, -0.23633255, -0.20778452, -0.1792365 ,
        -0.15068847, -0.12214044, -0.09359242, -0.06504439, -0.03649636,
        -0.00794834,  0.02059969,  0.04914772,  0.07769575,  0.10624377,
         0.1347918 ,  0.16333983,  0.19188785,  0.22043588,  0.24898391,
         0.27753193,  0.30607996,  0.33462799,  0.36317601,  0.39172404,
         0.42027207,  0.44882009,  0.47736812,  0.50591615,  0.53446417,
         0.5630122 ,  0.59156023,  0.62010825,  0.64865628,  0.67720431,
         0.70575233,  0.73430036]),
 <a list of 51 Patch objects>)
In [11]:
# By Law of Large numbers, as n gets biggers, 
# (1/n)(X_1+ ...+X_n) is very likely to be close to 0
In [12]:
# Instead of taking average by dividing by n, 
# now we will divide by sqrt(n)
# Use uniform random numbers between -1 and 1
import matplotlib.pyplot as plt # python package that has histogram
num_iters = 100000
n = 15
results = [] # will store the result for all the iterations
for j in range(num_iters):
    # generate random numbers, and take average
    X = [] 
    for i in range(n):
        rand = 2*random()-1
        X.append(rand)
    Sn = sum(X) # sum of all X_i 
    rescaled_ave = Sn/sqrt(n)  # now dividing by sqrt(n), rather than n
    results.append(N(rescaled_ave))
# print(results)
plt.hist(results,bins=51)
Out[12]:
(array([2.000e+00, 4.000e+00, 4.000e+00, 1.400e+01, 3.100e+01, 5.000e+01,
        8.400e+01, 1.310e+02, 1.940e+02, 2.790e+02, 4.580e+02, 6.110e+02,
        8.750e+02, 1.109e+03, 1.556e+03, 1.941e+03, 2.463e+03, 2.973e+03,
        3.619e+03, 4.127e+03, 4.730e+03, 5.186e+03, 5.499e+03, 5.910e+03,
        6.128e+03, 6.262e+03, 6.088e+03, 5.748e+03, 5.451e+03, 4.908e+03,
        4.464e+03, 3.890e+03, 3.309e+03, 2.756e+03, 2.271e+03, 1.838e+03,
        1.507e+03, 1.129e+03, 7.890e+02, 5.380e+02, 3.720e+02, 2.510e+02,
        1.900e+02, 1.030e+02, 6.300e+01, 4.300e+01, 2.200e+01, 1.900e+01,
        6.000e+00, 4.000e+00, 1.000e+00]),
 array([-2.27150657, -2.181812  , -2.09211744, -2.00242288, -1.91272832,
        -1.82303375, -1.73333919, -1.64364463, -1.55395007, -1.4642555 ,
        -1.37456094, -1.28486638, -1.19517181, -1.10547725, -1.01578269,
        -0.92608813, -0.83639356, -0.746699  , -0.65700444, -0.56730987,
        -0.47761531, -0.38792075, -0.29822619, -0.20853162, -0.11883706,
        -0.0291425 ,  0.06055207,  0.15024663,  0.23994119,  0.32963575,
         0.41933032,  0.50902488,  0.59871944,  0.68841401,  0.77810857,
         0.86780313,  0.95749769,  1.04719226,  1.13688682,  1.22658138,
         1.31627595,  1.40597051,  1.49566507,  1.58535963,  1.6750542 ,
         1.76474876,  1.85444332,  1.94413789,  2.03383245,  2.12352701,
         2.21322157,  2.30291614]),
 <a list of 51 Patch objects>)
In [13]:
# Central Limit Theorem
# For any X distribution (with E(X)=0, E(X^2)=1), we have that
# (1/sqrt(n))(X_1+X_2 + ... +X_n)
# converges to standard normal distribution (E(X)=0, E(X^2)=1)
# as n to infinity
# X_i are *independent* instances of X