Chapter 3. Sampling the Imaginary
In [0]:
import pandas as pd
import seaborn as sns
import torch
import pyro
import pyro.distributions as dist
import pyro.ops.stats as stats
import rethinking
Code 3.1¶
In [1]:
PrPV = 0.95
PrPM = 0.01
PrV = 0.001
PrP = PrPV * PrV + PrPM * (1 - PrV)
PrVP = PrPV * PrV / PrP
PrVP
Out[1]:
Code 3.2¶
In [2]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=9,
probs=p_grid).log_prob(torch.tensor(6.)).exp()
posterior = likelihood * prior
posterior = posterior / sum(posterior)
Code 3.3¶
In [3]:
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
Code 3.4¶
In [4]:
ax = sns.scatterplot(range(len(samples)), samples, s=80, alpha=0.4, edgecolor="none")
ax.set(xlabel="sample number", ylabel="proportion water (p)");
Out[4]:
Code 3.5¶
In [5]:
ax = sns.distplot(samples)
ax.set(xlabel="proportion water (p)", ylabel="Density");
Out[5]:
Code 3.6¶
In [6]:
# add up posterior probability where p < 0.5
(posterior[p_grid < 0.5]).sum()
Out[6]:
Code 3.7¶
In [7]:
(samples < 0.5).sum().float() / 1e4
Out[7]:
Code 3.8¶
In [8]:
((samples > 0.5) & (samples < 0.75)).sum().float() / 1e4
Out[8]:
Code 3.9¶
In [9]:
stats.quantile(samples, 0.8)
Out[9]:
Code 3.10¶
In [10]:
stats.quantile(samples, [0.1, 0.9])
Out[10]:
Code 3.11¶
In [11]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
Code 3.12¶
In [12]:
stats.pi(samples, prob=0.5)
Out[12]:
Code 3.13¶
In [13]:
stats.hpdi(samples, prob=0.5)
Out[13]:
Code 3.14¶
In [14]:
p_grid[posterior.argmax()]
Out[14]:
Code 3.15¶
In [15]:
adj = 0.01
silverman_factor = (0.75 * samples.size(0)) ** (-0.2)
bandwidth = adj * silverman_factor * samples.std()
x = torch.linspace(samples.min(), samples.max(), 1000)
y = dist.Normal(samples, bandwidth).log_prob(x.unsqueeze(-1)).logsumexp(-1).exp()
x[y.argmax()]
Out[15]:
Code 3.16¶
In [16]:
print(samples.mean())
print(samples.median())
Out[16]:
Code 3.17¶
In [17]:
(posterior * (0.5 - p_grid).abs()).sum()
Out[17]:
Code 3.18¶
In [18]:
loss = (posterior * (p_grid.unsqueeze(1) - p_grid).abs()).sum(1)
Code 3.19¶
In [19]:
p_grid[loss.argmin()]
Out[19]:
Code 3.20¶
In [20]:
dist.Binomial(total_count=2, probs=0.7).log_prob(torch.arange(3.)).exp()
Out[20]:
Code 3.21¶
In [21]:
dist.Binomial(total_count=2, probs=0.7).sample().long()
Out[21]:
Code 3.22¶
In [22]:
dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([10])).long()
Out[22]:
Code 3.23¶
In [23]:
dummy_w = dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([int(1e5)]))
dummy_w.long().bincount().float() / 1e5
Out[23]:
Code 3.24¶
In [24]:
dummy_w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e5)]))
ax = sns.distplot(dummy_w, kde=False)
ax.set(xlabel="dummy water count", ylabel="Frequency");
Out[24]:
Code 3.25¶
In [25]:
w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e4)]))
Code 3.26¶
In [26]:
w = dist.Binomial(total_count=9, probs=samples).sample()
Code 3.27¶
In [27]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
torch.manual_seed(100)
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))
Code 3.28¶
In [28]:
birth1 = [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1]
birth2 = [0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]
Code 3.29¶
In [29]:
homeworkch3 = pd.read_csv("../data/homeworkch3.csv")
Code 3.30¶
In [30]:
sum(birth1) + sum(birth2)
Out[30]: