# Chapter 3. Sampling the Imaginary

In [0]:
import pandas as pd
import seaborn as sns
import torch

import pyro
import pyro.distributions as dist
import pyro.ops.stats as stats

import rethinking


#### Code 3.1¶

In [1]:
PrPV = 0.95
PrPM = 0.01
PrV = 0.001
PrP = PrPV * PrV + PrPM * (1 - PrV)
PrVP = PrPV * PrV / PrP
PrVP

Out[1]:
0.08683729433272395

#### Code 3.2¶

In [2]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=9,
probs=p_grid).log_prob(torch.tensor(6.)).exp()
posterior = likelihood * prior
posterior = posterior / sum(posterior)


#### Code 3.3¶

In [3]:
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))


#### Code 3.4¶

In [4]:
ax = sns.scatterplot(range(len(samples)), samples, s=80, alpha=0.4, edgecolor="none")
ax.set(xlabel="sample number", ylabel="proportion water (p)");

Out[4]:

#### Code 3.5¶

In [5]:
ax = sns.distplot(samples)
ax.set(xlabel="proportion water (p)", ylabel="Density");

Out[5]:

#### Code 3.6¶

In [6]:
# add up posterior probability where p < 0.5
(posterior[p_grid < 0.5]).sum()

Out[6]:
tensor(0.1719)

#### Code 3.7¶

In [7]:
(samples < 0.5).sum().float() / 1e4

Out[7]:
tensor(0.1698)

#### Code 3.8¶

In [8]:
((samples > 0.5) & (samples < 0.75)).sum().float() / 1e4

Out[8]:
tensor(0.6057)

#### Code 3.9¶

In [9]:
stats.quantile(samples, 0.8)

Out[9]:
tensor(0.7618)

#### Code 3.10¶

In [10]:
stats.quantile(samples, [0.1, 0.9])

Out[10]:
tensor([0.4515, 0.8148])

#### Code 3.11¶

In [11]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))


#### Code 3.12¶

In [12]:
stats.pi(samples, prob=0.5)

Out[12]:
tensor([0.7107, 0.9329])

#### Code 3.13¶

In [13]:
stats.hpdi(samples, prob=0.5)

Out[13]:
tensor([0.8448, 1.0000])

#### Code 3.14¶

In [14]:
p_grid[posterior.argmax()]

Out[14]:
tensor(1.)

#### Code 3.15¶

In [15]:
adj = 0.01
silverman_factor = (0.75 * samples.size(0)) ** (-0.2)
bandwidth = adj * silverman_factor * samples.std()
x = torch.linspace(samples.min(), samples.max(), 1000)
y = dist.Normal(samples, bandwidth).log_prob(x.unsqueeze(-1)).logsumexp(-1).exp()
x[y.argmax()]

Out[15]:
tensor(0.9880)

#### Code 3.16¶

In [16]:
print(samples.mean())
print(samples.median())

Out[16]:
tensor(0.8023)
tensor(0.8448)


#### Code 3.17¶

In [17]:
(posterior * (0.5 - p_grid).abs()).sum()

Out[17]:
tensor(0.3129)

#### Code 3.18¶

In [18]:
loss = (posterior * (p_grid.unsqueeze(1) - p_grid).abs()).sum(1)


#### Code 3.19¶

In [19]:
p_grid[loss.argmin()]

Out[19]:
tensor(0.8408)

#### Code 3.20¶

In [20]:
dist.Binomial(total_count=2, probs=0.7).log_prob(torch.arange(3.)).exp()

Out[20]:
tensor([0.0900, 0.4200, 0.4900])

#### Code 3.21¶

In [21]:
dist.Binomial(total_count=2, probs=0.7).sample().long()

Out[21]:
tensor(2)

#### Code 3.22¶

In [22]:
dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([10])).long()

Out[22]:
tensor([1, 2, 1, 1, 2, 2, 0, 1, 1, 0])

#### Code 3.23¶

In [23]:
dummy_w = dist.Binomial(total_count=2, probs=0.7).sample(torch.Size([int(1e5)]))
dummy_w.long().bincount().float() / 1e5

Out[23]:
tensor([0.0918, 0.4171, 0.4911])

#### Code 3.24¶

In [24]:
dummy_w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e5)]))
ax = sns.distplot(dummy_w, kde=False)
ax.set(xlabel="dummy water count", ylabel="Frequency");

Out[24]:

#### Code 3.25¶

In [25]:
w = dist.Binomial(total_count=9, probs=0.7).sample(torch.Size([int(1e4)]))


#### Code 3.26¶

In [26]:
w = dist.Binomial(total_count=9, probs=samples).sample()


#### Code 3.27¶

In [27]:
p_grid = torch.linspace(start=0, end=1, steps=1000)
prior = torch.tensor(1.).repeat(1000)
likelihood = dist.Binomial(total_count=3,
probs=p_grid).log_prob(torch.tensor(3.)).exp()
posterior = likelihood * prior
posterior = posterior / posterior.sum()
torch.manual_seed(100)
samples = dist.Empirical(p_grid, posterior.log()).sample(torch.Size([int(1e4)]))


#### Code 3.28¶

In [28]:
birth1 = [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1]
birth2 = [0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1,
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0]


#### Code 3.29¶

In [29]:
homeworkch3 = pd.read_csv("../data/homeworkch3.csv")


#### Code 3.30¶

In [30]:
sum(birth1) + sum(birth2)

Out[30]:
111