Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue/228/calpreddistr #291

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
f24601e
creation of class structure for uncondition and condition PIT
torluca Aug 11, 2022
1d0d0ea
initial implementation of condition pit from Dey+22 in pit.py, adding…
torluca Aug 19, 2022
e84ead2
Merge branch 'main' of https://github.com/LSSTDESC/RAIL into issue/22…
torluca Aug 19, 2022
c7aff36
merging main branch into issue/228/calpreddistr
torluca Oct 20, 2022
18eee8d
restructuring of the branch according to main branch
torluca Oct 20, 2022
8def6e6
correction of bugs in the ConditionPIT class in pit.py, using pit com…
torluca Oct 21, 2022
be7c09e
Merge branch 'main' into issue/228/calpreddistr
torluca Nov 7, 2022
d19a66f
creation of a demo notebook showing how to use the implemented calibr…
torluca Nov 7, 2022
78eeb9f
Merge branch 'main' into issue/228/calpreddistr
eacharles Dec 5, 2022
fe6785b
added torch to requirements
eacharles Dec 5, 2022
ac562c2
trivial recommit of pyproject.toml to try and force actions
eacharles Dec 5, 2022
cd56370
added prettytable to dev reqs
eacharles Dec 5, 2022
e523e3e
move prettyable to main deps
eacharles Dec 5, 2022
c37317b
Merge branch 'main' into issue/228/calpreddistr
torluca Jan 10, 2023
bea8142
added unit test for condition pit
torluca Jan 10, 2023
d0ed97d
added unit test for condition pit
torluca Jan 10, 2023
4a1c31b
added unit test for condition pit
torluca Jan 10, 2023
ecd1a98
replace individual npy files with single npz table for condition pit …
torluca Jan 10, 2023
b70124a
replace individual npy files with single npz table for condition pit …
torluca Jan 10, 2023
917c859
replace individual npy files with single npz table for condition pit …
torluca Jan 10, 2023
5cd6d8d
add couple no cover statements to pit.py
sschmidt23 Jan 10, 2023
99036fd
added pragma no cover to lines 185 and 220 in pit.py
torluca Jan 11, 2023
0fa60cc
added pragma no cover to lines 185 and 220 in pit.py
torluca Jan 11, 2023
3df14c0
added pragma no cover to lines 185 and 220 in pit.py
torluca Jan 11, 2023
a198175
try adding an mpirun to github action
sschmidt23 Jan 11, 2023
153c6c9
remove the mpirun, as it did not fix problem
sschmidt23 Jan 11, 2023
5d171b4
commented some lines in the conditionpit unit test to check whether a…
torluca Jan 12, 2023
e8dd246
Merge branch 'issue/228/calpreddistr' of https://github.com/LSSTDESC/…
torluca Jan 12, 2023
9df8ca3
updating branch with latest changes in main due to dsps branch merging
torluca Jan 13, 2023
3d89fb5
merge main 12.06.2023 into branch
torluca Jun 12, 2023
437794f
remove HEAD
torluca Jun 12, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,290 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Demo notebook for the calibrated predictive distributions implementation in RAIL"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"Author: Luca Tortorelli, Bitrateep Dey\n",
"\n",
"last run successfully: Nov 7, 2022\n",
"\n",
"The purpose of this notebook is to demonstrate the implementation of the calibrated predictive distribution (Dey at al. 2022) in RAIL.\n",
"Bitrateep provided a test data in .npz format (/src/rail/examples/testdata/bpz_test_red.npz) that contained:\n",
"- a galaxy catalogue with spectroscopic redshifts, magnitudes and their errors\n",
"- conditional density estimates for each galaxy, PDFs evaluated with a photo-z method on representative sample of object\n",
"- redshift grid of the conditional density estimate"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"import numpy as np\n",
"import os\n",
"import pandas as pd\n",
"import qp\n",
"from src.rail.evaluation.metrics.pit import ConditionPIT"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"We do a small degree of preprocessing before feeding the data into RAIL"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# read Bitrateep's test data\n",
"\n",
"root = 'src/rail/examples/testdata/'\n",
"data = np.load(os.path.join(root, 'bpz_test_red.npz'), allow_pickle=True)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# display the keywords to access the data\n",
"for name in data.keys(): print(name)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# conveniently read the galaxy catalogue as pandas dataframe\n",
"cat = pd.DataFrame(data[\"test_cat\"])"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# create new features for training the method, in this case colours and their errors\n",
"\n",
"cat[\"UG\"] = cat[\"U\"]-cat[\"G\"]\n",
"cat[\"UGERR\"] = np.sqrt(cat[\"UERR\"]**2 + cat[\"GERR\"]**2)\n",
"cat[\"UR\"] = cat[\"U\"]-cat[\"R\"]\n",
"cat[\"URERR\"] = np.sqrt(cat[\"UERR\"]**2 + cat[\"RERR\"]**2)\n",
"cat[\"UI\"] = cat[\"U\"]-cat[\"I\"]\n",
"cat[\"UIERR\"] = np.sqrt(cat[\"UERR\"]**2 + cat[\"IERR\"]**2)\n",
"cat[\"UZ\"] = cat[\"U\"]-cat[\"Z\"]\n",
"cat[\"UZERR\"] = np.sqrt(cat[\"UERR\"]**2 + cat[\"ZERR\"]**2)\n",
"cat[\"UY\"] = cat[\"U\"]-cat[\"Y\"]\n",
"cat[\"UYERR\"] = np.sqrt(cat[\"UERR\"]**2 + cat[\"YERR\"]**2)\n",
"\n",
"cat[\"GR\"] = cat[\"G\"]-cat[\"R\"]\n",
"cat[\"GRERR\"] = np.sqrt(cat[\"GERR\"]**2 + cat[\"RERR\"]**2)\n",
"cat[\"GI\"] = cat[\"G\"]-cat[\"I\"]\n",
"cat[\"GIERR\"] = np.sqrt(cat[\"GERR\"]**2 + cat[\"IERR\"]**2)\n",
"cat[\"GZ\"] = cat[\"G\"]-cat[\"Z\"]\n",
"cat[\"GZERR\"] = np.sqrt(cat[\"GERR\"]**2 + cat[\"ZERR\"]**2)\n",
"cat[\"GY\"] = cat[\"G\"]-cat[\"Y\"]\n",
"cat[\"GYERR\"] = np.sqrt(cat[\"GERR\"]**2 + cat[\"YERR\"]**2)\n",
"\n",
"cat[\"RI\"] = cat[\"R\"]-cat[\"I\"]\n",
"cat[\"RIERR\"] = np.sqrt(cat[\"RERR\"]**2 + cat[\"IERR\"]**2)\n",
"cat[\"RZ\"] = cat[\"R\"]-cat[\"Z\"]\n",
"cat[\"RZERR\"] = np.sqrt(cat[\"RERR\"]**2 + cat[\"ZERR\"]**2)\n",
"cat[\"RY\"] = cat[\"R\"]-cat[\"Y\"]\n",
"cat[\"RYERR\"] = np.sqrt(cat[\"RERR\"]**2 + cat[\"YERR\"]**2)\n",
"\n",
"cat[\"IZ\"] = cat[\"I\"]-cat[\"Z\"]\n",
"cat[\"IZERR\"] = np.sqrt(cat[\"IERR\"]**2 + cat[\"ZERR\"]**2)\n",
"cat[\"IY\"] = cat[\"I\"]-cat[\"Y\"]\n",
"cat[\"IYERR\"] = np.sqrt(cat[\"IERR\"]**2 + cat[\"YERR\"]**2)\n",
"\n",
"cat[\"ZY\"] = cat[\"Z\"]-cat[\"Y\"]\n",
"cat[\"ZYERR\"] = np.sqrt(cat[\"ZERR\"]**2 + cat[\"YERR\"]**2)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# normalise the conditional density estimates across the redshift grid\n",
"z_grid = data[\"z_grid\"]\n",
"\n",
"cde = data[\"cde_test\"] # conditional density estimate\n",
"norm = np.trapz(cde, z_grid) # normalize across the redshift grid\n",
"norm[norm==0] = 1\n",
"cde = cde/norm[:,None]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# define the number of galaxies to train the method and split the sample into training and testing set\n",
"SEED = 299792458\n",
"\n",
"num_calib = 800\n",
"n_gal = len(cat)\n",
"num_test = n_gal - num_calib\n",
"\n",
"rng = np.random.default_rng(SEED)\n",
"indices = rng.permutation(n_gal) # creating index permutation for splitting in train and test\n",
"\n",
"cde_calib = cde[indices[:num_calib]] # splitting cde in training set\n",
"cde_test = cde[indices[num_calib:]] # and test set\n",
"\n",
"z_calib = cat[\"SPECZ\"][indices[:num_calib]].values\n",
"z_test = cat[\"SPECZ\"][indices[num_calib:]].values\n",
"\n",
"cat_calib = cat.iloc[indices[:num_calib]]\n",
"cat_test = cat.iloc[indices[num_calib:]]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# define a list of features for the method\n",
"features = [\"I\", \"UG\", \"GR\", \"RI\", \"IZ\", \"ZY\", \"IZERR\", \"RIERR\", \"GRERR\", \"UGERR\", \"IERR\", \"ZYERR\"]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# store the conditional density estimates for the training and test set into qp ensembles\n",
"qp_ens_cde_calib = qp.Ensemble(qp.interp, data=dict(xvals=z_grid, yvals=cde_calib))\n",
"qp_ens_cde_test = qp.Ensemble(qp.interp, data=dict(xvals=z_grid, yvals=cde_test))"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"Initialisation of the ConditionPIT class"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"cond_pit = ConditionPIT(cde_calib, cde_test, z_grid, z_calib, z_test, cat_calib[features].values,\n",
" cat_test[features].values, qp_ens_cde_calib)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# train the method using the provided data\n",
"cond_pit.train(patience=10, n_epochs=10, lr=0.001, weight_decay=0.01, batch_size=100, frac_mlp_train=0.9,\n",
" lr_decay=0.95, oversample=50, n_alpha=201, checkpt_path=\"checkpoint_GPZ_wide_CDE_test.pt\",\n",
" hidden_layers=[2, 2, 2])"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# compute the local pit\n",
"pit_local, pit_local_fit = cond_pit.evaluate(model_checkpt_path='checkpoint_GPZ_wide_CDE_test.pt',\n",
" model_hidden_layers=[2, 2, 2], nn_type='monotonic',\n",
" batch_size=100, num_basis=40, num_cores=1)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# plot the local P-P plot diagnostics\n",
"cond_pit.diagnostics(pit_local, pit_local_fit)"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,4 @@ show_error_codes = true
strict_equality = true
warn_redundant_casts = true
warn_unreachable = true
warn_unused_ignores = true
warn_unused_ignores = true
64 changes: 64 additions & 0 deletions src/rail/evaluation/metrics/condition_pit_utils/MonotonicNN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright (C) 2021,2022 Bitrateep Dey, University of Pittsburgh, USA

import torch
import torch.nn as nn
from .NeuralIntegral import NeuralIntegral
from .ParallelNeuralIntegral import ParallelNeuralIntegral


def _flatten(sequence):
flat = [p.contiguous().view(-1) for p in sequence]
return torch.cat(flat) if len(flat) > 0 else torch.tensor([])

def clipped_relu(x, device):
return torch.minimum(torch.maximum(torch.Tensor([0]).to(device),x), torch.Tensor([1]).to(device))

class IntegrandNN(nn.Module):
def __init__(self, in_d, hidden_layers):
super(IntegrandNN, self).__init__()
self.net = []
hs = [in_d] + hidden_layers + [1]
for h0, h1 in zip(hs, hs[1:]):
self.net.extend([
nn.Linear(h0, h1),
nn.ReLU(),
])
self.net.pop() # pop the last ReLU for the output layer
self.net.append(nn.ELU())
self.net = nn.Sequential(*self.net)

def forward(self, x, h):
return self.net(torch.cat((x, h), 1)) + 1.

class MonotonicNN(nn.Module):
def __init__(self, in_d, hidden_layers, nb_steps=50, sigmoid=False, dev="cpu"):
super(MonotonicNN, self).__init__()
self.integrand = IntegrandNN(in_d, hidden_layers)
self.net = []
hs = [in_d-1] + hidden_layers + [2]
for h0, h1 in zip(hs, hs[1:]):
self.net.extend([
nn.Linear(h0, h1),
nn.ReLU(),
])
self.net.pop() # pop the last ReLU for the output layer
# It will output the scaling and offset factors.
self.net = nn.Sequential(*self.net)
self.device = dev
self.nb_steps = nb_steps
self.sigmoid = sigmoid

'''
The forward procedure takes as input x which is the variable for which the integration must be made, h is just other conditionning variables.
'''
def forward(self, x_input):
x = x_input[:, 0][:, None]
h = x_input[:, 1:]
x0 = torch.zeros(x.shape).to(self.device)
out = self.net(h)
offset = out[:, [0]]
scaling = torch.exp(out[:, [1]])
if self.sigmoid:
return torch.sigmoid(scaling*ParallelNeuralIntegral.apply(x0, x, self.integrand, _flatten(self.integrand.parameters()), h, self.nb_steps) + offset)
else:
return scaling*ParallelNeuralIntegral.apply(x0, x, self.integrand, _flatten(self.integrand.parameters()), h, self.nb_steps) + offset
Loading