From d4c20ada8ab88d4d49ed6fc126abda8f69fd364a Mon Sep 17 00:00:00 2001 From: Dar Dahlen Date: Tue, 14 May 2024 11:24:05 -0700 Subject: [PATCH] Remove NEO population model (#18) * remove neo construction * black update --- CHANGELOG.md | 14 + .../definitions.py => population.py} | 114 +---- src/neospy/population/__init__.py | 8 - src/neospy/population/neas.py | 451 ------------------ src/neospy/population/utils.py | 280 ----------- src/neospy/{population => }/power_law.py | 223 ++++----- 6 files changed, 129 insertions(+), 961 deletions(-) rename src/neospy/{population/definitions.py => population.py} (69%) delete mode 100644 src/neospy/population/__init__.py delete mode 100644 src/neospy/population/neas.py delete mode 100644 src/neospy/population/utils.py rename src/neospy/{population => }/power_law.py (70%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f64de7..c85e10c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed + +- Moved population definitions to the base level and renamed it `population`. +- Changed references to diameters in the broken power law sampler. + +### Removed + +- Removed construction of populations entirely. +- Removed folder for `population`, the remaining contents were moved up to the base + level of the package. + ## [0.2.1] - 2024 - 5 - 13 ### Added diff --git a/src/neospy/population/definitions.py b/src/neospy/population.py similarity index 69% rename from src/neospy/population/definitions.py rename to src/neospy/population.py index 81f7da7..26d8df3 100644 --- a/src/neospy/population/definitions.py +++ b/src/neospy/population.py @@ -4,10 +4,9 @@ Group definitions are computed strictly from perihelion distance and eccentricity. """ -from __future__ import annotations import matplotlib.pyplot as plt # type: ignore import numpy as np -from ..conversion import compute_aphelion, compute_semi_major +from .conversion import compute_aphelion, compute_semi_major from numpy.typing import NDArray MAX_ECCENTRICITY = 0.995 @@ -262,63 +261,6 @@ def which_group( return groups -def neo_amor_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of Amors. - - Returns `True` if the object is in the Amors and is a part of what is expected to - be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return neo_amor(peri_dist, ecc) & (h_mag < 20.0) - - -def neo_apollo_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of Apollos. - - Returns `True` if the object is in the Apollos and is a part of what is expected to - be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return neo_apollo(peri_dist, ecc) & (h_mag < 20.0) - - -def neo_aten_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of Atens. - - Returns `True` if the object is in the Atens and is a part of what is expected to - be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return neo_aten(peri_dist, ecc) & (h_mag < 20.0) - - def mba(peri_dist, ecc, *_): """ Orbital element filter to select all main belt objects. @@ -360,60 +302,6 @@ def neo(peri_dist, ecc, *_): ) -def neo_complete(peri_dist, ecc, h_mag): - """ - Orbital element filter to select all NEO objects. - - Returns `True` if the object is an NEO and is a part of what is expected to be - observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return ( - neo_atira(peri_dist, ecc) - | neo_aten(peri_dist, ecc) - | neo_apollo(peri_dist, ecc) - | neo_amor(peri_dist, ecc) - ) & (h_mag < 20.0) - - -def nearly_neo_complete(peri_dist, ecc, h_mag): - """ - Orbital element filter to select all objects which are either NEO or nearly NEO. - - Returns `True` if the object is ~NEO and is a part of what is expected to be - observationally complete. - - This function exists to solve a KDE orbit sampling problem: - A histogram of the perihelion distance of NEOs shows an increase in object count up - to the NEO/Mar-crosser boundary, leading to a hard cut at around perihelion of 1.3 - The KDE estimator has a commonly known issue with dataset with hard cuts like this, - and has a tendency to "round" the corners of the histogram near this cutoff. To - solve this issue, we sample from a collection of data beyond the limit of our group - of interest, and select only the objects which match the properties we want. What - this in effect does is over-sample, then subselect, this moves the "rounding" issue - to an area where we will always reject the data, thus resulting in clean cuts where - we expect. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return (peri_dist <= 1.4) & (ecc < MAX_ECCENTRICITY) & (ecc >= 0.0) & (h_mag < 20.0) - - def plot_groups(): """ Plot orbital element groups defined above. diff --git a/src/neospy/population/__init__.py b/src/neospy/population/__init__.py deleted file mode 100644 index af295c9..0000000 --- a/src/neospy/population/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from . import utils, definitions, neas, power_law - -__all__ = [ - "utils", - "definitions", - "neas", - "power_law", -] diff --git a/src/neospy/population/neas.py b/src/neospy/population/neas.py deleted file mode 100644 index 67e3d58..0000000 --- a/src/neospy/population/neas.py +++ /dev/null @@ -1,451 +0,0 @@ -from __future__ import annotations -import numpy as np -import pandas as pd # type: ignore -import logging - -from . import utils -from .definitions import ( - neo_amor_complete, - neo_apollo_complete, - neo_aten_complete, - neo_complete, - nearly_neo_complete, - neo_atira, -) -from .power_law import CumulativePowerLaw -from ..conversion import compute_H -from ..time import Time -from ..vector import CometElements, State -from ..mpc import num_to_base62 -from ..flux import NeatmParams - - -logger = logging.getLogger(__name__) - - -neo_sdt_diameters = CumulativePowerLaw( - slopes=[3.2, 1.6217413993016, 2.75], - cutoffs=[0.07, 1.5], - num_1km_obj=993, - max_possible=50, -) -""" -Diameter fit from the 2017 SDT Report -The middle slope in the paper is 1.6, here it is set to 1.64 to make the total -number of objects larger than 140m be 25k objects. This is within the error bars of -the paper.""" - - -class NEOGroup: - """ - Parameters - ---------- - name : - The name of the group. - group_id: - An id string to be inserted into the object name. - frac_dark : float - The percentage of objects which come from the darker of the two Rayleigh - distributions of visible albedo. - filter_func: - The orbital filter function for this group as defined in - `neospy.population.definitions` - pop_fraction : float - The relative fraction of the total NEO population which is made up of the - specified `filt` function, for example, the Apollos are about `0.551`. - - Population fractions are from Granvik 2018 - Debiased orbit and absolute-magnitude distributions for near-Earth objects - https://www.boulder.swri.edu/~bottke/Reprints/Granvik_2018_Icarus_312_181_Debiased_Orbit_Mag_NEO.pdf - - That papers relative fractions dont add up to 100%, so these values have been - normalized to make them 100%, and Vatiras count was added to the Atiras - """ - - def __init__(self, name, group_id, frac_dark, filter_func, pop_fraction): - self.name = name - self.group_id = group_id - self.frac_dark = frac_dark - self.filter_func = filter_func - self.pop_fraction = pop_fraction - - def sample_orbits(self, n_samples, seed=42): - orbits = utils.sample_orbits( - nearly_neo_complete, n_samples * 10, seed=seed, bandwidth=0.2 - ) - keep = self.filter_func(orbits.peri_dist, orbits.ecc, np.zeros(len(orbits))) - orbits = orbits[keep][:n_samples] - - # The filter specified is too small of a percentage of the full dataset to do - # sampling and selection. So here we sample directly from this set. This is - # primarily for the Atiras, since they are such a tiny fraction. - if len(orbits) < n_samples: - orbits = utils.sample_orbits( - self.filter_func, n_samples, seed=seed, bandwidth=0.2 - ) - return orbits - - def sample_beaming(self, n_samples, seed=42): - return utils.sample_beaming( - "neowise_neos.xml", - neo_complete, - n_samples, - seed=seed + 2, - bandwidth=0.1, - ) - - def sample_vis_albedo(self, n_samples, seed=42): - """ - Sample visible albedos using the double Rayleigh distribution described in the - 2016 Wright paper: https://arxiv.org/pdf/1606.07421.pdf - - The double Rayleigh distribution is fully described by 3 values, the sigma - values for each distribution, and the relative fraction of one distribution vs - the other. - - The sigmas are hard coded from the paper above, but its expected that each NEO - group will have a different relative share of "dark" vs "light" albedos. - - Parameters - ---------- - n_samples : int - The number of visible and ir albedos to compute. - seed : int - The random seed used for all generation of random numbers. This can be used - to accurately reconstruct the same objects. - """ - dark_sigma = 0.030 - bright_sigma = 0.168 - rng = np.random.default_rng(seed) - - # This breaks the sampling problem into 2 bins, if the object came from the - # light or dark distributions. - # Each Rayleigh distribution is invertible on its own, but together it doesn't - # have a nice analytical form. By splitting it up it makes it trivial to sample. - in_pop_a = rng.random(n_samples) <= self.frac_dark - - # Using CDF inverse transformation sampling on each population - sample_a = dark_sigma * np.sqrt(-2 * np.log(1 - rng.random(n_samples))) - sample_b = bright_sigma * np.sqrt(-2 * np.log(1 - rng.random(n_samples))) - - # Selecting the samples based off if it came from the first or second - # distribution - sample = sample_a.copy() - sample[~in_pop_a] = sample_b[~in_pop_a] - - return sample - - def sample_albedos(self, n_samples, bandwidth=0.1, seed=42): - """ - Sample NEO albedos using existing data, returns a pandas dataframe of 2 columns. - - The visible albedos are sampled using the `neo_sample_vis_albedo` function, - which accepts a single parameter, `dark_frac`. This is the relative fraction of - objects which come from the darker of the two Rayleigh distributions, other than - that, the visible albedos are fully defined. - - The `ir_albedo` is created by taking the full dataset of all observationally - complete NEOs and computing the `IR_Albedo / V_Albedo` distribution, then - sampling from that. These values are multiplied by the `V_albedo` distribution - from the analytically defined function `neo_sample_vis_albedo` to estimate an - `IR_albedo`. - - Then there is a rejection process where the albedos are required to be within a - physically reasonably bound, see the code for details. - - Parameters - ---------- - n_samples : int - The number of visible and ir albedos to compute. - bandwidth : float - The number bandwidth of the Guassian KDE used to sample the IR/Vis albedo - ratio. - seed : int - The random seed used for all generation of random numbers. This can be used - to accurately reconstruct the same objects. - """ - albedo_ratio_data = utils.pds_data_filtered( - "neowise_neos.xml", neo_complete, fit_code="VBI" - ) - - for scale in [1.5, 3, 10]: - extra_samples = int(np.ceil(n_samples * scale)) - v_albedo = self.sample_vis_albedo(extra_samples, seed=seed) - ir_ratios = utils.sample_values( - np.transpose(albedo_ratio_data[["V_albedo", "IR_albedo"]]), - extra_samples, - bandwidth=bandwidth, - seed=seed + 1, - ) - ir_ratios = np.array(ir_ratios[1] / ir_ratios[0]).ravel() - ir_albedo = v_albedo * ir_ratios - - keep = (v_albedo > 0.015) & (v_albedo < 0.7) - keep = keep & (ir_albedo > 0.015) & (ir_albedo < 0.85) - v_albedo = v_albedo[keep][:n_samples] - ir_albedo = ir_albedo[keep][:n_samples] - if len(v_albedo) == n_samples: - return pd.DataFrame( - np.transpose([v_albedo, ir_albedo]), - columns=["V_albedo", "IR_albedo"], - ) - raise ValueError( - "Filter was too aggressive with provided data, failed to find valid sample." - ) - - def sample_diameters(self, min_size, max_size=np.inf, n_objects=None, seed=42): - """ - Generate a sorted fair sample of diameters for this group. - - Parameters - ---------- - min_size : float - The minimum diameter of objects to make, this is in units of km. - max_size : float - The maximum diameter of objects to make, units of km. - n_objects: - If this is None, then the expected number of objects are made for the - specified size range, if this is set to a value, then exactly that number - of objects are made. - seed : int - The random seed used for all generation of random numbers. This can be used - to accurately reconstruct the same objects. - """ - scale = self.pop_fraction if n_objects is None else None - return neo_sdt_diameters.sample( - min_size, max_size=max_size, scale=scale, seed=seed, n_objects=n_objects - ) - - def sample_objects( - self, - min_size, - max_size=np.inf, - n_objects=None, - batch_size=100_000, - seed=42, - id_start=0, - epoch=None, - ): - """ - Construct a generator of an NEO population down to the specified diameter size. - - To reduce memory pressure, this is structured as a generator function, and all - objects can be collected using something like: - - .. testcode:: - :skipif: True - - for obj_batch in neo_sample_objects(filt, min_size, 0.254): - # Do something with the batch of object properties - - Parameters - ---------- - min_size : float - The minimum diameter of objects to make, this is in units of km. - max_size : float - The maximum diameter of objects to make, units of km. - n_objects: - If this is None, then the expected number of objects are made for the - specified size range, if this is set to a value, then exactly that number - of objects are made. - batch_size : int - The number of objects inside of each generated pandas dataframe. - seed : int - The random seed used for all generation of random numbers. This can be used - to accurately reconstruct the same objects. - id_start : int - Each object has a object id assigned to it, this is the offset value to add - to all objects created in this call. - """ - - if epoch is None: - epoch = Time.from_current_time().jd - prefix = "S" + num_to_base62(seed, 2) + self.group_id - - diams = self.sample_diameters( - min_size, seed=seed, n_objects=n_objects, max_size=max_size - ) - logger.info("Generated % diameters", len(diams)) - - n_batches = int(np.ceil(len(diams) / batch_size)) - - for i in range(n_batches): - batch = diams[i * batch_size : (i + 1) * batch_size] - batch_len = len(batch) - ids = np.arange(i * batch_size, (i + 1) * batch_size)[:batch_len] + id_start - - albedos = self.sample_albedos(batch_len, seed=seed + 1) - beaming = self.sample_beaming(batch_len, seed=seed + 2) - orbits = self.sample_orbits(batch_len, seed=seed + 3) - - objects = pd.DataFrame( - np.transpose( - [ - beaming.Beaming_param, - albedos.V_albedo, - albedos.IR_albedo, - batch, - orbits.peri_dist, - orbits.ecc, - orbits.incl, - orbits.lon_node, - orbits.peri_arg, - orbits.peri_time, - orbits.epoch, - ] - ), - columns=[ - "beaming", - "vis_albedo", - "ir_albedo", - "diameter", - "peri_dist", - "ecc", - "incl", - "lon_node", - "peri_arg", - "peri_time", - "epoch", - ], - ) - - objects["h_mag"] = compute_H( - albedo=objects.vis_albedo, diameter=objects.diameter - ) - objects["g_phase"] = 0.15 - objects["emissivity"] = 0.9 - objects["desig"] = [prefix + num_to_base62(n, 6) for n in ids] - objects["efrho"] = 0.0 - - yield objects - - -Amor = NEOGroup( - name="Amor", - group_id="03", - frac_dark=0.358, - filter_func=neo_amor_complete, - pop_fraction=0.400, -) -"""Sampler for the Amor Group""" - -Aten = NEOGroup( - name="Aten", - group_id="01", - frac_dark=0.104, - filter_func=neo_aten_complete, - pop_fraction=0.035, -) -"""Sampler for the Aten Group""" - -Apollo = NEOGroup( - name="Apollo", - group_id="02", - frac_dark=0.308, - filter_func=neo_apollo_complete, - pop_fraction=0.551, -) -"""Sampler for the Apollo Group""" - -Atira = NEOGroup( - name="Atira", - group_id="00", - frac_dark=0.104, - filter_func=neo_atira, - pop_fraction=0.014, -) -"""Sampler for the Atria Group""" - - -def create_neo_population( - filename: str, - min_size: float = 0.14, - max_size: float = np.inf, - seed: int = 42, - epoch=None, -): - """ - Create a new NEO population containing objects down to the specified minimum - diameter in km. - - .. testcode:: - :skipif: True - - create_neo_population("neos") - - Parameters - ---------- - filename : - The filename for the new database. - min_size : - The minimum diameter of objects to make, this is in units of km. - max_size : - The maximum diameter of objects to make, this is in units of km. - seed : - The random seed used for all generation of random numbers. The same seed and min - diameter will result in the same database being built. - epoch : None or float - The epoch of observation for the database. If no epoch is specified, the - database will be built using the first day of the survey. This is the ideal time - to minimize simulation time. - """ - logger.info("Beginning construction of database...") - - if epoch is None: - epoch = Time.from_current_time().jd - - id_start = 0 - - names = [] - pos = [] - vel = [] - properties: list = [] - for group in [Atira, Aten, Apollo, Amor]: - logger.info("Working on %s", group.name) - for vals in group.sample_objects( - min_size=min_size, - max_size=max_size, - batch_size=1_000_000, - seed=seed, - id_start=id_start, - ): - vals = vals.copy() - logger.info( - "%d %s %0.1f+ m diameter", - len(vals), - group.name, - min(vals.diameter * 1000), - ) - state = CometElements( - epoch, - vals.desig, - vals.ecc, - vals.incl, - vals.peri_dist, - vals.peri_arg, - vals.peri_time, - vals.lon_node, - ).as_state - names.extend(vals.desig) - pos.append(state.pos) - vel.append(state.vel) - properties.extend( - NeatmParams.new_neos( - o.desig, - [o.ir_albedo, o.ir_albedo], - o.h_mag, - o.diameter, - o.v_albedo, - o.beaming, - o.g_phase, - ) - for o in vals.itertuples() - ) - - id_start += len(vals) - pos_arr = np.vstack(pos) - vel_arr = np.vstack(vel) - - state = State(epoch, names, pos_arr, vel_arr, [None for _ in names]) - state.save(f"{filename}_state.bin") - NeatmParams.save_list(properties, f"{filename}_props.bin") diff --git a/src/neospy/population/utils.py b/src/neospy/population/utils.py deleted file mode 100644 index efce7b8..0000000 --- a/src/neospy/population/utils.py +++ /dev/null @@ -1,280 +0,0 @@ -from __future__ import annotations -from ..mpc import mpc_known_orbit_filtered -from ..pds import pds_data_filtered -from ..vector import Vector -from ..flux import neatm -import numpy as np -from scipy.stats import gaussian_kde # type: ignore -import pandas as pd # type: ignore -from numpy.typing import NDArray -from typing import Optional - - -def sample_values( - data: NDArray[np.floating], - n_samples: float, - bandwidth: Optional[float] = None, - seed: int = 42, -) -> np.ndarray: - """ - Create new samples which approximate the distribution of the provided data using - gaussian kernel density estimation (KDE). - - Data is normalized against its standard deviations, making the bandwidth parameter - be meaningful across a large scale difference in inputs. - - Parameters - ---------- - data: - An array of values which can be cast to a numpy array from which to generate - samples. - n_samples: - How many samples to generate from the provided dataset. - bandwidth: - The bandwidth of the gaussian KDEs in fractions of standard deviation of the - data. - seed: - A random seed to use to generate the values, the same seed will result in the - same draws. - """ - std = np.array(np.std(data, axis=1))[:, np.newaxis] - data /= std - kde = gaussian_kde(data, bw_method=bandwidth) - return kde.resample(n_samples, seed=seed) * std - - -def sample_orbits(filt, n_samples, bandwidth=0.05, seed=42): - """ - Sample the MPC orbital database using the specified filter function and generate - new orbital elements which also obey the filter. - - .. testcode:: - :skipif: True - - mpc_filter = neospy.population.definitions.mba_inner - new = neospy.population.utils.sample_orbits(mpc_filter, len(mpc_data)) - - Parameters - ---------- - filter: - Filter function which defines which group to select. The filter function must - accept 3 parameters, `peri_dist, eccentricity, h_mag`, and return a bool. - See `neospy.population.definitions` for a collection of filter functions which - are used to generation model populations. - n_samples: - How many samples to generate from the provided dataset. - bandwidth: - The bandwidth of the gaussian KDEs in fractions of standard deviation of the - orbital parameters. - seed: - A random seed to use to generate the values, the same seed will result in the - same draws. - """ - columns = ["incl", "peri_arg", "peri_dist", "peri_time", "ecc", "lon_node"] - mpc_orbits = mpc_known_orbit_filtered(filt) - epoch = mpc_orbits.epoch.median() - for scale in [1.25, 3, 10]: - extra_samples = int(np.ceil(n_samples * scale)) - data = pd.DataFrame( - sample_values( - np.transpose(mpc_orbits[columns]), - extra_samples, - bandwidth, - seed=seed, - ).T, - columns=columns, - ) - h_mag = np.zeros(len(data)) - filt_bool = filt(data.peri_dist, data.ecc, h_mag) - filt_bool = filt_bool & (data.incl >= 0.0) - data["epoch"] = epoch - if len(data[filt_bool]) >= n_samples: - data["peri_arg"] %= 360 - data["lon_node"] %= 360 - return data[filt_bool][:n_samples] - raise ValueError( - "Filter was too aggressive with provided data, failed to find valid sample." - ) - - -def sample_beaming(collection, orbit_filter, n_samples, bandwidth=0.05, seed=42): - """ - Create a Beaming Parameter sample. - - This samples the PDS dataset with the specified orbital elements filter, returning - new beaming parameters. This assumes that beaming is uncorrelated with other values - within the specified orbit filter. - - Parameters - ---------- - collection: - A PDS data filename, such as ``"neowise_neos.xml"`` - orbit_filter: - Filter function which defines which group to select. The filter function must - accept 3 parameters, `peri_dist, eccentricity, h_mag`, and return a bool. - See `neospy.population.definitions` for a collection of filter functions which - are used to generation model populations. - n_samples: - How many samples to generate from the provided dataset. - bandwidth: - The bandwidth of the gaussian KDEs in fractions of standard deviation of the - orbital parameters. - seed: - A random seed to use to generate the values, the same seed will result in the - same draws. - """ - beaming_data = pds_data_filtered(collection, orbit_filter, fit_code="B") - for scale in [1.5, 3, 10]: - extra_samples = int(np.ceil(n_samples * scale)) - - beaming = sample_values( - np.array(beaming_data["Beaming_param"])[np.newaxis, :], - extra_samples, - bandwidth=bandwidth, - seed=seed, - ).ravel() - keep = (0 < beaming) & (beaming < np.pi) - beaming = beaming[keep][:n_samples] - if len(beaming) == n_samples: - return pd.DataFrame(np.transpose([beaming]), columns=["Beaming_param"]) - raise ValueError( - "Filter was too aggressive with provided data, failed to find valid sample." - ) - - -def sample_albedos(collection, orbit_filter, n_samples, bandwidth=0.05, seed=42): - """ - Create a V and IR albedo sample using the PDS dataset. - - This samples the PDS dataset with the specified orbital elements filter, returning - new albedo parameters. This assumes that albedo is uncorrelated with other values - within the specified orbit filter. - - This assumes that the objects with `V` fits in the PDS data are I.I.D. (in other - words a representative sample of the group specified by the filter). - - Objects which have an `I` fit do not appear to be an IID sample, but those fits - are used to estimate the ratio of V_Albedo / IR_albedo, and the V_albedo calculated - from the strictly `V` fits are then used to create an IR albedo. - - Parameters - ---------- - collection: - A PDS data filename, such as ``"neowise_neos.xml"`` - orbit_filter: - Filter function which defines which group to select. The filter function must - accept 3 parameters, `peri_dist, eccentricity, h_mag`, and return a bool. - See `neospy.population.definitions` for a collection of filter functions which - are used to generation model populations. - n_samples: - How many samples to generate from the provided dataset. - bandwidth: - The bandwidth of the gaussian KDEs in fractions of standard deviation of the - orbital parameters. - seed: - A random seed to use to generate the values, the same seed will result in the - same draws. - """ - albedo_ratio_data = pds_data_filtered(collection, orbit_filter, fit_code="VBI") - vis_albedo_data = pds_data_filtered(collection, orbit_filter, fit_code="V") - for scale in [1.5, 3, 10]: - extra_samples = int(np.ceil(n_samples * scale)) - v_albedo = sample_values( - np.array(vis_albedo_data["V_albedo"])[np.newaxis, :], - extra_samples, - bandwidth=bandwidth, - seed=seed, - ).ravel() - ir_ratios = sample_values( - np.transpose(albedo_ratio_data[["V_albedo", "IR_albedo"]]), - extra_samples, - bandwidth=bandwidth, - seed=seed, - ) - ir_ratios = np.array(ir_ratios[1] / ir_ratios[0]).ravel() - ir_albedo = v_albedo * ir_ratios - - keep = (v_albedo > 0.015) & (v_albedo < 0.7) - keep = keep & (ir_albedo > 0.015) & (ir_albedo < 0.7) - v_albedo = v_albedo[keep][:n_samples] - ir_albedo = ir_albedo[keep][:n_samples] - if len(v_albedo) == n_samples: - return pd.DataFrame( - np.transpose([v_albedo, ir_albedo]), columns=["V_albedo", "IR_albedo"] - ) - raise ValueError( - "Filter was too aggressive with provided data, failed to find valid sample." - ) - - -def visibility_test( - peri_dist: NDArray[np.floating], - vis_albedo: NDArray[np.floating], - beaming: NDArray[np.floating], - diameter: NDArray[np.floating], - wavelength: float = 8e-06, - solar_elong: float = 120, - min_flux: float = 50e-6, -) -> np.ndarray: - """ - Given lists of physical parameters, determine if the object would ever be visible - if it is located at the optimal geometry for observation - - This places the object at the specified solar elongation angle from the observer, at - the perihelion distance from the sun. - - Parameters - ---------- - peri_dist: - Perihelion distance of the object in AU. - vis_albedo: - The visible albedo of the object. - beaming: - The objects beaming parameter. - diameter: - The diameter in km. - wavelength: - The wavelength at which to calculate the flux from the object using NEATM. - solar_elong: - The solar elongation at which to place the object for the test. - min_flux: - The minimum flux to determine if the object could possibly be visible. - """ - - visible = [] - lon = ( - np.pi - - np.arcsin(np.sin(np.radians(solar_elong)) / peri_dist) - - np.radians(solar_elong) - ) - obj2sun = -Vector( - np.transpose( - [peri_dist * np.cos(lon), peri_dist * np.sin(lon), np.zeros_like(lon)] - ) - ) - - obj2sc = Vector( - np.transpose( - [1 - peri_dist * np.cos(lon), -peri_dist * np.sin(lon), np.zeros_like(lon)] - ) - ) - for o2s, o2o, alb, beam, r in zip( # type: ignore - obj2sun, - obj2sc, - vis_albedo, - beaming, - diameter, - ): - flux_ujy = neatm( - o2s, - o2o, - geom_albedo=alb, - G=0.15, - beaming=beam, - emissivity=0.9, - diameter=r, - wavelength=wavelength, - ) - visible.append(flux_ujy > min_flux) - - return np.array(visible) diff --git a/src/neospy/population/power_law.py b/src/neospy/power_law.py similarity index 70% rename from src/neospy/population/power_law.py rename to src/neospy/power_law.py index fed460e..a9b863a 100644 --- a/src/neospy/population/power_law.py +++ b/src/neospy/power_law.py @@ -1,63 +1,15 @@ +""" +Broken Power Law + +This may be used to draw samples fairly from a broken power law. +""" + from __future__ import annotations import numpy as np from functools import partial from numpy.typing import NDArray -def _smooth_power_law_segment(x, a, b, x_b, offset, delta): - """ - A broken power law curve evaluated at the position x. - - Parameters - ---------- - x: - Point on the power law curve to evaluate. - a: - Slope of the power law before the break point. - b: - Slope of the power law after the break point. - x_b: - The x position of the break point. - offset: - The value of the curve at the `x_b` point. - delta: - Rounding term which changes how curved the break point is. - """ - return ( - offset - * (x / x_b) ** (-a) - * (0.5 + 0.5 * (x / x_b) ** (1 / delta)) ** ((a - b) * delta) - ) - - -def _smooth_power_law_segment_der(x, a, b, x_b, offset, delta): - """ - The derivative of the power law segment defined above. - - Parameters - ---------- - x: - Point on the power law curve to evaluate. - a: - Slope of the power law before the break point. - b: - Slope of the power law after the break point. - x_b: - The x position of the break point. - offset: - The value of the curve at the `x_b` point. - delta: - Rounding term which changes how curved the break point is. - """ - x = x / x_b - return offset * ( - x ** (-a - 1) - * (-(2 ** (delta * (b - a)))) - * (x ** (1 / delta) + 1) ** (a * delta - b * delta - 1) - * (a + b * x ** (1 / delta)) - ) - - class CumulativePowerLaw: """ Representation of a cumulative power law made up of individual power law segments. @@ -70,23 +22,22 @@ class CumulativePowerLaw: slopes: The slopes of the different sections of the power law. cutoffs: - The break points between the different sections of the power laws. This is - diameters in km where the breakpoints happen. - num_1km_obj: - The number of objects larger than 1 km, this is the single offset required to + The break points between the different sections of the power laws. + num_larger_than_1: + The number of objects larger than 1, this is the single offset required to calculate the offsets of each of the individual power law segments. max_possible: - The maximum diameter of object possible, any diameter larger than this will - return a total of 0 objects, regardless of slopes. + The maximum value possible, any value larger than this will return a total of 0, + regardless of slopes. delta: Curvature parameter which adjusts how rounded the joints are between the individual power law sections. """ - def __init__(self, slopes, cutoffs, num_1km_obj, max_possible, delta=0.2): + def __init__(self, slopes, cutoffs, num_larger_than_1, max_possible, delta=0.2): self.slopes = slopes self.cutoffs = np.array(cutoffs, dtype=float) - self.num_1km_obj = num_1km_obj + self.num_larger_than_1 = num_larger_than_1 self.max_possible = max_possible self.delta = delta @@ -96,13 +47,13 @@ def __init__(self, slopes, cutoffs, num_1km_obj, max_possible, delta=0.2): ) self.slope_pairs = list(zip(slopes, slopes[1:])) - # Given the slopes, cutoffs, and number of 1km objects, calculate the offset - # values for each slope region which will make the regions meet correctly at - # the boundaries. + # Given the slopes, cutoffs, and number of values larger than 1, calculate the + # offset values for each slope region which will make the regions meet correctly + # at the boundaries. one_region = np.digitize(1, self.cutoff_mids) offsets = [ - num_1km_obj + num_larger_than_1 / _smooth_power_law_segment( 1.0, *self.slope_pairs[one_region], self.cutoffs[one_region], 1.0, delta ) @@ -170,104 +121,158 @@ def __init__(self, slopes, cutoffs, num_1km_obj, max_possible, delta=0.2): ) ) - def num_larger_than(self, diameter: NDArray[np.floating]): + def num_larger_than(self, value: NDArray[np.floating]): """ - Calculate the cumulative number of objects larger than the specified diameters. + Calculate the cumulative values of objects larger than the specified value. Parameters ---------- - diameter: - A list of diameters in km. + value: + A list of values. """ - diameter = np.array(diameter) + value = np.array(value) - # Calculate the value for each element in diameter - idx = np.digitize(diameter, self.cutoff_mids) - vals = np.array([self.partials[i](x) for i, x in zip(idx, diameter)]) - vals[diameter > self.max_possible] = 0 + # Calculate the value for each element + idx = np.digitize(value, self.cutoff_mids) + vals = np.array([self.partials[i](x) for i, x in zip(idx, value)]) + vals[value > self.max_possible] = 0 return vals - def diam_of_nth_largest(self, nth_largest: list) -> list: + def value_of_nth_largest(self, nth_largest: list) -> list: """ - Calculate the diameter of the n-th largest objects. + Calculate the value of the n-th largest. Parameters ---------- nth_largest: - A list of indices of the nth largest objects, this does not have to be an - int. + A list of indices of the nth largest, this does not have to be an int. """ nth_largest = np.array_split( np.array(nth_largest), int(np.ceil(len(nth_largest) / 1000)) ) cutoffs = np.array(list(self.cutoff_mids) + [np.inf]) - all_diams = [] + all_values = [] for batch in nth_largest: - batch_diams = [] + batch_vals = [] for p, p_d in zip(self.partials, self.partials_der): - diams = np.ones_like(batch) * 0.01 + value = np.ones_like(batch) * 0.01 for _ in range(10000): - diams[diams < 0] = 0.001 - step = (p(diams) - batch) / p_d(diams) + value[value < 0] = 0.001 + step = (p(value) - batch) / p_d(value) if max(abs(step)) < 1e-10: break - diams = diams - 0.1 * step - batch_diams.append(diams) - all_diams.append(batch_diams) - all_diams_arr = np.hstack(all_diams) + value = value - 0.1 * step + batch_vals.append(value) + all_values.append(batch_vals) + all_values_arr = np.hstack(all_values) return [ - all_diams_arr[d, idx] + all_values_arr[d, idx] for idx, d in enumerate( - np.argmax(all_diams_arr < cutoffs[:, np.newaxis], axis=0) + np.argmax(all_values_arr < cutoffs[:, np.newaxis], axis=0) ) ] - def sample(self, min_size, max_size=np.inf, scale=1.0, n_objects=None, seed=42): + def sample(self, min_value, max_value=np.inf, scale=1.0, n_samples=None, seed=42): """ - Sample the diameter distribution. + Sample the distribution. - This returns a sorted array of diameters which are constructed using Inverse + This returns a sorted array of values which are constructed using Inverse Transform Sampling from the fit. Parameters ---------- - min_size : float - The minimum object size in km diameter. - max_size : float - The maximum object size in km diameter. + min_value : float + The minimum value. + max_value : float + The maximum value. scale : float The relative fraction of the total population to sample from, for example, - if this value is set to 0.5, then the total count of each diameter size + if this value is set to 0.5, then the total count of each value range will be half of the expected true population count. This makes it so sub-populations may be sampled assuming they all are relative fractions of the same total distribution. - n_objects : int + n_samples : int If this is set to a value, then scale must be set to None. - This will sample exactly this number of diameters fairly, if this is set to - None, this will sample the expected number of objects between the min and + This will sample exactly this number of samples fairly, if this is set to + None, this will sample the expected number of values between the min and maximum specified, scaled by the `scale` key word. seed : int The random seed used for all generation of random numbers. This can be used to accurately reconstruct the same objects. """ rng = np.random.default_rng(seed) - if n_objects is not None and scale is not None: + if n_samples is not None and scale is not None: raise ValueError("Either scale may be set or n_objects, but not both.") - max_index, min_index = self.num_larger_than([min_size, max_size]) + max_index, min_index = self.num_larger_than([min_value, max_value]) - if n_objects is None: - n_objects = int((max_index - min_index) * scale) + if n_samples is None: + n_samples = int((max_index - min_index) * scale) - rand_idx = rng.random(n_objects) * (max_index - min_index) + min_index - diameters = self.diam_of_nth_largest(rand_idx) - return np.array(sorted(diameters, reverse=True)) + rand_idx = rng.random(n_samples) * (max_index - min_index) + min_index + values = self.value_of_nth_largest(rand_idx) + return np.array(sorted(values, reverse=True)) def __repr__(self): return ( type(self).__name__ + f"(slopes={list(self.slopes)},\n\tcutoffs={list(self.cutoffs)}," - f"\n\tnum_1km_obj={self.num_1km_obj},\n\tmax_possible={self.max_possible})" + f"\n\num_larger_than_1={self.num_larger_than_1}," + f"\n\tmax_possible={self.max_possible})" ) + + +def _smooth_power_law_segment(x, a, b, x_b, offset, delta): + """ + A broken power law curve evaluated at the position x. + + Parameters + ---------- + x: + Point on the power law curve to evaluate. + a: + Slope of the power law before the break point. + b: + Slope of the power law after the break point. + x_b: + The x position of the break point. + offset: + The value of the curve at the `x_b` point. + delta: + Rounding term which changes how curved the break point is. + """ + return ( + offset + * (x / x_b) ** (-a) + * (0.5 + 0.5 * (x / x_b) ** (1 / delta)) ** ((a - b) * delta) + ) + + +def _smooth_power_law_segment_der(x, a, b, x_b, offset, delta): + """ + The derivative of the power law segment defined above. + + Parameters + ---------- + x: + Point on the power law curve to evaluate. + a: + Slope of the power law before the break point. + b: + Slope of the power law after the break point. + x_b: + The x position of the break point. + offset: + The value of the curve at the `x_b` point. + delta: + Rounding term which changes how curved the break point is. + """ + x = x / x_b + return offset * ( + x ** (-a - 1) + * (-(2 ** (delta * (b - a)))) + * (x ** (1 / delta) + 1) ** (a * delta - b * delta - 1) + * (a + b * x ** (1 / delta)) + )