From eae45d800f50a3a55bf06963548d9d4acfd9d08d Mon Sep 17 00:00:00 2001 From: Dar Dahlen Date: Fri, 3 May 2024 16:13:05 -0700 Subject: [PATCH] Remove redundant functions and rename (#15) * clarify that unpacked designations are preferred * changelog --- CHANGELOG.md | 7 + src/neospy/mpc.py | 27 -- src/neospy/population/__init__.py | 5 +- src/neospy/population/definitions.py | 57 --- src/neospy/population/main_belt.py | 404 ------------------ src/neospy/population/neas.py | 2 +- .../population/{diameters.py => power_law.py} | 10 +- src/neospy/population/utils.py | 2 +- src/neospy/rust/horizons.rs | 2 +- 9 files changed, 17 insertions(+), 499 deletions(-) delete mode 100644 src/neospy/population/main_belt.py rename src/neospy/population/{diameters.py => power_law.py} (97%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 275a4d0..485cda6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,10 +14,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Change documentation to clarify that designations are unpacked. - Updated MPC Obs codes to include NEO Surveyor with code C58. - Updated NAIF ID list to include new designations for about 20-30 comets. - Renamed underlying `_rust` binary to `_core`. - Moved NAIF ID list to a dedicated CSV file which is read during compilation. +- Rename `population.diameter` to `population.power_law`. + +### Removed + +- Removed main belt construction tools, out of scope and not accurate enough. +- Removed redundant MPC name resolver function. ## [0.2.0] - 2024-3-16 diff --git a/src/neospy/mpc.py b/src/neospy/mpc.py index e4e05d9..59d43ef 100644 --- a/src/neospy/mpc.py +++ b/src/neospy/mpc.py @@ -468,33 +468,6 @@ def pack_designation(unpacked): return pack_provisional_designation(unpacked) -@lru_cache -def fetch_known_packed_principal_designations(force_download=False): - """ - Download the most recent copy of the MPCs orbit catalogue and create a dictionary - mapping all packed designations to the MPC's 'Principal Designation' for the object. - - This principal designation is the primary packed provisional designation that the - object has. Since all objects have one of these regardless of whether or not they - are numbered. - - """ - url = "https://minorplanetcenter.net/Extended_Files/mpcorb_extended.json.gz" - objs = cached_gzip_json_download(url, force_download) - - mapper = {} - - for o in objs: - desig = pack_provisional_designation(o["Principal_desig"]) - for other_desg in o.get("Other_desigs", []): - other_desg = pack_provisional_designation(other_desg) - mapper[other_desg] = desig - if "Number" in o: - num = pack_permanent_designation(int(o["Number"][1:-1])) - mapper[num] = desig - return mapper - - @lru_cache() def fetch_known_packed_designations(force_download=False): """ diff --git a/src/neospy/population/__init__.py b/src/neospy/population/__init__.py index bbf990e..af295c9 100644 --- a/src/neospy/population/__init__.py +++ b/src/neospy/population/__init__.py @@ -1,9 +1,8 @@ -from . import utils, main_belt, definitions, neas, diameters +from . import utils, definitions, neas, power_law __all__ = [ "utils", - "main_belt", "definitions", "neas", - "diameters", + "power_law", ] diff --git a/src/neospy/population/definitions.py b/src/neospy/population/definitions.py index 6541bb1..81f7da7 100644 --- a/src/neospy/population/definitions.py +++ b/src/neospy/population/definitions.py @@ -262,63 +262,6 @@ def which_group( return groups -def mba_inner_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of inner main belt objects. - - Returns `True` if the object is in the inner main belt and is a part of what is - expected to be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return mba_inner(peri_dist, ecc) & (h_mag < 17.75) - - -def mba_middle_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of middle main belt objects. - - Returns `True` if the object is in the middle main belt and is a part of what is - expected to be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return mba_middle(peri_dist, ecc) & (h_mag < 17.25) - - -def mba_outer_complete(peri_dist, ecc, h_mag): - """ - Observationally complete set of outer main belt objects. - - Returns `True` if the object is in the outer main belt and is a part of what is - expected to be observationally complete. - - Parameters - ---------- - peri_dist: - Perihelion distance in units of AU. - ecc: - Eccentricity of the orbit. - h_mag: - The H magnitude of the object. - """ - return mba_outer(peri_dist, ecc) & (h_mag < 16.75) - - def neo_amor_complete(peri_dist, ecc, h_mag): """ Observationally complete set of Amors. diff --git a/src/neospy/population/main_belt.py b/src/neospy/population/main_belt.py deleted file mode 100644 index 75ac633..0000000 --- a/src/neospy/population/main_belt.py +++ /dev/null @@ -1,404 +0,0 @@ -from __future__ import annotations -import numpy as np -import pandas as pd # type: ignore -import logging -from scipy.optimize import least_squares # type: ignore - -from ..conversion import compute_H -from ..time import Time -from ..vector import CometElements -from ..flux import NeatmParams - - -from .utils import ( - sample_albedos, - sample_beaming, - sample_orbits, - visibility_test, -) -from .diameters import CumulativePowerLaw -from .definitions import ( - mba_inner_complete, - mba_middle_complete, - mba_outer_complete, -) -from ..mpc import num_to_base62 -from ..pds import pds_data_filtered - -logger = logging.getLogger(__name__) - - -mba_inner_diameters = CumulativePowerLaw( - slopes=[2.87492, 1.0, 2.88164], - cutoffs=[13.4677, 82.9235], - num_1km_obj=257038, - max_possible=196.371, -) -""" -Diameter fit for the inner main belt, these were fit using the -`mba_fit_diameter_distribution` function defined below, and the parameters are saved -here as reference. -""" - -mba_middle_diameters = CumulativePowerLaw( - slopes=[2.89788, 1.03716, 5.0], - cutoffs=[13.06984, 115.4161], - num_1km_obj=928025, - max_possible=231.689, -) -""" -Diameter fit for the middle main belt, these were fit using the -`mba_fit_diameter_distribution` function defined below, and the parameters are saved -here as reference. -""" - -mba_outer_diameters = CumulativePowerLaw( - slopes=[2.54969, 1.21393, 3.2284], - cutoffs=[19.58096, 87.5279], - num_1km_obj=1816513, - max_possible=453.239, -) -""" -Diameter fit for the outer main belt, these were fit using the -`mba_fit_diameter_distribution` function defined below, and the parameters are saved -here as reference. -""" - - -def mba_fit_diameter_distribution(filt): - """ - Creates cumulative object count functions which define the number of MBAs greater - than a specified diameter in km. - - This returns a CumulativePowerLaw class. - - This class enables estimating the number of objects larger than a specified - diameter. They are constructed using the PDS dataset for the main belt, and the - data is linearly extrapolated to down to arbitrarily small diameters. - - Parameters - ---------- - filt : - The `neospy.population.definition` function which selects an population group. - This should be `mba_inner_complete` or similar. - """ - - # Plotting the cumulative total number of objects as a function of size shows that - # the diameters seem to be split into 3 distinct regimes, along with a falloff of - # small object counts due to them being too small to observe. Each of these 3 - # zones follow a linear slope, along with a cutoff size where no objects are seen - # below that size. - - diams = pds_data_filtered("neowise_mainbelt.xml", filt, "D").Diameter - - # Finding the minimum size which exists in the dataset, this uses an approximation - # of the hessian to determine when the population stops growing as the diameters - # get smaller. - cutoff = [] - for i in range(50, 200): - bins = np.logspace(np.log10(min(diams)), np.log10(max(diams)), i) - counts, bins = np.histogram(diams, bins=bins) - bin_centers = (bins[1:] + bins[:-1]) / 2 - cutoff.append(bin_centers[np.argmin(np.diff(counts))]) - cutoff = np.median(cutoff) - - # Now that the observability cutoff has been estimated, we can find the slope of the - # 3 distinct regions. Select and histogram the data after this cutoff - bins = np.linspace(cutoff, max(diams), 50) - counts, bins = np.histogram(diams, bins=bins) - - bounds = ([1.0, 50, 0, 1, 1, 1], [50, 500, 1e12, 5, 5, 5]) - - def _diff(params, diams, freq): - cutoff_0, cutoff_1, offset_0, slope_0, slope_1, slope_2 = params - t = CumulativePowerLaw( - slopes=[slope_0, slope_1, slope_2], - cutoffs=[cutoff_0, cutoff_1], - num_1km_obj=offset_0, - max_possible=np.inf, - ) - return np.array(freq) - t.num_larger_than(diams) - - fit = least_squares( - _diff, - [15.0, 100.0, 300000.0, 2.6, 1.2, 4.7], - args=(bins[:-1], np.cumsum(counts[::-1])[::-1]), - bounds=bounds, - ).x - cutoff_0, cutoff_1, offset_0, slope_0, slope_1, slope_2 = fit - - return CumulativePowerLaw( - slopes=[slope_0, slope_1, slope_2], - cutoffs=[cutoff_0, cutoff_1], - num_1km_obj=offset_0, - max_possible=np.max(diams), - ) - - -class MBAGroup: - def __init__(self, name, group_id, filter_func, diameter_sampler): - self.name = name - self.group_id = group_id - self.diameter_sampler = diameter_sampler - self.filter_func = filter_func - - def sample_orbits(self, n_samples, seed=42): - return sample_orbits(self.filter_func, n_samples, seed=seed) - - def sample_beaming(self, n_samples, seed=43): - return sample_beaming( - "neowise_mainbelt.xml", self.filter_func, n_samples, seed=seed - ) - - def sample_albedos(self, n_samples, seed=44): - return sample_albedos( - "neowise_mainbelt.xml", self.filter_func, n_samples, seed=seed - ) - - def sample_diameters(self, min_size, max_size=np.inf, n_objects=None, seed=42): - scale = None if n_objects is not None else 1.0 - return self.diameter_sampler.sample( - min_size, max_size=max_size, seed=seed, n_objects=n_objects, scale=scale - ) - - def sample_objects( - self, - min_size, - max_size=np.inf, - n_objects=None, - batch_size=100_000, - seed=42, - id_start=0, - epoch=None, - keep_only_visible=True, - ): - """ - Construct a generator of an MBA population down to the specified diameter size. - - To reduce memory pressure, this is structured as a generator function. - - Parameters - ---------- - min_size : float - The minimum diameter of objects to make, this is in units of km. - max_size : float - The maximum diameter of objects to make, units of km. - n_objects: - If this is None, then the expected number of objects are made for the - specified size range, if this is set to a value, then exactly that number - of objects are made. - batch_size : int - The number of objects inside of each generated pandas dataframe. - seed : int - The random seed used for all generation of random numbers. This can be used - to accurately reconstruct the same objects. - id_start : int - Each object has a object id assigned to it, this is the offset value to add - to all objects created in this call. - keep_only_visible : bool - If this is `True`, only objects which are potentially visible are kept. - """ - - if epoch is None: - epoch = Time.from_current_time().jd - prefix = "S" + num_to_base62(seed, 2) + self.group_id - - diams = self.sample_diameters( - min_size, seed=seed, n_objects=n_objects, max_size=max_size - ) - n_obj = len(diams) - logger.info("Generated % diameters", n_obj) - - n_batches = int(np.ceil(len(diams) / batch_size)) - - for i in range(n_batches): - batch = diams[i * batch_size : (i + 1) * batch_size] - batch_len = len(batch) - ids = np.arange(i * batch_size, (i + 1) * batch_size)[:batch_len] + id_start - - albedos = self.sample_albedos(batch_len, seed=seed + 1 + i * 6) - beaming = self.sample_beaming(batch_len, seed=seed + 2 + i * 6) - orbits = self.sample_orbits(batch_len, seed=seed + 3 + i * 6) - - objects = pd.DataFrame( - np.transpose( - [ - beaming.Beaming_param, - albedos.V_albedo, - albedos.IR_albedo, - batch, - orbits.peri_dist, - orbits.ecc, - orbits.incl, - orbits.lon_node, - orbits.peri_arg, - orbits.peri_time, - ] - ), - columns=[ - "beaming", - "vis_albedo", - "ir_albedo", - "diameter", - "peri_dist", - "ecc", - "incl", - "lon_node", - "peri_arg", - "peri_time", - ], - ) - - objects["h_mag"] = compute_H( - albedo=objects.vis_albedo, diameter=objects.diameter - ) - objects["g_phase"] = 0.15 - objects["desig"] = [prefix + num_to_base62(n, 6) for n in ids] - objects["epoch"] = epoch - objects["efrho"] = 0.0 - - if keep_only_visible: - vis = visibility_test( - objects.peri_dist, - objects.vis_albedo, - objects.beaming, - objects.diameter, - ) - objects = objects[vis] - - yield objects - - -MBA_Inner = MBAGroup( - name="Inner Main Belt", - group_id="10", - filter_func=mba_inner_complete, - diameter_sampler=mba_inner_diameters, -) - - -MBA_Middle = MBAGroup( - name="Middle Main Belt", - group_id="11", - filter_func=mba_middle_complete, - diameter_sampler=mba_middle_diameters, -) - - -MBA_Outer = MBAGroup( - name="Outer Main Belt", - group_id="12", - filter_func=mba_outer_complete, - diameter_sampler=mba_outer_diameters, -) - - -def create_mba_population( - db_name, - min_sizes=None, - seed=43, - epoch=None, - batch_size=1_000_000, - keep_only_visible=True, -): - """ - Create a new databases using the samplers above. - - There will be a number of databases for each MBA group (inner, middle, outer). - - Objects will not be placed into the database if they are never visible from the NEO - Surveyor, this results in significant reduction of the total number of objects - saved. - - These databases will at most contain ``batch_size`` number of objects. This is to - reduce the memory pressure during simulation, 100_000 will result in around 50 gigs - of memory used during a regular simulation run. Something around 25k is appropriate - for most laptops. - - Databases are constructed with the provided name plus `_0001.db` added on the end. - - .. testcode:: - :skipif: True - - # Create all objects for the 3 sections of the main belt, larger than 3, 4, and - # 7 km in size for the 3 groups. - create_mba_population("mbas", min_size=[3, 4, 7]) - - Parameters - ---------- - db_name : str - The filename for the new database. - min_sizes : float - A list of 3 minimum diameters of objects for the inner/middle/outer MBAs, this - is in units of km. For example: [1, 1.5, 3] for 1km+ IMB, 1.5km+ MMB, 3km+ OMB - seed : int - The random seed used for all generation of random numbers. The same seed and min - diameter will result in the same database being built. - epoch : None or float - The epoch of observation for the database. If no epoch is specified, the - database will be built using the first day of the survey. This is the ideal time - to minimize simulation time. - batch_size : int - The largest number of objects to place inside of a database before creating a - new database. - keep_only_visible : bool - If this is `True`, only objects which are potentially visible are kept. - """ - min_sizes = [0.4, 0.5, 0.75] if min_sizes is None else min_sizes - logger.info("Beginning construction of database...") - - if epoch is None: - epoch = Time.from_current_time().jd - - diam_samplers = [MBA_Inner, MBA_Middle, MBA_Outer] - - db_idx = 0 - for min_size, sampler in zip(min_sizes, diam_samplers): - logger.info("Working on %s", sampler.name) - id_start = 0 - for vals in sampler.sample_objects( - min_size=min_size, - batch_size=batch_size, - seed=seed + db_idx, - id_start=id_start, - keep_only_visible=keep_only_visible, - ): - group_name = sampler.name.split(" ")[0].lower() - full_name = db_name + f"_{group_name}_{str(db_idx).zfill(4)}" - id_start += len(vals) - db_idx += 1 - state = CometElements( - epoch, - list(vals.desig), - vals.ecc, - vals.incl, - vals.peri_dist, - vals.peri_arg, - vals.peri_time, - vals.lon_node, - ).as_state - - # pylint: disable=too-many-function-args - properties_list = [ - NeatmParams.new_neos( - o.desig, - [o.ir_albedo, o.ir_albedo], - o.h_mag, - o.diameter, - o.v_albedo, - o.beaming, - o.g_phase, - ) - for o in vals.itertuples() - ] - - state.save(f"{full_name}_state.bin") - NeatmParams.save_list(properties_list, f"{full_name}_props.bin") - - logger.info( - "Completed database %s of %d objects %0.2f+ km.", - full_name, - len(vals), - min(vals.diameter), - ) - logger.info("Finished %s, constructed %d objects", sampler.name, id_start) diff --git a/src/neospy/population/neas.py b/src/neospy/population/neas.py index 90a205e..67e3d58 100644 --- a/src/neospy/population/neas.py +++ b/src/neospy/population/neas.py @@ -12,7 +12,7 @@ nearly_neo_complete, neo_atira, ) -from .diameters import CumulativePowerLaw +from .power_law import CumulativePowerLaw from ..conversion import compute_H from ..time import Time from ..vector import CometElements, State diff --git a/src/neospy/population/diameters.py b/src/neospy/population/power_law.py similarity index 97% rename from src/neospy/population/diameters.py rename to src/neospy/population/power_law.py index 8acfd30..fed460e 100644 --- a/src/neospy/population/diameters.py +++ b/src/neospy/population/power_law.py @@ -147,12 +147,12 @@ def __init__(self, slopes, cutoffs, num_1km_obj, max_possible, delta=0.2): self.partials = [] self.partials_der = [] - for i in range(len(self.slope_pairs)): + for i, (a, b) in enumerate(self.slope_pairs): self.partials.append( partial( _smooth_power_law_segment, - a=self.slope_pairs[i][0], - b=self.slope_pairs[i][1], + a=a, + b=b, x_b=self.cutoffs[i], offset=self.offsets[i], delta=delta, @@ -162,8 +162,8 @@ def __init__(self, slopes, cutoffs, num_1km_obj, max_possible, delta=0.2): self.partials_der.append( partial( _smooth_power_law_segment_der, - a=self.slope_pairs[i][0], - b=self.slope_pairs[i][1], + a=a, + b=b, x_b=self.cutoffs[i], offset=self.offsets[i], delta=delta, diff --git a/src/neospy/population/utils.py b/src/neospy/population/utils.py index 58eb25a..efce7b8 100644 --- a/src/neospy/population/utils.py +++ b/src/neospy/population/utils.py @@ -51,7 +51,7 @@ def sample_orbits(filt, n_samples, bandwidth=0.05, seed=42): .. testcode:: :skipif: True - mpc_filter = neospy.population.definitions.complete_mba_inner_filter + mpc_filter = neospy.population.definitions.mba_inner new = neospy.population.utils.sample_orbits(mpc_filter, len(mpc_data)) Parameters diff --git a/src/neospy/rust/horizons.rs b/src/neospy/rust/horizons.rs index 6844f22..1263247 100644 --- a/src/neospy/rust/horizons.rs +++ b/src/neospy/rust/horizons.rs @@ -61,7 +61,7 @@ impl HorizonsCovariance { #[pyclass(frozen, get_all, module = "neospy")] #[derive(Clone, Debug, Deserialize, Serialize)] pub struct HorizonsProperties { - /// The MPC packed designation of the object. + /// The MPC designation of the object. desig: String, /// An optional group name to associate the object with a group.