From 5013967cacb51650530bd8dea772449a150212a6 Mon Sep 17 00:00:00 2001 From: Dar Dahlen Date: Tue, 9 Jul 2024 14:19:34 -0700 Subject: [PATCH] Spk optimizations - 15-20% speedup of orbit propagation (#73) * Optimizations in interpolation This leads to about 15-25% speedup in querying state vectors. This will lead to commensurate speedups in integration. --- CHANGELOG.md | 2 + src/neospy/ztf.py | 5 +- src/neospy_core/src/spice/interpolation.rs | 42 ++++++++---- src/neospy_core/src/spice/pck_segments.rs | 5 +- src/neospy_core/src/spice/spk_segments.rs | 78 +++++++++++++++------- 5 files changed, 86 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97639a9..51a9a36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Optimized SPICE kernel loading for Type 2 records, which is what DE440 is saved as. + This means effectively all n-body propagation is now 15-20% faster than before. - Removed `SpiceKernel` as a class, lowering all its methods to the submodule level, see #68 for more discussion. - Removed `Time` object which was a wrapper over astropy.Time, instead making a diff --git a/src/neospy/ztf.py b/src/neospy/ztf.py index 8bfc68d..0dcb0d1 100644 --- a/src/neospy/ztf.py +++ b/src/neospy/ztf.py @@ -5,7 +5,6 @@ from functools import lru_cache import os from collections import defaultdict -import numpy as np from .cache import cached_file_download, cache_path from .fov import ZtfCcdQuad, ZtfField, FOVList @@ -98,9 +97,7 @@ def fetch_ZTF_fovs(year: int): ) # Exposures are 30 seconds - jds_str = [x.split("+")[0] for x in irsa_query["obsdate"]] - jds = np.array(Time(jds_str, "iso", "utc").jd) - + jds = [Time.from_iso(x + ":00").jd for x in irsa_query["obsdate"]] obs_info = find_obs_code("ZTF") # ZTF fields are made up of up to 64 individual CCD quads, here we first construct diff --git a/src/neospy_core/src/spice/interpolation.rs b/src/neospy_core/src/spice/interpolation.rs index cd66490..3fc374a 100644 --- a/src/neospy_core/src/spice/interpolation.rs +++ b/src/neospy_core/src/spice/interpolation.rs @@ -11,14 +11,24 @@ use nalgebra::DVector; /// This is useful for reading values out of JPL SPK format, be aware though that time /// scaling is also important for that particular use case. /// +/// This evaluates the coefficients at a single point of time, but for 3 sets of +/// coefficients at once. This is specifically done for performance reasons. +/// /// # Arguments /// /// * `t` - Time at which to evaluate the chebyshev polynomials. -/// * `coef` - List of coefficients of the chebyshev polynomials. +/// * `coefx` - Slice of coefficients of the chebyshev polynomials. +/// * `coefy` - Slice of coefficients of the chebyshev polynomials. +/// * `coefz` - Slice of coefficients of the chebyshev polynomials. /// #[inline(always)] -pub fn chebyshev_evaluate_both(x: f64, coef: &[f64]) -> Result<(f64, f64), NEOSpyError> { - let n_coef = coef.len(); +pub fn chebyshev3_evaluate_both( + x: f64, + coefx: &[f64], + coefy: &[f64], + coefz: &[f64], +) -> Result<([f64; 3], [f64; 3]), NEOSpyError> { + let n_coef = coefx.len(); if n_coef < 2 { return Err(NEOSpyError::IOError( @@ -28,38 +38,42 @@ pub fn chebyshev_evaluate_both(x: f64, coef: &[f64]) -> Result<(f64, f64), NEOSp } let x2 = 2.0 * x; - let mut val = 0.0; + let mut val = [ + coefx[0] + coefx[1] * x, + coefy[0] + coefy[1] * x, + coefz[0] + coefz[1] * x, + ]; let mut second_t = 1.0; let mut last_t = x; let mut next_t; - val += coef[0] * second_t; - val += coef[1] * last_t; - // The derivative of the first kind is defined by the recurrence relation: // d T_i / dx = i * U_{i-1} - let mut der_vel; let mut second_u = 1.0; let mut last_u = x2; let mut next_u; - der_vel = coef[1] * second_u; + let mut der_val = [coefx[1], coefy[1], coefz[1]]; - for (idx, &c) in coef.iter().enumerate().skip(2) { + for (idx, ((x, y), z)) in coefx.iter().zip(coefy).zip(coefz).enumerate().skip(2) { next_t = x2 * last_t - second_t; - val += c * next_t; + val[0] += x * next_t; + val[1] += y * next_t; + val[2] += z * next_t; second_t = last_t; last_t = next_t; next_u = x2 * last_u - second_u; - der_vel += c * last_u * (idx as f64); + der_val[0] += x * last_u * (idx as f64); + der_val[1] += y * last_u * (idx as f64); + der_val[2] += z * last_u * (idx as f64); second_u = last_u; last_u = next_u; } - Ok((val, der_vel)) + Ok((val, der_val)) } /// Interpolate using Hermite interpolation. @@ -70,7 +84,7 @@ pub fn chebyshev_evaluate_both(x: f64, coef: &[f64]) -> Result<(f64, f64), NEOSp /// * `x` - The values of the function `f` evaluated at the specified times. /// * `dx` - The values of the derivative of the function `f`. /// * `eval_time` - Time at which to evaluate the interpolation function. -pub fn hermite_interpolation(times: &[&f64], x: &[f64], dx: &[f64], eval_time: f64) -> (f64, f64) { +pub fn hermite_interpolation(times: &[f64], x: &[f64], dx: &[f64], eval_time: f64) -> (f64, f64) { assert_eq!(times.len(), x.len()); assert_eq!(times.len(), dx.len()); diff --git a/src/neospy_core/src/spice/pck_segments.rs b/src/neospy_core/src/spice/pck_segments.rs index 5cb8502..c638746 100644 --- a/src/neospy_core/src/spice/pck_segments.rs +++ b/src/neospy_core/src/spice/pck_segments.rs @@ -173,9 +173,8 @@ impl PckSegmentType2 { let dec_coef = &record[(self.n_coef + 2)..(2 * self.n_coef + 2)]; let w_coef = &record[(2 * self.n_coef + 2)..(3 * self.n_coef + 2)]; - let (ra, ra_der) = chebyshev_evaluate_both(t, ra_coef)?; - let (dec, dec_der) = chebyshev_evaluate_both(t, dec_coef)?; - let (w, w_der) = chebyshev_evaluate_both(t, w_coef)?; + let ([ra, dec, w], [ra_der, dec_der, w_der]) = + chebyshev3_evaluate_both(t, ra_coef, dec_coef, w_coef)?; // rem_euclid is equivalent to the modulo operator, so this maps w to [0, 2pi] let w = w.rem_euclid(std::f64::consts::TAU); diff --git a/src/neospy_core/src/spice/spk_segments.rs b/src/neospy_core/src/spice/spk_segments.rs index c3441fb..24a7bd9 100644 --- a/src/neospy_core/src/spice/spk_segments.rs +++ b/src/neospy_core/src/spice/spk_segments.rs @@ -314,12 +314,33 @@ pub struct SpkSegmentType2 { record_len: usize, } +/// Type 2 Record View +/// A view into a record of type 2, provided mainly for clarity to the underlying +/// data structure. +struct Type2RecordView<'a> { + t_mid: &'a f64, + t_step: &'a f64, + + x_coef: &'a [f64], + y_coef: &'a [f64], + z_coef: &'a [f64], +} + impl SpkSegmentType2 { - fn get_record(&self, idx: usize) -> &[f64] { + fn get_record(&self, idx: usize) -> Type2RecordView { unsafe { - self.array + let vals = self + .array .data - .get_unchecked(idx * self.record_len..(idx + 1) * self.record_len) + .get_unchecked(idx * self.record_len..(idx + 1) * self.record_len); + + Type2RecordView { + t_mid: vals.get_unchecked(0), + t_step: vals.get_unchecked(1), + x_coef: vals.get_unchecked(2..(self.n_coef + 2)), + y_coef: vals.get_unchecked((self.n_coef + 2)..(2 * self.n_coef + 2)), + z_coef: vals.get_unchecked((2 * self.n_coef + 2)..(3 * self.n_coef + 2)), + } } } @@ -333,29 +354,26 @@ impl SpkSegmentType2 { let record_index = ((jd - jd_start) / self.jd_step).floor() as usize; let record = self.get_record(record_index); - let x_coef = unsafe { record.get_unchecked(2..(self.n_coef + 2)) }; - let y_coef = unsafe { record.get_unchecked((self.n_coef + 2)..(2 * self.n_coef + 2)) }; - let z_coef = unsafe { record.get_unchecked((2 * self.n_coef + 2)..(3 * self.n_coef + 2)) }; + let t_step = record.t_step; - let t_mid = unsafe { record.get_unchecked(0) }; - let t_step = unsafe { record.get_unchecked(1) }; - let t = (jd - t_mid) / t_step; + let t = (jd - record.t_mid) / t_step; let t_step_scaled = 86400.0 / t_step / AU_KM; - let (x, vx) = chebyshev_evaluate_both(t, x_coef)?; - let (y, vy) = chebyshev_evaluate_both(t, y_coef)?; - let (z, vz) = chebyshev_evaluate_both(t, z_coef)?; + let (p, v) = chebyshev3_evaluate_both(t, record.x_coef, record.y_coef, record.z_coef)?; Ok(( - [x / AU_KM, y / AU_KM, z / AU_KM], - [vx * t_step_scaled, vy * t_step_scaled, vz * t_step_scaled], + [p[0] / AU_KM, p[1] / AU_KM, p[2] / AU_KM], + [ + v[0] * t_step_scaled, + v[1] * t_step_scaled, + v[2] * t_step_scaled, + ], )) } } impl From for SpkSegmentType2 { fn from(array: DafArray) -> Self { - // let n_records = array[array.len() - 1] as usize; let record_len = array[array.len() - 2] as usize; let jd_step = array[array.len() - 3]; @@ -527,9 +545,23 @@ impl From for SpkSegmentType13 { } } +/// Type 13 Record View +/// A view into a record of type 13, provided mainly for clarity to the underlying +/// data structure. +struct Type13RecordView<'a> { + pos: &'a [f64; 3], + vel: &'a [f64; 3], +} + impl SpkSegmentType13 { - fn get_record(&self, idx: usize) -> &[f64] { - unsafe { self.array.data.get_unchecked(idx * 6..(idx + 1) * 6) } + fn get_record(&self, idx: usize) -> Type13RecordView { + unsafe { + let rec = self.array.data.get_unchecked(idx * 6..(idx + 1) * 6); + Type13RecordView { + pos: rec[0..3].try_into().unwrap(), + vel: rec[3..6].try_into().unwrap(), + } + } } fn get_times(&self) -> &[f64] { @@ -562,19 +594,15 @@ impl SpkSegmentType13 { let mut pos = [0.0; 3]; let mut vel = [0.0; 3]; - let times: Box<[&f64]> = times - .iter() - .skip(start_idx) - .take(self.window_size) - .collect(); for idx in 0..3 { let p: Box<[f64]> = (0..self.window_size) - .map(|i| self.get_record(i + start_idx)[idx]) + .map(|i| self.get_record(i + start_idx).pos[idx]) .collect(); let dp: Box<[f64]> = (0..self.window_size) - .map(|i| self.get_record(i + start_idx)[idx + 3]) + .map(|i| self.get_record(i + start_idx).vel[idx]) .collect(); - let (p, v) = hermite_interpolation(×, &p, &dp, jd); + let (p, v) = + hermite_interpolation(×[start_idx..start_idx + self.window_size], &p, &dp, jd); pos[idx] = p / AU_KM; vel[idx] = v / AU_KM * 86400.; }