diff --git a/doc/code_structure.rst b/doc/code_structure.rst new file mode 100644 index 0000000..d821600 --- /dev/null +++ b/doc/code_structure.rst @@ -0,0 +1,88 @@ +Code Organization +================= + +Goals of NEOSpy +--------------- +NEOSpy is a collection of tools for calculating the orbits and expected fluxes for minor +planets specifically for the purpose of estimating which objects are visible in current, +past, or future sky surveys. Specifically the goal is that these calculations may be +performed on the full set of all known asteroids in a reasonable amount of time on a +laptop. + +Propagation +~~~~~~~~~~~ +There are a number of existing tools which enable the propagation of orbits for +many millions or billions of years, but these typically are designed for a comparatively +small number of objects (perhaps 10-100 thousand), whereas NEOSpy's design intent is +10-100 million objects over the course of decades. The total compute approximately +scales like:: + + computation time ~ (number of objects) x (length of time being simulated) + +However solving the large number of objects for a relatively short length of time can be +optimized differently than a small number of objects for Giga-years. + +Flux Estimation +~~~~~~~~~~~~~~~ +After propagation has been performed, it is important to estimate the total expected +flux of the objects from the point of view of an observer. There are a number of models +for this available, including the Near Earth Asteroid Thermal Model (NEATM), the Fast +Rotator Model (FRM), and the HG-Magnitude system which is common for asteroids in the +visible band. + +SPICE Kernel Interoperability +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +SPICE is a commonly used software package which has been in development for several +decades at this point, and is often used to keep track of the ephemeris of planets, +satellites (both natural and artificial), asteroids, and comets. Essentially the motion +of anything in the Solar System can be encoded in some flavor of SPICE kernel. The +primary downside of using cSPICE is that there is no native support for multi-core cpu +queries (an artifact of the age of the code). NEOSpy has native multi-core support for +the majority of all commonly used SPICE kernels. + + +High level Structure +-------------------- + +Why Rust? +~~~~~~~~~ +Given the goals listed above, it is clear that performance plays a key roll in design +considerations of the tool. As a result of this, the Rust language was chosen as the +primary language for a majority of the business logic. A Python "wrapper" was written on +top of the Rust core, allowing users to call the compiled Rust code from Python. This +wrapper lowers the barrier to entry for users doing data-analysis or simulations without +having to learn the underlying Rust. + +Rust has a number of advantages over existing languages, it's performance is typically +comparable to C++/C, however due to the structure of the language it is less prone to +the most common type of errors to do with memory allocation and management. In addition +to this, Rust has excellent native multi-core support, especially for embarrassingly +parallel problems such as the orbit propagation required for NEOSpy. + +NEOSpy Core +~~~~~~~~~~~ +The Rust core of the library, which does the underlying orbit and flux calculations is +written entirely without any reference to Python. This core part is available as +`neospy_core`, and programming can be done entirely within Rust for tools which do not +require the Python wrapper. This design was chosen so that systems tools which would +benefit from orbit computation can be written without having to have Python installed. +It is important to note that if performance is a concern, then removing the Python is an +important step to get the maximum possible performance. + +Core Python Wrapper +~~~~~~~~~~~~~~~~~~~ +The Rust library described above then has Python wrappers written over it, allowing +users to call these compiled tools inside of Python. In order to do this, some +boiler-plate code is required to glue these independent parts together. This is where +the `rust` folder inside of NEOSpy comes from. Inside of this folder there are rust +files which are mostly a one-to-one mapping to their respective counterparts inside of +the `neospy_core`. Ideally there should be no 'business' logic contained within these +wrappers, and they should largely exist to provide convenient mappings from the Python +concepts to the Rust internal organization. + +NEOSpy Python +~~~~~~~~~~~~~ +The remaining part of the code which is strictly Python is mostly quality of life +functions, plotting, and web query code. There is little to no mathematics or physics +contained within the Python. The exception being the population related code, which +manages the fair sampling of the known asteroids to generation synthetic populations. diff --git a/doc/images/orbit_models.png b/doc/images/orbit_models.png new file mode 100644 index 0000000..9e993c6 Binary files /dev/null and b/doc/images/orbit_models.png differ diff --git a/doc/images/orbit_models_short.png b/doc/images/orbit_models_short.png new file mode 100644 index 0000000..cc48651 Binary files /dev/null and b/doc/images/orbit_models_short.png differ diff --git a/doc/index.rst b/doc/index.rst index 6838796..677f944 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -32,6 +32,8 @@ move out of frame. :maxdepth: 2 background + code_structure + propagation .. toctree:: :maxdepth: 1 diff --git a/doc/propagation.rst b/doc/propagation.rst new file mode 100644 index 0000000..ab8a75d --- /dev/null +++ b/doc/propagation.rst @@ -0,0 +1,108 @@ +Orbit Propagation +================= + +Orbit propagation can be an immensely complicated topic, spanning from two-body +Keplerian motion, to models of gravitational fields including thousands of terms. Being +able to pick the correct amount of precision required is difficult, and NEOSpy makes a +number of judicious choices which are reasonable for the vast majority of cases. + +What follows is a discussion of the pros-cons of various models of gravitational forces, +and when the model is "good enough" for the problem at hand. + +If we wish to predict the on-sky position or motion of an object, the level of precision +required depends greatly upon the distance which the object is from the observer. +Being accurate to 100nm when we are looking 5 au away is not necessary, alternatively, +when we are considering an NEO such as Apophis (which will come very close to hitting +the Earth), meters matter. + +.. figure:: images/orbit_models.png + :width: 600 + + This is a plot of an arbitrary, rather large Main Belt Asteroid, and its orbital + propagation error going back 100 years. Specifically error in orbit propagation + comparing internal propagation against JPL Horizons SPICE kernels. This compares the + difference in position for the two-body approximation, orbit propagation assuming + there are no massive asteroids in the main belt, and finally including the 5 most + massive main belt asteroids. Note that JPL Horizons includes at least 16 of the + largest asteroids, so there is still residual error over the course of 100 years. + This error however is only on the order of 100km. + +Forces +------ + +First it is useful to list out forces which objects in space may experience, to get an +understanding of what is required in order to accurately model their motion. These are +listed in approximately the order of their effects on objects (though order may change +due to varying circumstances). + +- *Newtonian Gravity* - Typically what is imagined when people say the force of gravity. +- *Corrections for Relativity (GR)* - The orbit of Mercury precesses due to effects from + relativity, and in fact many NEOs, or even objects which get close enough to planets + will experience effects from relativity. +- *Gravity from Minor Planets* - Main belt asteroids are often non-negligible. + The motion of objects through the main belt often needs to include the mass of + asteroids such as Ceres, which makes up an appreciable fraction of the total mass of + the belt. +- *Oblateness of the Sun/Planets* - The Sun and Planets are not ideal spheres, and as a + result, cannot be exactly modeled as a point source. This non-sphericity can be + written as an expansion of spherical harmonic-like terms, commonly written as `J` + terms. The first non-trivial term of this expansion is the oblateness, referred to + as `J2`. This term by itself will cause objects in orbit of the central mass to + have their longitude of ascending node precess as a function of how inclined they + are with respect to the equator of the central mass. +- *Non-gravitational Forces* - The forces above are a result of gravity, however there + are many potential forces which an object may experience, these include things such + as outgassing, radiation pressure, the Yarkovsky effect, and the Poynting-Robertson + effect. These effects typically have some relation to the total solar radiation that + the object is receiving, and can frequently be written as a polynomial with respect + to the distance that the object is from the Sun. + + +Accuracy +-------- + +Accuracy of the orbit propagation depends on how precisely the forces above are +computed, and the precision of the numerical integrator used. Forces such as the +Yarkovsky effect cause relatively small forces on asteroids, but will add up to be +non-negligible over the course of kilo-year or mega-years. A common issue however is +that we often don't have accurate enough measurements of the objects to be able to +accurately measure the contribution of these forces. + +Close encounters with other objects also cause significant accuracy issues as well. The +force between objects follows a `1/r^2` relationship, meaning as the distance goes to +zero, small changes in the distance cause large changes in the force. We do not have +infinitely precise measurements of the objects orbits, meaning there is some positional +error in our knowledge of every object. This error "blows up" during close encounters +because of the small `r` effect. Many objects which cross the path of planets frequently +will have relatively close encounters with planets with some regularity, making long +term predictions of their orbits difficult. Typically it only takes a few close +encounters to completely ruin any hope of predicting the position of an object in the +future or past. This makes the inner solar system a chaotic system in the mathematical +sense, small deviations of input parameters have large implications for future behavior +(despite the fact that everything should in theory be precisely computable). + +The result of the problems above, objects in the inner solar system of often very +difficult to model more than a few hundred/thousand years into the future with any +accuracy. + +Performance +----------- + +To achieve the highest accuracy, all forces listed above must be included. The downside +for this is that it can be computationally expensive. If we wish to predict the position +of minor planets for NEOWISE frames as an example, which are captured every 10-15 +seconds, it would be wildly inefficient. Over the course of a few minutes objects motion +can be modeled as linear, over hours (even days) the two-body approximation is often +good enough. As a result of this, NEOSpy has tools for adaptively changing the +approximation used in order to get good computational performance. The typical method is +to use a full N-Body simulation to get the highest precision, then use that solution for +the next day or so (adjustable) with two-body mechanics to query hundreds of times. + +.. figure:: images/orbit_models_short.png + :width: 600 + + Demonstration of how quickly linear motion, two-body motion, and N-Body deviate from + the true position over the course of a few weeks. Linear motion is invalid within a + few minutes, but two-body takes several days for this object to become significantly + inaccurate. The dotted black line is how far an object must move tangentially to + have an error of 0.1 arcseconds when it is 1 au from the observer (about 74km). diff --git a/src/neospy/data.py b/src/neospy/data.py index 6e06be8..9d81241 100644 --- a/src/neospy/data.py +++ b/src/neospy/data.py @@ -7,7 +7,6 @@ import urllib import os import zipfile -from importlib import resources from urllib3.util.retry import Retry @@ -25,12 +24,10 @@ logger = logging.getLogger(__name__) __all__ = [ - "data_path", "cache_path", "cached_file_download", "cached_gzip_json_download", "cached_zip_download", - "data_ls", "get_cache_size", ] @@ -50,24 +47,6 @@ def cache_path(subfolder=""): return path -def data_path(subfolder="", module_path="neospy.data"): - """ - Helper function to the absolute path of the neospy data path. - - The data path is where core data files are stored for neospy, such as required - spice kernels. - """ - # Python > 3.9 needs this - try: - return os.path.abspath(resources.files(module_path).joinpath(subfolder)) - except AttributeError: - pass - - # python <= 3.9 needs this - with resources.path(module_path, "") as p: - return os.path.abspath(p / subfolder) - - def cache_ls(subfolder=""): """List the contents of the neospy cache directory""" path = os.path.join(cache_path(), subfolder) @@ -76,11 +55,6 @@ def cache_ls(subfolder=""): return sorted(os.listdir(path)) -def data_ls(): - """List the contents of the neospy data directory specified.""" - return sorted(os.listdir(data_path("."))) - - def get_cache_size(): """ Return the total size of the cache directory in MB. @@ -88,7 +62,7 @@ def get_cache_size(): return ( sum( os.path.getsize(os.path.join(dirpath, filename)) - for dirpath, dirnames, filenames in os.walk(cache_path()) + for dirpath, _, filenames in os.walk(cache_path()) for filename in filenames ) / 1024**2 @@ -142,7 +116,7 @@ def cached_gzip_json_download(url, force_download=False, subfolder=""): def cached_zip_download(url, force_download=False, subfolder=""): """ - Download a zipped file to the `neospy/data` directory and unzip it in place. + Download a zipped file to the cache directory and unzip it in place. Returning the path to the folder where it is unzipped. This operation is cached, so requesting the same URL will result in the previously diff --git a/src/neospy/spice.py b/src/neospy/spice.py index 75f9c3c..da31fde 100644 --- a/src/neospy/spice.py +++ b/src/neospy/spice.py @@ -9,7 +9,7 @@ from .time import Time from . import _rust # pylint: disable=no-name-in-module from .constants import AU_KM -from .data import data_path, cached_file_download, cache_path +from .data import cached_file_download, cache_path from .vector import Frames, State __all__ = ["SpiceKernels"] @@ -306,12 +306,10 @@ def cache_kernel_reload(): folders into the spice loaded memory. """ cache_files = glob.glob(os.path.join(cache_path(), "kernels", "**.bsp")) - cache_files.extend(glob.glob(os.path.join(data_path(), "**.bsp"))) _rust.spk_reset() _rust.spk_load(cache_files) cache_files = glob.glob(os.path.join(cache_path(), "kernels", "**.bpc")) - cache_files.extend(glob.glob(os.path.join(data_path(), "**.bpc"))) _rust.pck_reset() _rust.pck_load(cache_files)