From 04389b8a34de2a2f3596bff843789cec353a46ea Mon Sep 17 00:00:00 2001 From: hsin-c <109615347+hsin-c@users.noreply.github.com> Date: Mon, 22 Jan 2024 08:22:34 -0800 Subject: [PATCH 1/5] Fix Loss Function to Improve Model Convergence for `AutoEncoder` (#1460) This PR addresses an issue in the dfencoder model related to its convergence behavior. Previously, the model exhibited difficulty in converging when trained exclusively with numerical features. This PR fixes the way different loss types are combined in the model's loss function to ensure that backpropagation works correctly. Note: This may alter the exact values resulting from calling `fit()` on the model. Before, categorical features were weighted much higher than binary or numerical categories (all numerical features shared a combined weight of 1, all binaries features shared a combined weight of 1, and each categorical feature had a weight of 1). Now all features are weighted equally which may impact the trained weights. Closes #1455 Authors: - https://github.com/hsin-c - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1460 --- morpheus/models/dfencoder/autoencoder.py | 81 +++++++++++-------- tests/dfencoder/test_autoencoder.py | 25 +++++- .../test_dfencoder_distributed_e2e.py | 61 +++++++------- 3 files changed, 101 insertions(+), 66 deletions(-) diff --git a/morpheus/models/dfencoder/autoencoder.py b/morpheus/models/dfencoder/autoencoder.py index 820362cf8d..df429bbdf5 100644 --- a/morpheus/models/dfencoder/autoencoder.py +++ b/morpheus/models/dfencoder/autoencoder.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -233,6 +233,9 @@ def get_scaler(self, name): } return scalers[name] + def get_feature_count(self): + return len(self.numeric_fts) + len(self.binary_fts) + len(self.categorical_fts) + def _init_numeric(self, df=None): """Initializes the numerical features of the model by either using preset numerical scaler parameters or by using the input data. @@ -626,8 +629,10 @@ def preprocess_data( return preprocessed_data def compute_loss(self, num, bin, cat, target_df, should_log=True, _id=False): + num_target, bin_target, codes = self.compute_targets(target_df) - return self.compute_loss_from_targets( + + mse, bce, cce, net = self.compute_loss_from_targets( num=num, bin=bin, cat=cat, @@ -638,6 +643,10 @@ def compute_loss(self, num, bin, cat, target_df, should_log=True, _id=False): _id=_id, ) + net = net.cpu().item() + + return mse, bce, cce, net + def compute_loss_from_targets(self, num, bin, cat, num_target, bin_target, cat_target, should_log=True, _id=False): """Computes the loss from targets. @@ -670,38 +679,45 @@ def compute_loss_from_targets(self, num, bin, cat, num_target, bin_target, cat_t should_log = True else: should_log = False - net_loss = [] - mse_loss = self.mse(num, num_target) - net_loss += list(mse_loss.mean(dim=0).cpu().detach().numpy()) - mse_loss = mse_loss.mean() - bce_loss = self.bce(bin, bin_target) - net_loss += list(bce_loss.mean(dim=0).cpu().detach().numpy()) - bce_loss = bce_loss.mean() - cce_loss = [] - for i, ft in enumerate(self.categorical_fts): - loss = self.cce(cat[i], cat_target[i]) - loss = loss.mean() - cce_loss.append(loss) - val = loss.cpu().item() - net_loss += [val] + # Calculate the numerical loss (per feature) + mse_loss: torch.Tensor = self.mse(num, num_target).mean(dim=0) + + # Calculate the binary loss (per feature) + bce_loss: torch.Tensor = self.bce(bin, bin_target).mean(dim=0) + + # To calc the categorical loss, we need to average the loss of each categorical feature independently (since + # they will have a different number of categories) + cce_loss_list = [] + + for i in range(len(self.categorical_fts)): + # Take the full mean but ensure the output is a 1x1 tensor to make it easier to concatenate + cce_loss_list.append(self.cce(cat[i], cat_target[i]).mean(dim=0, keepdim=True)) + + if (len(cce_loss_list) > 0): + cce_loss = torch.cat(cce_loss_list) + else: + cce_loss = torch.Tensor().to(self.device) + + # The net loss should have one loss per feature + net_loss = 0 + for loss in [mse_loss, bce_loss, cce_loss]: + if len(loss) > 0: + net_loss += loss.sum() + net_loss /= self.get_feature_count() + if should_log: + # Convert it to a list of numpy + net_loss_list = torch.cat((mse_loss, bce_loss, cce_loss)).tolist() + if self.training: - self.logger.training_step(net_loss) + self.logger.training_step(net_loss_list) elif _id: - self.logger.id_val_step(net_loss) + self.logger.id_val_step(net_loss_list) elif not self.training: - self.logger.val_step(net_loss) - - net_loss = np.array(net_loss).mean() - return mse_loss, bce_loss, cce_loss, net_loss + self.logger.val_step(net_loss_list) - def do_backward(self, mse, bce, cce): - # running `backward()` seperately on mse/bce/cce is equivalent to summing them up and run `backward()` once - loss_fn = mse + bce - for ls in cce: - loss_fn += ls - loss_fn.backward() + return mse_loss.mean(), bce_loss.mean(), cce_loss.mean(), net_loss def compute_baseline_performance(self, in_, out_): """ @@ -729,6 +745,7 @@ def compute_baseline_performance(self, in_, out_): codes_pred.append(pred) mse_loss, bce_loss, cce_loss, net_loss = self.compute_loss(num_pred, bin_pred, codes_pred, out_, should_log=False) + if isinstance(self.logger, BasicLogger): self.logger.baseline_loss = net_loss return net_loss @@ -981,11 +998,11 @@ def _fit_batch(self, input_swapped, num_target, bin_target, cat_target, **kwargs cat_target=cat_target, should_log=True, ) - self.do_backward(mse, bce, cce) + net_loss.backward() self.optim.step() self.optim.zero_grad() - return net_loss + return net_loss.cpu().item() def _compute_baseline_performance_from_dataset(self, validation_dataset): self.eval() @@ -1028,7 +1045,7 @@ def _compute_batch_baseline_performance( cat_target=cat_target, should_log=False ) - return net_loss + return net_loss.cpu().item() def _validate_dataset(self, validation_dataset, rank=None): """Runs a validation loop on the given validation dataset, computing and returning the average loss of both the original @@ -1108,7 +1125,7 @@ def _validate_batch(self, input_original, input_swapped, num_target, bin_target, cat_target=cat_target, should_log=True, ) - return orig_net_loss, net_loss + return orig_net_loss.cpu().item(), net_loss.cpu().item() def _populate_loss_stats_from_dataset(self, dataset): """Populates the `self.feature_loss_stats` dict with feature losses computed using the provided dataset. diff --git a/tests/dfencoder/test_autoencoder.py b/tests/dfencoder/test_autoencoder.py index 1cf16eff49..70a85ec781 100755 --- a/tests/dfencoder/test_autoencoder.py +++ b/tests/dfencoder/test_autoencoder.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ import typing from unittest.mock import patch +import numpy as np import pandas as pd import pytest import torch @@ -374,7 +375,7 @@ def test_auto_encoder_get_anomaly_score(train_ae: autoencoder.AutoEncoder, train train_ae.fit(train_df, epochs=1) anomaly_score = train_ae.get_anomaly_score(train_df) assert len(anomaly_score) == len(train_df) - assert round(anomaly_score.mean().item(), 2) == 2.28 + assert round(anomaly_score.mean().item(), 2) == 2.29 assert round(anomaly_score.std().item(), 2) == 0.11 @@ -478,8 +479,24 @@ def test_auto_encoder_get_results(train_ae: autoencoder.AutoEncoder, train_df: p assert 'max_abs_z' in results.columns assert 'mean_abs_z' in results.columns - assert round(results.loc[0, 'max_abs_z'], 2) == 2.5 + assert np.isclose(results.loc[0, 'max_abs_z'], 2.51, atol=1e-2) # Numpy float has different precision checks than python float, so we wrap it. - assert round(float(results.loc[0, 'mean_abs_z']), 3) == 0.335 + assert np.isclose(results.loc[0, 'mean_abs_z'], 0.361, atol=1e-3) assert results.loc[0, 'z_loss_scaler_type'] == 'z' + + +@pytest.mark.usefixtures("manual_seed") +def test_auto_encoder_num_only_convergence(train_ae: autoencoder.AutoEncoder): + num_df = pd.DataFrame({ + 'num_feat_1': [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9], + 'num_feat_2': [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1], + }) + + train_ae.fit(num_df, epochs=50) + + avg_loss = np.sum([np.array(loss[1]) + for loss in train_ae.logger.train_fts.values()], axis=0) / len(train_ae.logger.train_fts) + + # Make sure the model converges with numerical feats only + assert avg_loss[-1] < avg_loss[0] / 2 diff --git a/tests/dfencoder/test_dfencoder_distributed_e2e.py b/tests/dfencoder/test_dfencoder_distributed_e2e.py index bd9d855173..6ec7913ae5 100644 --- a/tests/dfencoder/test_dfencoder_distributed_e2e.py +++ b/tests/dfencoder/test_dfencoder_distributed_e2e.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -42,43 +42,44 @@ "log_count", "location_incr", "app_incr", + "has_error", ] LOSS_TYPES = ["train", "val", "id_val"] # 75th quantile of the losses from 100 times of offline training LOSS_TARGETS = { "train": { - "log_count": 0.33991, - "location_incr": 0.30789, - "app_incr": 0.17698, - "has_error": 0.00878, - "app_name": 0.13066, - "browser_type": 0.39804, - "os": 0.09882, - "country": 0.06063, - "city": 0.32344, + "log_count": 0.31612, + "location_incr": 0.27285, + "app_incr": 0.13989, + "has_error": 0.00536, + "app_name": 0.13652, + "browser_type": 0.39303, + "os": 0.00115, + "country": 0.00102, + "city": 0.30947 }, "val": { - "log_count": 0.3384, - "location_incr": 0.31456, - "app_incr": 0.16201, - "has_error": 0.00614, - "app_name": 0.11907, - "browser_type": 0.38239, - "os": 0.00064, - "country": 0.0042, - "city": 0.32161, + "log_count": 0.27835, + "location_incr": 0.28686, + "app_incr": 0.13064, + "has_error": 0.00364, + "app_name": 0.13276, + "browser_type": 0.36868, + "os": 2e-05, + "country": 0.00168, + "city": 0.31735 }, "id_val": { - "log_count": 0.07079, - "location_incr": 0.05318, - "app_incr": 0.03659, - "has_error": 0.0046, - "app_name": 0.03542, - "browser_type": 0.0915, - "os": 0.00057, - "country": 0.00343, - "city": 0.08525, - }, + "log_count": 0.04845, + "location_incr": 0.02274, + "app_incr": 0.01639, + "has_error": 0.00255, + "app_name": 0.04597, + "browser_type": 0.08826, + "os": 2e-05, + "country": 0.00146, + "city": 0.07591 + } } LOSS_TOLERANCE_RATIO = 1.25 @@ -146,7 +147,7 @@ def _run_test(rank, world_size): min_cats=1, device=rank, preset_numerical_scaler_params=preset_numerical_scaler_params, - binary_feature_list=[], + binary_feature_list=['has_error'], preset_cats=preset_cats, eval_batch_size=1024, patience=5, From 1b52ab865eac6d0cbb9cf0fa3cfbc7c2efd143f6 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 22 Jan 2024 09:13:26 -0800 Subject: [PATCH 2/5] Create a base mixin class for ingress & egress stages (#1473) * Creates a new `BoundaryStageMixin` class * Allowing `LinearBoundaryEgressStage` and `LinearBoundaryIngressStage` to share a common class and be distinguished from other stages Closes #638 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Devin Robison (https://github.com/drobison00) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1473 --- morpheus/pipeline/boundary_stage_mixin.py | 22 +++++++ morpheus/pipeline/pipeline.py | 9 ++- .../stages/boundary/linear_boundary_stage.py | 7 ++- tests/pipeline/test_pipeline.py | 62 ++++++++++++++++++- 4 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 morpheus/pipeline/boundary_stage_mixin.py diff --git a/morpheus/pipeline/boundary_stage_mixin.py b/morpheus/pipeline/boundary_stage_mixin.py new file mode 100644 index 0000000000..f721bcfa33 --- /dev/null +++ b/morpheus/pipeline/boundary_stage_mixin.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC + + +class BoundaryStageMixin(ABC): + """ + Mixin intended to be added to both ingress and egress boundary stages, currently this only adds the ability to + identify boundary stages. + """ diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 568c848df4..4859244c10 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ from tqdm import tqdm from morpheus.config import Config +from morpheus.pipeline.boundary_stage_mixin import BoundaryStageMixin from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.receiver import Receiver from morpheus.pipeline.sender import Sender @@ -175,9 +176,9 @@ def add_edge(self, end_port_idx=end_port.port_number) def add_segment_edge(self, - egress_stage: Stage, + egress_stage: BoundaryStageMixin, egress_segment: str, - ingress_stage: Stage, + ingress_stage: BoundaryStageMixin, ingress_segment: str, port_pair: typing.Union[str, typing.Tuple[str, typing.Type, bool]]): """ @@ -205,6 +206,7 @@ def add_segment_edge(self, * bool: If the type is a shared pointer (typically should be `False`) """ self._assert_not_built() + assert isinstance(egress_stage, BoundaryStageMixin), "Egress stage must be a BoundaryStageMixin" egress_edges = self._segments[egress_segment]["egress_ports"] egress_edges.append({ "port_pair": port_pair, @@ -213,6 +215,7 @@ def add_segment_edge(self, "receiver_segment": ingress_segment }) + assert isinstance(ingress_stage, BoundaryStageMixin), "Ingress stage must be a BoundaryStageMixin" ingress_edges = self._segments[ingress_segment]["ingress_ports"] ingress_edges.append({ "port_pair": port_pair, diff --git a/morpheus/stages/boundary/linear_boundary_stage.py b/morpheus/stages/boundary/linear_boundary_stage.py index 14d1db1858..ad8db9ebc2 100644 --- a/morpheus/stages/boundary/linear_boundary_stage.py +++ b/morpheus/stages/boundary/linear_boundary_stage.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ from mrc.core import operators as ops from morpheus.config import Config +from morpheus.pipeline.boundary_stage_mixin import BoundaryStageMixin from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource @@ -28,7 +29,7 @@ logger = logging.getLogger(__name__) -class LinearBoundaryEgressStage(PassThruTypeMixin, SinglePortStage): +class LinearBoundaryEgressStage(BoundaryStageMixin, PassThruTypeMixin, SinglePortStage): """ The LinearBoundaryEgressStage acts as an egress point from one linear segment to another. Given an existing linear pipeline that we want to connect to another segment, a linear boundary egress stage would be added, in conjunction @@ -81,7 +82,7 @@ def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> return input_node -class LinearBoundaryIngressStage(PreallocatorMixin, SingleOutputSource): +class LinearBoundaryIngressStage(BoundaryStageMixin, PreallocatorMixin, SingleOutputSource): """ The LinearBoundaryIngressStage acts as source ingress point from a corresponding egress in another linear segment. Given an existing linear pipeline that we want to connect to another segment, a linear boundary egress stage would diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 40ca0b9612..1d93dc22c0 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,8 +26,13 @@ from _utils.stages.multi_message_pass_thru import MultiMessagePassThruStage from _utils.stages.multi_port_pass_thru import MultiPortPassThruStage from morpheus.config import Config +from morpheus.messages import ControlMessage +from morpheus.messages import MessageMeta +from morpheus.messages import MultiMessage from morpheus.pipeline import LinearPipeline from morpheus.pipeline import Pipeline +from morpheus.stages.boundary.linear_boundary_stage import LinearBoundaryEgressStage +from morpheus.stages.boundary.linear_boundary_stage import LinearBoundaryIngressStage from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage @@ -195,3 +200,58 @@ def test_add_edge_input_port_errors(config: Config, num_inputs: int): with pytest.raises(AssertionError): pipe.add_edge(start_stage.output_ports[0], end_stage) + + +@pytest.mark.parametrize("data_type", [int, float, str, MessageMeta, ControlMessage, MultiMessage]) +def test_add_segment_edge(config: Config, data_type: type): + pipe = Pipeline(config) + + boundary_egress = LinearBoundaryEgressStage(config, boundary_port_id="seg_1", data_type=data_type) + boundary_ingress = LinearBoundaryIngressStage(config, boundary_port_id="seg_1", data_type=data_type) + + pipe.add_stage(boundary_egress, "seg_1") + pipe.add_stage(boundary_ingress, "seg_2") + pipe.add_segment_edge(boundary_egress, "seg_1", boundary_ingress, "seg_2", ("seg_1", object, False)) + + +def test_add_segment_edge_assert_not_built(config: Config): + pipe = Pipeline(config) + + src_stage = InMemSourceXStage(config, data=list(range(3))) + boundary_egress = LinearBoundaryEgressStage(config, boundary_port_id="seg_1", data_type=int) + boundary_ingress = LinearBoundaryIngressStage(config, boundary_port_id="seg_1", data_type=int) + + pipe.add_stage(src_stage, "seg_1") + pipe.add_stage(boundary_egress, "seg_1") + pipe.add_edge(src_stage, boundary_egress, "seg_1") + pipe.add_stage(boundary_ingress, "seg_2") + pipe.build() + + with pytest.raises(AssertionError): + pipe.add_segment_edge(boundary_egress, "seg_1", boundary_ingress, "seg_2", ("seg_1", object, False)) + + +def test_add_segment_edge_bad_egress(config: Config): + pipe = Pipeline(config) + + bad_egress = InMemorySinkStage(config) + boundary_ingress = LinearBoundaryIngressStage(config, boundary_port_id="seg_1", data_type=int) + + pipe.add_stage(bad_egress, "seg_1") + pipe.add_stage(boundary_ingress, "seg_2") + + with pytest.raises(AssertionError): + pipe.add_segment_edge(bad_egress, "seg_1", boundary_ingress, "seg_2", ("seg_1", object, False)) + + +def test_add_segment_edge_bad_ingress(config: Config): + pipe = Pipeline(config) + + boundary_egress = LinearBoundaryEgressStage(config, boundary_port_id="seg_1", data_type=int) + bad_ingress = InMemSourceXStage(config, data=list(range(3))) + + pipe.add_stage(boundary_egress, "seg_1") + pipe.add_stage(bad_ingress, "seg_2") + + with pytest.raises(AssertionError): + pipe.add_segment_edge(boundary_egress, "seg_1", bad_ingress, "seg_2", ("seg_1", object, False)) From c0237fe7d51f9343a6d432da372a4e87d8c753cd Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 22 Jan 2024 10:35:57 -0800 Subject: [PATCH 3/5] Install headers & morpheus-config.cmake (#1448) * Allows users to create C++ stages/messages without a source code checkout of morpheus. * Explicitly install cpython extensions as a part of the python install. * Remove usage of `YAPF_EXCLUDE_FLAGS` work-around for an old yapf bug which has been fixed in our current version. * Update C++ developer guide examples to optionally build independently of morpheus. * Install versioneer as a conda package, remove in-repo copy of versioneer.py Closes #331 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) - Christopher Harris (https://github.com/cwharris) URL: https://github.com/nv-morpheus/Morpheus/pull/1448 --- .devcontainer/devcontainer.json | 6 +- CMakeLists.txt | 104 +- MANIFEST.in | 3 +- ci/conda/recipes/morpheus/morpheus_build.sh | 4 +- ci/scripts/common.sh | 7 +- ci/scripts/fix_all.sh | 4 +- ci/scripts/github/common.sh | 13 +- ci/scripts/github/docs.sh | 7 +- ci/scripts/github/test.sh | 13 +- ci/scripts/python_checks.sh | 4 +- cmake/dependencies.cmake | 38 +- .../all_cuda-118_arch-x86_64.yaml | 9 +- .../dev_cuda-118_arch-x86_64.yaml | 9 +- dependencies.yaml | 5 + docker/Dockerfile | 20 +- .../guides/3_simple_cpp_stage.md | 41 +- .../guides/4_source_cpp_stage.md | 34 +- examples/CMakeLists.txt | 8 +- .../3_simple_cpp_stage/.gitattributes | 1 + .../3_simple_cpp_stage/CMakeLists.txt | 74 +- .../3_simple_cpp_stage/MANIFEST.in | 2 + .../3_simple_cpp_stage/compile.sh | 31 + .../3_simple_cpp_stage/pyproject.toml | 33 + .../3_simple_cpp_stage/setup.cfg | 35 + .../3_simple_cpp_stage/setup.py | 16 + .../3_simple_cpp_stage/{ => src}/run.py | 4 +- .../src/simple_cpp_stage/__init__.py | 19 + .../src/simple_cpp_stage/_lib/CMakeLists.txt | 27 + .../simple_cpp_stage}/_lib/__init__.py | 0 .../simple_cpp_stage}/_lib/pass_thru.cpp | 18 +- .../simple_cpp_stage}/_lib/pass_thru.hpp | 2 +- .../_lib/pass_thru_cpp/__init__.pyi | 14 + .../src/simple_cpp_stage/_version.py | 683 ++++++ .../{ => src/simple_cpp_stage}/pass_thru.py | 7 +- .../4_rabbitmq_cpp_stage/.gitattributes | 1 + .../4_rabbitmq_cpp_stage/CMakeLists.txt | 77 +- .../4_rabbitmq_cpp_stage/MANIFEST.in | 2 + .../4_rabbitmq_cpp_stage/README.md | 29 +- .../4_rabbitmq_cpp_stage/_lib/__init__.py | 0 .../cmake/Configure_SimpleAmqpClient.cmake | 6 +- .../cmake/Configure_rabbitmq.cmake | 6 +- .../cmake/dependencies.cmake | 4 +- .../4_rabbitmq_cpp_stage/compile.sh | 31 + .../4_rabbitmq_cpp_stage/pyproject.toml | 33 + .../4_rabbitmq_cpp_stage/setup.cfg | 38 + .../4_rabbitmq_cpp_stage/setup.py | 16 + .../src/rabbitmq_cpp_stage/__init__.py | 19 + .../rabbitmq_cpp_stage/_lib/CMakeLists.txt | 30 + .../rabbitmq_cpp_stage/_lib}/__init__.py | 0 .../_lib/rabbitmq_cpp_stage/__init__.pyi | 15 + .../_lib/rabbitmq_source.cpp | 8 +- .../_lib/rabbitmq_source.hpp | 2 +- .../src/rabbitmq_cpp_stage/_version.py | 683 ++++++ .../rabbitmq_source_stage.py | 21 +- .../write_to_rabbitmq_stage.py | 2 +- .../{ => src}/read_simple.py | 4 +- .../{ => src}/write_simple.py | 4 +- examples/developer_guide/CMakeLists.txt | 19 + external/utilities | 2 +- morpheus/_lib/cmake/libmorpheus.cmake | 93 +- morpheus/_lib/doca/CMakeLists.txt | 4 +- morpheus/_lib/tests/CMakeLists.txt | 25 +- pyproject.toml | 1 + setup.cfg | 12 +- setup.py | 22 +- .../developer_guide/test_pass_thru.py | 5 +- versioneer.py | 2155 ----------------- 67 files changed, 2185 insertions(+), 2479 deletions(-) create mode 100644 examples/developer_guide/3_simple_cpp_stage/.gitattributes create mode 100644 examples/developer_guide/3_simple_cpp_stage/MANIFEST.in create mode 100755 examples/developer_guide/3_simple_cpp_stage/compile.sh create mode 100644 examples/developer_guide/3_simple_cpp_stage/pyproject.toml create mode 100644 examples/developer_guide/3_simple_cpp_stage/setup.cfg create mode 100644 examples/developer_guide/3_simple_cpp_stage/setup.py rename examples/developer_guide/3_simple_cpp_stage/{ => src}/run.py (94%) create mode 100644 examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/__init__.py create mode 100644 examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/CMakeLists.txt rename examples/developer_guide/3_simple_cpp_stage/{ => src/simple_cpp_stage}/_lib/__init__.py (100%) rename examples/developer_guide/3_simple_cpp_stage/{ => src/simple_cpp_stage}/_lib/pass_thru.cpp (80%) rename examples/developer_guide/3_simple_cpp_stage/{ => src/simple_cpp_stage}/_lib/pass_thru.hpp (96%) create mode 100644 examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru_cpp/__init__.pyi create mode 100644 examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_version.py rename examples/developer_guide/3_simple_cpp_stage/{ => src/simple_cpp_stage}/pass_thru.py (88%) create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/.gitattributes create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/MANIFEST.in delete mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/_lib/__init__.py create mode 100755 examples/developer_guide/4_rabbitmq_cpp_stage/compile.sh create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/pyproject.toml create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/setup.cfg create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/setup.py create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/__init__.py create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/CMakeLists.txt rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src/rabbitmq_cpp_stage/_lib}/__init__.py (100%) create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_cpp_stage/__init__.pyi rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src/rabbitmq_cpp_stage}/_lib/rabbitmq_source.cpp (96%) rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src/rabbitmq_cpp_stage}/_lib/rabbitmq_source.hpp (98%) create mode 100644 examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_version.py rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src/rabbitmq_cpp_stage}/rabbitmq_source_stage.py (85%) rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src/rabbitmq_cpp_stage}/write_to_rabbitmq_stage.py (97%) rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src}/read_simple.py (94%) rename examples/developer_guide/4_rabbitmq_cpp_stage/{ => src}/write_simple.py (92%) create mode 100644 examples/developer_guide/CMakeLists.txt delete mode 100644 versioneer.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 86f88d3799..720df1508d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -99,7 +99,10 @@ "features": { "ghcr.io/devcontainers/features/docker-from-docker": {}, - "ghcr.io/devcontainers/features/dotnet:1": {} + "ghcr.io/devcontainers/features/dotnet:1": { + "version": "6.0", + "installUsingApt": false + } }, "customizations": { @@ -116,7 +119,6 @@ ], "settings": { "cmake.cmakePath": "/tmp/.current-conda-env/bin/cmake", - "cmake.languageSupport.dotnetPath": "/usr/bin/dotnet", "C_Cpp.intelliSenseEngine": "disabled", "python.terminal.activateEnvironment": false } diff --git a/CMakeLists.txt b/CMakeLists.txt index 2e493963b0..77fd97a444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,10 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. - cmake_minimum_required(VERSION 3.25 FATAL_ERROR) - list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus") # Global options (Keep sorted!) @@ -42,19 +40,9 @@ set(MORPHEUS_RAPIDS_VERSION "23.06" CACHE STRING "Sets default versions for RAPI set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data") mark_as_advanced(MORPHEUS_CACHE_DIR) -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS ON) -set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) -set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) -set(CMAKE_INSTALL_RPATH "$ORIGIN") - -# Disable compile commands until after dependencies -set(CMAKE_EXPORT_COMPILE_COMMANDS OFF) - enable_testing() -if (MORPHEUS_USE_IWYU AND MORPHEUS_USE_CCACHE) +if(MORPHEUS_USE_IWYU AND MORPHEUS_USE_CCACHE) message(FATAL_ERROR "MORPHEUS_USE_IWYU and MORPHEUS_USE_CCACHE cannot be set simultaneously") endif() @@ -80,66 +68,88 @@ set(MORPHEUS_UTILS_RAPIDS_VERSION ${MORPHEUS_RAPIDS_VERSION} CACHE STRING "" FOR # Load morpheus utils and update CMake paths include(morpheus_utils/load) -# Configure project package manager morpheus_utils_initialize_package_manager( - MORPHEUS_USE_CONDA - BUILD_SHARED_LIBS + MORPHEUS_USE_CONDA + BUILD_SHARED_LIBS ) -# Configure CUDA architecture -# NOTE: This MUST occur before any 'project' calls because of rapids_cmake requirements. -if (DEFINED MORPHEUS_CUDA_ARCHITECTURES) +# Initialize CUDA +# This is a two-step process. We need to call morpheus_utils_initialize_cuda_arch which in turn calls +# rapids_cuda_init_architectures prior to calling project(). This is because rapids_cuda_init_architectures defines a +# `CMAKE_PROJECT__INCLUDE` hook which is invoked by the project() call. This hook is what allows us to +# set `CMAKE_CUDA_ARCHITECTURES=rapids` when performing a release build which will be expanded to the current list of +# supported architectures by our version of rapids. +# +# After the call to project() we can then call morpheus_utils_enable_cuda() which will set some CUDA+clang settings +# which can only be performed after calling project(), but which must be set prior to calling enable_language(CUDA) +if(DEFINED MORPHEUS_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES "${MORPHEUS_CUDA_ARCHITECTURES}") endif() morpheus_utils_initialize_cuda_arch(morpheus) -# Set a default build type if none was specified -rapids_cmake_build_type(Release) # Project definition +# Note intentionally excluding CUDA from the LANGUAGES list allowing us to set some clang specific settings later when +# we call morpheus_utils_enable_cuda() project(morpheus - VERSION 24.03.00 - LANGUAGES C CXX CUDA) + VERSION 24.03.00 + LANGUAGES C CXX +) + +# This sets some clang specific settings for CUDA prior to calling enable_language(CUDA) +morpheus_utils_enable_cuda() rapids_cmake_write_version_file(${CMAKE_BINARY_DIR}/autogenerated/include/morpheus/version.hpp) -# Ccache configuration +# Set a default build type if none was specified +rapids_cmake_build_type(Release) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(CMAKE_INSTALL_RPATH "$ORIGIN") + +# Setup cache before dependencies +# Configure CCache if requested include(environment/init_ccache) +# Disable exporting compile commands for dependencies +set(CMAKE_EXPORT_COMPILE_COMMANDS OFF) + +# Create a custom target to allow preparing for style checks +add_custom_target(${PROJECT_NAME}_style_checks + COMMENT "Building dependencies for style checks" +) + # Configure all dependencies include(dependencies) -#################################### -# - Post dependencies setup -------- -morpheus_utils_compiler_set_defaults(MORPHEUS_USE_CLANG_TIDY) - -# Setup IWYU if enabled -include(environment/init_iwyu) +# Enable for all first party code +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # To make it easier for CI to find output files, set the default executable suffix to .x if not set if("${CMAKE_EXECUTABLE_SUFFIX}" STREQUAL "") set(CMAKE_EXECUTABLE_SUFFIX ".x") endif() -# Create a custom target to allow preparing for style checks -add_custom_target(${PROJECT_NAME}_style_checks - COMMENT "Building dependencies for style checks" -) - - -################################## -##### Morpheus Python Setup ###### -################################## +# ################################### +# - Post dependencies setup -------- +morpheus_utils_compiler_set_defaults(MORPHEUS_USE_CLANG_TIDY) -# Re-enable compile commands -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# Setup IWYU if enabled +include(environment/init_iwyu) +# ################################# +# #### Morpheus Python Setup ###### +# ################################# morpheus_utils_python_configure() # Include the main morpheus code morpheus_utils_create_python_package(morpheus - PROJECT_DIRECTORY "${CMAKE_SOURCE_DIR}" - SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus" + PROJECT_DIRECTORY "${CMAKE_SOURCE_DIR}" + SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus" ) add_subdirectory(morpheus) @@ -174,9 +184,9 @@ if(MORPHEUS_ENABLE_DEBUG_INFO) morpheus_utils_print_all_targets() morpheus_utils_print_target_properties( - TARGETS - morpheus morpheus._lib.llm - WRITE_TO_FILE + TARGETS + morpheus + WRITE_TO_FILE ) morpheus_utils_print_global_properties( diff --git a/MANIFEST.in b/MANIFEST.in index c8c2c6df7a..4503f2bcc4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -include versioneer.py include morpheus/_version.py recursive-include morpheus/data * -recursive-include morpheus *.so py.typed *.pyi +recursive-include morpheus *.cpython*.so py.typed *.pyi diff --git a/ci/conda/recipes/morpheus/morpheus_build.sh b/ci/conda/recipes/morpheus/morpheus_build.sh index 5381dee0a4..721f7a3e44 100644 --- a/ci/conda/recipes/morpheus/morpheus_build.sh +++ b/ci/conda/recipes/morpheus/morpheus_build.sh @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -90,7 +90,7 @@ cmake -B ${BUILD_DIR} \ . # Build the components -cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install # Install just the python wheel components ${PYTHON} -m pip install -vv ${BUILD_DIR}/dist/*.whl diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh index d942aec853..3bfa4e0870 100644 --- a/ci/scripts/common.sh +++ b/ci/scripts/common.sh @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,14 +22,11 @@ export PY_ROOT="${MORPHEUS_ROOT}" export PY_CFG="${PY_ROOT}/setup.cfg" export PY_DIRS="${PY_ROOT} ci/scripts" -# work-around for known yapf issue https://github.com/google/yapf/issues/984 -export YAPF_EXCLUDE_FLAGS="-e versioneer.py -e morpheus/_version.py" - # Determine the commits to compare against. If running in CI, these will be set. Otherwise, diff with main export BASE_SHA=${CHANGE_TARGET:-${BASE_SHA:-$(${SCRIPT_DIR}/gitutils.py get_merge_target)}} export COMMIT_SHA=${GIT_COMMIT:-${COMMIT_SHA:-HEAD}} -export CPP_FILE_REGEX='^(\.\/)?(morpheus|tests)\/.*\.(cc|cpp|h|hpp)$' +export CPP_FILE_REGEX='^(\.\/)?(examples|morpheus|tests)\/.*\.(cc|cpp|h|hpp)$' export PYTHON_FILE_REGEX='^(\.\/)?(?!\.|build|external).*\.(py|pyx|pxd)$' # Use these options to skip any of the checks diff --git a/ci/scripts/fix_all.sh b/ci/scripts/fix_all.sh index 449902cafe..8585e73cc6 100755 --- a/ci/scripts/fix_all.sh +++ b/ci/scripts/fix_all.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -105,5 +105,5 @@ fi # Run yapf if [[ "${SKIP_YAPF}" == "" ]]; then echo "Running yapf..." - python3 -m yapf -i --style ${PY_CFG} ${YAPF_EXCLUDE_FLAGS} -r ${PY_MODIFIED_FILES[@]} + python3 -m yapf -i --style ${PY_CFG} -r ${PY_MODIFIED_FILES[@]} fi diff --git a/ci/scripts/github/common.sh b/ci/scripts/github/common.sh index 685ba7282a..8ee908c1a3 100644 --- a/ci/scripts/github/common.sh +++ b/ci/scripts/github/common.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -71,10 +71,14 @@ function update_conda_env() { # Deactivate the environment first before updating conda deactivate - rapids-logger "Checking for updates to conda env" - # Update the packages - rapids-mamba-retry env update -n morpheus --prune -q --file "$1" + if [[ "${SKIP_CONDA_ENV_UPDATE}" == "" ]]; then + rapids-logger "Checking for updates to conda env" + + + # Update the packages + rapids-mamba-retry env update -n morpheus --prune -q --file "$1" + fi # Finally, reactivate conda activate morpheus @@ -107,7 +111,6 @@ function fetch_base_branch_gh_api() { function fetch_base_branch_local() { rapids-logger "Retrieving base branch from git" - git remote remove upstream git remote add upstream ${GIT_UPSTREAM_URL} git fetch upstream --tags source ${MORPHEUS_ROOT}/ci/scripts/common.sh diff --git a/ci/scripts/github/docs.sh b/ci/scripts/github/docs.sh index d03f79ebb0..f928d02a38 100755 --- a/ci/scripts/github/docs.sh +++ b/ci/scripts/github/docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,10 +40,11 @@ ${MORPHEUS_ROOT}/scripts/fetch_data.py fetch docs examples git submodule update --init --recursive rapids-logger "Configuring for docs" -cmake -B build -G Ninja ${CMAKE_BUILD_ALL_FEATURES} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF -DMORPHEUS_BUILD_DOCS=ON . +cmake -B build -G Ninja ${CMAKE_BUILD_ALL_FEATURES} -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF -DMORPHEUS_BUILD_DOCS=ON . rapids-logger "Building docs" -cmake --build build --target morpheus_docs +cmake --build build --parallel ${PARALLEL_LEVEL} --target install +cmake --build build --parallel ${PARALLEL_LEVEL} --target morpheus_docs rapids-logger "Archiving the docs" tar cfj "${WORKSPACE_TMP}/docs.tar.bz" build/docs/html diff --git a/ci/scripts/github/test.sh b/ci/scripts/github/test.sh index cb45189733..fe4fe23813 100755 --- a/ci/scripts/github/test.sh +++ b/ci/scripts/github/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,13 +26,7 @@ rapids-dependency-file-generator \ update_conda_env env.yaml -rapids-logger "Check versions" -python3 --version -x86_64-conda-linux-gnu-cc --version -x86_64-conda-linux-gnu-c++ --version -cmake --version -ninja --version -sccache --version +log_toolchain git submodule update --init --recursive @@ -41,6 +35,7 @@ CMAKE_FLAGS="${CMAKE_FLAGS} -DCMAKE_BUILD_RPATH_USE_ORIGIN=ON" CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_STUBS=ON" CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_WHEEL=OFF" CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" +CMAKE_FLAGS="${CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX}" if [[ "${LOCAL_CI}" == "" ]]; then CMAKE_FLAGS="${CMAKE_FLAGS} -DCCACHE_PROGRAM_PATH=$(which sccache)" fi @@ -49,7 +44,7 @@ rapids-logger "Configuring cmake for Morpheus with ${CMAKE_FLAGS}" cmake -B build -G Ninja ${CMAKE_FLAGS} . rapids-logger "Building Morpheus" -cmake --build build --parallel ${PARALLEL_LEVEL} +cmake --build build --parallel ${PARALLEL_LEVEL} --target install if [[ "${LOCAL_CI}" == "" ]]; then rapids-logger "sccache usage for morpheus build:" diff --git a/ci/scripts/python_checks.sh b/ci/scripts/python_checks.sh index 536d3fb974..7455b2edfc 100755 --- a/ci/scripts/python_checks.sh +++ b/ci/scripts/python_checks.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +59,7 @@ if [[ -n "${MORPHEUS_MODIFIED_FILES}" ]]; then if [[ "${SKIP_YAPF}" == "" ]]; then # Run yapf. Will return 1 if there are any diffs - YAPF_OUTPUT=`python3 -m yapf --style ${PY_CFG} ${YAPF_EXCLUDE_FLAGS} --diff ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` + YAPF_OUTPUT=`python3 -m yapf --style ${PY_CFG} --diff ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` YAPF_RETVAL=$? fi diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index c01516fa29..09032a6b46 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,44 +17,48 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "dep") morpheus_utils_initialize_cpm(MORPHEUS_CACHE_DIR) - +# Show some setup variables (only prints if VERBOSE) morpheus_utils_print_config() # First, load the package_config functions include(${CMAKE_CURRENT_LIST_DIR}/package_config/register_api.cmake) # Load direct physical package dependencies first, so we fail early. Add all dependencies to our export set -rapids_find_package(Protobuf - REQUIRED +rapids_find_package(Protobuf REQUIRED BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports ) -find_package(CUDAToolkit REQUIRED) +rapids_find_package(CUDAToolkit REQUIRED + BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports + INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports +) if(MORPHEUS_BUILD_BENCHMARKS) # google benchmark # - Expects package to pre-exist in the build environment # ================ rapids_find_package(benchmark REQUIRED - GLOBAL_TARGETS benchmark::benchmark - BUILD_EXPORT_SET ${PROJECT_NAME}-exports - INSTALL_EXPORT_SET ${PROJECT_NAME}-exports - FIND_ARGS - CONFIG + GLOBAL_TARGETS benchmark::benchmark + BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports + INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports + FIND_ARGS CONFIG ) endif() +# glog +# ==== +morpheus_utils_configure_glog() + if(MORPHEUS_BUILD_TESTS) # google test # - Expects package to pre-exist in the build environment # =========== rapids_find_package(GTest REQUIRED - GLOBAL_TARGETS GTest::gtest GTest::gmock GTest::gtest_main GTest::gmock_main - BUILD_EXPORT_SET ${PROJECT_NAME}-exports - INSTALL_EXPORT_SET ${PROJECT_NAME}-exports - FIND_ARGS - CONFIG + GLOBAL_TARGETS GTest::gtest GTest::gmock GTest::gtest_main GTest::gmock_main + BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports + INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports + FIND_ARGS CONFIG ) endif() @@ -74,6 +78,10 @@ morpheus_utils_configure_pybind11() # ===== morpheus_utils_configure_rdkafka() +# RxCpp +# ===== +morpheus_utils_configure_rxcpp() + # MRC (Should come after all third party but before NVIDIA repos) # ===== morpheus_utils_configure_mrc() diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 884973ebf0..95de177e43 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -1,5 +1,5 @@ # This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +# To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. channels: - conda-forge - huggingface @@ -17,6 +17,7 @@ dependencies: - boost-cpp=1.82 - boto3 - breathe=4.34.0 +- ccache>=3.7 - clangdev=16 - click >=8 - click>=8 @@ -46,6 +47,7 @@ dependencies: - flake8 - gcc_linux-64=11.2 - git-lfs +- glog=0.6 - grpcio - gxx_linux-64=11.2 - huggingface_hub=0.10.1 @@ -70,6 +72,7 @@ dependencies: - openai=0.28 - papermill=2.3.4 - pip +- pkg-config - pluggy=1.0 - pre-commit - protobuf=4.21.* @@ -100,6 +103,7 @@ dependencies: - typing_utils=0.1 - ucx=1.14 - ujson=5.8 +- versioneer - watchdog=2.1 - websockets - yapf=0.40.1 @@ -112,4 +116,5 @@ dependencies: - pyarrow_hotfix - pymilvus==2.3.2 - pytest-kafka==0.6.0 -name: all_cuda-118_arch-x86_64 +name: all_cuda-118_arch-x86_64_py-310 + diff --git a/conda/environments/dev_cuda-118_arch-x86_64.yaml b/conda/environments/dev_cuda-118_arch-x86_64.yaml index b13d2e8d64..3552134802 100644 --- a/conda/environments/dev_cuda-118_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-118_arch-x86_64.yaml @@ -1,5 +1,5 @@ # This file is generated by `rapids-dependency-file-generator`. -# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +# To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. channels: - conda-forge - huggingface @@ -15,6 +15,7 @@ dependencies: - benchmark=1.6.0 - boost-cpp=1.82 - breathe=4.34.0 +- ccache>=3.7 - clangdev=16 - click >=8 - click>=8 @@ -38,6 +39,7 @@ dependencies: - flake8 - gcc_linux-64=11.2 - git-lfs +- glog=0.6 - grpcio - gxx_linux-64=11.2 - include-what-you-use=0.20 @@ -55,6 +57,7 @@ dependencies: - numpydoc=1.4 - nvtabular=23.06 - pip +- pkg-config - pluggy=1.0 - pre-commit - protobuf=4.21.* @@ -79,6 +82,7 @@ dependencies: - tritonclient=2.26 - typing_utils=0.1 - ucx=1.14 +- versioneer - watchdog=2.1 - websockets - yapf=0.40.1 @@ -88,4 +92,5 @@ dependencies: - pyarrow_hotfix - pymilvus==2.3.2 - pytest-kafka==0.6.0 -name: dev_cuda-118_arch-x86_64 +name: dev_cuda-118_arch-x86_64_py-310 + diff --git a/dependencies.yaml b/dependencies.yaml index 6b31678d78..ea6ecf5fa5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -56,6 +56,7 @@ files: cuda: ["11.8"] arch: [x86_64] includes: + - data_retrieval - build_cpp - development - benchmark_cpp @@ -128,13 +129,16 @@ dependencies: - gxx_linux-64=11.2 - cmake=3.25 - boost-cpp=1.82 + - ccache>=3.7 - cuda-nvcc - cudf=23.06 - cxx-compiler - cython=0.29.24 + - glog=0.6 - librdkafka=1.9.2 - ninja=1.10 - nlohmann_json=3.9 + - pkg-config # for mrc cmake - protobuf=4.21.* - pybind11-stubgen=0.10 - rapidjson=1.1.0 @@ -163,6 +167,7 @@ dependencies: - include-what-you-use=0.20 - isort - pylint>=2.17.4,<2.18 # 2.17.4 contains a fix for toml support + - versioneer - yapf=0.40.1 docs: diff --git a/docker/Dockerfile b/docker/Dockerfile index 4c89accc20..b4adc5b27c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.3 -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -112,9 +112,7 @@ RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \ conda config --set ssl_verify false &&\ conda config --add pkgs_dirs /opt/conda/pkgs &&\ # Install mamba, boa and git here. Conda build breaks with other git installs - /opt/conda/bin/mamba install -y -n base -c conda-forge "boa" "git >=2.35.3" "python=${PYTHON_VER}" "tini=0.19" &&\ - source activate base - # conda clean -afy + /opt/conda/bin/mamba install -y -n base -c conda-forge "boa" "git >=2.35.3" "python=${PYTHON_VER}" "tini=0.19" # ============ Stage: conda_env ============ # Create the conda environment and install all dependencies @@ -127,8 +125,7 @@ RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \ # Create the environment and install as little dependencies as possible CONDA_ALWAYS_YES=true /opt/conda/bin/mamba create -n morpheus -c conda-forge \ python=${PYTHON_VER} && \ - # Clean and activate - # conda clean -afy && \ + # Activate the environment automatically from .bashrc conda init bash && \ echo "conda activate morpheus" >> ~/.bashrc @@ -187,9 +184,7 @@ RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \ conda config --env --set channel_alias ${CONDA_CHANNEL_ALIAS} &&\ /opt/conda/bin/mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_MAJOR_VER}.${CUDA_MINOR_VER}_dev.yml &&\ # Remove channel_alias to use the normal channel in the container - conda config --env --remove-key channel_alias &&\ - # Clean and activate - conda clean -afy + conda config --env --remove-key channel_alias # ============ Stage: conda_bld_morpheus ============ # Now build the morpheus conda package @@ -255,7 +250,8 @@ RUN --mount=type=bind,from=conda_bld_morpheus,source=/opt/conda/conda-bld,target -c nvidia \ -c pytorch \ -c conda-forge morpheus &&\ - /opt/conda/bin/mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_MAJOR_VER}.${CUDA_MINOR_VER}_runtime.yml + /opt/conda/bin/mamba env update -n morpheus --file docker/conda/environments/cuda${CUDA_MAJOR_VER}.${CUDA_MINOR_VER}_runtime.yml && \ + conda clean -afy # Only copy specific files/folders over that are necessary for runtime COPY "${MORPHEUS_ROOT_HOST}/docker" "./docker" @@ -290,10 +286,10 @@ RUN --mount=type=cache,id=apt,target=/var/cache/apt \ rm -rf /var/lib/apt/lists/* # Install camouflage needed for unittests to mock a triton server -# Pin to v0.9 until #967 is resolved RUN source activate morpheus && \ npm install -g camouflage-server@0.15 && \ - npm cache clean --force + npm cache clean --force && \ + conda clean -afy # Setup git to allow other users to access /workspace. Requires git 2.35.3 or # greater. See https://marc.info/?l=git&m=164989570902912&w=2. Only enable for diff --git a/docs/source/developer_guide/guides/3_simple_cpp_stage.md b/docs/source/developer_guide/guides/3_simple_cpp_stage.md index da2ef93624..2b203ef42b 100644 --- a/docs/source/developer_guide/guides/3_simple_cpp_stage.md +++ b/docs/source/developer_guide/guides/3_simple_cpp_stage.md @@ -1,5 +1,5 @@ # Simple C++ Stage -> **Note**: The code for this guide can be found in the `examples/developer_guide/3_simple_cpp_stage` directory of the Morpheus repository. To build the C++ examples, pass `-DMORPHEUS_BUILD_EXAMPLES=ON` to CMake when building Morpheus. Users building Morpheus with the provided `scripts/compile.sh` script can do do by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable: -> ```bash -> CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh +## Building the Example +The code for this guide can be found in the `examples/developer_guide/3_simple_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable: +```bash +CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh +``` + +The second method is to build the example as a standalone project. From the root of the Morpheus repo execute: +```bash +cd examples/developer_guide/3_simple_cpp_stage +./compile.sh +# Optionally install the package into the current python environment +pip install ./ +``` + +## Overview Morpheus offers the choice of writing pipeline stages in either Python or C++. For many use cases, a Python stage is perfectly fine. However, in the event that a Python stage becomes a bottleneck for the pipeline, then writing a C++ implementation for the stage becomes advantageous. The C++ implementations of Morpheus stages and messages utilize the [pybind11](https://pybind11.readthedocs.io/en/stable/index.html) library to provide Python bindings. So far we have been defining our stages in Python, the option of defining a C++ implementation is only available to stages implemented as classes. Many of the stages included with Morpheus have both a Python and a C++ implementation, and Morpheus will use the C++ implementations by default. You can explicitly disable the use of C++ stage implementations by calling `morpheus.config.CppConfig.set_should_use_cpp(False)`: @@ -275,7 +287,7 @@ The Python interface itself defines a Python module named `morpheus_example` and namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_example, m) +PYBIND11_MODULE(pass_thru_cpp, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); @@ -319,7 +331,7 @@ std::shared_ptr> PassThruStageInterfaceProxy namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_example, m) +PYBIND11_MODULE(pass_thru_cpp, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); @@ -353,10 +365,9 @@ As mentioned in the previous section, our `_build_single` method needs to be upd ```python def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: if self._build_cpp_node() and issubclass(self._input_type, MultiMessage): - from _lib import morpheus_example as morpheus_example_cpp + from ._lib import pass_thru_cpp - # pylint: disable=c-extension-no-member - node = morpheus_example_cpp.PassThruStage(builder, self.unique_name) + node = pass_thru_cpp.PassThruStage(builder, self.unique_name) else: node = builder.make_node(self.unique_name, ops.map(self.on_data)) @@ -408,9 +419,9 @@ class PassThruStage(PassThruTypeMixin, SinglePortStage): def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: if self._build_cpp_node() and issubclass(self._input_type, MultiMessage): - from _lib import morpheus_example as morpheus_example_cpp + from ._lib import pass_thru_cpp - node = morpheus_example_cpp.PassThruStage(builder, self.unique_name) + node = pass_thru_cpp.PassThruStage(builder, self.unique_name) else: node = builder.make_node(self.unique_name, ops.map(self.on_data)) @@ -420,10 +431,10 @@ class PassThruStage(PassThruTypeMixin, SinglePortStage): ## Testing the Stage To test the updated stage we will build a simple pipeline using the Morpheus command line tool. In order to illustrate the stage building a C++ node only when the input type is a `MultiMessage` we will insert the `pass-thru` stage in twice in the pipeline. In the first instance the input type will be `MessageMeta` and the stage will fallback to using a Python node, and in the second instance the input type will be a `MultiMessage` and the stage will build a C++ node. - + ```bash -PYTHONPATH="examples/developer_guide/3_simple_cpp_stage" \ -morpheus --log_level=debug --plugin "pass_thru" \ +PYTHONPATH="examples/developer_guide/3_simple_cpp_stage/src" \ +morpheus --log_level=debug --plugin "simple_cpp_stage.pass_thru" \ run pipeline-other \ from-file --filename=examples/data/email_with_addresses.jsonlines \ pass-thru \ @@ -432,5 +443,3 @@ morpheus --log_level=debug --plugin "pass_thru" \ pass-thru \ monitor ``` - -> **Note**: In the above example we set the `PYTHONPATH` environment variable this is to facilitate the relative import the stage performs of the `_lib` module. \ No newline at end of file diff --git a/docs/source/developer_guide/guides/4_source_cpp_stage.md b/docs/source/developer_guide/guides/4_source_cpp_stage.md index 58c0dbaf88..8bc17f1347 100644 --- a/docs/source/developer_guide/guides/4_source_cpp_stage.md +++ b/docs/source/developer_guide/guides/4_source_cpp_stage.md @@ -1,5 +1,5 @@ # Creating a C++ Source Stage -> **Note**: The code for this guide can be found in the `examples/developer_guide/4_rabbitmq_cpp_stage` directory of the Morpheus repository. To build the C++ examples, pass `-DMORPHEUS_BUILD_EXAMPLES=ON` to CMake when building Morpheus. Users building Morpheus with the provided `scripts/compile.sh` script can do do by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable: -> ```bash -> CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh +## Building the Example +The code for this guide can be found in the `examples/developer_guide/4_rabbitmq_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable: +```bash +CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh +``` + +The second method is to build the example as a standalone project. From the root of the Morpheus repo execute: +```bash +cd examples/developer_guide/4_rabbitmq_cpp_stage +./compile.sh + +# Optionally install the package into the current python environment +pip install ./ +``` +## Overview For this example, we are going to add a C++ implementation for the `RabbitMQSourceStage` we designed in the Python examples. The Python implementation of this stage emits messages of the type `MessageMeta`; as such, our C++ implementation must do the same. For communicating with [RabbitMQ](https://www.rabbitmq.com/) we will be using the [SimpleAmqpClient](https://github.com/alanxz/SimpleAmqpClient) library, and [libcudf](https://docs.rapids.ai/api/libcudf/stable/index.html) for constructing the `DataFrame`. @@ -199,8 +211,8 @@ RabbitMQSourceStage::RabbitMQSourceStage(const std::string& host, const std::string& queue_name, std::chrono::milliseconds poll_interval) : PythonSource(build()), - m_channel{AmqpClient::Channel::Create(host)}, - m_poll_interval{poll_interval} + m_poll_interval{poll_interval}, + m_channel{AmqpClient::Channel::Create(host)} { m_channel->DeclareExchange(exchange, exchange_type); m_queue_name = m_channel->DeclareQueue(queue_name); @@ -316,7 +328,7 @@ std::shared_ptr> RabbitMQSourceStageIn namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_rabbit, m) +PYBIND11_MODULE(rabbitmq_cpp_stage, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); @@ -358,8 +370,8 @@ RabbitMQSourceStage::RabbitMQSourceStage(const std::string& host, const std::string& queue_name, std::chrono::milliseconds poll_interval) : PythonSource(build()), - m_channel{AmqpClient::Channel::Create(host)}, - m_poll_interval{poll_interval} + m_poll_interval{poll_interval}, + m_channel{AmqpClient::Channel::Create(host)} { m_channel->DeclareExchange(exchange, exchange_type); m_queue_name = m_channel->DeclareQueue(queue_name); @@ -444,7 +456,7 @@ std::shared_ptr> RabbitMQSourceStageIn namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_rabbit, m) +PYBIND11_MODULE(rabbitmq_cpp_stage, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); @@ -511,7 +523,7 @@ Lastly, our `_build_source` method needs to be updated to build a C++ node when ```python def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: if self._build_cpp_node(): - from _lib import morpheus_rabbit as morpheus_rabbit_cpp + from ._lib import rabbitmq_cpp_stage node = morpheus_rabbit_cpp.RabbitMQSourceStage(builder, self.unique_name, diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 2b6fcaa7c2..d7632f3325 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -13,7 +13,9 @@ # ============================================================================= list(APPEND CMAKE_MESSAGE_CONTEXT "examples") -add_subdirectory(developer_guide/3_simple_cpp_stage) -add_subdirectory(developer_guide/4_rabbitmq_cpp_stage) +# Add the binary dir before including examples so they can find the parent project +list(PREPEND CMAKE_PREFIX_PATH "${PROJECT_BINARY_DIR}") + +add_subdirectory(developer_guide) list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/examples/developer_guide/3_simple_cpp_stage/.gitattributes b/examples/developer_guide/3_simple_cpp_stage/.gitattributes new file mode 100644 index 0000000000..6402a165ec --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/.gitattributes @@ -0,0 +1 @@ +src/simple_cpp_stage/_version.py export-subst diff --git a/examples/developer_guide/3_simple_cpp_stage/CMakeLists.txt b/examples/developer_guide/3_simple_cpp_stage/CMakeLists.txt index 83dbbbe269..a5f88a892f 100644 --- a/examples/developer_guide/3_simple_cpp_stage/CMakeLists.txt +++ b/examples/developer_guide/3_simple_cpp_stage/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,52 +13,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -list(APPEND CMAKE_MESSAGE_CONTEXT "simple_cpp_stage") +list(APPEND CMAKE_MESSAGE_CONTEXT "3_simple_cpp_stage") -set(Python3_FIND_VIRTUALENV "FIRST") -set(Python3_FIND_STRATEGY "LOCATION") -find_package(Python3 REQUIRED COMPONENTS Development Interpreter NumPy) +cmake_minimum_required(VERSION 3.24 FATAL_ERROR) -pybind11_add_module(morpheus_example MODULE "${CMAKE_CURRENT_SOURCE_DIR}/_lib/pass_thru.cpp") +# Set the cache to be the same to allow for CCache to be used effectively +set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data") +mark_as_advanced(MORPHEUS_CACHE_DIR) -add_library(${PROJECT_NAME}::morpheus_example ALIAS morpheus_example) +# Add the Conda environment to the prefix path and add the CMake files +list(PREPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}") -target_link_libraries(morpheus_example - PUBLIC - morpheus - mrc::pymrc +project(3_simple_cpp_stage + VERSION 24.03.00 + LANGUAGES C CXX ) -target_include_directories(morpheus_example - PUBLIC - $ - $ +set(CMAKE_CXX_STANDARD 20) + +# Set the option prefix to match the outer project before including. Must be before find_package(morpheus) +set(OPTION_PREFIX "MORPHEUS") +find_package(morpheus REQUIRED) + +morpheus_utils_initialize_cpm(MORPHEUS_CACHE_DIR) + +# Ensure CPM is initialized +rapids_cpm_init() + +morpheus_utils_python_configure() + +rapids_find_package(CUDAToolkit REQUIRED) + +set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +morpheus_utils_create_python_package(simple_cpp_stage + PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/src" ) -set_target_properties(morpheus_example PROPERTIES CXX_VISIBILITY_PRESET hidden) +add_subdirectory(src/simple_cpp_stage/_lib) +# Complete the python package if(MORPHEUS_PYTHON_INPLACE_BUILD) - set_target_properties(morpheus_example - PROPERTIES - INSTALL_RPATH "${PROJECT_SOURCE_DIR}/morpheus/_lib" - ) -else() - # TODO: https://github.com/nv-morpheus/Morpheus/issues/331 - set_target_properties(morpheus_example - PROPERTIES - INSTALL_RPATH "${Python3_SITELIB}/morpheus/_lib" - ) + list(APPEND extra_args "IS_INPLACE") endif() -morpheus_utils_inplace_build_copy(morpheus_example "${CMAKE_CURRENT_SOURCE_DIR}/_lib") - -message(STATUS " Install dest: (morpheus_example) ${CMAKE_CURRENT_BINARY_DIR}/_lib") -install( - TARGETS - morpheus_example - LIBRARY DESTINATION - "${CMAKE_CURRENT_BINARY_DIR}/_lib" -) +if(TARGET morpheus-package-install) + list(APPEND extra_args "PYTHON_DEPENDENCIES" "morpheus-package-install") +endif() +morpheus_utils_build_python_package(simple_cpp_stage ${extra_args}) list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/examples/developer_guide/3_simple_cpp_stage/MANIFEST.in b/examples/developer_guide/3_simple_cpp_stage/MANIFEST.in new file mode 100644 index 0000000000..6f3094ee52 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/MANIFEST.in @@ -0,0 +1,2 @@ +include src/simple_cpp_stage/_version.py +recursive-include src *.so *.pyi diff --git a/examples/developer_guide/3_simple_cpp_stage/compile.sh b/examples/developer_guide/3_simple_cpp_stage/compile.sh new file mode 100755 index 0000000000..41c4640a49 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/compile.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +set -e + +# Optionally can set INSTALL_PREFIX to build and install to a specific directory. Also causes cmake install to run +BUILD_DIR=${BUILD_DIR:-"build"} + +echo "Runing CMake configure..." +cmake -B ${BUILD_DIR} -GNinja \ + -DCMAKE_MESSAGE_CONTEXT_SHOW=ON \ + -DMORPHEUS_PYTHON_INPLACE_BUILD:BOOL=ON \ + -DMORPHEUS_PYTHON_PERFORM_INSTALL:BOOL=ON `# Ensure all of the libraries are installed` \ + ${CMAKE_CONFIGURE_EXTRA_ARGS:+CMAKE_CONFIGURE_EXTRA_ARGS} . + +echo "Running CMake build..." +cmake --build ${BUILD_DIR} -j "$@" diff --git a/examples/developer_guide/3_simple_cpp_stage/pyproject.toml b/examples/developer_guide/3_simple_cpp_stage/pyproject.toml new file mode 100644 index 0000000000..1ad4dec9eb --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/pyproject.toml @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools", "wheel", "versioneer[toml]==0.29"] + +[tool.versioneer] +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer setup' after changing this section, and commit the +# resulting files. +VCS = "git" +style = "pep440" +tag_prefix = "v" +versionfile_build = "src/simple_cpp_stage/_version.py" +versionfile_source = "src/simple_cpp_stage/_version.py" + +[tool.yapfignore] +ignore_patterns = [ + "**/_version.py", +] diff --git a/examples/developer_guide/3_simple_cpp_stage/setup.cfg b/examples/developer_guide/3_simple_cpp_stage/setup.cfg new file mode 100644 index 0000000000..e6a7282416 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/setup.cfg @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[metadata] +name = simple_cpp_stage +version = attr: versioneer.get_version +description = Morpheus Example - Pass Thru C++ Stage +author = NVIDIA Corporation +license = Apache +classifiers = + Intended Audience :: Developers + Programming Language :: Python + Programming Language :: Python :: 3.10 + +[options] +zip_safe = False +include_package_data = True +packages = find: +package_dir = + =src + +[options.packages.find] +where = src diff --git a/examples/developer_guide/3_simple_cpp_stage/setup.py b/examples/developer_guide/3_simple_cpp_stage/setup.py new file mode 100644 index 0000000000..0e0fce75d5 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/setup.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup + +setup() diff --git a/examples/developer_guide/3_simple_cpp_stage/run.py b/examples/developer_guide/3_simple_cpp_stage/src/run.py similarity index 94% rename from examples/developer_guide/3_simple_cpp_stage/run.py rename to examples/developer_guide/3_simple_cpp_stage/src/run.py index d60ee264ed..9f00ff00a2 100755 --- a/examples/developer_guide/3_simple_cpp_stage/run.py +++ b/examples/developer_guide/3_simple_cpp_stage/src/run.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ import logging import os -from pass_thru import PassThruStage +from simple_cpp_stage.pass_thru import PassThruStage from morpheus.config import Config from morpheus.pipeline import LinearPipeline diff --git a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/__init__.py b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/__init__.py new file mode 100644 index 0000000000..9614ddb8cd --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import _version # pylint: disable=wrong-import-position + +__version__ = _version.get_versions()['version'] diff --git a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/CMakeLists.txt b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/CMakeLists.txt new file mode 100644 index 0000000000..55f9082d4c --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/CMakeLists.txt @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "pass_thru_cpp_stage") + +morpheus_add_pybind11_module(pass_thru_cpp + SOURCE_FILES + "pass_thru.cpp" + INCLUDE_DIRS + $ + LINK_TARGETS + morpheus +) + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/examples/developer_guide/3_simple_cpp_stage/_lib/__init__.py b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/__init__.py similarity index 100% rename from examples/developer_guide/3_simple_cpp_stage/_lib/__init__.py rename to examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/__init__.py diff --git a/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.cpp b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.cpp similarity index 80% rename from examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.cpp rename to examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.cpp index 78fa64ccad..a639bc1a35 100644 --- a/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.cpp +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,10 +29,16 @@ PassThruStage::PassThruStage() : PythonNode(base_t::op_factory_from_sub_fn(build PassThruStage::subscribe_fn_t PassThruStage::build_operator() { return [this](rxcpp::observable input, rxcpp::subscriber output) { - return input.subscribe( - rxcpp::make_observer([this, &output](sink_type_t x) { output.on_next(std::move(x)); }, - [&](std::exception_ptr error_ptr) { output.on_error(error_ptr); }, - [&]() { output.on_completed(); })); + return input.subscribe(rxcpp::make_observer( + [this, &output](sink_type_t x) { + output.on_next(std::move(x)); + }, + [&](std::exception_ptr error_ptr) { + output.on_error(error_ptr); + }, + [&]() { + output.on_completed(); + })); }; } @@ -45,7 +51,7 @@ std::shared_ptr> PassThruStageInterfaceProxy namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_example, m) +PYBIND11_MODULE(pass_thru_cpp, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); diff --git a/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.hpp b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp similarity index 96% rename from examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.hpp rename to examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp index 12db26362f..9670aab1d7 100644 --- a/examples/developer_guide/3_simple_cpp_stage/_lib/pass_thru.hpp +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru_cpp/__init__.pyi b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru_cpp/__init__.pyi new file mode 100644 index 0000000000..566d38fa38 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru_cpp/__init__.pyi @@ -0,0 +1,14 @@ +from __future__ import annotations +import src.simple_cpp_stage._lib.pass_thru_cpp +import typing +import morpheus._lib.messages +import mrc.core.segment + +__all__ = [ + "PassThruStage" +] + + +class PassThruStage(mrc.core.segment.SegmentObject): + def __init__(self, builder: mrc.core.segment.Builder, name: str) -> None: ... + pass diff --git a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_version.py b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_version.py new file mode 100644 index 0000000000..37b3a073b6 --- /dev/null +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_version.py @@ -0,0 +1,683 @@ + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools + + +def get_keywords() -> Dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "src/simple_cpp_stage/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions() -> Dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} diff --git a/examples/developer_guide/3_simple_cpp_stage/pass_thru.py b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/pass_thru.py similarity index 88% rename from examples/developer_guide/3_simple_cpp_stage/pass_thru.py rename to examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/pass_thru.py index 22f246fbcc..3d22d25b8a 100644 --- a/examples/developer_guide/3_simple_cpp_stage/pass_thru.py +++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/pass_thru.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -53,10 +53,9 @@ def on_data(self, message: typing.Any): def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: if self._build_cpp_node() and issubclass(self._input_type, MultiMessage): - from _lib import morpheus_example as morpheus_example_cpp + from ._lib import pass_thru_cpp - # pylint: disable=c-extension-no-member - node = morpheus_example_cpp.PassThruStage(builder, self.unique_name) + node = pass_thru_cpp.PassThruStage(builder, self.unique_name) else: node = builder.make_node(self.unique_name, ops.map(self.on_data)) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/.gitattributes b/examples/developer_guide/4_rabbitmq_cpp_stage/.gitattributes new file mode 100644 index 0000000000..f6976a3c01 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/.gitattributes @@ -0,0 +1 @@ +src/rabbitmq_cpp_stage/_version.py export-subst diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/CMakeLists.txt b/examples/developer_guide/4_rabbitmq_cpp_stage/CMakeLists.txt index 8d2d965126..5b5c2d0a19 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/CMakeLists.txt +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,59 +13,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus_rabbit") +list(APPEND CMAKE_MESSAGE_CONTEXT "4_rabbitmq_cpp_stage") +cmake_minimum_required(VERSION 3.24 FATAL_ERROR) + +# Set the cache to be the same to allow for CCache to be used effectively +set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data") +mark_as_advanced(MORPHEUS_CACHE_DIR) + +# Add the Conda environment to the prefix path and add the CMake files +list(PREPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -set(Python3_FIND_VIRTUALENV "FIRST") -set(Python3_FIND_STRATEGY "LOCATION") -find_package(Python3 REQUIRED COMPONENTS Development Interpreter NumPy) +project(4_rabbitmq_cpp_stage + VERSION 24.03.00 + LANGUAGES C CXX +) -include(cmake/dependencies.cmake) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -pybind11_add_module(morpheus_rabbit MODULE "${CMAKE_CURRENT_SOURCE_DIR}/_lib/rabbitmq_source.cpp") +# Set the option prefix to match the outer project before including. Must be before find_package(morpheus) +set(OPTION_PREFIX "MORPHEUS") +find_package(morpheus REQUIRED) -add_library(${PROJECT_NAME}::morpheus_rabbit ALIAS morpheus_rabbit) +morpheus_utils_initialize_cpm(MORPHEUS_CACHE_DIR) -target_link_libraries(morpheus_rabbit - PUBLIC - morpheus - rabbitmq - SimpleAmqpClient - mrc::pymrc -) +# Ensure CPM is initialized +rapids_cpm_init() -target_include_directories(morpheus_rabbit - PUBLIC - $ - $ - rabbitmq - ${SimpleAmqpClient_SOURCE_DIR}/src +morpheus_utils_python_configure() + +include(cmake/dependencies.cmake) + +morpheus_utils_create_python_package(morpheus_rabbit + PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/src" ) -set_target_properties(morpheus_rabbit PROPERTIES CXX_VISIBILITY_PRESET hidden) +add_subdirectory(src/rabbitmq_cpp_stage/_lib) +# Complete the python package if(MORPHEUS_PYTHON_INPLACE_BUILD) - set_target_properties(morpheus_rabbit - PROPERTIES - INSTALL_RPATH "${PROJECT_SOURCE_DIR}/morpheus/_lib" - ) -else() - # TODO: https://github.com/nv-morpheus/Morpheus/issues/331 - set_target_properties(morpheus_rabbit - PROPERTIES - INSTALL_RPATH "${Python3_SITELIB}/morpheus/_lib" - ) + list(APPEND extra_args "IS_INPLACE") endif() -morpheus_utils_inplace_build_copy(morpheus_rabbit "${CMAKE_CURRENT_SOURCE_DIR}/_lib") +if(TARGET morpheus-package-install) + list(APPEND extra_args "PYTHON_DEPENDENCIES" "morpheus-package-install") +endif() -message(STATUS " Install dest: (morpheus_rabbit) ${CMAKE_CURRENT_BINARY_DIR}/_lib") -install( - TARGETS - morpheus_rabbit - LIBRARY DESTINATION - "${CMAKE_CURRENT_BINARY_DIR}/_lib" -) +morpheus_utils_build_python_package(morpheus_rabbit ${extra_args}) list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/MANIFEST.in b/examples/developer_guide/4_rabbitmq_cpp_stage/MANIFEST.in new file mode 100644 index 0000000000..78b93bc2d2 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/MANIFEST.in @@ -0,0 +1,2 @@ +include src/rabbitmq_cpp_stage/_version.py +recursive-include src *.so *.pyi diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md index eb646e9209..2e3319b65e 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md @@ -1,5 +1,5 @@ # Example RabbitMQ stages -This example builds upon the `examples/developer_guide/2_2_rabbitmq` example adding a C++ implementation for the `RabbitMQSourceStage`. +This example builds upon the `examples/developer_guide/2_2_rabbitmq` example adding a C++ implementation for the `RabbitMQSourceStage` along with adding package install scripts. This example adds two flags to the `read_simple.py` script. A `--use_cpp` flag which defaults to `True` and a `--num_threads` flag which defaults to the number of cores on the system as returned by `os.cpu_count()`. +## Installing Pika +The `RabbitMQSourceStage` and `WriteToRabbitMQStage` stages use the [pika](https://pika.readthedocs.io/en/stable/#) RabbitMQ client for Python. To install this into the current env run: +```bash +pip install -r examples/developer_guide/4_rabbitmq_cpp_stage/requirements.txt +``` + +## Building the Example +There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable: +```bash +CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh +``` + +The second is to build the example as a standalone project. From the root of the Morpheus repo execute: +```bash +cd examples/developer_guide/4_rabbitmq_cpp_stage +./compile.sh + +# Optionally install the package into the current python environment +pip install ./ +``` + ## Testing with a RabbitMQ container Testing can be performed locally with the RabbitMQ supplied docker image from the [RabbitMQ container registry](https://registry.hub.docker.com/_/rabbitmq/): ```bash @@ -31,7 +52,7 @@ The image can be verified with the web management console by opening http://loca ## Launch the reader In a second terminal from the root of the Morpheus repo execute: ```bash -python examples/developer_guide/4_rabbitmq_cpp_stage/read_simple.py +python examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py ``` This will read from a RabbitMQ exchange named 'logs', and write the results to `/tmp/results.json`. @@ -41,7 +62,7 @@ If no exchange named 'logs' exists in RabbitMQ it will be created. ## Launch the writer In a third terminal from the root of the Morpheus repo execute: ```bash -python examples/developer_guide/4_rabbitmq_cpp_stage/write_simple.py +python examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py ``` This will read JSON data from the `examples/data/email.jsonlines` file and publish the data into the 'logs' RabbitMQ exchange as a single message. diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/__init__.py b/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake index 7aa248e8e1..7ce13f7695 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake @@ -1,5 +1,5 @@ #============================================================================= -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,9 +25,9 @@ function(find_and_configure_SimpleAmqpClient version) GLOBAL_TARGETS SimpleAmqpClient BUILD_EXPORT_SET - ${PROJECT_NAME}-exports + ${PROJECT_NAME}-core-exports INSTALL_EXPORT_SET - ${PROJECT_NAME}-exports + ${PROJECT_NAME}-core-exports CPM_ARGS GIT_REPOSITORY https://github.com/alanxz/SimpleAmqpClient GIT_TAG "v${version}" diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_rabbitmq.cmake b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_rabbitmq.cmake index a1534f2e46..ecbcc2b0ed 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_rabbitmq.cmake +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_rabbitmq.cmake @@ -1,5 +1,5 @@ #============================================================================= -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,9 +26,9 @@ function(find_and_configure_rabbitmq version) GLOBAL_TARGETS rabbitmq rabbitmq::rabbitmq BUILD_EXPORT_SET - ${PROJECT_NAME}-exports + ${PROJECT_NAME}-core-exports INSTALL_EXPORT_SET - ${PROJECT_NAME}-exports + ${PROJECT_NAME}-core-exports CPM_ARGS GIT_REPOSITORY https://github.com/alanxz/rabbitmq-c GIT_SHALLOW TRUE diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/dependencies.cmake b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/dependencies.cmake index fa24f87b2e..56a692f7b4 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/dependencies.cmake +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/dependencies.cmake @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,8 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "dep") +rapids_find_package(CUDAToolkit REQUIRED) + set(RABBITMQ_VERSION "0.12.0" CACHE STRING "Version of RabbitMQ-C to use") include(Configure_rabbitmq) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/compile.sh b/examples/developer_guide/4_rabbitmq_cpp_stage/compile.sh new file mode 100755 index 0000000000..41c4640a49 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/compile.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +set -e + +# Optionally can set INSTALL_PREFIX to build and install to a specific directory. Also causes cmake install to run +BUILD_DIR=${BUILD_DIR:-"build"} + +echo "Runing CMake configure..." +cmake -B ${BUILD_DIR} -GNinja \ + -DCMAKE_MESSAGE_CONTEXT_SHOW=ON \ + -DMORPHEUS_PYTHON_INPLACE_BUILD:BOOL=ON \ + -DMORPHEUS_PYTHON_PERFORM_INSTALL:BOOL=ON `# Ensure all of the libraries are installed` \ + ${CMAKE_CONFIGURE_EXTRA_ARGS:+CMAKE_CONFIGURE_EXTRA_ARGS} . + +echo "Running CMake build..." +cmake --build ${BUILD_DIR} -j "$@" diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/pyproject.toml b/examples/developer_guide/4_rabbitmq_cpp_stage/pyproject.toml new file mode 100644 index 0000000000..ad85e7c003 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/pyproject.toml @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools", "wheel", "versioneer[toml]==0.29"] + +[tool.versioneer] +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer setup' after changing this section, and commit the +# resulting files. +VCS = "git" +style = "pep440" +tag_prefix = "v" +versionfile_build = "src/rabbitmq_cpp_stage/_version.py" +versionfile_source = "src/rabbitmq_cpp_stage/_version.py" + +[tool.yapfignore] +ignore_patterns = [ + "**/_version.py", +] diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/setup.cfg b/examples/developer_guide/4_rabbitmq_cpp_stage/setup.cfg new file mode 100644 index 0000000000..9f6be7db1a --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/setup.cfg @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[metadata] +name = rabbitmq_cpp_stage +version = attr: versioneer.get_version +description = Morpheus Example - RabbitMQ C++ Stage +author = NVIDIA Corporation +license = Apache +classifiers = + Intended Audience :: Developers + Programming Language :: Python + Programming Language :: Python :: 3.10 + +[options] +zip_safe = False +include_package_data = True +packages = find: +package_dir = + =src +python_requires = >=3.10 +install_requires = + pika ==1.2.0 + +[options.packages.find] +where = src diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/setup.py b/examples/developer_guide/4_rabbitmq_cpp_stage/setup.py new file mode 100644 index 0000000000..0e0fce75d5 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/setup.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup + +setup() diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/__init__.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/__init__.py new file mode 100644 index 0000000000..acc168b4e5 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import _version + +__version__ = _version.get_versions()['version'] diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/CMakeLists.txt b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/CMakeLists.txt new file mode 100644 index 0000000000..ba768f7a2a --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/CMakeLists.txt @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "rabbitmq_cpp_stage") + +morpheus_add_pybind11_module(rabbitmq_cpp_stage + SOURCE_FILES + "rabbitmq_source.cpp" + INCLUDE_DIRS + $ + ${SimpleAmqpClient_SOURCE_DIR}/src + LINK_TARGETS + morpheus + rabbitmq + SimpleAmqpClient +) + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/__init__.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/__init__.py similarity index 100% rename from examples/developer_guide/4_rabbitmq_cpp_stage/__init__.py rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/__init__.py diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_cpp_stage/__init__.pyi b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_cpp_stage/__init__.pyi new file mode 100644 index 0000000000..93e02914b8 --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_cpp_stage/__init__.pyi @@ -0,0 +1,15 @@ +from __future__ import annotations +import src.rabbitmq_cpp_stage._lib.rabbitmq_cpp_stage +import typing +import datetime +import morpheus._lib.messages +import mrc.core.segment + +__all__ = [ + "RabbitMQSourceStage" +] + + +class RabbitMQSourceStage(mrc.core.segment.SegmentObject): + def __init__(self, builder: mrc.core.segment.Builder, name: str, host: str, exchange: str, exchange_type: str = 'fanout', queue_name: str = '', poll_interval: datetime.timedelta = datetime.timedelta(microseconds=100000)) -> None: ... + pass diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp similarity index 96% rename from examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp index 4f0e6db7cc..bf7427b773 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,8 +36,8 @@ RabbitMQSourceStage::RabbitMQSourceStage(const std::string& host, const std::string& queue_name, std::chrono::milliseconds poll_interval) : PythonSource(build()), - m_channel{AmqpClient::Channel::Create(host)}, - m_poll_interval{poll_interval} + m_poll_interval{poll_interval}, + m_channel{AmqpClient::Channel::Create(host)} { m_channel->DeclareExchange(exchange, exchange_type); m_queue_name = m_channel->DeclareQueue(queue_name); @@ -122,7 +122,7 @@ std::shared_ptr> RabbitMQSourceStageIn namespace py = pybind11; // Define the pybind11 module m. -PYBIND11_MODULE(morpheus_rabbit, m) +PYBIND11_MODULE(rabbitmq_cpp_stage, m) { mrc::pymrc::import(m, "morpheus._lib.messages"); diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp similarity index 98% rename from examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp index 6f838b69d8..8a1b2ff462 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_version.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_version.py new file mode 100644 index 0000000000..a0573c335e --- /dev/null +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_version.py @@ -0,0 +1,683 @@ + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. +# Generated by versioneer-0.29 +# https://github.com/python-versioneer/python-versioneer + +"""Git implementation of _version.py.""" + +import errno +import os +import re +import subprocess +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple +import functools + + +def get_keywords() -> Dict[str, str]: + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + VCS: str + style: str + tag_prefix: str + parentdir_prefix: str + versionfile_source: str + verbose: bool + + +def get_config() -> VersioneerConfig: + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "None" + cfg.versionfile_source = "src/rabbitmq_cpp_stage/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command( + commands: List[str], + args: List[str], + cwd: Optional[str] = None, + verbose: bool = False, + hide_stderr: bool = False, + env: Optional[Dict[str, str]] = None, +) -> Tuple[Optional[str], Optional[int]]: + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs: Dict[str, Any] = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, cwd=cwd, env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None), **popen_kwargs) + break + except OSError as e: + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir( + parentdir_prefix: str, + root: str, + verbose: bool, +) -> Dict[str, Any]: + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None, "date": None} + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % + (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords: Dict[str, str] = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords( + keywords: Dict[str, str], + tag_prefix: str, + verbose: bool, +) -> Dict[str, Any]: + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None, + "date": date} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags", "date": None} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs( + tag_prefix: str, + root: str, + verbose: bool, + runner: Callable = run_command +) -> Dict[str, Any]: + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, + hide_stderr=not verbose) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces: Dict[str, Any] = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], + cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" + % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" + % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces: Dict[str, Any]) -> str: + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces: Dict[str, Any]) -> str: + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces: Dict[str, Any]) -> str: + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces: Dict[str, Any]) -> str: + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces: Dict[str, Any]) -> str: + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces: Dict[str, Any]) -> str: + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces: Dict[str, Any]) -> str: + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None, + "date": pieces.get("date")} + + +def get_versions() -> Dict[str, Any]: + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", "date": None} diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/rabbitmq_source_stage.py similarity index 85% rename from examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/rabbitmq_source_stage.py index 9557e4f6da..4516f7d87b 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/rabbitmq_source_stage.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -87,16 +87,15 @@ def compute_schema(self, schema: StageSchema): def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: if self._build_cpp_node(): - # pylint: disable=c-extension-no-member,no-name-in-module - from _lib import morpheus_rabbit as morpheus_rabbit_cpp - - node = morpheus_rabbit_cpp.RabbitMQSourceStage(builder, - self.unique_name, - self._host, - self._exchange, - self._exchange_type, - self._queue_name, - self._poll_interval.to_pytimedelta()) + from ._lib import rabbitmq_cpp_stage + + node = rabbitmq_cpp_stage.RabbitMQSourceStage(builder, + self.unique_name, + self._host, + self._exchange, + self._exchange_type, + self._queue_name, + self._poll_interval.to_pytimedelta()) else: self.connect() node = builder.make_source(self.unique_name, self.source_generator) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/write_to_rabbitmq_stage.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/write_to_rabbitmq_stage.py similarity index 97% rename from examples/developer_guide/4_rabbitmq_cpp_stage/write_to_rabbitmq_stage.py rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/write_to_rabbitmq_stage.py index 3516580bda..401d8b785e 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/write_to_rabbitmq_stage.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/write_to_rabbitmq_stage.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/read_simple.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py similarity index 94% rename from examples/developer_guide/4_rabbitmq_cpp_stage/read_simple.py rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py index 7581cf38b1..1edec18e75 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/read_simple.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import os import click -from rabbitmq_source_stage import RabbitMQSourceStage +from rabbitmq_cpp_stage.rabbitmq_source_stage import RabbitMQSourceStage from morpheus.common import FileTypes from morpheus.config import Config diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/write_simple.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py similarity index 92% rename from examples/developer_guide/4_rabbitmq_cpp_stage/write_simple.py rename to examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py index 239f205350..9cac7d7331 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/write_simple.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ import logging import os -from write_to_rabbitmq_stage import WriteToRabbitMQStage +from rabbitmq_cpp_stage.write_to_rabbitmq_stage import WriteToRabbitMQStage from morpheus.config import Config from morpheus.pipeline import LinearPipeline diff --git a/examples/developer_guide/CMakeLists.txt b/examples/developer_guide/CMakeLists.txt new file mode 100644 index 0000000000..16220563c7 --- /dev/null +++ b/examples/developer_guide/CMakeLists.txt @@ -0,0 +1,19 @@ +# ============================================================================= +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= +list(APPEND CMAKE_MESSAGE_CONTEXT "developer_guide") + +add_subdirectory(3_simple_cpp_stage) +add_subdirectory(4_rabbitmq_cpp_stage) + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/external/utilities b/external/utilities index c977a62a62..eb55e1acb7 160000 --- a/external/utilities +++ b/external/utilities @@ -1 +1 @@ -Subproject commit c977a62a62ab2ca219dd2a10900644434e250dde +Subproject commit eb55e1acb73df1dbf4c1b69f17c918c661921c3c diff --git a/morpheus/_lib/cmake/libmorpheus.cmake b/morpheus/_lib/cmake/libmorpheus.cmake index 69b8857631..a33a563d4c 100644 --- a/morpheus/_lib/cmake/libmorpheus.cmake +++ b/morpheus/_lib/cmake/libmorpheus.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -91,10 +91,9 @@ add_library(${PROJECT_NAME}::morpheus ALIAS morpheus) target_link_libraries(morpheus PRIVATE - ${cudf_helpers_target} matx::matx - PUBLIC + $ cudf::cudf CUDA::nvToolsExt mrc::pymrc @@ -109,6 +108,17 @@ target_include_directories(morpheus $ ) +# Add the include directories of the cudf_helpers_project since we dont want to link directly to it +get_target_property(cudf_helpers_include ${cudf_helpers_target} INTERFACE_INCLUDE_DIRECTORIES) + +target_include_directories(morpheus + PRIVATE + ${cudf_helpers_include} +) + +# Also add a dependency to the target so that the headers are generated before the target is built +add_dependencies(morpheus ${cudf_helpers_target}) + # In debug mode, dont allow missing symbols target_link_options(morpheus PUBLIC "$<$:-Wl,--no-allow-shlib-undefined>") @@ -133,6 +143,7 @@ target_sources(morpheus PUBLIC FILE_SET public_headers TYPE HEADERS + BASE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/include" FILES ${morpheus_public_headers} ) @@ -165,17 +176,73 @@ set_target_properties(morpheus CUDA_STANDARD_REQUIRED ON ) +if(MORPHEUS_PYTHON_INPLACE_BUILD) + morpheus_utils_inplace_build_copy(morpheus ${CMAKE_CURRENT_SOURCE_DIR}) +endif() + +# ################################################################################################## +# - install targets -------------------------------------------------------------------------------- + +# Get the library directory in a cross-platform way +rapids_cmake_install_lib_dir(lib_dir) + +include(CPack) +include(GNUInstallDirs) + install( - TARGETS - morpheus - EXPORT - ${PROJECT_NAME}-exports - FILE_SET - public_headers - COMPONENT - Wheel + TARGETS + morpheus + EXPORT + ${PROJECT_NAME}-core-exports + LIBRARY + DESTINATION ${lib_dir} + FILE_SET + public_headers ) -if(MORPHEUS_PYTHON_INPLACE_BUILD) - morpheus_utils_inplace_build_copy(morpheus ${CMAKE_CURRENT_SOURCE_DIR}) +# ################################################################################################## +# - install export --------------------------------------------------------------------------------- +set(doc_string + [=[ +Provide targets for mrc. +]=]) + +set(rapids_project_version_compat SameMinorVersion) + +# Install cmake helper scripts needed to build examples +set(cmake_lib_dir "${lib_dir}/cmake/morpheus") + +install(DIRECTORY + "${MORPHEUS_UTILS_ROOT_PATH}" + DESTINATION "${cmake_lib_dir}" +) + +set(code_string + [=[ +if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/morpheus_utils/load.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/morpheus_utils/load.cmake") endif() +]=]) + +# Need to explicitly set VERSION ${PROJECT_VERSION} here since rapids_cmake gets +# confused with the `RAPIDS_VERSION` variable we use +rapids_export(INSTALL ${PROJECT_NAME} + EXPORT_SET ${PROJECT_NAME}-core-exports + GLOBAL_TARGETS morpheus + VERSION ${PROJECT_VERSION} + NAMESPACE morpheus:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string +) + +# ################################################################################################## +# - build export ---------------------------------------------------------------------------------- +rapids_export(BUILD ${PROJECT_NAME} + EXPORT_SET ${PROJECT_NAME}-core-exports + GLOBAL_TARGETS morpheus + VERSION ${PROJECT_VERSION} + LANGUAGES C CXX CUDA + NAMESPACE morpheus:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string +) diff --git a/morpheus/_lib/doca/CMakeLists.txt b/morpheus/_lib/doca/CMakeLists.txt index cf3cafa17f..2578ca02a3 100644 --- a/morpheus/_lib/doca/CMakeLists.txt +++ b/morpheus/_lib/doca/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -80,7 +80,7 @@ install( TARGETS morpheus_doca EXPORT - ${PROJECT_NAME}-exports + ${PROJECT_NAME}-core-exports FILE_SET public_headers COMPONENT diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt index 3f21202e82..32ed8379df 100644 --- a/morpheus/_lib/tests/CMakeLists.txt +++ b/morpheus/_lib/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -20,7 +20,6 @@ find_package(pybind11 REQUIRED) include(GoogleTest) # Cuda Test - add_executable(test_cuda test_cuda.cu ) @@ -41,16 +40,7 @@ set_target_properties(test_cuda CUDA_STANDARD_REQUIRED ON ) -install( - TARGETS - test_cuda - RUNTIME DESTINATION - "${MORPHEUS_LIB_INSTALL_DIR}/tests" - COMPONENT Wheel -) - # Morpheus Test Utilities - add_library( morpheus_test_utilities test_utils/common.cpp @@ -64,8 +54,7 @@ target_link_libraries( ) # Morpheus Tests - -function (add_morpheus_test) +function(add_morpheus_test) set(options) set(oneValueArgs NAME) set(multiValueArgs FILES) @@ -82,6 +71,7 @@ function (add_morpheus_test) GTest::gtest_main morpheus_test_utilities pybind11::embed + TritonClient::httpclient_static ) gtest_discover_tests(test_${MORPHEUS_TEST_NAME}) @@ -92,15 +82,6 @@ function (add_morpheus_test) CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON ) - - install( - TARGETS - test_${MORPHEUS_TEST_NAME} - RUNTIME DESTINATION - "${MORPHEUS_LIB_INSTALL_DIR}/tests" - COMPONENT Wheel - ) - endfunction() add_morpheus_test( diff --git a/pyproject.toml b/pyproject.toml index c33d4e0e88..79c189f7df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ asyncio_mode = "auto" ignore_patterns = [ "**/*.pyx", "**/*.pxd", + "**/_version.py", ] [tool.mypy] diff --git a/setup.cfg b/setup.cfg index a6a79158d4..8b0fbaa4e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,6 +52,8 @@ default_section=THIRDPARTY sections=FUTURE,STDLIB,THIRDPARTY,DASK,RAPIDS,FIRSTPARTY,LOCALFOLDER skip= __init__.py + # Skip _version.py as it is auto-generated + _version.py .eggs .git .hg @@ -63,10 +65,7 @@ skip= build dist models - morpheus/_version.py thirdparty - # Skip versioneer.py as its third-party - versioneer.py # ===== flake8 Config ===== [flake8] @@ -83,8 +82,7 @@ exclude = docs, models/, morpheus/models/dfencoder/*.py, - thirdparty, - versioneer.py + thirdparty max-line-length = 120 max-doc-length = 120 @@ -131,7 +129,7 @@ per-file-ignores = D102, # D103: Missing docstring in public function D103 - + [yapf] based_on_style = pep8 column_limit = 120 diff --git a/setup.py b/setup.py index e26112b5cc..10e5421754 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,28 +12,16 @@ # limitations under the License. # See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the +# re-run 'versioneer setup' after changing this section, and commit the # resulting files. -import os -import sys - +import versioneer from setuptools import find_packages # noqa: E402 from setuptools import setup # noqa: E402 -try: - import versioneer -except ImportError: - # we have a versioneer.py file living in the same directory as this file, but - # if we're using pep 517/518 to build from pyproject.toml its not going to find it - # https://github.com/python-versioneer/python-versioneer/issues/193#issue-408237852 - # make this work by adding this directory to the python path - sys.path.append(os.path.dirname(os.path.realpath(__file__))) - import versioneer - setup( name="morpheus", - version=versioneer.get_version(), + version=versioneer.get_version(), # pylint: disable=no-member description="Morpheus", classifiers=[ "Development Status :: 3 - Alpha", @@ -56,7 +44,7 @@ ], license="Apache", python_requires='>=3.10, <4', - cmdclass=versioneer.get_cmdclass(), + cmdclass=versioneer.get_cmdclass(), # pylint: disable=no-member entry_points=''' [console_scripts] morpheus=morpheus.cli:run_cli diff --git a/tests/examples/developer_guide/test_pass_thru.py b/tests/examples/developer_guide/test_pass_thru.py index f3c1f7c411..e8ae2d0086 100644 --- a/tests/examples/developer_guide/test_pass_thru.py +++ b/tests/examples/developer_guide/test_pass_thru.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -52,7 +52,8 @@ def test_pass_thru_ex1_deco(config: Config, filter_probs_df: DataFrameType, impo _check_pass_thru(config, filter_probs_df, pass_thru.pass_thru_stage, on_data_fn_name='_on_data_fn') -@pytest.mark.import_mod(os.path.join(TEST_DIRS.examples_dir, 'developer_guide/3_simple_cpp_stage/pass_thru.py')) +@pytest.mark.import_mod( + os.path.join(TEST_DIRS.examples_dir, 'developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/pass_thru.py')) def test_pass_thru_ex3(config: Config, filter_probs_df: DataFrameType, import_mod: types.ModuleType): pass_thru = import_mod _check_pass_thru(config, filter_probs_df, pass_thru.PassThruStage) diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index f5f276f846..0000000000 --- a/versioneer.py +++ /dev/null @@ -1,2155 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# Version: 0.22 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible with: Python 3.6, 3.7, 3.8, 3.9, 3.10 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in distutils/setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere in your $PATH -* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) -* run `versioneer install` in your source tree, commit the results -* Verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the Creative Commons "Public Domain -Dedication" license (CC0-1.0), as described in -https://creativecommons.org/publicdomain/zero/1.0/ . - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.ConfigParser() - with open(setup_cfg, "r") as cfg_file: - parser.read_file(cfg_file) - VCS = parser.get("versioneer", "VCS") # mandatory - - # Dict-like interface for non-mandatory entries - section = parser["versioneer"] - - cfg = VersioneerConfig() - cfg.VCS = VCS - cfg.style = section.get("style", "") - cfg.versionfile_source = section.get("versionfile_source") - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = section.get("tag_prefix") - if cfg.tag_prefix in ("''", '""'): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - cfg.verbose = section.get("verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer) - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - MATCH_ARGS = ["--match", "%%s*" %% tag_prefix] if tag_prefix else [] - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", *MATCH_ARGS], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else [] - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", *MATCH_ARGS], - cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - my_path = __file__ - if my_path.endswith(".pyc") or my_path.endswith(".pyo"): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.22) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass=None): - """Get the custom setuptools/distutils subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to both distutils and setuptools - try: - from setuptools import Command - except ImportError: - from distutils.core import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in both distutils and setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] - elif "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py - else: - from distutils.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] - elif "setuptools" in sys.modules: - from setuptools.command.build_ext import build_ext as _build_ext - else: - from distutils.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - from py2exe.distutils_buildexe import py2exe as _py2exe - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] - elif "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist - else: - from distutils.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup(): - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except OSError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) From 9c226bbd826055080c77e3b9b04426c329ef8160 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 22 Jan 2024 10:44:46 -0800 Subject: [PATCH 4/5] Remove unused cmake script --- cmake/package_search/Finducx.cmake | 73 ------------------------------ 1 file changed, 73 deletions(-) delete mode 100644 cmake/package_search/Finducx.cmake diff --git a/cmake/package_search/Finducx.cmake b/cmake/package_search/Finducx.cmake deleted file mode 100644 index e6e72ecec5..0000000000 --- a/cmake/package_search/Finducx.cmake +++ /dev/null @@ -1,73 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -include(FindPackageHandleStandardArgs) - -set(components "ucx") - -find_package(PkgConfig QUIET) -pkg_check_modules(PC_UCX ${components}) - -# message(STATUS "PC_UCX_FOUND: ${PC_UCX_FOUND}") -# message(STATUS "PC_UCX_LIBRARIES: ${PC_UCX_LIBRARIES}") -# message(STATUS "PC_UCX_LINK_LIBRARIES: ${PC_UCX_LINK_LIBRARIES}") -# message(STATUS "PC_UCX_LIBRARY_DIRS: ${PC_UCX_LIBRARY_DIRS}") -# message(STATUS "PC_UCX_LDFLAGS: ${PC_UCX_LDFLAGS}") -# message(STATUS "PC_UCX_LDFLAGS_OTHER: ${PC_UCX_LDFLAGS_OTHER}") -# message(STATUS "PC_UCX_INCLUDE_DIRS: ${PC_UCX_INCLUDE_DIRS}") -# message(STATUS "PC_UCX_CFLAGS: ${PC_UCX_CFLAGS}") -# message(STATUS "PC_UCX_CFLAGS_OTHER: ${PC_UCX_CFLAGS_OTHER}") - -# set(mod_prefix "PC_UCX") - -# message(STATUS "${mod_prefix}_VERSION: ${${mod_prefix}_VERSION}") -# message(STATUS "${mod_prefix}_PREFIX: ${${mod_prefix}_PREFIX}") -# message(STATUS "${mod_prefix}_INCLUDEDIR: ${${mod_prefix}_INCLUDEDIR}") -# message(STATUS "${mod_prefix}_LIBDIR: ${${mod_prefix}_LIBDIR}") -set(ucx_VERSION ${PC_UCX_VERSION}) - -find_package_handle_standard_args(ucx - FOUND_VAR ucx_FOUND - REQUIRED_VARS - PC_UCX_FOUND - VERSION_VAR ucx_VERSION -) - -if(UCX_FOUND) - set(all_ucx_targets "") - - foreach(ucx_library IN ZIP_LISTS PC_UCX_LIBRARIES PC_UCX_LINK_LIBRARIES) - if(NOT TARGET ucx::${ucx_library_0}) - add_library(ucx::${ucx_library_0} UNKNOWN IMPORTED) - set_target_properties(ucx::${ucx_library_0} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${PC_UCX_INCLUDE_DIRS}" - INTERFACE_COMPILE_OPTIONS "${PC_UCX_CFLAGS_OTHER}" - INTERFACE_LINK_OPTIONS "${PC_UCX_LDFLAGS_OTHER}" - IMPORTED_LOCATION "${ucx_library_1}" - ) - endif() - - # Add to the list of child targets - list(APPEND all_ucx_targets "ucx::${ucx_library_0}") - endforeach() - - if(NOT TARGET ucx::ucx) - # Combined ucx::ucx target - add_library(ucx::ucx INTERFACE IMPORTED GLOBAL) - set_target_properties(ucx::ucx PROPERTIES - INTERFACE_LINK_LIBRARIES "${all_ucx_targets}" - ) - endif() -endif() From 659e73535be494a01b30b21c1114f823de8ef162 Mon Sep 17 00:00:00 2001 From: Bhargav Suryadevara Date: Mon, 22 Jan 2024 15:13:43 -0500 Subject: [PATCH 5/5] Eliminate Redundant Fetches in RSS Controller (#1442) Addressed redundant feed fetches in the RSS Controller when parsing manually with BeautifulSoup and cache is enabled. Closes #1419 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Bhargav Suryadevara (https://github.com/bsuryadevara) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1442 --- docker/conda/environments/cuda11.8_dev.yml | 1 + morpheus/controllers/rss_controller.py | 78 +++++++++------------- tests/controllers/test_rss_controller.py | 62 ++++++++--------- 3 files changed, 63 insertions(+), 78 deletions(-) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 5ee09141b0..0527e4277b 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -68,6 +68,7 @@ dependencies: - libgrpc>=1.49 - librdkafka=1.9.2 - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 + - lxml=4.9.1 - mlflow>=2.2.1,<3 - mrc=24.03 - networkx>=2.8 diff --git a/morpheus/controllers/rss_controller.py b/morpheus/controllers/rss_controller.py index cafefa17c8..c4c64876df 100644 --- a/morpheus/controllers/rss_controller.py +++ b/morpheus/controllers/rss_controller.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -102,16 +102,23 @@ def __init__(self, run_indefinitely = any(RSSController.is_url(f) for f in self._feed_input) self._run_indefinitely = run_indefinitely + self._enable_cache = enable_cache - self._session = None if enable_cache: self._session = requests_cache.CachedSession(os.path.join(cache_dir, "RSSController.sqlite"), backend="sqlite") + else: + self._session = requests.session() + + self._session.headers.update({ + "User-Agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36" + }) self._feed_stats_dict = { - input: + url: FeedStats(failure_count=0, success_count=0, last_failure=-1, last_success=-1, last_try_result="Unknown") - for input in self._feed_input + for url in self._feed_input } @property @@ -119,14 +126,9 @@ def run_indefinitely(self): """Property that determines to run the source indefinitely""" return self._run_indefinitely - @property - def session_exist(self) -> bool: - """Property that indicates the existence of a session.""" - return bool(self._session) - def get_feed_stats(self, feed_url: str) -> FeedStats: """ - Get feed input stats. + Get feed url stats. Parameters ---------- @@ -141,30 +143,20 @@ def get_feed_stats(self, feed_url: str) -> FeedStats: Raises ------ ValueError - If the feed URL is not found in the feed input provided to the constructor. + If the feed URL is not found in the feed url provided to the constructor. """ if feed_url not in self._feed_stats_dict: - raise ValueError("The feed URL is not part of the feed input provided to the constructor.") + raise ValueError("The feed URL is not part of the feed url provided to the constructor.") return self._feed_stats_dict[feed_url] - def _get_response_text(self, url: str) -> str: - if self.session_exist: - response = self._session.get(url) - else: - response = requests.get(url, timeout=self._request_timeout) - - return response.text - def _read_file_content(self, file_path: str) -> str: with open(file_path, 'r', encoding="utf-8") as file: return file.read() - def _try_parse_feed_with_beautiful_soup(self, feed_input: str, is_url: bool) -> "feedparser.FeedParserDict": - - feed_input = self._get_response_text(feed_input) if is_url else self._read_file_content(feed_input) + def _try_parse_feed_with_beautiful_soup(self, feed_input: str) -> "feedparser.FeedParserDict": - soup = BeautifulSoup(feed_input, 'xml') + soup = BeautifulSoup(feed_input, 'lxml') # Verify whether the given feed has 'item' or 'entry' tags. if soup.find('item'): @@ -205,32 +197,28 @@ def _try_parse_feed(self, url: str) -> "feedparser.FeedParserDict": fallback = False cache_hit = False - is_url_with_session = is_url and self.session_exist - if is_url_with_session: - response = self._session.get(url) - cache_hit = response.from_cache + if is_url: + response = self._session.get(url, timeout=self._request_timeout) feed_input = response.text + if self._enable_cache: + cache_hit = response.from_cache else: feed_input = url feed = feedparser.parse(feed_input) if feed["bozo"]: - cache_hit = False - - if is_url_with_session: - fallback = True - logger.info("Failed to parse feed: %s. Trying to parse using feedparser directly.", url) - feed = feedparser.parse(url) - - if feed["bozo"]: - try: - logger.info("Failed to parse feed: %s, %s. Try parsing feed manually", url, feed['bozo_exception']) - feed = self._try_parse_feed_with_beautiful_soup(url, is_url) - except Exception: - logger.error("Failed to parse the feed manually: %s", url) - raise + fallback = True + try: + if not is_url: + # Read file content + feed_input = self._read_file_content(feed_input) + # Parse feed content with beautifulsoup + feed = self._try_parse_feed_with_beautiful_soup(feed_input) + except Exception: + logger.error("Failed to parse the feed manually: %s", url) + raise logger.debug("Parsed feed: %s. Cache hit: %s. Fallback: %s", url, cache_hit, fallback) @@ -312,17 +300,17 @@ def fetch_dataframes(self): @classmethod def is_url(cls, feed_input: str) -> bool: """ - Check if the provided input is a valid URL. + Check if the provided url is a valid URL. Parameters ---------- feed_input : str - The input string to be checked. + The url string to be checked. Returns ------- bool - True if the input is a valid URL, False otherwise. + True if the url is a valid URL, False otherwise. """ try: parsed_url = urlparse(feed_input) diff --git a/tests/controllers/test_rss_controller.py b/tests/controllers/test_rss_controller.py index e388783fbe..94726c4783 100644 --- a/tests/controllers/test_rss_controller.py +++ b/tests/controllers/test_rss_controller.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -78,10 +78,12 @@ def test_run_indefinitely_false(feed_input: list[str]): @pytest.mark.parametrize("feed_input", test_urls) -def test_parse_feed_valid_url(feed_input: list[str], mock_feed: feedparser.FeedParserDict): +def test_parse_feed_valid_url(feed_input: list[str], mock_feed: feedparser.FeedParserDict, mock_get_response: Mock): controller = RSSController(feed_input=feed_input) - with patch("morpheus.controllers.rss_controller.feedparser.parse") as mock_feedparser_parse: - mock_feedparser_parse.return_value = mock_feed + + patch("morpheus.controllers.rss_controller.feedparser.parse", return_value=mock_feed) + + with patch("requests.Session.get", return_value=mock_get_response): feed = list(controller.parse_feeds())[0] assert feed.entries @@ -112,11 +114,14 @@ def test_is_url_false(feed_input: list[str]): @pytest.mark.parametrize("feed_input", [test_urls, test_urls[0]]) -def test_fetch_dataframes_url(feed_input: str | list[str], mock_feed: feedparser.FeedParserDict): +def test_fetch_dataframes_url(feed_input: str | list[str], + mock_feed: feedparser.FeedParserDict, + mock_get_response: Mock): controller = RSSController(feed_input=feed_input) - with patch("morpheus.controllers.rss_controller.feedparser.parse") as mock_feedparser_parse: - mock_feedparser_parse.return_value = mock_feed + patch("morpheus.controllers.rss_controller.feedparser.parse", return_value=mock_feed) + + with patch("requests.Session.get", return_value=mock_get_response): dataframes_generator = controller.fetch_dataframes() dataframe = next(dataframes_generator, None) assert isinstance(dataframe, pd.DataFrame) @@ -142,26 +147,15 @@ def test_batch_size(feed_input: list[str], batch_size: int): assert len(df) <= batch_size -@pytest.mark.parametrize("feed_input, is_url, enable_cache", [(test_file_paths[0], False, False), - (test_urls[0], True, True), (test_urls[0], True, False)]) -def test_try_parse_feed_with_beautiful_soup(feed_input: str, is_url: bool, enable_cache: bool, mock_get_response: Mock): +@pytest.mark.parametrize("feed_input, enable_cache", [(test_file_paths[0], False), (test_urls[0], True), + (test_urls[0], False)]) +def test_try_parse_feed_with_beautiful_soup(feed_input: str, enable_cache: bool, mock_get_response: Mock): controller = RSSController(feed_input=feed_input, enable_cache=enable_cache) - if is_url: - if enable_cache: - with patch("morpheus.controllers.rss_controller.requests_cache.CachedSession.get") as mock_get: - mock_get.return_value = mock_get_response - feed_data = controller._try_parse_feed_with_beautiful_soup(feed_input, is_url) - else: - with patch("morpheus.controllers.rss_controller.requests.get") as mock_get: - mock_get.return_value = mock_get_response - feed_data = controller._try_parse_feed_with_beautiful_soup(feed_input, is_url) - - else: - feed_data = controller._try_parse_feed_with_beautiful_soup(feed_input, is_url) + # When enable_cache is set to 'True', the feed content is provided as input. + feed_data = controller._try_parse_feed_with_beautiful_soup(mock_get_response.text) assert isinstance(feed_data, feedparser.FeedParserDict) - assert len(feed_data.entries) > 0 for entry in feed_data.entries: @@ -180,16 +174,6 @@ def test_try_parse_feed_with_beautiful_soup(feed_input: str, is_url: bool, enabl assert isinstance(feed_data["entries"], list) -@pytest.mark.parametrize("enable_cache", [True, False]) -def test_enable_disable_cache(enable_cache): - controller = RSSController(feed_input=test_urls, enable_cache=enable_cache) - - if enable_cache: - assert controller.session_exist - else: - assert not controller.session_exist - - def test_parse_feeds(mock_feed: feedparser.FeedParserDict): feed_input = test_urls[0] cooldown_interval = 620 @@ -239,3 +223,15 @@ def test_parse_feeds(mock_feed: feedparser.FeedParserDict): with pytest.raises(ValueError): controller.get_feed_stats("http://testfeed.com") + + +@pytest.mark.parametrize("feed_input", [test_urls[0]]) +def test_redundant_fetch(feed_input: str, mock_feed: feedparser.FeedParserDict, mock_get_response: Mock): + + controller = RSSController(feed_input=feed_input) + mock_feedparser_parse = patch("morpheus.controllers.rss_controller.feedparser.parse") + with mock_feedparser_parse, patch("requests.Session.get", return_value=mock_get_response) as mocked_session_get: + mock_feedparser_parse.return_value = mock_feed + dataframes_generator = controller.fetch_dataframes() + next(dataframes_generator, None) + assert mocked_session_get.call_count == 1