From 3c7d48da037d34bb480b8faea19417d5ceb1c421 Mon Sep 17 00:00:00 2001 From: Kory Draughn Date: Mon, 22 Apr 2024 09:07:44 -0400 Subject: [PATCH] [#94,#111] Allow removal of data objects with only stale replicas and/or single quotes in the data name. --- README.md | 16 ++++++ .../test_rule_engine_plugin_logical_quotas.py | 56 +++++++++++++++++++ src/handler.cpp | 24 +++++++- 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b5d7a30..562e7ea 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ $ ils ``` ## Requirements + - iRODS v4.3.0 - irods-dev package - irods-runtime package @@ -50,6 +51,7 @@ $ ils - irods-externals-json package ## Compiling + ```bash $ git clone https://github.com/irods/irods_rule_engine_plugin_logical_quotas $ cd irods_rule_engine_plugin_logical_quotas @@ -64,6 +66,7 @@ irods-rule-engine-plugin-logical-quotas---. ``` ## Installing + Ubuntu: ```bash $ sudo dpkg -i irods-rule-engine-plugin-logical-quotas-*.deb @@ -79,6 +82,7 @@ should be similar to the following: ``` ## Configuration + To enable, prepend the following plugin configuration to the list of rule engines in `/etc/irods/server_config.json`. ```javascript "rule_engines": [ @@ -140,6 +144,7 @@ The _data_size_ specific query may result in an overcount of bytes on an activel data object having different sizes. For this situation, consider using slightly larger quota limits. ## How To Use + **IMPORTANT NOTE:** To invoke rules provided by the plugin, the only requirement is that the user be a *rodsadmin*. The *rodsadmin* user does not need permissions set on the target collection. @@ -158,6 +163,7 @@ The following operations are supported: - logical_quotas_unset_total_size_in_bytes ### Invoking operations via the Plugin + To invoke an operation through the plugin, JSON must be passed using the following structure: ```javascript { @@ -206,12 +212,14 @@ The JSON output will be printed to the terminal and have the following structure The **keys** are derived from the **namespace** and **metadata_attribute_names** defined by the plugin configuration. ### Invoking operations via the Native Rule Language + Here, we demonstrate how to start monitoring a collection just like in the section above. ```bash $ irule -r irods_rule_engine_plugin-irods_rule_language-instance 'logical_quotas_start_monitoring_collection(*col)' '*col=/tempZone/home/rods' ruleExecOut ``` ## Stream Operations + With previous iterations of this plugin, changes in data were tracked and checked for violations across all stream-based operations in real-time. However, with the introduction of intermediate replicas and logical locking in iRODS v4.2.9, maintaining this behavior became complex. Due to the complexity, the handling of quotas has been @@ -223,3 +231,11 @@ relaxed. The most important changes are as follows: These changes have the following effects: - The plugin allows stream-based writes to violate the maximum bytes quota once. - Subsequent stream-based creates and writes will be denied until the quotas are out of violation. + +## Questions and Answers + +### Sometimes, the total number of bytes for my collection doesn't change when I remove a data object. Why? + +When it comes to tracking the total number of bytes in use, only **good** replicas are considered. If the data object being removed has no **good** replicas, the plugin will leave the total number of bytes as is. The reason for this is due to there not being a clear path forward for determining which replica's data size should be used for the update. Therefore, the recommendation is for administrators to recalculate the quota totals periodically. + +Remember, the plugin is designed to track the totals of **good** replicas only. diff --git a/packaging/test_rule_engine_plugin_logical_quotas.py b/packaging/test_rule_engine_plugin_logical_quotas.py index 38ace1a..351c0af 100644 --- a/packaging/test_rule_engine_plugin_logical_quotas.py +++ b/packaging/test_rule_engine_plugin_logical_quotas.py @@ -831,6 +831,62 @@ def test_quotas_do_not_block_non_administrators_from_creating_or_writing_data_ob self.assert_quotas(sandbox, expected_number_of_objects = 1, expected_size_in_bytes = file_size) + @unittest.skipIf(test.settings.RUN_IN_TOPOLOGY, "Skip for Topology Testing") + def test_data_objects_containing_single_quotes_in_data_name_can_be_removed__issue_94(self): + config = IrodsConfig() + + with lib.file_backed_up(config.server_config_path): + self.enable_rule_engine_plugin(config) + + col = self.user.session_collection + self.logical_quotas_start_monitoring_collection(col) + + # Create a non-empty data object. + data_object = f"{col}/test_data_objects_containing_single_quote_can_be_removed__issue_94'" + contents = 'test_data_objects_containing_single_quote_can_be_removed__issue_94' + self.user.assert_icommand(['istream', 'write', data_object], input=contents) + self.user.assert_icommand(['ils', '-l', col], 'STDOUT') # Debugging. + self.assert_quotas(col, expected_number_of_objects=1, expected_size_in_bytes=len(contents)) + self.user.assert_icommand(['imeta', 'ls', '-C', col], 'STDOUT') # Debugging. + + # Show the data object was removed. + self.user.assert_icommand(['irm', '-f', data_object]) + self.user.assert_icommand(['ils', '-l', col], 'STDOUT') # Debugging. + self.assertFalse(lib.replica_exists(self.user, data_object, 0)) + # TODO(#113): expected_size_in_bytes will need to be updated once issue is resolved. + self.assert_quotas(col, expected_number_of_objects=0, expected_size_in_bytes=len(contents)) + self.user.assert_icommand(['imeta', 'ls', '-C', col], 'STDOUT') # Debugging. + + @unittest.skipIf(test.settings.RUN_IN_TOPOLOGY, "Skip for Topology Testing") + def test_data_objects_having_only_stale_replicas_can_be_removed__issue_111(self): + config = IrodsConfig() + + with lib.file_backed_up(config.server_config_path): + self.enable_rule_engine_plugin(config) + + col = self.user.session_collection + self.logical_quotas_start_monitoring_collection(col) + + # Create a non-empty data object. + data_object = f'{col}/test_data_objects_having_only_stale_replicas_can_be_removed__issue_111.txt' + contents = 'test_data_objects_having_only_stale_replicas_can_be_removed__issue_111' + self.user.assert_icommand(['istream', 'write', data_object], input=contents) + self.user.assert_icommand(['ils', '-l', col], 'STDOUT') # Debugging. + self.assert_quotas(col, expected_number_of_objects=1, expected_size_in_bytes=len(contents)) + self.user.assert_icommand(['imeta', 'ls', '-C', col], 'STDOUT') # Debugging. + + # Mark the replica stale. + lib.set_replica_status(self.admin1, data_object, 0, 0) + self.user.assert_icommand(['ils', '-l', col], 'STDOUT') # Debugging. + self.assertEqual(lib.get_replica_status(self.user, os.path.basename(data_object), 0), '0') + + # Show the data object was removed and the total data size was left as is. + self.user.assert_icommand(['irm', '-f', data_object]) + self.user.assert_icommand(['ils', '-l', col], 'STDOUT') # Debugging. + self.assertFalse(lib.replica_exists(self.user, data_object, 0)) + self.assert_quotas(col, expected_number_of_objects=0, expected_size_in_bytes=len(contents)) + self.user.assert_icommand(['imeta', 'ls', '-C', col], 'STDOUT') # Debugging. + # # Utility Functions # diff --git a/src/handler.cpp b/src/handler.cpp index ec7dd6e..65f8842 100644 --- a/src/handler.cpp +++ b/src/handler.cpp @@ -1229,7 +1229,29 @@ namespace irods::handler const auto& attrs = get_instance_config(_instance_configs, _instance_name).attributes(); if (auto collection = get_monitored_parent_collection(conn, attrs, input->objPath); collection) { - size_in_bytes_ = fs::server::data_object_size(conn, input->objPath); + try { + size_in_bytes_ = fs::server::data_object_size(conn, input->objPath); + } + catch (const fs::filesystem_error& e) { + // The filesystem library's data_object_size() function will throw an exception + // if the data object does not have any good replicas. Because the REP is designed + // to track the data size of good replicas, the only reasonable step is to leave + // the data size as is. + // + // Attempting to define rules for handling data objects without good replicas + // contains too many challenges. Relying on the admin to recalculate the totals is + // the best/safest approach. + if (e.code().value() == SYS_NO_GOOD_REPLICA) { + log::rule_engine::info("Logical Quotas: Removal of data object [{}] will not affect total " + "number of bytes. Data object does not have any good replicas.", + input->objPath); + } + else { + log::rule_engine::error(e.what()); + addRErrorMsg(&get_rei(_effect_handler).rsComm->rError, e.code().value(), e.what()); + return ERROR(e.code().value(), e.what()); + } + } } } catch (const irods::exception& e) {