From 37b498b053d547a0c4b47531979722ced6dfae2a Mon Sep 17 00:00:00 2001
From: Antonio Bellotta <antonio.bellotta@epfl.ch>
Date: Wed, 18 Oct 2023 17:37:43 +0200
Subject: [PATCH 1/4] Added nodes suggestions and fix negative memory values

---
 neurodamus/cell_distributor.py |  2 +-
 neurodamus/node.py             |  1 +
 neurodamus/utils/memory.py     | 36 ++++++++++++++++++++++++++++++++--
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/neurodamus/cell_distributor.py b/neurodamus/cell_distributor.py
index ed9a0520..2fd5f21c 100644
--- a/neurodamus/cell_distributor.py
+++ b/neurodamus/cell_distributor.py
@@ -313,7 +313,7 @@ def store_metype_stats(metype, n_cells):
             memory_allocated = end_memory - prev_memory
             log_all(logging.DEBUG, " * METype %s: %.1f KiB averaged over %d cells",
                     metype, memory_allocated/n_cells, n_cells)
-            memory_dict[metype] = memory_allocated / n_cells
+            memory_dict[metype] = max(0, memory_allocated / n_cells)
             prev_memory = end_memory
 
         for gid, cell_info in gid_info_items:
diff --git a/neurodamus/node.py b/neurodamus/node.py
index bccf243e..8ad19c15 100644
--- a/neurodamus/node.py
+++ b/neurodamus/node.py
@@ -1958,6 +1958,7 @@ def run(self):
         if SimConfig.dry_run:
             log_stage("============= DRY RUN (SKIP SIMULATION) =============")
             self._dry_run_stats.display_total()
+            self._dry_run_stats.display_node_suggestions()
             return
         if not SimConfig.simulate_model:
             self.sim_init()
diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py
index 1efb9e76..b857b5ff 100644
--- a/neurodamus/utils/memory.py
+++ b/neurodamus/utils/memory.py
@@ -8,6 +8,7 @@
 import math
 import os
 import json
+import psutil
 
 from ..core import MPI, NeurodamusCore as Nd, run_only_rank0
 
@@ -184,6 +185,7 @@ def __init__(self) -> None:
         self.metype_memory = {}
         self.metype_counts = Counter()
         self.synapse_counts = Counter()
+        self.grand_total = 0
         _, _, self.base_memory, _ = get_task_level_mem_usage()
 
     @run_only_rank0
@@ -267,7 +269,37 @@ def display_total(self):
         logging.info("| {:<40s} | {:12.1f} |".format("Cells", self.cell_memory_total))
         logging.info("| {:<40s} | {:12.1f} |".format("Synapses", self.synapse_memory_total))
         logging.info("+{:-^57}+".format(""))
-        grand_total = full_overhead + self.cell_memory_total + self.synapse_memory_total
-        grand_total = pretty_printing_memory_mb(grand_total)
+        self.grand_total = full_overhead + self.cell_memory_total + self.synapse_memory_total
+        grand_total = pretty_printing_memory_mb(self.grand_total)
         logging.info("| {:<40s} | {:>12s} |".format("GRAND TOTAL", grand_total))
         logging.info("+{:-^57}+".format(""))
+
+    def total_memory_available():
+        """
+        Returns the total memory available in the system in MB
+        """
+        try:
+            virtual_memory = psutil.virtual_memory()
+            return virtual_memory.total / (1024 * 1024)  # Total available memory in MB
+        except Exception as e:
+            logging.error(f"Error: {e}")
+            return None
+
+    @run_only_rank0
+    def display_node_suggestions(self):
+        """
+        Display suggestions for how many nodes are approximately
+        necessary to run the simulation based on the memory available
+        on the current node.
+        """
+        node_total_memory = DryRunStats.total_memory_available()
+        if node_total_memory is None:
+            logging.warning("Unable to get the total memory available on the current node.")
+            return
+        suggested_nodes = math.ceil(self.grand_total / node_total_memory)
+        logging.info(f"Based on the memory available on the current node, "
+                     f"it is suggested to use at least {suggested_nodes} node(s).")
+        logging.info("This is just a suggestion and the actual number of nodes "
+                     "needed to run the simulation may be different.")
+        logging.info(f"The calculation was based on a total memory available of "
+                     f"{pretty_printing_memory_mb(node_total_memory)} on the current node.")

From 47b486e808fdaa709500bd092c392df9734712ee Mon Sep 17 00:00:00 2001
From: Antonio Bellotta <antonio.bellotta@epfl.ch>
Date: Fri, 20 Oct 2023 17:22:33 +0200
Subject: [PATCH 2/4] Improved nodes calculation to take into account variable
 overhead

---
 neurodamus/utils/memory.py | 39 ++++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py
index b857b5ff..db4c56b3 100644
--- a/neurodamus/utils/memory.py
+++ b/neurodamus/utils/memory.py
@@ -9,6 +9,7 @@
 import os
 import json
 import psutil
+import multiprocessing
 
 from ..core import MPI, NeurodamusCore as Nd, run_only_rank0
 
@@ -185,7 +186,6 @@ def __init__(self) -> None:
         self.metype_memory = {}
         self.metype_counts = Counter()
         self.synapse_counts = Counter()
-        self.grand_total = 0
         _, _, self.base_memory, _ = get_task_level_mem_usage()
 
     @run_only_rank0
@@ -269,8 +269,8 @@ def display_total(self):
         logging.info("| {:<40s} | {:12.1f} |".format("Cells", self.cell_memory_total))
         logging.info("| {:<40s} | {:12.1f} |".format("Synapses", self.synapse_memory_total))
         logging.info("+{:-^57}+".format(""))
-        self.grand_total = full_overhead + self.cell_memory_total + self.synapse_memory_total
-        grand_total = pretty_printing_memory_mb(self.grand_total)
+        grand_total = full_overhead + self.cell_memory_total + self.synapse_memory_total
+        grand_total = pretty_printing_memory_mb(grand_total)
         logging.info("| {:<40s} | {:>12s} |".format("GRAND TOTAL", grand_total))
         logging.info("+{:-^57}+".format(""))
 
@@ -285,6 +285,35 @@ def total_memory_available():
             logging.error(f"Error: {e}")
             return None
 
+    @run_only_rank0
+    def suggest_nodes(self, margin):
+        """
+        A function to calculate the suggested number of nodes to run the simulation
+        The function takes into account the fact that the memory overhead is
+        variable with the amount of ranks the simulation it's ran with.
+        One can also specify a custom margin to add to the memory usage.
+        """
+
+        try:
+            ranks_per_node = os.cpu_count()
+        except AttributeError:
+            ranks_per_node = multiprocessing.cpu_count()
+
+        full_overhead = self.base_memory * ranks_per_node
+
+        # initialize variable for iteration
+        est_nodes = 0
+        prev_est_nodes = None
+
+        while prev_est_nodes is None or est_nodes != prev_est_nodes:
+            prev_est_nodes = est_nodes
+            mem_usage_per_node = full_overhead + self.cell_memory_total + self.synapse_memory_total
+            mem_usage_with_margin = mem_usage_per_node * (1 + margin)
+            est_nodes = math.ceil(mem_usage_with_margin / DryRunStats.total_memory_available())
+            full_overhead = self.base_memory * ranks_per_node * est_nodes
+
+        return est_nodes
+
     @run_only_rank0
     def display_node_suggestions(self):
         """
@@ -296,10 +325,12 @@ def display_node_suggestions(self):
         if node_total_memory is None:
             logging.warning("Unable to get the total memory available on the current node.")
             return
-        suggested_nodes = math.ceil(self.grand_total / node_total_memory)
+        suggested_nodes = self.suggest_nodes(0.3)
         logging.info(f"Based on the memory available on the current node, "
                      f"it is suggested to use at least {suggested_nodes} node(s).")
         logging.info("This is just a suggestion and the actual number of nodes "
                      "needed to run the simulation may be different.")
         logging.info(f"The calculation was based on a total memory available of "
                      f"{pretty_printing_memory_mb(node_total_memory)} on the current node.")
+        logging.info("Please remember that it is suggested to use the same class of nodes "
+                     "for both the dryrun and the actual simulation.")

From 3de919a0fd9b4ac5643b5fc0b6b22ba80525da74 Mon Sep 17 00:00:00 2001
From: Antonio Bellotta <antonio.bellotta@epfl.ch>
Date: Tue, 24 Oct 2023 17:26:25 +0200
Subject: [PATCH 3/4] Updated docs on dry run, providing explaination for node
 suggestion

---
 docs/architecture.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/architecture.rst b/docs/architecture.rst
index 8d248432..6e8bc177 100644
--- a/docs/architecture.rst
+++ b/docs/architecture.rst
@@ -329,6 +329,16 @@ will get a summary of the estimated memory used for cells and synapses, includin
 memory necessary to load libraries and neurodamus data structures.
 A grand total is provided to the user as well as a per-cell type and per-synapse type breakdown.
 
+At the end of the execution the user will also be provided with a suggestion on how many nodes
+to use in order to run the simulation with the given circuit on the given machine.
+Keep in mind that this is just a suggestion and the user is free to use a different number of nodes
+if he/she wishes to do so. The suggestion is based on the assumption that the user wants to run
+the simulation on the same kind of machine used to run the dry run. The suggestion is also based
+on the assumption that the user wants to use all the available memory on each node for the simulation.
+The node estimate takes into account the memory usage of the cells and synapses as well as the
+variable usage of memory "overhead" that is fixed for each rank but varies depending on the number
+of ranks used.
+
 In this paragraph we will go a bit more into details on how the estimation is done.
 
 Below you can see the workflow of the dry run mode:

From 006b91c2ceef93431fb6ee7ee60762a1f4225b3c Mon Sep 17 00:00:00 2001
From: Antonio Bellotta <antonio.bellotta@epfl.ch>
Date: Wed, 25 Oct 2023 12:05:20 +0200
Subject: [PATCH 4/4] Put maximum number of iterations for nodes suggestion
 loop

---
 neurodamus/utils/memory.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/neurodamus/utils/memory.py b/neurodamus/utils/memory.py
index db4c56b3..b0ae5d4f 100644
--- a/neurodamus/utils/memory.py
+++ b/neurodamus/utils/memory.py
@@ -304,13 +304,16 @@ def suggest_nodes(self, margin):
         # initialize variable for iteration
         est_nodes = 0
         prev_est_nodes = None
+        max_iter = 5
+        iter_count = 0
 
-        while prev_est_nodes is None or est_nodes != prev_est_nodes:
+        while (prev_est_nodes is None or est_nodes != prev_est_nodes) and iter_count < max_iter:
             prev_est_nodes = est_nodes
             mem_usage_per_node = full_overhead + self.cell_memory_total + self.synapse_memory_total
             mem_usage_with_margin = mem_usage_per_node * (1 + margin)
             est_nodes = math.ceil(mem_usage_with_margin / DryRunStats.total_memory_available())
             full_overhead = self.base_memory * ranks_per_node * est_nodes
+            iter_count += 1
 
         return est_nodes