Revert "lint dev (aidecentralized#23)" (aidecentralized#81)

This reverts commit e25e0a3.
dasdebanna · Sep 14, 2024 · 927fd35 · 927fd35
1 parent 6745a28
commit 927fd35
Show file tree

Hide file tree

Showing 26 changed files with 323 additions and 522 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,4 +1,3 @@
 {
-    "python.REPL.enableREPLSmartSend": false,
     "python.analysis.typeCheckingMode": "strict"
 }
diff --git a/src/algos/DisPFL.py b/src/algos/DisPFL.py
@@ -1,20 +1,25 @@
-"""This module defines the DisPFL optimization strategy for distributed machine learning."""
+"""
+This module defines the DisPFLClient and DisPFLServer classes for distributed personalized federated learning.
+"""
 
 import copy
 import math
+import random
 from collections import OrderedDict
+from typing import Any, Dict, List
 
 import numpy as np
 import torch
-from torch import Tensor, from_numpy, randperm, zeros_like  # pylint: disable=no-name-in-module
-from torch import nn
+import torch.nn as nn
+from torch import Tensor, from_numpy, numel, randperm, zeros_like
+
 from algos.base_class import BaseClient, BaseServer
 
+
 class CommProtocol:
     """
     Communication protocol tags for the server and clients.
     """
-    # pylint: disable=too-few-public-methods
 
     DONE = 0  # Used to signal the server that the client is done with local training
     START = 1  # Used to signal by the server to start the current round
@@ -29,18 +34,11 @@ class DisPFLClient(BaseClient):
     """
     Client class for DisPFL (Distributed Personalized Federated Learning).
     """
-
     def __init__(self, config) -> None:
         super().__init__(config)
-        self.params = None
-        self.mask = None
-        self.index = None
-        self.repr = None
         self.config = config
         self.tag = CommProtocol
-        self.model_save_path = (
-            f"{self.config['results_path']}/saved_models/node_{self.node_id}.pt"
-        )
+        self.model_save_path = f"{self.config['results_path']}/saved_models/node_{self.node_id}.pt"
         self.dense_ratio = self.config["dense_ratio"]
         self.anneal_factor = self.config["anneal_factor"]
         self.dis_gradient_check = self.config["dis_gradient_check"]
@@ -66,7 +64,7 @@ def local_test(self, **kwargs):
         test_loss, acc = self.model_utils.test(
             self.model, self._test_loader, self.loss_fn, self.device
         )
-        # save the model if the accuracy is better than the best accuracy so far
+        # TODO save the model if the accuracy is better than the best accuracy so far
         # if acc > self.best_acc:
         #     self.best_acc = acc
         #     self.model_utils.save_model(self.model, self.model_save_path)
@@ -96,8 +94,7 @@ def fire_mask(self, masks, round_num, total_round):
         """
         weights = self.get_representation()
         drop_ratio = (
-            self.anneal_factor / 2 *
-            (1 + np.cos((round_num * np.pi) / total_round))
+            self.anneal_factor / 2 * (1 + np.cos((round_num * np.pi) / total_round))
         )
         new_masks = copy.deepcopy(masks)
         num_remove = {}
@@ -109,7 +106,7 @@ def fire_mask(self, masks, round_num, total_round):
                 torch.abs(weights[name]),
                 100000 * torch.ones_like(weights[name]),
             )
-            _, idx = torch.sort(temp_weights.view(-1).to(self.device))
+            x, idx = torch.sort(temp_weights.view(-1).to(self.device))
             new_masks[name].view(-1)[idx[: num_remove[name]]] = 0
         return new_masks, num_remove
 
@@ -147,8 +144,7 @@ def aggregate(self, nei_indexes, weights_lstrnd, masks_lstrnd):
         """
         count_mask = copy.deepcopy(masks_lstrnd[self.index])
         for k in count_mask.keys():
-            count_mask[k] = count_mask[k] - \
-                count_mask[k]  # zero out by pruning
+            count_mask[k] = count_mask[k] - count_mask[k]  # zero out by pruning
             for clnt in nei_indexes:
                 count_mask[k] += masks_lstrnd[clnt][k].to(self.device)  # mask
         for k in count_mask.keys():
@@ -180,8 +176,7 @@ def send_representations(self, representation):
         Set the model.
         """
         for client_node in self.clients:
-            self.comm_utils.send_signal(
-                client_node, representation, self.tag.UPDATES)
+            self.comm_utils.send_signal(client_node, representation, self.tag.UPDATES)
         print(f"Node 1 sent average weight to {len(self.clients)} nodes")
 
     def calculate_sparsities(self, params, tabu=None, distribution="ERK", sparse=0.5):
@@ -221,8 +216,7 @@ def calculate_sparsities(self, params, tabu=None, distribution="ERK", sparse=0.5
                     else:
                         rhs += n_ones
                         raw_probabilities[name] = (
-                            np.sum(params[name].shape) /
-                            np.prod(params[name].shape)
+                            np.sum(params[name].shape) / np.prod(params[name].shape)
                         ) ** self.config["erk_power_scale"]
                         divisor += raw_probabilities[name] * n_param
                 epsilon = rhs / divisor
@@ -232,8 +226,7 @@ def calculate_sparsities(self, params, tabu=None, distribution="ERK", sparse=0.5
                     is_epsilon_valid = False
                     for mask_name, mask_raw_prob in raw_probabilities.items():
                         if mask_raw_prob == max_prob:
-                            print(
-                                f"Sparsity of var:{mask_name} had to be set to 0.")
+                            print(f"Sparsity of var:{mask_name} had to be set to 0.")
                             dense_layers.add(mask_name)
                 else:
                     is_epsilon_valid = True
@@ -288,20 +281,19 @@ def hamming_distance(self, mask_a, mask_b):
         total = 0
         for key in mask_a:
             dis += torch.sum(
-                mask_a[key].int().to(
-                    self.device) ^ mask_b[key].int().to(self.device)
+                mask_a[key].int().to(self.device) ^ mask_b[key].int().to(self.device)
             )
             total += mask_a[key].numel()
         return dis, total
 
     def _benefit_choose(
         self,
-        round_idx, # pylint: disable=unused-argument
+        round_idx,
         cur_clnt,
         client_num_in_total,
         client_num_per_round,
-        dist_local, # pylint: disable=unused-argument
-        total_dist, # pylint: disable=unused-argument
+        dist_local,
+        total_dist,
         cs=False,
         active_ths_rnd=None,
     ):
@@ -339,8 +331,7 @@ def model_difference(self, model_a, model_b):
         Calculate the difference between two models.
         """
         diff = sum(
-            [torch.sum(torch.square(model_a[name] - model_b[name]))
-             for name in model_a]
+            [torch.sum(torch.square(model_a[name] - model_b[name])) for name in model_a]
         )
         return diff
 
@@ -355,7 +346,7 @@ def run_protocol(self):
             self.params, sparse=self.dense_ratio
         )  # calculate sparsity to create masks
         self.mask = self.init_masks(self.params, sparsities)  # mask_per_local
-        dist_locals = np.zeros(shape=self.num_users)
+        dist_locals = np.zeros(shape=(self.num_users))
         self.index = self.node_id - 1
         masks_lstrnd = [self.mask for i in range(self.num_users)]
         weights_lstrnd = [
@@ -366,8 +357,7 @@ def run_protocol(self):
         ]
         for epoch in range(start_epochs, total_epochs):
             # wait for signal to start round
-            active_ths_rnd = self.comm_utils.wait_for_signal(
-                src=0, tag=self.tag.START)
+            active_ths_rnd = self.comm_utils.wait_for_signal(src=0, tag=self.tag.START)
             if epoch != 0:
                 [weights_lstrnd, masks_lstrnd] = self.comm_utils.wait_for_signal(
                     src=0, tag=self.tag.LAST_ROUND
@@ -395,7 +385,8 @@ def run_protocol(self):
             if self.num_users != self.config["neighbors"]:
                 nei_indexes = np.append(nei_indexes, self.index)
             print(
-                f"Node {self.index}'s neighbors index:{[i + 1 for i in nei_indexes]}")
+                f"Node {self.index}'s neighbors index:{[i + 1 for i in nei_indexes]}"
+            )
 
             for tmp_idx in nei_indexes:
                 if tmp_idx != self.index:
@@ -421,8 +412,7 @@ def run_protocol(self):
                 )
             else:
                 new_repr = copy.deepcopy(weights_lstrnd[self.index])
-                w_per_globals[self.index] = copy.deepcopy(
-                    weights_lstrnd[self.index])
+                w_per_globals[self.index] = copy.deepcopy(weights_lstrnd[self.index])
             model_diff = self.model_difference(new_repr, self.repr)
             print(f"Node {self.node_id} model_diff{model_diff}")
             self.comm_utils.send_signal(
@@ -434,42 +424,34 @@ def run_protocol(self):
             # locally train
             print(f"Node {self.node_id} local train")
             self.local_train()
-            _, acc = self.local_test()
+            loss, acc = self.local_test()
             print(f"Node {self.node_id} local test: {acc}")
             repr = self.get_representation()
             if not self.config["static"]:
                 if not self.dis_gradient_check:
                     gradient = self.screen_gradient()
-                self.mask, num_remove = self.fire_mask(
-                    self.mask, epoch, total_epochs)
+                self.mask, num_remove = self.fire_mask(self.mask, epoch, total_epochs)
                 self.mask = self.regrow_mask(self.mask, num_remove, gradient)
             self.comm_utils.send_signal(
                 dest=0, data=copy.deepcopy(repr), tag=self.tag.SHARE_WEIGHTS
             )
 
             # test updated model
             self.set_representation(repr)
-            _, acc = self.local_test()
+            loss, acc = self.local_test()
             self.comm_utils.send_signal(dest=0, data=acc, tag=self.tag.FINISH)
 
 
 class DisPFLServer(BaseServer):
     """
     Server class for DisPFL (Distributed Personalized Federated Learning).
     """
-
     def __init__(self, config) -> None:
         super().__init__(config)
-        self.best_acc = 0
-        self.round = 0
-        self.masks = 0
-        self.reprs = 0
         self.config = config
         self.set_model_parameters(config)
         self.tag = CommProtocol
-        self.model_save_path = (
-            f"{self.config['results_path']}/saved_models/node_{self.node_id}.pt"
-        )
+        self.model_save_path = f"{self.config['results_path']}/saved_models/node_{self.node_id}.pt"
         self.dense_ratio = self.config["dense_ratio"]
         self.num_users = self.config["num_users"]
 
@@ -484,8 +466,7 @@ def send_representations(self, representations):
         Set the model.
         """
         for client_node in self.users:
-            self.comm_utils.send_signal(
-                client_node, representations, self.tag.UPDATES)
+            self.comm_utils.send_signal(client_node, representations, self.tag.UPDATES)
             self.log_utils.log_console(
                 f"Server sent {len(representations)} representations to node {client_node}"
             )
@@ -497,7 +478,7 @@ def test(self) -> float:
         test_loss, acc = self.model_utils.test(
             self.model, self._test_loader, self.loss_fn, self.device
         )
-        # save the model if the accuracy is better than the best accuracy so far
+        # TODO save the model if the accuracy is better than the best accuracy so far
         if acc > self.best_acc:
             self.best_acc = acc
             self.model_utils.save_model(self.model, self.model_save_path)
@@ -553,6 +534,5 @@ def run_protocol(self):
 
             self.single_round(epoch, active_ths_rnd)
 
-            accs = self.comm_utils.wait_for_all_clients(
-                self.users, self.tag.FINISH)
+            accs = self.comm_utils.wait_for_all_clients(self.users, self.tag.FINISH)
             self.log_utils.log_console(f"Round {epoch} done; acc {accs}")
diff --git a/src/algos/L2C.py b/src/algos/L2C.py
@@ -1,13 +1,11 @@
-"""Module docstring: This module implements the L2C algorithm for federated learning."""
 from collections import OrderedDict, defaultdict
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 from utils.communication.comm_utils import CommunicationManager
 import torch
 import numpy as np
-import torch
+from torch import Tensor, cat, tensor, optim
+import torch.nn as nn
 import torch.nn.functional as F
-from torch import optim, Tensor
-from torch.nn import Module, Linear, ReLU
 
 from utils.stats_utils import from_round_stats_per_round_per_client_to_dict_arrays
 from algos.base_class import BaseFedAvgClient, BaseFedAvgServer
@@ -102,16 +100,9 @@ def learn_collab_weights(self, models_update_wts: dict[int, dict[str, Tensor]])
                 for key in grad_dict.keys():
                     if key not in self.model_keys_to_ignore:
                         if self.sharing_mode == "updates":
-                            cw_grad -= (
-                                models_update_wts[id][key] *
-                                grad_dict[key].cpu()
-                            ).sum()
-                        elif self.sharing_mode == "weights":
-                            cw_grad += (
-                                models_update_wts[id][key] *
-                                grad_dict[key].cpu()
-                            ).sum()
                             cw_grad -= (models_update_wts[id][key] * grad_dict[key].cpu()).sum()
+                        elif self.sharing_mode == "weights":
+                            cw_grad += (models_update_wts[id][key] * grad_dict[key].cpu()).sum()
                         else:
                             raise ValueError("Unknown sharing mode")
                 collab_weights_grads.append(cw_grad)
@@ -199,15 +190,9 @@ def run_protocol(self) -> None:
             self.set_model_weights(new_wts, self.model_keys_to_ignore)
             round_stats["test_acc"] = self.local_test()
             round_stats["validation_loss"], round_stats["validation_acc"] = self.learn_collab_weights(models_update_wts)
-            round_stats["validation_loss"], round_stats["validation_acc"] = (
-                self.learn_collab_weights(models_update_wts)
-            )
-            print("node {} weight: {}".format(
-                self.node_id, self.collab_weights))
 
-            # Lower the number of neighbors
+            print(f"node {self.node_id} weight: {self.collab_weights}")
             if round == self.config["T_0"]:
-
                 self.filter_out_worse_neighbors(self.config["target_users_after_T_0"])
 
             self.comm_utils.send(dest=self.server_node, data=round_stats, tag=self.tag.ROUND_STATS)
@@ -241,14 +226,8 @@ def single_round(self) -> list[dict[str, Any]]:
 
         round_stats: list[dict[str, Any]] = self.comm_utils.all_gather(self.tag.ROUND_STATS)
         self.log_utils.log_console("Server received all clients stats")
-
-        # Log the round stats on tensorboard
-        self.log_utils.log_tb_round_stats(
-            round_stats, ["collab_weights"], self.round)
-
-        self.log_utils.log_console(
-            f"Round test acc {[stats['test_acc'] for stats in round_stats]}"
-        )
+        self.log_utils.log_tb_round_stats(round_stats, ["collab_weights"], self.round)
+        self.log_utils.log_console(f"Round test acc {[stats['test_acc'] for stats in round_stats]}")
 
         return round_stats
 
@@ -264,8 +243,7 @@ def run_protocol(self) -> None:
             round_stats: list[dict[str, Any]] = self.single_round()
             stats.append(round_stats)
 
-        stats_dict = from_round_stats_per_round_per_client_to_dict_arrays(
-            stats)
+        stats_dict = from_round_stats_per_round_per_client_to_dict_arrays(stats)
         stats_dict["round_step"] = 1
         self.log_utils.log_experiments_stats(stats_dict)
         self.plot_utils.plot_experiments_stats(stats_dict)