raphael-group · anushka255 · Oct 22, 2024 · Oct 16, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/.github/workflows/test_pinned_deps.yml b/.github/workflows/test_pinned_deps.yml
@@ -51,7 +51,7 @@ jobs:
       - name: Pytest with coverage
         if: matrix.os == 'ubuntu-latest'
         # Note: Use of pytest -n .. (pytest-xdist) does not work with coverage
-        run: coverage run --source=src/paste3 -m pytest
+        run: coverage run --source=src/paste3 --omit="*/__main__.py" -m pytest
 
       - name: Upload coverage to Coveralls
         if: matrix.os == 'ubuntu-latest'

diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,7 @@ dependencies = [
 dynamic = ["version"]
 
 [project.scripts]
-paste = "paste.__main__:main"
+paste = "paste3.__main__:main"
 
 [tool.setuptools]
 package-dir = {"" = "src"}

diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/paste3/__init__.py b/src/paste3/__init__.py
@@ -0,0 +1,32 @@
+import logging.config
+
+
+# The _version.py file is managed by setuptools-scm
+#   and is not in version control.
+try:
+    from paste3._version import version as __version__  # type: ignore
+except ModuleNotFoundError:
+    # We're likely running as a source package without installation
+    __version__ = "src"
+
+
+logging.config.dictConfig(
+    {
+        "version": 1,
+        "formatters": {
+            "standard": {
+                "format": "(%(levelname)s) (%(filename)s) (%(asctime)s) %(message)s",
+                "datefmt": "%d-%b-%y %H:%M:%S",
+            }
+        },
+        "handlers": {
+            "default": {
+                "level": "NOTSET",
+                "formatter": "standard",
+                "class": "logging.StreamHandler",
+                "stream": "ext://sys.stdout",
+            }
+        },
+        "loggers": {"": {"handlers": ["default"], "level": "INFO"}},
+    }
+)
diff --git a/src/paste3/__main__.py b/src/paste3/__main__.py
@@ -0,0 +1,41 @@
+import logging
+import argparse
+import os
+from paste3 import align
+import paste3
+
+logger = logging.getLogger("paste3")
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--version", action="version", version=paste3.__version__)
+
+    modules = [align]
+
+    subparsers = parser.add_subparsers(title="Choose a command")
+    subparsers.required = True
+
+    def get_str_name(module):
+        return os.path.splitext(os.path.basename(module.__file__))[0]
+
+    for module in modules:
+        this_parser = subparsers.add_parser(
+            get_str_name(module), description=module.__doc__
+        )
+        this_parser.add_argument(
+            "-v", "--verbose", action="store_true", help="Increase verbosity"
+        )
+
+        module.add_args(this_parser)
+        this_parser.set_defaults(func=module.main)
+
+    args = parser.parse_args()
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/paste3/align.py b/src/paste3/align.py
@@ -0,0 +1,258 @@
+import ot.backend
+import numpy as np
+from pathlib import Path
+
+import pandas as pd
+
+from paste3.io import process_files
+import logging
+from paste3.paste import pairwise_align, center_align
+from paste3.visualization import stack_slices_pairwise, stack_slices_center
+
+logger = logging.getLogger(__name__)
+
+
+def align(
+    mode,
+    gene_fpath,
+    spatial_fpath=None,
+    output_directory="",
+    alpha=0.1,
+    cost="kl",
+    n_components=15,
+    lmbda=None,
+    initial_slice=1,
+    threshold=0.001,
+    coordinates=False,
+    weight_fpath=None,
+    overlap_fraction=None,
+    start=None,
+    seed=None,
+    cost_matrix=None,
+    max_iter=10,
+    norm=False,
+    numItermax=200,
+    use_gpu=False,
+    return_obj=False,
+    optimizeTheta=True,
+    eps=1e-4,
+    is_histology=False,
+    armijo=False,
+):
+    slices = process_files(gene_fpath, spatial_fpath, weight_fpath)
+    n_slices = len(slices)
+
+    if not (mode == "pairwise" or mode == "center"):
+        raise (ValueError("Please select either pairwise or center alignment mode."))
+
+    if alpha < 0 or alpha > 1:
+        raise (ValueError("Alpha specified outside of 0-1 range."))
+
+    if initial_slice < 1 or initial_slice > n_slices:
+        raise (ValueError("Initial specified outside of 0 - n range"))
+
+    if overlap_fraction:
+        if overlap_fraction < 0 or overlap_fraction > 1:
+            raise (ValueError("Overlap fraction specified outside of 0-1 range."))
+
+    if lmbda is None:
+        lmbda = n_slices * [1 / n_slices]
+    elif len(lmbda) != n_slices:
+        raise (ValueError("Length of lambda doesn't equal number of files"))
+    else:
+        if not all(i >= 0 for i in lmbda):
+            raise (ValueError("lambda includes negative weights"))
+        else:
+            print("Normalizing lambda weights into probability vector.")
+            lmbda = [float(i) / sum(lmbda) for i in lmbda]
+
+    if cost_matrix:
+        cost_matrix = np.genfromtxt(cost_matrix, delimiter=",", dtype="float64")
+
+    if start is None:
+        pis_init = [None] * (n_slices - 1) if mode == "pairwise" else None
+    elif mode == "pairwise" and not (len(start) == n_slices - 1):
+        raise ValueError(
+            f"Number of slices {n_slices} is not equal to number of start pi files {len(start)}"
+        )
+    else:
+        pis_init = [np.genfromtxt(pi, delimiter=",") for pi in start]
+
+    # make output directory if it doesn't exist
+    output_directory = Path(output_directory)
+    Path.mkdir(output_directory, exist_ok=True)
+
+    if mode == "pairwise":
+        logger.info("Computing Pairwise Alignment ")
+        pis = []
+        for i in range(n_slices - 1):
+            pi = pairwise_align(
+                sliceA=slices[i],
+                sliceB=slices[i + 1],
+                s=overlap_fraction,
+                M=cost_matrix,
+                alpha=alpha,
+                dissimilarity=cost,
+                use_rep=None,
+                G_init=pis_init[i],
+                a_distribution=slices[i].obsm["weights"],
+                b_distribution=slices[i + 1].obsm["weights"],
+                norm=norm,
+                numItermax=numItermax,
+                backend=ot.backend.NumpyBackend(),
+                use_gpu=use_gpu,
+                return_obj=return_obj,
+                maxIter=max_iter,
+                optimizeTheta=optimizeTheta,
+                eps=eps,
+                is_histology=is_histology,
+                armijo=armijo,
+            )
+            pis.append(pi)
+            pd.DataFrame(
+                pi, index=slices[i].obs.index, columns=slices[i + 1].obs.index
+            ).to_csv(output_directory / f"slice_{i+1}_{i+2}_pairwise.csv")
+
+        if coordinates:
+            new_slices = stack_slices_pairwise(
+                slices, pis, is_partial=overlap_fraction is not None
+            )
+
+    elif mode == "center":
+        logger.info("Computing Center Alignment")
+        initial_slice = slices[initial_slice - 1].copy()
+
+        center_slice, pis = center_align(
+            A=initial_slice,
+            slices=slices,
+            lmbda=lmbda,
+            alpha=alpha,
+            n_components=n_components,
+            threshold=threshold,
+            max_iter=max_iter,
+            dissimilarity=cost,
+            norm=norm,
+            random_seed=seed,
+            pis_init=pis_init,
+            distributions=[slice.obsm["weights"] for slice in slices],
+            backend=ot.backend.NumpyBackend(),
+            use_gpu=use_gpu,
+        )
+
+        center_slice.write(output_directory / "center_slice.h5ad")
+        for i in range(len(pis) - 1):
+            pd.DataFrame(
+                pis[i], index=center_slice.obs.index, columns=slices[i].obs.index
+            ).to_csv(output_directory / f"slice_{i}_{i+1}_pairwise.csv")
+
+        if coordinates:
+            new_slices = stack_slices_center(center_slice, slices, pis)
+
+    if coordinates:
+        if mode == "center":
+            center, new_slices = new_slices
+            center.write(output_directory / "new_center.h5ad")
+
+        for i, slice in enumerate(new_slices, start=1):
+            slice.write(output_directory / f"new_slices_{i}.h5ad")
+
+
+def add_args(parser):
+    parser.add_argument(
+        "mode", type=str, help="Alignment type: 'pairwise' or 'center'."
+    )
+    parser.add_argument(
+        "--g_fpath", type=str, nargs="+", help="Paths to gene exp files (.csv/ .h5ad)."
+    )
+    parser.add_argument(
+        "--s_fpath", type=str, nargs="*", help="Paths to spatial data files (.csv)."
+    )
+    parser.add_argument(
+        "--w_fpath", type=str, nargs="*", help="Paths to spot weight files (.csv)."
+    )
+    parser.add_argument(
+        "--output_dir", default="./output", help="Directory to save output files."
+    )
+    parser.add_argument(
+        "--alpha", type=float, default=0.1, help="Alpha param for alignment (0 to 1)."
+    )
+    parser.add_argument(
+        "--cost",
+        choices=["kl", "euc", "gkl", "selection_kl", "pca", "glmpca"],
+        default="kl",
+        help="Expression dissimilarity cost",
+    )
+
+    parser.add_argument(
+        "--cost_mat", type=str, help="Paths to exp dissimilarity cost matrix."
+    )
+    parser.add_argument(
+        "--n_comp", type=int, default=15, help="Components for NMF in center alignment."
+    )
+    parser.add_argument(
+        "--lmbda", type=float, nargs="+", help="Weight vector for each slice."
+    )
+    parser.add_argument(
+        "--init_slice", type=int, default=1, help="First slice for alignment (1 to n)."
+    )
+    parser.add_argument(
+        "--thresh",
+        type=float,
+        default=1e-3,
+        help="Convergence threshold for alignment.",
+    )
+
+    parser.add_argument(
+        "--coor", action="store_true", help="Compute and save new coordinates."
+    )
+    parser.add_argument(
+        "--ovlp_frac", type=float, default=None, help="Overlap fraction (0-1)."
+    )
+    parser.add_argument(
+        "--start", type=str, nargs="+", help="Paths to initial alignment files."
+    )
+    parser.add_argument(
+        "--norm", action="store_true", help="Normalize expression data if True."
+    )
+    parser.add_argument("--max_iter", type=int, help="Maximum number of iterations.")
+    parser.add_argument(
+        "--gpu", action="store_true", help="Use GPU for processing if True."
+    )
+    parser.add_argument("--r_info", action="store_true", help="Returns log if True.")
+    parser.add_argument(
+        "--hist", action="store_true", help="Use histological images if True."
+    )
+    parser.add_argument(
+        "--armijo", action="store_true", help="Run Armijo line search if True."
+    )
+    parser.add_argument(
+        "--seed", type=int, default=0, help="Random seed for reproducibility."
+    )
+    return parser
+
+
+def main(args):
+    align(
+        mode=args.mode,
+        gene_fpath=args.g_fpath,
+        spatial_fpath=args.s_fpath,
+        output_directory=args.output_dir,
+        alpha=args.alpha,
+        cost=args.cost,
+        n_components=args.n_comp,
+        lmbda=args.lmbda,
+        initial_slice=args.init_slice,
+        threshold=args.thresh,
+        coordinates=args.coor,
+        weight_fpath=args.w_fpath,
+        overlap_fraction=args.ovlp_frac,
+        start=args.start,
+        seed=args.seed,
+        cost_matrix=args.cost_mat,
+        norm=args.norm,
+        numItermax=args.max_iter,
+        use_gpu=args.gpu,
+        return_obj=args.r_info,
+        is_histology=args.hist,
+        armijo=args.armijo,
+    )