diff --git a/DESCRIPTION b/DESCRIPTION
index 3bca396..3c32a34 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,7 +3,7 @@ Type: Package
 Title: clustSIGNAL: a spatial clustering method
 Version: 0.1.0
 Author: c(
-    person(given = "Pratibha", family = "Panwar", email = "pratibha.panwar@sydney.edu.au", role = c("cre", "aut")),
+    person(given = "Pratibha", family = "Panwar", email = "pratibha.panwar@sydney.edu.au", role = c("cre", "aut", "ctb")),
     person(given = "Boyi", family = "Guo", email = "", role = "aut")),
     person(given = "Haowen", family = "Zhao", email = "", role = "aut")),
     person(given = "Stephanie", family = "Hicks", email = "", role = "aut")),
@@ -18,9 +18,30 @@ Description: clustSIGNAL: clustering of Spatially Informed Gene expression with
 License: GPL-2
 Encoding: UTF-8
 LazyData: true
+LazyDataCompression: xz
+URL: https://sydneybiox.github.io/clustSIGNAL/, https://sydneybiox.github.io/clustSIGNAL/
+BugReports: https://github.com/sydneybiox/clustSIGNAL/issues
+biocViews: Clustering, Software 
+Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
-Depends: R (>= 4.0.0), SpatialExperiment, doParallel
-Imports: BiocNeighbors, bluster, scater, aricode, distances, cluster, ggplot2, patchwork, BiocStyle
-Sugests: knitr, rmarkdown
-VignetteBuilder: knitr, rmarkdown
-URL: https://sydneybiox.github.io/clustSIGNAL/
+Depends: 
+    R (>= 4.0.0), 
+    SpatialExperiment, 
+    doParallel
+Imports: 
+    BiocNeighbors, 
+    bluster, 
+    scater, 
+    aricode, 
+    distances, 
+    cluster, 
+    ggplot2, 
+    patchwork, 
+    BiocStyle,
+    dplyr
+Suggests: 
+    knitr, 
+    rmarkdown
+VignetteBuilder: 
+    knitr, 
+    rmarkdown
diff --git a/R/adaptiveSmoothing.R b/R/adaptiveSmoothing.R
index 5c20671..a9f23ef 100644
--- a/R/adaptiveSmoothing.R
+++ b/R/adaptiveSmoothing.R
@@ -21,7 +21,7 @@
 #' @export
 
 #### Smoothing
-adaptiveSmoothing <- function(spe, nnCells, NN, kernel, spread) {
+adaptiveSmoothing <- function(spe, nnCells, NN = 30, kernel = "G", spread = 0.05) {
     ed = unique(spe$entropy)
     gXc = as(logcounts(spe), "sparseMatrix")
     if (kernel == "G") {
diff --git a/R/clustering.R b/R/clustering.R
index 692ef5c..7cbd2aa 100644
--- a/R/clustering.R
+++ b/R/clustering.R
@@ -4,7 +4,9 @@
 #' A function containing two steps used at different times in the clustSIGNAL workflow. An initial non-spatial clustering and sub-clustering step (reclust = FALSE) is used to generate groups of ‘putative cell types’, whereas a later non-spatial clustering step (reclust = TRUE) is used to cluster adaptively smoothed gene expression data.
 #'
 #' @param spe SpatialExperiment object. For reclust = FALSE, the object should contain logcounts and PCA, but for reculst = TRUE, the object should contain smoothed gene expression.
+#' @param dimRed a character indicating the name of the reduced dimensions to use from the SpatialExperiment object (i.e., from reducedDimNames(spe)). Default value is 'PCA'.
 #' @param reclust a logical parameter handled within the method.
+#' @param ... additional parameters for TwoStepParam clustering methods. Include parameters like k for number of nearest neighbours and cluster.fun for selecting community detection method. Default values k = 5, cluster.fun = "louvain".
 #'
 #' @return SpatialExperiment object containing 'putative cell type' group allotted to each cell (reclust = FALSE) or clusters generated from smoothed data (reclust = TRUE).
 #'
@@ -22,7 +24,7 @@
 #' @export
 
 #### Non-spatial clustering
-nsClustering <- function(spe, dimRed, reclust, ...) {
+nsClustering <- function(spe, dimRed = "PCA", reclust, ...) {
     # number of centers = 1/5th of total cells in sample
     clustVal <- min(as.integer(ncol(spe) / 5), 50000)
     if (reclust == FALSE) {
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 0000000..8f8e6f2
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,53 @@
+#' Mouse Embryo Data as SpatialExperiment object
+#'
+#' This dataset contains spatial transcriptomics data from 3 mouse embryos, with
+#' 351 genes and a total of 57536 cells. For running vignettes and examples, we subset
+#' the data by selecting only embryo 2 and removed all cells that were annotated
+#' as 'low quality'. After subsetting, we have 14,185 cells from embryo 2 and 351
+#' genes.
+#'
+#'
+#' @name mEmbryo2
+#' @aliases nnCells me_data me_expr regXclust
+#' @docType data
+#' @format
+#' \code{me_expr} a gene expression matrix with normalised counts, where rows indicate
+#' genes and columns indicate cells.
+#' \code{me_data} a data frame of cell metadata including cell IDs, sample IDs,
+#' cell type annotations, and x-y coordinates of cells.
+#' \code{nnCells} a matrix where each row corresponds to a cell in spe object,
+#' and the columns correspond to the nearest neighbors.
+#' \code{regXclust} a list where each element corresponds to a cell in spe object,
+#' and contains the cluster composition proportions.
+#' @usage load("mEmbryo2.RData")
+#' @source Integration of spatial and single-cell transcriptomic data elucidates mouse
+#' organogenesis, \emph{Nature Biotechnology}, 2022.
+#' Webpage: \url{https://www.nature.com/articles/s41587-021-01006-2}
+#' @keywords datasets
+NULL
+
+
+#' Mouse Hypothalamus Data as SpatialExperiment object
+#'
+#' This dataset contains spatial transcriptomics data from 181 mouse hypothalamus
+#' samples embryos, 155 genes and a total of 1,027,080 cells. For running the
+#' vignettes, we subset the data by selecting only 3 samples - Animal 1 Bregma -0.09
+#' and Animal 7 Bregmas 0.16 and -0.09, removed all cells that were annotated
+#' as 'ambiguous', and removed 20 genes that were assessed using a different technology.
+#' After subsetting, we have 15,848 cells from 3 mouse brain samples and 135 genes.
+#'
+#'
+#' @name mHypothal
+#' @aliases mh_data mh_expr
+#' @docType data
+#' @format
+#' \code{mh_expr} a gene expression matrix with normalised counts, where rows indicate
+#' genes and columns indicate cells.
+#' \code{mh_data} a data frame of cell metadata including cell IDs, sample IDs,
+#' cell type annotations, and x-y coordinates of cells.
+#' @usage load("mHypothal.RData")
+#' @source Molecular, Spatial and Functional Single-Cell Profiling of the
+#' Hypothalamic Preoptic Region, \emph{Science}, 2018.
+#' Webpage: \url{https://www.science.org/doi/10.1126/science.aau5324}
+#' @keywords datasets
+NULL
diff --git a/R/entropyMeasure.R b/R/entropyMeasure.R
index 8a0bcb1..005a12e 100644
--- a/R/entropyMeasure.R
+++ b/R/entropyMeasure.R
@@ -21,7 +21,7 @@
 #' @export
 
 #### Domainness measure
-entropyMeasure <- function(spe, cells, regXclust, threads) {
+entropyMeasure <- function(spe, cells, regXclust, threads = 1) {
     cellsList <- as.vector(spe[[cells]])
     cl <- parallel::makeCluster(threads)
     doParallel::registerDoParallel(cl)
diff --git a/R/neighborDetect.R b/R/neighborDetect.R
index cd53e5e..528c2cb 100644
--- a/R/neighborDetect.R
+++ b/R/neighborDetect.R
@@ -24,7 +24,7 @@
 #' @export
 
 #### Region description + sorting
-neighbourDetect <- function(spe, samples, NN, cells, sort) {
+neighbourDetect <- function(spe, samples, NN = 30, cells, sort = TRUE) {
     samplesList <- unique(spe[[samples]])
     nnCells <- matrix(nrow = 0, ncol = NN + 1)
     nnClusts <- matrix(nrow = 0, ncol = NN)
diff --git a/data/mEmbryo2.RData b/data/mEmbryo2.RData
new file mode 100644
index 0000000..c1a4f75
Binary files /dev/null and b/data/mEmbryo2.RData differ
diff --git a/data/mHypothal.RData b/data/mHypothal.RData
new file mode 100644
index 0000000..11ee807
Binary files /dev/null and b/data/mHypothal.RData differ
diff --git a/data/mouseEmbryo2.rda b/data/mouseEmbryo2.rda
deleted file mode 100644
index 26dd553..0000000
Binary files a/data/mouseEmbryo2.rda and /dev/null differ
diff --git a/data/mousePH_subset.rda b/data/mousePH_subset.rda
deleted file mode 100644
index ea3f88e..0000000
Binary files a/data/mousePH_subset.rda and /dev/null differ
diff --git a/man/adaptiveSmoothing.Rd b/man/adaptiveSmoothing.Rd
index 81044e3..718a45d 100644
--- a/man/adaptiveSmoothing.Rd
+++ b/man/adaptiveSmoothing.Rd
@@ -4,7 +4,7 @@
 \alias{adaptiveSmoothing}
 \title{Adaptive smoothing}
 \usage{
-adaptiveSmoothing(spe, nnCells, NN, kernel, spread)
+adaptiveSmoothing(spe, nnCells, NN = 30, kernel = "G", spread = 0.05)
 }
 \arguments{
 \item{spe}{SpatialExperiment object with logcounts, PCA, 'putative cell type' groups, and entropy outputs included.}
diff --git a/man/clustSIGNAL.Rd b/man/clustSIGNAL.Rd
index afe1ad3..f9373c7 100644
--- a/man/clustSIGNAL.Rd
+++ b/man/clustSIGNAL.Rd
@@ -43,12 +43,11 @@ clustSIGNAL(
 }
 \value{
 a list of outputs
-
-1. clusters: a data frame of cell names and their cluster classification.
-
-2. neighbours: a matrix of cell names and the names of their NN nearest neighbour cells.
-
-3. spe_final: a SpatialExperiment object with initial 'putative cell type' groups, entropy values, smoothed gene expression, post-smoothing clusters, and silhouette widths included.
+\enumerate{
+\item clusters: a data frame of cell names and their cluster classification.
+\item neighbours: a matrix of cell names and the names of their NN nearest neighbour cells.
+\item spe_final: a SpatialExperiment object with initial 'putative cell type' groups, entropy values, smoothed gene expression, post-smoothing clusters, and silhouette widths included.
+}
 }
 \description{
 A clustering method for cell type classification of spatial transcriptomics data. The tool generates and uses an adaptively smoothed, spatially informed gene expression data for clustering.
diff --git a/man/entropyMeasure.Rd b/man/entropyMeasure.Rd
index 3331ce5..6690af5 100644
--- a/man/entropyMeasure.Rd
+++ b/man/entropyMeasure.Rd
@@ -4,7 +4,7 @@
 \alias{entropyMeasure}
 \title{Domainness measure}
 \usage{
-entropyMeasure(spe, cells, regXclust, threads)
+entropyMeasure(spe, cells, regXclust, threads = 1)
 }
 \arguments{
 \item{spe}{SpatialExperiment object with logcounts, PCA, and 'putative cell type' groups included.}
diff --git a/man/mEmbryo2.Rd b/man/mEmbryo2.Rd
new file mode 100644
index 0000000..2193258
--- /dev/null
+++ b/man/mEmbryo2.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{mEmbryo2}
+\alias{mEmbryo2}
+\alias{nnCells}
+\alias{me_data}
+\alias{me_expr}
+\alias{regXclust}
+\title{Mouse Embryo Data as SpatialExperiment object}
+\format{
+\code{me_expr} a gene expression matrix with normalised counts, where rows indicate
+genes and columns indicate cells.
+\code{me_data} a data frame of cell metadata including cell IDs, sample IDs,
+cell type annotations, and x-y coordinates of cells.
+\code{nnCells} a matrix where each row corresponds to a cell in spe object,
+and the columns correspond to the nearest neighbors.
+\code{regXclust} a list where each element corresponds to a cell in spe object,
+and contains the cluster composition proportions.
+}
+\source{
+Integration of spatial and single-cell transcriptomic data elucidates mouse
+organogenesis, \emph{Nature Biotechnology}, 2022.
+Webpage: \url{https://www.nature.com/articles/s41587-021-01006-2}
+}
+\usage{
+load("mEmbryo2.RData")
+}
+\description{
+This dataset contains spatial transcriptomics data from 3 mouse embryos, with
+351 genes and a total of 57536 cells. For running vignettes and examples, we subset
+the data by selecting only embryo 2 and removed all cells that were annotated
+as 'low quality'. After subsetting, we have 14,185 cells from embryo 2 and 351
+genes.
+}
+\keyword{datasets}
diff --git a/man/mHypothal.Rd b/man/mHypothal.Rd
new file mode 100644
index 0000000..3846301
--- /dev/null
+++ b/man/mHypothal.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{mHypothal}
+\alias{mHypothal}
+\alias{mh_data}
+\alias{mh_expr}
+\title{Mouse Hypothalamus Data as SpatialExperiment object}
+\format{
+\code{mh_expr} a gene expression matrix with normalised counts, where rows indicate
+genes and columns indicate cells.
+\code{mh_data} a data frame of cell metadata including cell IDs, sample IDs,
+cell type annotations, and x-y coordinates of cells.
+}
+\source{
+Molecular, Spatial and Functional Single-Cell Profiling of the
+Hypothalamic Preoptic Region, \emph{Science}, 2018.
+Webpage: \url{https://www.science.org/doi/10.1126/science.aau5324}
+}
+\usage{
+load("mHypothal.RData")
+}
+\description{
+This dataset contains spatial transcriptomics data from 181 mouse hypothalamus
+samples embryos, 155 genes and a total of 1,027,080 cells. For running the
+vignettes, we subset the data by selecting only 3 samples - Animal 1 Bregma -0.09
+and Animal 7 Bregmas 0.16 and -0.09, removed all cells that were annotated
+as 'ambiguous', and removed 20 genes that were assessed using a different technology.
+After subsetting, we have 15,848 cells from 3 mouse brain samples and 135 genes.
+}
+\keyword{datasets}
diff --git a/man/neighbourDetect.Rd b/man/neighbourDetect.Rd
index a458a39..d215a54 100644
--- a/man/neighbourDetect.Rd
+++ b/man/neighbourDetect.Rd
@@ -4,7 +4,7 @@
 \alias{neighbourDetect}
 \title{Cell neighbourhood detection}
 \usage{
-neighbourDetect(spe, samples, NN, cells, sort)
+neighbourDetect(spe, samples, NN = 30, cells, sort = TRUE)
 }
 \arguments{
 \item{spe}{SpatialExperiment object with logcounts, PCA, and 'putative cell type' groups included.}
@@ -19,10 +19,10 @@ neighbourDetect(spe, samples, NN, cells, sort)
 }
 \value{
 a list containing two items:
-
-1. nnCells, a character matrix of NN nearest neighbours - rows are cells and columns are their nearest neighbours ranged from closest to farthest neighbour. For sort = TRUE, the neighbours belonging to the same 'putative cell type' group as the cell are moved closer to it.
-
-2. regXclust, a list of vectors for each cell's neighbourhood composition indicated by the proportion of 'putative cell type' groups it contains.
+\enumerate{
+\item nnCells, a character matrix of NN nearest neighbours - rows are cells and columns are their nearest neighbours ranged from closest to farthest neighbour. For sort = TRUE, the neighbours belonging to the same 'putative cell type' group as the cell are moved closer to it.
+\item regXclust, a list of vectors for each cell's neighbourhood composition indicated by the proportion of 'putative cell type' groups it contains.
+}
 }
 \description{
 A function to identify the neighbourhood of each cell. If sort = TRUE, the neighbourhoods are also sorted such that cells belonging to the same group as the central cell are arranged closer to it.
diff --git a/man/nsClustering.Rd b/man/nsClustering.Rd
index f171feb..c19e4f0 100644
--- a/man/nsClustering.Rd
+++ b/man/nsClustering.Rd
@@ -4,12 +4,16 @@
 \alias{nsClustering}
 \title{Non-spatial clustering}
 \usage{
-nsClustering(spe, dimRed, reclust, ...)
+nsClustering(spe, dimRed = "PCA", reclust, ...)
 }
 \arguments{
 \item{spe}{SpatialExperiment object. For reclust = FALSE, the object should contain logcounts and PCA, but for reculst = TRUE, the object should contain smoothed gene expression.}
 
+\item{dimRed}{a character indicating the name of the reduced dimensions to use from the SpatialExperiment object (i.e., from reducedDimNames(spe)). Default value is 'PCA'.}
+
 \item{reclust}{a logical parameter handled within the method.}
+
+\item{...}{additional parameters for TwoStepParam clustering methods. Include parameters like k for number of nearest neighbours and cluster.fun for selecting community detection method. Default values k = 5, cluster.fun = "louvain".}
 }
 \value{
 SpatialExperiment object containing 'putative cell type' group allotted to each cell (reclust = FALSE) or clusters generated from smoothed data (reclust = TRUE).
diff --git a/vignettes/MERFISH_mouseHypothalamus.Rmd b/vignettes/MERFISH_mouseHypothalamus.Rmd
index 8bdebe8..66000f6 100644
--- a/vignettes/MERFISH_mouseHypothalamus.Rmd
+++ b/vignettes/MERFISH_mouseHypothalamus.Rmd
@@ -37,12 +37,14 @@ library(patchwork)
 ```
 
 ```{r}
-data(mousePH_subset)
-spe2
+load("mHypothal.RData")
+spe = SpatialExperiment(assays = list(logcounts = mh_expr),
+                        colData = mh_data, spatialCoordsNames = c("X", "Y"))
+spe
 ```
 
 ```{r}
-names(colData(spe2))
+names(colData(spe))
 ```
 
 To run clustSIGNAL, we need the column names of sample and cell IDs in the colData dataframe of the spatial experiment object. Here, the cell IDs are in the column 'Cell_ID' and sample IDs are in 'samples' column.
@@ -53,38 +55,38 @@ To run clustSIGNAL, we need the column names of sample and cell IDs in the colDa
 set.seed(101)
 samples = "samples"
 cells = "Cell_ID"
-res_hyp = clustSIGNAL(spe2, samples, cells, outputs = "a")
+res_hyp = clustSIGNAL(spe, samples, cells, outputs = "a")
 ```
 
 ```{r}
-spe2 = res_hyp$spe_final
-spe2
+spe = res_hyp$spe_final
+spe
 ```
 
 # Calculating clustering metrics
 
 ```{r}
-samplesList <- levels(spe2[[samples]])
+samplesList <- levels(spe[[samples]])
 ```
 
 ```{r}
 # calculating silhouette width per sample
 silWidthRC <- matrix(nrow = 0, ncol = 3)
 for (s in samplesList) {
-  speX <- spe2[, spe2[[samples]] == s]
+  speX <- spe[, spe[[samples]] == s]
   clust_sub <- as.numeric(as.character(speX$reCluster))
   cXg <- t(as.matrix(logcounts(speX)))
   distMat <- distances(cXg)
   silCluster <- as.matrix(silhouette(clust_sub, distMat))
   silWidthRC <- rbind(silWidthRC, silCluster)
 }
-spe2$rcSil <- silWidthRC[, 3]
+spe$rcSil <- silWidthRC[, 3]
 ```
 
 ```{r}
 # for datasets with annotated cell type information, we can also calculate 
 # metrics like adjusted rand index (ARI) and normalised mutual information (NMI)
-as.data.frame(colData(spe2)) %>%
+as.data.frame(colData(spe)) %>%
   group_by(samples) %>%
   summarise(ARI = aricode::ARI(Cell_class, reCluster),
             NMI = aricode::NMI(Cell_class, reCluster),
@@ -111,7 +113,7 @@ colors = c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C",
 
 ```{r}
 # Histogram of entropy spread
-hst_ent <- as.data.frame(colData(spe2)) %>%
+hst_ent <- as.data.frame(colData(spe)) %>%
   ggplot(aes(entropy)) +
   geom_histogram(binwidth = 0.05) +
   ggtitle("A") +
@@ -121,13 +123,13 @@ hst_ent <- as.data.frame(colData(spe2)) %>%
   theme(text = element_text(size = 12))
 
 # Spatial plot showing sample entropy distribution
-spt_ent <- as.data.frame(colData(spe2)) %>%
-  ggplot(aes(x = spatialCoords(spe2)[, 1], 
-             y = -spatialCoords(spe2)[, 2])) +
+spt_ent <- as.data.frame(colData(spe)) %>%
+  ggplot(aes(x = spatialCoords(spe)[, 1], 
+             y = -spatialCoords(spe)[, 2])) +
   geom_point(size = 0.5, 
              aes(colour = entropy)) +
   scale_colour_gradient2("Entropy", low = "grey", high = "blue") +
-  scale_size_continuous(range = c(0, max(spe2$entropy))) +
+  scale_size_continuous(range = c(0, max(spe$entropy))) +
   facet_wrap(vars(samples), scales = "free", nrow = 1) +
   ggtitle("B") +
   labs(x = "x-coordinate", y = "y-coordinate") +
@@ -143,12 +145,12 @@ In multisample analysis, the spread (A) and spatial distribution (B) of region e
 ## clustSIGNAL clusters visualisation
 
 ```{r}
-df_ent = as.data.frame(colData(spe2))
+df_ent = as.data.frame(colData(spe))
 
 # spatial plot
 spt_clust <- df_ent %>%
-  ggplot(aes(x = spatialCoords(spe2)[, 1], 
-             y = -spatialCoords(spe2)[, 2])) +
+  ggplot(aes(x = spatialCoords(spe)[, 1], 
+             y = -spatialCoords(spe)[, 2])) +
   geom_point(size = 0.5, aes(colour = reCluster)) +
   scale_color_manual(values = colors) +
   facet_wrap(vars(samples), scales = "free", nrow = 1) +
@@ -161,7 +163,7 @@ spt_clust <- df_ent %>%
 
 box_clust = list()
 for (s in samplesList) {
-  df_ent_sub = as.data.frame(colData(spe2)[spe2[[samples]] == s, ])
+  df_ent_sub = as.data.frame(colData(spe)[spe[[samples]] == s, ])
   # calculating median entropy of each cluster in a sample
   celltype_ent = df_ent_sub %>%
     group_by(as.character(reCluster)) %>%
diff --git a/vignettes/seqFISH_mouseEmbryo.Rmd b/vignettes/seqFISH_mouseEmbryo.Rmd
index f01aff7..6b077c9 100644
--- a/vignettes/seqFISH_mouseEmbryo.Rmd
+++ b/vignettes/seqFISH_mouseEmbryo.Rmd
@@ -37,7 +37,9 @@ library(patchwork)
 ```
 
 ```{r}
-data(mouseEmbryo2)
+load("mEmbryo2.RData")
+spe = SpatialExperiment(assays = list(logcounts = me_expr),
+                        colData = me_data, spatialCoordsNames = c("X", "Y"))
 spe
 ```