Skip to content

Commit

Permalink
test_that on fc_reorder
Browse files Browse the repository at this point in the history
  • Loading branch information
ds4ci committed Jun 21, 2015
1 parent b0c7195 commit 5859455
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 6 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ Version: 0.1
Date: 2015-06-13
Author: Jim Porzak
Maintainer: Jim Porzak <[email protected]>
Description: A set of tools for doing customer segmentatio with R. Initially focused on using flexclust package.
Description: A set of tools for doing customer segmentatio with R. Initially focused on
using flexclust package.
License: GPL-2
LazyData: TRUE
Imports:
Imports:
dplyr,
flexclust,
grid,
ggplot2,
MASS,
tidyr
Suggests: knitr
Suggests: knitr,
testthat
VignetteBuilder: knitr
8 changes: 5 additions & 3 deletions R/fc_rclust.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#' Generate a List of Random kcca Objects.
#'
#' For a given number of clusters, \code{k}, \code{nrep} kcca objects are generated.
#' Each is re-ordered so clusters are in decending size order. Cluster summary information is
#'
#' Each is kcca object is re-ordered so clusters are in decending size order. Cluster summary information is
#' pulled out of each object from the \code{clusinfo} slot. This includes the sizes of the clusters.
#'
#' To look for stable cluster solutions, each random run is characterized by the sizes of the first
Expand All @@ -15,10 +16,11 @@
#' @param k Integer. Number of clusters for this run.
#' @param fc_contol The flexclust control object for this run.
#' @param nrep Integer. Number of repititions to run.
#' @param verbose Logical. Override for fc_control@verbose.
#' @param verbose Logical. Override for fc_control slot verbose.
#' @param FUN flexclust function.
#' @param seed Integer. Starting set.seed value for this run.
#' @param plotme Logical. Should plot be produced as side-effect?
#' @return A list(best, sizes, peak_at, tries)
fc_rclust <- function(x, k, fc_cont, nrep=100, verbose=FALSE, FUN = kcca, seed=1234, plotme=TRUE){
fc_seed = seed
fc_tries <- NULL
Expand All @@ -27,7 +29,7 @@ fc_rclust <- function(x, k, fc_cont, nrep=100, verbose=FALSE, FUN = kcca, seed=1
set.seed(fc_seed)
cli <- flexclust::kcca(x, k, save.data = TRUE,
control = fc_cont, family = kccaFamily(fc_family))
cli.re <- fc_reorder(cli, orderby = "decending size")
cli.re <- CustSegs::fc_reorder(cli, orderby = "decending size")
cli_info <- cli.re@clusinfo %>%
dplyr::mutate(clust_num = row_number(),
clust_rank = min_rank(desc(size))) %>%
Expand Down
51 changes: 51 additions & 0 deletions man/fc_rclust.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/fc_rclust.R, R/fc_stable.R
\name{fc_rclust}
\alias{fc_rclust}
\title{Generate a List of Random kcca Objects.}
\usage{
fc_rclust(x, k, fc_cont, nrep = 100, verbose = FALSE, FUN = kcca,
seed = 1234, plotme = TRUE)

fc_rclust(x, k, fc_cont, nrep = 100, verbose = FALSE, FUN = kcca,
seed = 1234, plotme = TRUE)
}
\arguments{
\item{x}{Integer. matrix. Input to kcca.}

\item{k}{Integer. Number of clusters for this run.}

\item{nrep}{Integer. Number of repititions to run.}

\item{verbose}{Logical. Override for fc_control slot verbose.}

\item{FUN}{flexclust function.}

\item{seed}{Integer. Starting set.seed value for this run.}

\item{plotme}{Logical. Should plot be produced as side-effect?}

\item{fc_contol}{The flexclust control object for this run.}
}
\value{
A list(best, sizes, peak_at, tries)

tbl_df of k * nrep rows with cluster summary for k, seed, cluster #
}
\description{
For a given number of clusters, \code{k}, \code{nrep} kcca objects are generated.

Repeat flexclust runs
}
\details{
Each is kcca object is re-ordered so clusters are in decending size order. Cluster summary information is
pulled out of each object from the \code{clusinfo} slot. This includes the sizes of the clusters.

To look for stable cluster solutions, each random run is characterized by the sizes of the first
two clusters; which will be the largest after reording. \code{kde2d()} from the MASS package is
used to find density contours. The highest peak is determined and the distance of each solution
to the peak is retained for each cluster.

Optionally, the scatter plot of the sizes and corresponding coutour in plotted.
}

31 changes: 31 additions & 0 deletions man/fc_reorder.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/fc_reorder.R
\name{fc_reorder}
\alias{fc_reorder}
\title{Reorder clusters in a kcca object.}
\usage{
fc_reorder(x, orderby = "decending size")
}
\arguments{
\item{x}{A kcca object.}

\item{orderby}{A string. Specifying the method to order by. Currently only "decending size".}
}
\value{
The kcca object with clusters reordered.
}
\description{
Since running kcca with different seeds will result, at least, in equivalent
clusters having different cluster sequence numbers which makes interpretation
of repeated runs difficult.
}
\details{
\code{fc_reorder} simply rearranges the clusters within the kcca object according
to the requested method.
}
\examples{
\dontrun{
fc_reorder(kcca(x, k, save.data = TRUE, control = fc_cont, family = kccaFamily(fc_family)))
}
}

4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(CustSegs)

test_check("CustSegs")
Binary file added tests/testthat/VolClBre.rds
Binary file not shown.
53 changes: 53 additions & 0 deletions tests/testthat/test-reorder.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
library(CustSegs)
library(flexclust)
context("flexclust kcca object re-order")

data("volunteers")
vol_ch <- volunteers[-(1:2)]
vol.mat <- as.matrix(vol_ch)
fc_cont <- new("flexclustControl") ## flexclustControl object holds "hyperparameters"
fc_cont@tolerance <- 0.1 ## kcca only uses if classify == "weighted"
fc_cont@iter.max <- 30
fc_cont@verbose <- 0
fc_family <- "ejaccard" ## distance metric

#####
# check reorder on object already in order does nothing at al
##
num_clusters <- 3
fc_seed <- 577 ## magic seed to get result in order
set.seed(fc_seed)
vol.cl.A <- kcca(vol.mat, k = num_clusters, save.data = TRUE,
control = fc_cont, family = kccaFamily(fc_family))
summary(vol.cl.A)
sizes.A <- vol.cl.A@clusinfo[[1]]
vol.cl.A.re <- fc_reorder(vol.cl.A)
test_that("fc_reorder does not clobber already ordered kcca object", {
expect_is(vol.cl.A.re, "kcca")
expect_equal(sizes.A, c(1078, 258, 79))
expect_identical(vol.cl.A, vol.cl.A.re)
})

#####
# check reorder on object that needs reordering
##
fc_seed <- 243 ## magic seed to get result needing reordering
set.seed(fc_seed)
vol.cl.B <- kcca(vol.mat, k = num_clusters, save.data = TRUE,
control = fc_cont, family = kccaFamily(fc_family))
summary(vol.cl.B)
sizes.B <- vol.cl.B@clusinfo[[1]]
vol.cl.B.re <- fc_reorder(vol.cl.B)
sizes.B.re <- vol.cl.B.re@clusinfo[[1]]
test_that("fc_reorder does in-fact reorder clusters within kcca object", {
expect_is((vol.cl.B.re), "kcca")
expect_equal(sizes.B, c(260, 1080, 75))
expect_equal(sizes.B.re, c(1080, 260, 75))
expect_equal_to_reference(vol.cl.B.re, file = "VolClBre.rds")
})






0 comments on commit 5859455

Please sign in to comment.