From e7a2c7d7a548af9f86e49863169ab38b456b9418 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 20 Nov 2024 10:22:23 -0500
Subject: [PATCH 01/78] Adding /browse view

---
 CHANGELOG.rst                                 |   6 +
 pyproject.toml                                |   2 +-
 src/encoded/browse.py                         |  54 ++++
 .../static/components/browse/BrowseView.js    | 242 ++++++++++++++++++
 src/encoded/static/components/index.js        |   5 +
 5 files changed, 308 insertions(+), 1 deletion(-)
 create mode 100644 src/encoded/browse.py
 create mode 100644 src/encoded/static/components/browse/BrowseView.js

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 94441a01d..4f8150a20 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,12 @@ smaht-portal
 Change Log
 ----------
 
+0.113.0
+=======
+* 2024-11-20/dmichaels
+* Added module browse.py for /browse; adapted from fourfront/.../search.py/browse.
+
+
 0.112.3
 =======
 * 2024-11-08/dmichaels
diff --git a/pyproject.toml b/pyproject.toml
index 40c210c40..cacaa4d34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "encoded"
-version = "0.112.3"
+version = "0.112.4"
 description = "SMaHT Data Analysis Portal"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/src/encoded/browse.py b/src/encoded/browse.py
new file mode 100644
index 000000000..d5bd028ea
--- /dev/null
+++ b/src/encoded/browse.py
@@ -0,0 +1,54 @@
+import structlog
+from pyramid.view import view_config
+from webob.multidict import MultiDict
+from pyramid.httpexceptions import HTTPFound
+from urllib.parse import urlencode
+from snovault.search.search import search
+from snovault.util import debug_log
+
+log = structlog.getLogger(__name__)
+
+# 2024-11-19/dmichaels: Adapted from fourfront for C4-1184.
+
+def includeme(config):
+    config.add_route('browse', '/browse{slash:/?}')
+    config.scan(__name__)
+
+
+# DEFAULT_BROWSE_TYPE = "FileSet"
+DEFAULT_BROWSE_TYPE = "OutputFile"
+DEFAULT_BROWSE_PARAM_LISTS = {
+    "type": [DEFAULT_BROWSE_TYPE],
+    "additional_facet": ["file_size"]
+}
+
+@view_config(route_name='browse', request_method='GET', permission='search')
+@debug_log
+def browse(context, request, search_type=DEFAULT_BROWSE_TYPE, return_generator=False):
+    """
+    Simply use search results for browse view
+    Redirect to proper URL w. params if needed
+    """
+    orig_params = request.params
+    for k,vals in DEFAULT_BROWSE_PARAM_LISTS.items():
+        if k not in orig_params or orig_params[k] not in vals:
+            # Redirect to DEFAULT_BROWSE_PARAM_LISTS URL
+            next_qs = MultiDict()
+            for k2, v2list in DEFAULT_BROWSE_PARAM_LISTS.items():
+                for v2 in v2list:
+                    next_qs.add(k2, v2)
+            # Preserve other keys that arent in DEFAULT_BROWSE_PARAM_LISTS
+            for k2, v2 in orig_params.items():
+                if k2 not in DEFAULT_BROWSE_PARAM_LISTS:
+                    next_qs.add(k2, v2)
+            # next_qs.add("redirected_from", str(request.path_qs))
+            return HTTPFound(
+                location=str(request.path) + '?' +  urlencode(next_qs),
+                detail="Redirected from " + str(request.path_info)
+            )
+
+    # TODO
+    # Returning forced_type="Search" for now as there is not
+    # yet any "Browse" UI for /browse; only "Search" for /search. 
+    # return search(context, request, search_type, return_generator, forced_type="Search")
+    return search(context, request, search_type, return_generator, forced_type="Browse")
diff --git a/src/encoded/static/components/browse/BrowseView.js b/src/encoded/static/components/browse/BrowseView.js
new file mode 100644
index 000000000..08ce8ae84
--- /dev/null
+++ b/src/encoded/static/components/browse/BrowseView.js
@@ -0,0 +1,242 @@
+'use strict';
+
+import React from 'react';
+import memoize from 'memoize-one';
+import _ from 'underscore';
+import url from 'url';
+
+import {
+    memoizedUrlParse,
+    schemaTransforms,
+    analytics,
+} from '@hms-dbmi-bgm/shared-portal-components/es/components/util';
+import { SearchView as CommonSearchView } from '@hms-dbmi-bgm/shared-portal-components/es/components/browse/SearchView';
+import { DetailPaneStateCache } from '@hms-dbmi-bgm/shared-portal-components/es/components/browse/components/DetailPaneStateCache';
+import { columnExtensionMap } from './columnExtensionMap';
+import { Schemas } from './../util';
+import {
+    TitleAndSubtitleBeside,
+    PageTitleContainer,
+    TitleAndSubtitleUnder,
+    pageTitleViews,
+    EditingItemPageTitle,
+} from './../PageTitleSection';
+
+export default function BrowseView(props) {
+    const {
+        context: { '@type': searchPageType = ['ItemSearchResults'] },
+    } = props;
+    const isCaseSearch = searchPageType[0] === 'CaseSearchResults';
+
+    if (isCaseSearch) {
+        return (
+            <DetailPaneStateCache>
+                <BrowseViewBody {...props} {...{ isCaseSearch }} />
+            </DetailPaneStateCache>
+        );
+    }
+
+    return <BrowseViewBody {...props} />;
+}
+
+export class BrowseViewBody extends React.PureComponent {
+    /**
+     * Function which is passed into a `.filter()` call to
+     * filter context.facets down, usually in response to frontend-state.
+     *
+     * Currently is meant to filter out type facet if we're in selection mode,
+     * as well as some fields from embedded 'experiment_set' which might
+     * give unexpected results.
+     *
+     * @todo Potentially get rid of this and do on backend.
+     *
+     * @param {{ field: string }} facet - Object representing a facet.
+     * @returns {boolean} Whether to keep or discard facet.
+     */
+    static filterFacet(facet, currentAction) {
+        // Set in backend or schema for facets which are under development or similar.
+        if (facet.hide_from_view) return false;
+
+        // Remove the @type facet while in selection mode.
+        if (facet.field === 'type' && currentAction === 'selection')
+            return false;
+
+        return true;
+    }
+
+    /** Filter the `@type` facet options down to abstract types only (if none selected) for Search. */
+    static transformedFacets(context, currentAction, schemas) {
+        // Clone/filter list of facets.
+        // We may filter out type facet completely at this step,
+        // in which case we can return out of func early.
+        const facets = context.facets.filter(function (facet) {
+            return BrowseViewBody.filterFacet(facet, currentAction);
+        });
+
+        // Find facet for '@type'
+        const searchItemTypes =
+            schemaTransforms.getAllSchemaTypesFromSearchContext(context); // "Item" is excluded
+
+        if (searchItemTypes.length > 0) {
+            console.info(
+                "A (non-'Item') type filter is present. Will skip filtering Item types in Facet."
+            );
+            // Keep all terms/leaf-types - backend should already filter down to only valid sub-types through
+            // nature of search itself.
+
+            if (searchItemTypes.length > 1) {
+                const errMsg =
+                    'More than one "type" filter is selected. This is intended to not occur, at least as a consequence of interacting with the UI. Perhaps have entered multiple types into URL.';
+                analytics.exception('CGAP SearchView - ' + errMsg);
+                console.warn(errMsg);
+            }
+
+            return facets;
+        }
+
+        const typeFacetIndex = _.findIndex(facets, { field: 'type' });
+        if (typeFacetIndex === -1) {
+            console.error(
+                'Could not get type facet, though some filter for it is present.'
+            );
+            return facets; // Facet not present, return.
+        }
+
+        // Avoid modifying in place.
+        facets[typeFacetIndex] = _.clone(facets[typeFacetIndex]);
+
+        // Show only base types for when itemTypesInSearch.length === 0 (aka 'type=Item').
+        facets[typeFacetIndex].terms = _.filter(
+            facets[typeFacetIndex].terms,
+            function (itemType) {
+                const parentType = schemaTransforms.getAbstractTypeForType(
+                    itemType.key,
+                    schemas
+                );
+                return !parentType || parentType === itemType.key;
+            }
+        );
+
+        return facets;
+    }
+
+    /** Not currently used. */
+    static filteredFilters(filters) {
+        const typeFilterCount = filters.reduce(function (m, { field }) {
+            if (field === 'type') return m + 1;
+            return m;
+        }, 0);
+        return filters.filter(function ({ field, term }) {
+            if (field === 'type') {
+                if (term === 'Item') {
+                    return false;
+                }
+                if (typeFilterCount === 1) {
+                    return false;
+                }
+            }
+            return true;
+        });
+    }
+
+    constructor(props) {
+        super(props);
+        this.memoized = {
+            transformedFacets: memoize(BrowseViewBody.transformedFacets),
+            filteredFilters: memoize(BrowseViewBody.filteredFilters),
+        };
+    }
+
+    render() {
+        const {
+            isCaseSearch = false,
+            context,
+            currentAction,
+            schemas,
+        } = this.props;
+
+        // We don't need full screen btn on CGAP as already full width.
+        const passProps = _.omit(
+            this.props,
+            'isFullscreen',
+            'toggleFullScreen',
+            'isCaseSearch'
+        );
+
+        //const filters = BrowseView.filteredFilters(context.filters || []);
+        const facets = this.memoized.transformedFacets(
+            context,
+            currentAction,
+            schemas
+        );
+        const tableColumnClassName = 'results-column col';
+        const facetColumnClassName = 'facets-column col-auto';
+
+        return (
+            <div
+                className="container-wide search-page-outer-container"
+                id="content">
+                <CommonSearchView
+                    {...passProps}
+                    {...{
+                        columnExtensionMap,
+                        tableColumnClassName,
+                        facetColumnClassName,
+                        facets,
+                    }}
+                    renderDetailPane={null}
+                    termTransformFxn={Schemas.Term.toName}
+                    separateSingleTermFacets={false}
+                    rowHeight={31}
+                    openRowHeight={40}
+                />
+                HELLO: THIS IS BROWSE-VIEW!
+            </div>
+        );
+    }
+}
+
+const BrowseViewPageTitle = React.memo(function BrowseViewPageTitle(props) {
+    const { context, schemas, currentAction, alerts } = props;
+
+    if (currentAction === 'add') {
+        // Fallback unless any custom PageTitles registered for @type=<ItemType>SearchResults & currentAction=add
+        return (
+            <EditingItemPageTitle
+                {...{ context, schemas, currentAction, alerts }}
+            />
+        );
+    }
+
+    if (currentAction === 'selection' || currentAction === 'multiselect') {
+        return (
+            <PageTitleContainer alerts={alerts} className="container-wide">
+                <TitleAndSubtitleUnder subtitle="Drag and drop Items from this view into other window(s).">
+                    Selecting
+                </TitleAndSubtitleUnder>
+            </PageTitleContainer>
+        );
+    }
+
+    const thisTypeTitle = schemaTransforms.getSchemaTypeFromSearchContext(
+        context,
+        schemas
+    );
+    const subtitle = thisTypeTitle ? (
+        <span>
+            <small className="text-300">for</small> {thisTypeTitle}
+        </span>
+    ) : null;
+
+    return (
+        <PageTitleContainer alerts={alerts} className="container-wide">
+            <TitleAndSubtitleBeside subtitle={subtitle}>
+                Search
+            </TitleAndSubtitleBeside>
+        </PageTitleContainer>
+    );
+});
+
+pageTitleViews.register(BrowseViewPageTitle, 'Browse');
+pageTitleViews.register(BrowseViewPageTitle, 'Browse', 'selection');
+pageTitleViews.register(BrowseViewPageTitle, 'Browse', 'add');
diff --git a/src/encoded/static/components/index.js b/src/encoded/static/components/index.js
index 1227a21dd..e0d041ac9 100644
--- a/src/encoded/static/components/index.js
+++ b/src/encoded/static/components/index.js
@@ -27,6 +27,7 @@ import DocumentView from './item-pages/DocumentView';
 import StaticSectionView from './item-pages/StaticSectionView';
 import SMaHTSubmissionView from './forms/SMaHTSubmissionView';
 import SearchView from './browse/SearchView';
+import BrowseView from './browse/BrowseView';
 import FileView from './item-pages/FileView';
 
 /**
@@ -59,6 +60,10 @@ content_views.register(SearchView, 'Search');
 content_views.register(SearchView, 'Search', 'selection');
 content_views.register(SearchView, 'Search', 'multiselect');
 
+content_views.register(BrowseView, 'Browse');
+content_views.register(BrowseView, 'Browse', 'selection');
+content_views.register(BrowseView, 'Browse', 'multiselect');
+
 // Fallback for anything we haven't registered
 content_views.fallback = function () {
     return FallbackView;

From b7b6de6bea478bf7f8b21d52fb29b19b5f649e10 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 20 Nov 2024 10:47:48 -0500
Subject: [PATCH 02/78] Adding /browse view

---
 src/encoded/__init__.py |  1 +
 src/encoded/browse.py   | 13 ++++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/encoded/__init__.py b/src/encoded/__init__.py
index 67a2a4fb6..9fa9f9453 100644
--- a/src/encoded/__init__.py
+++ b/src/encoded/__init__.py
@@ -313,6 +313,7 @@ def main(global_config, **local_config):
     if 'elasticsearch.server' in config.registry.settings:
         config.include('snovault.elasticsearch')
         config.include('snovault.search.search')
+        config.include('encoded.browse')
         config.include('snovault.search.compound_search')
 
     # this contains fall back url, so make sure it comes just before static_resoruces
diff --git a/src/encoded/browse.py b/src/encoded/browse.py
index d5bd028ea..b312d0ba7 100644
--- a/src/encoded/browse.py
+++ b/src/encoded/browse.py
@@ -16,10 +16,15 @@ def includeme(config):
 
 
 # DEFAULT_BROWSE_TYPE = "FileSet"
-DEFAULT_BROWSE_TYPE = "OutputFile"
+# DEFAULT_BROWSE_TYPE = "UnalignedReads"
+# DEFAULT_BROWSE_TYPE = "OutputFile"
+
+DEFAULT_BROWSE_TYPE = "File"
+DEFAULT_BROWSE_FACETS = ["file_size"]
+
 DEFAULT_BROWSE_PARAM_LISTS = {
     "type": [DEFAULT_BROWSE_TYPE],
-    "additional_facet": ["file_size"]
+    "additional_facet": DEFAULT_BROWSE_FACETS
 }
 
 @view_config(route_name='browse', request_method='GET', permission='search')
@@ -48,7 +53,5 @@ def browse(context, request, search_type=DEFAULT_BROWSE_TYPE, return_generator=F
             )
 
     # TODO
-    # Returning forced_type="Search" for now as there is not
-    # yet any "Browse" UI for /browse; only "Search" for /search. 
-    # return search(context, request, search_type, return_generator, forced_type="Search")
+    # No real /browse specific UI yet; initially just basically copied static/components/SearchView.js to BrowseView.js.
     return search(context, request, search_type, return_generator, forced_type="Browse")

From d39c93cb59d9622b96db4ad5cf66d72c25998f1a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 20 Nov 2024 10:53:11 -0500
Subject: [PATCH 03/78] comments in CHANGELOG.rst

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4f8150a20..ad981bf1b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,6 +11,7 @@ Change Log
 =======
 * 2024-11-20/dmichaels
 * Added module browse.py for /browse; adapted from fourfront/.../search.py/browse.
+  This is for ticket: https://hms-dbmi.atlassian.net/browse/C4-1184
 
 
 0.112.3

From a39073be5cce0287458e1c0d0b61c2cc80abb1e1 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 20 Nov 2024 10:57:13 -0500
Subject: [PATCH 04/78] udpate version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cacaa4d34..57240bdaf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "encoded"
-version = "0.112.4"
+version = "0.113.0"
 description = "SMaHT Data Analysis Portal"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"

From 566ff2a0fa6f5a3d799eee2d5900d154a41c8d3f Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 6 Dec 2024 14:03:31 -0500
Subject: [PATCH 05/78] changes for new /recent_files_summary endpoint for
 C4-1192

---
 CHANGELOG.rst                       |  22 ++-
 pyproject.toml                      |   2 +-
 src/encoded/browse.py               |  14 +-
 src/encoded/elasticsearch_utils.py  | 212 ++++++++++++++++++++++++++++
 src/encoded/endpoint_utils.py       | 178 +++++++++++++++++++++++
 src/encoded/item_utils/file.py      |  23 ++-
 src/encoded/recent_files_summary.py | 210 +++++++++++++++++++++++++++
 src/encoded/types/file.py           |  54 +++++++
 8 files changed, 698 insertions(+), 17 deletions(-)
 create mode 100644 src/encoded/elasticsearch_utils.py
 create mode 100644 src/encoded/endpoint_utils.py
 create mode 100644 src/encoded/recent_files_summary.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 71e4becac..a8649660f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,12 +7,30 @@ smaht-portal
 Change Log
 ----------
 
-0.115.0
+0.117.0
 =======
-* 2024-11-20/dmichaels
+* 2024-11-20/dmichaels - branch: dmichaels-20241119-browse-view (PR-295)
+
 * Added module browse.py for /browse; adapted from fourfront/.../search.py/browse.
   This is for ticket: https://hms-dbmi.atlassian.net/browse/C4-1184
 
+* New endpoint /recent_files_summary which, by default, returns info for files released
+  within the past three months grouped by release-date, cell-line or donor, and 
+  ile-description. The specific fields used for these groupings are:
+  - release-date: file_status_tracking.released
+  - cell-line: file_sets.libraries.analytes.samples.sample_sources.cell_line.code
+  - donor: donors.display_title
+  - file-dsecription: release_tracker_description
+  Note that release_tracker_description is a newer (2024-12) calcprop (PR-298/sn_file_release_tracker);
+  and included in this branch are these files from the branch sn_file_release_tracker:
+  - src/encoded/item_utils/file.py
+  - src/encoded/types/file.py
+  Added these new modules to support this new endpoint:
+  - src/encoded/recent_files_summary.py
+  - src/encoded/elasticsearch_utils.py (maybe move to dcicutils eventually)
+  - src/encoded/endpoint_utils.py (maybe move to dcicutils eventually)
+  This is for ticket: https://hms-dbmi.atlassian.net/browse/C4-1192
+
 
 0.114.0
 =======
diff --git a/pyproject.toml b/pyproject.toml
index 5b006392c..4907ddc16 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "encoded"
-version = "0.115.0"
+version = "0.117.0"
 description = "SMaHT Data Analysis Portal"
 authors = ["4DN-DCIC Team <support@4dnucleome.org>"]
 license = "MIT"
diff --git a/src/encoded/browse.py b/src/encoded/browse.py
index b312d0ba7..3ad80270d 100644
--- a/src/encoded/browse.py
+++ b/src/encoded/browse.py
@@ -1,10 +1,12 @@
-import structlog
+from pyramid.httpexceptions import HTTPBadRequest, HTTPFound
+from pyramid.security import Authenticated
 from pyramid.view import view_config
+import structlog
 from webob.multidict import MultiDict
-from pyramid.httpexceptions import HTTPFound
 from urllib.parse import urlencode
 from snovault.search.search import search
 from snovault.util import debug_log
+from encoded.recent_files_summary import recent_files_summary
 
 log = structlog.getLogger(__name__)
 
@@ -12,6 +14,7 @@
 
 def includeme(config):
     config.add_route('browse', '/browse{slash:/?}')
+    config.add_route("recent_files_summary_endpoint", "/recent_files_summary")
     config.scan(__name__)
 
 
@@ -55,3 +58,10 @@ def browse(context, request, search_type=DEFAULT_BROWSE_TYPE, return_generator=F
     # TODO
     # No real /browse specific UI yet; initially just basically copied static/components/SearchView.js to BrowseView.js.
     return search(context, request, search_type, return_generator, forced_type="Browse")
+
+
+@view_config(route_name="recent_files_summary_endpoint", request_method=["GET"], effective_principals=Authenticated)
+@debug_log
+def recent_files_summary_endpoint(context, request):
+    results = recent_files_summary(request)
+    return results
diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
new file mode 100644
index 000000000..c45258d5f
--- /dev/null
+++ b/src/encoded/elasticsearch_utils.py
@@ -0,0 +1,212 @@
+from copy import deepcopy
+from typing import Any, Callable, List, Optional, Tuple
+
+
+def create_elasticsearch_aggregation_query(fields: List[str],
+                                            aggregation_property_name: Optional[str] = None,
+                                            max_buckets: Optional[int] = None,
+                                            missing_value: Optional[str] = None,
+                                            create_field_aggregation: Optional[Callable] = None) -> dict:
+
+    global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
+
+    if not (isinstance(fields, list) and fields and isinstance(field := fields[0], str) and field):
+        return {}
+    if not isinstance(missing_value, str):
+        missing_value = AGGREGATION_NO_VALUE
+    if not (isinstance(max_buckets, int) and (max_buckets > 0)):
+        max_buckets = AGGREGATION_MAX_BUCKETS
+
+    if not (callable(create_field_aggregation) and
+            isinstance(field_aggregation := create_field_aggregation(field), dict)):
+        field_aggregation = {
+            "terms": {
+                "field": f"embedded.{field}.raw",
+                "missing": missing_value,
+                "size": max_buckets
+            }
+        }
+
+    if not (isinstance(aggregation_property_name, str) and aggregation_property_name):
+        aggregation_property_name = field
+    aggregation = {aggregation_property_name: field_aggregation}
+    aggregation[aggregation_property_name]["meta"] = {"field_name": field}
+
+    if nested_aggregation := create_elasticsearch_aggregation_query(
+            fields[1:], max_buckets=max_buckets,
+            missing_value=missing_value,
+            create_field_aggregation=create_field_aggregation):
+        aggregation[aggregation_property_name]["aggs"] = nested_aggregation
+
+    return aggregation
+
+
+def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bool = False) -> Optional[dict]:
+
+    def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
+        if isinstance(aggregation, dict) and isinstance(aggregation.get("buckets"), list):
+            if isinstance(field_name := aggregation.get("meta", {}).get("field_name"), str) and field_name:
+                if isinstance(aggregation_key, str) and aggregation_key:
+                    if field_name != aggregation_key:
+                        return None
+                return field_name
+        return None
+
+    def get_nested_aggregation(aggregation: dict) -> Optional[dict]:
+        if isinstance(aggregation, dict):
+            for key in aggregation:
+                if get_aggregation_key(aggregation[key], key):
+                    return aggregation[key]
+        return None
+
+    def get_aggregation_bucket_value(aggregation_bucket: dict) -> Optional[Any]:
+        if isinstance(aggregation_bucket, dict):
+            return aggregation_bucket.get("key_as_string", aggregation_bucket.get("key"))
+        return None
+
+    def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
+        if isinstance(aggregation_bucket, dict):
+            if isinstance(doc_count := aggregation_bucket.get("doc_count"), int):
+                return doc_count
+        return None
+
+    def get_aggregation_buckets_doc_count(aggregation: dict):
+        buckets_doc_count = 0
+        if get_aggregation_key(aggregation):
+            for aggregation_bucket in aggregation["buckets"]:
+                if (doc_count := get_aggregation_bucket_doc_count(aggregation_bucket)) is not None:
+                    buckets_doc_count += doc_count
+        return buckets_doc_count
+
+    def find_aggregation_bucket(aggregation: dict, value: str) -> Optional[dict]:
+        if get_aggregation_key(aggregation):
+            for aggregation_bucket in aggregation["buckets"]:
+                if get_aggregation_bucket_value(aggregation_bucket) == value:
+                    return aggregation_bucket
+        return None
+
+    def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[int]]:
+        merged_item_count = 0
+        if not ((aggregation_key := get_aggregation_key(source)) and (get_aggregation_key(target) == aggregation_key)):
+            return None, None
+        for source_bucket in source["buckets"]:
+            if (((source_bucket_value := get_aggregation_bucket_value(source_bucket)) is None) or
+                ((source_bucket_item_count := get_aggregation_bucket_doc_count(source_bucket)) is None)):  # noqa
+                continue
+            if (target_bucket := find_aggregation_bucket(target, source_bucket_value)):
+                if source_nested_aggregation := get_nested_aggregation(source_bucket):
+                    if target_nested_aggregation := get_nested_aggregation(target_bucket):
+                        merged_item_count, _ = merge_results(target_nested_aggregation, source_nested_aggregation)
+                        if merged_item_count is None:
+                            if source_nested_aggregation_key := get_aggregation_key(source_nested_aggregation):
+                                target_bucket[source_nested_aggregation_key] = \
+                                    source_bucket[source_nested_aggregation_key]
+                                target_bucket["doc_count"] += \
+                                    get_aggregation_buckets_doc_count(source_bucket[source_nested_aggregation_key])
+                        elif merged_item_count > 0:
+                            target_bucket["doc_count"] += merged_item_count
+                elif get_aggregation_bucket_value(target_bucket) is not None:
+                    if get_aggregation_bucket_doc_count(target_bucket) is not None:
+                        target_bucket["doc_count"] += source_bucket_item_count
+                        merged_item_count += source_bucket_item_count
+                continue
+        return merged_item_count, target
+
+    if copy is True:
+        target = deepcopy(target)
+    return merge_results(target, source)[1]
+
+
+def normalize_elasticsearch_aggregation_results(aggregation: dict,
+                                                sort: bool = False,
+                                                additional_properties: Optional[dict] = None,
+                                                remove_empty_items: bool = True) -> dict:
+
+    def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
+        # TODO: same as in merge_elasticsearch_aggregation_results function
+        if isinstance(aggregation, dict) and isinstance(aggregation.get("buckets"), list):
+            if isinstance(field_name := aggregation.get("meta", {}).get("field_name"), str) and field_name:
+                if isinstance(aggregation_key, str) and aggregation_key:
+                    if field_name != aggregation_key:
+                        return None
+                return field_name
+        return None
+
+    def get_aggregation_bucket_value(aggregation_bucket: dict) -> Optional[Any]:
+        # TODO: same as in merge_elasticsearch_aggregation_results function
+        if isinstance(aggregation_bucket, dict):
+            return aggregation_bucket.get("key_as_string", aggregation_bucket.get("key"))
+        return None
+
+    def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
+        # TODO: same as in merge_elasticsearch_aggregation_results function
+        if isinstance(aggregation_bucket, dict):
+            if isinstance(doc_count := aggregation_bucket.get("doc_count"), int):
+                return doc_count
+        return None
+
+    def get_nested_aggregations(data: dict) -> List[dict]:
+        results = []
+        if isinstance(data, dict):
+            for key in data:
+                if get_aggregation_key(data[key]):
+                    results.append(data[key])
+            if (not results) and data.get("buckets", list):
+                results.append(data)
+        return results
+
+    def find_group_item(group_items: List[dict], value: Any) -> Optional[dict]:
+        if isinstance(group_items, list):
+            for group_item in group_items:
+                if isinstance(group_item, dict) and (value == group_item.get("value")):
+                    return group_item
+        return None
+
+    def normalize_results(aggregation: dict,
+                          key: Optional[str] = None, value: Optional[str] = None,
+                          additional_properties: Optional[dict] = None) -> dict:
+        nonlocal remove_empty_items
+        if not (aggregation_key := get_aggregation_key(aggregation)):
+            return {}
+        group_items = [] ; item_count = 0  # noqa
+        for bucket in aggregation["buckets"]:
+            if (((bucket_value := get_aggregation_bucket_value(bucket)) is None) or
+                ((bucket_item_count := get_aggregation_bucket_doc_count(bucket)) is None)):  # noqa
+                continue
+            item_count += bucket_item_count
+            if nested_aggregations := get_nested_aggregations(bucket):
+                for nested_aggregation in nested_aggregations:
+                    if normalized_aggregation := normalize_results(nested_aggregation, aggregation_key, bucket_value):
+                        if group_item := find_group_item(group_items, bucket_value):
+                            # group_item["items"].extend(normalized_aggregation["items"])
+                            for normalized_aggregation_item in normalized_aggregation["items"]:
+                                group_item["items"].append(normalized_aggregation_item)
+                                group_item["count"] += normalized_aggregation_item["count"]
+                        else:
+                            group_item = normalized_aggregation
+                            group_items.append(group_item)
+                    else:
+                        if (remove_empty_items is False) or (bucket_item_count > 0):
+                            group_item = {"name": aggregation_key, "value": bucket_value, "count": bucket_item_count}
+                            group_items.append(group_item)
+        if (remove_empty_items is not False) and (not group_items):
+            return {}
+        results = {"name": key, "value": value, "count": item_count, "items": group_items}
+        if isinstance(additional_properties, dict) and additional_properties:
+            results = {**additional_properties, **results}
+        if key is None:
+            del results["name"]
+            if value is None:
+                del results["value"]
+        return results
+
+    def sort_results(data: dict) -> None:
+        if isinstance(data, dict) and isinstance(items := data.get("items"), list):
+            items.sort(key=lambda item: (-item.get("count", 0), item.get("value", "")))
+            for item in items:
+                sort_results(item)
+
+    results = normalize_results(aggregation, additional_properties=additional_properties)
+    if sort is True:
+        sort_results(results)
+    return results
diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
new file mode 100644
index 000000000..f03d09328
--- /dev/null
+++ b/src/encoded/endpoint_utils.py
@@ -0,0 +1,178 @@
+import calendar
+from datetime import date, datetime
+from dateutil.relativedelta import relativedelta
+import pyramid
+from typing import Any, List, Optional, Tuple, Union
+from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
+
+
+def request_arg(request: pyramid.request.Request, name: str, fallback: Optional[str] = None) -> Optional[str]:
+    return str(value).strip() if (value := request.params.get(name, None)) is not None else fallback
+
+
+def request_arg_int(request: pyramid.request.Request, name: str, fallback: Optional[int] = 0) -> Optional[Any]:
+    if (value := request_arg(request, name)) is not None:
+        try:
+            return int(value)
+        except Exception:
+            pass
+    return fallback
+
+
+def request_arg_bool(request: pyramid.request.Request, name: str, fallback: Optional[bool] = False) -> Optional[bool]:
+    return fallback if (value := request_arg(request, name)) is None else (value.lower() == "true")
+
+
+def request_args(request: pyramid.request.Request,
+                 name: str, fallback: Optional[str] = None, duplicates: bool = False) -> List[str]:
+    args = []
+    if isinstance(value := request.params.getall(name), list):
+        # Note that request.paramss.getall always returns a list,
+        # even if the named query parameter is not specified at all.
+        if value == []:
+            if request.params.get(name) is None:
+                # Only return the fallback if the named query parameter was not specified at all.
+                return fallback
+        for item in value:
+            if isinstance(item, str) and (item := item.strip()):
+                if (item not in args) or (duplicates is True):
+                    args.append(item)
+    return args
+
+
+def parse_date_range_related_arguments(
+        from_date: Optional[Union[str, datetime, date]],
+        thru_date: Optional[Union[str, datetime, date]],
+        nmonths: Optional[Union[str, int]] = None,
+        include_current_month: bool = True,
+        strings: bool = False) -> Tuple[Optional[Union[str, datetime]], Optional[Union[str, datetime]]]:
+
+    """
+    Returns from/thru dates based on the given from/thru date arguments and optional nmonths argument.
+    Given dates may be datetime or date objects or strings. Returned dates are datetime objects, or
+    if the the given strings arguments is True, then strings (formatted as YYYY-MM-DD).
+
+    If both of the given from/thru dates are specified/valid then those are returned
+    and the given nmonths argument is not used.
+
+    If only the given from date is specified then a None thru date is returned, UNLESS the given nmonths
+    argument represents a positive integer, in which case the returned thru date will be nmonths months
+    subsequent to the given from date; or if the given nmonths represents zero, in which case the
+    returned thru date will be the last date of the month of the given from date.
+
+    If only the given thru date is specified then a None from date is returned, UNLESS the given nmonths
+    argument represents a negative integer, in which case the returned from date will be nmonths monthss
+    previous to the given thru date; or if the given nmonths represents zero, in which case
+    the returned from date will be the first date of the month of the given thru date.
+
+    If neither the given from/thru dates are specified then None is returns for both, UNLESS the given
+    nmonths arguments represents a non-zero integer, in which case the returned from/thru dates will represent
+    the past (absolute value) nmonths months starting with the month previous to the month of "today"; however
+    if the include_current_month is True it is rather the past nmonths starting with the month of "today".
+    """
+    from_date = parse_datetime_string(from_date, notz=True)
+    thru_date = parse_datetime_string(thru_date, last_day_of_month_if_no_day=True, notz=True)
+    if not isinstance(nmonths, int):
+        if isinstance(nmonths, str) and (nmonths := nmonths.strip()):
+            try:
+                nmonths = int(nmonths)
+            except Exception:
+                nmonths = 0
+        else:
+            nmonths = 0
+    if from_date:
+        if (not thru_date) and isinstance(nmonths, int):
+            if nmonths > 0:
+                thru_date = _add_months(from_date, nmonths)
+            elif nmonths == 0:
+                thru_date = _get_last_date_of_month(from_date)
+    elif thru_date:
+        if isinstance(nmonths, int):
+            if nmonths < 0:
+                from_date = _add_months(thru_date, nmonths)
+            elif nmonths == 0:
+                from_date = _get_first_date_of_month(thru_date)
+    elif isinstance(nmonths, int) and ((nmonths := abs(nmonths)) != 0):
+        # If no (valid) from/thru dates given, but the absolute value of nmonths is a non-zero integer, then returns
+        # from/thru dates for the last nmonths month ending with the last day of month previous to the current month.
+        # thru_date = _add_months(_get_last_date_of_month(), -1)
+        thru_date = _get_last_date_of_month()
+        if include_current_month is not True:
+            thru_date = _add_months(thru_date, -1)
+        from_date = _add_months(thru_date, -nmonths)
+    if strings is True:
+        return (from_date.strftime(f"%Y-%m-%d") if from_date else None,
+                thru_date.strftime(f"%Y-%m-%d") if thru_date else None)
+    return from_date, thru_date
+
+
+def parse_datetime_string(value: Union[str, datetime, date],
+                          last_day_of_month_if_no_day: bool = False,
+                          notz: bool = False) -> Optional[datetime]:
+    """
+    Wrapper around dcicutils.datetime_utils.parse_datetime_string to handle a few special cases for convenience.
+    """
+    last_day_of_month = False
+    if not isinstance(value, datetime):
+        if isinstance(value, date):
+            value = datetime.combine(value, datetime.min.time())
+        elif isinstance(value, str):
+            if (len(value) == 8) and value.isdigit():
+                # Special case to accept for example "20241206" to mean "2024-12-06".
+                value = f"{value[0:4]}-{value[4:6]}-{value[6:8]}"
+            elif (len(value) == 7) and (value[4] == "-") and value[0:4].isdigit() and value[5:].isdigit():
+                # Special case to accept for example "2024-10" to mean "2024-10-01".
+                value = f"{value}-01"
+                last_day_of_month = last_day_of_month_if_no_day
+            elif (len(value) == 7) and (value[2] == "/") and value[0:2].isdigit() and value[3:].isdigit():
+                # Special case to accept for example "11/2024" to mean "2024-11-01".
+                value = f"{value[3:]}-{value[0:2]}-01"
+                last_day_of_month = last_day_of_month_if_no_day
+            elif (len(value) == 6) and (value[1] == "/") and value[0:1].isdigit() and value[2:].isdigit():
+                # Special case to accept for example "9/2024" to mean "2024-09-01".
+                value = f"{value[2:]}-0{value[0:1]}-01"
+                last_day_of_month = last_day_of_month_if_no_day
+            if not (value := dcicutils_parse_datetime_string(value)):
+                return None
+        else:
+            return None
+    value = value.replace(tzinfo=None) if notz is True else value
+    if last_day_of_month:
+        value = _get_last_date_of_month(value)
+    return value
+
+
+def _get_first_date_of_month(day: Optional[Union[datetime, date, str]] = None) -> datetime:
+    """
+    Returns a datetime object representing the first day of the month of the given date;
+    this given date may be a datetime or date object, or string representing a date or
+    datetime; if the given argument is unspecified or incorrect then assumes "today".
+    """
+    if not (day := parse_datetime_string(day, notz=True)):
+        day = datetime.today().replace(tzinfo=None)
+    return day.replace(day=1)
+
+
+def _get_last_date_of_month(day: Optional[Union[datetime, date, str]] = None) -> datetime:
+    """
+    Returns a datetime object representing the last day of the month of the given date;
+    this given date may be a datetime or date object, or string representing a date or
+    datetime; if the given argument is unspecified or incorrect then assumes "today".
+    """
+    if not (day := parse_datetime_string(day)):
+        day = datetime.today().replace(tzinfo=None)
+    return datetime(day.year, day.month, calendar.monthrange(day.year, day.month)[1])
+
+
+def _add_months(day: Optional[Union[datetime, date, str]] = None, nmonths: int = 0) -> datetime:
+    """
+    Returns a datetime object representing the given date with the given nmonths number of months
+    added (or substracted if negative) to (or from) that given date.; this given date may be a
+    datetime or date object, or string representing a date or datetime; if the given argument
+    is unspecified or incorrect then assumes "today".
+    """
+    if not (day := parse_datetime_string(day, notz=True)):
+        day = datetime.today().replace(tzinfo=None)
+    if isinstance(nmonths, int) and (nmonths != 0):
+        return day + relativedelta(months=nmonths)
+    return day
diff --git a/src/encoded/item_utils/file.py b/src/encoded/item_utils/file.py
index 595bae82f..2305adec9 100644
--- a/src/encoded/item_utils/file.py
+++ b/src/encoded/item_utils/file.py
@@ -76,6 +76,11 @@ def get_reference_genome(properties: Dict[str, Any]) -> Union[str, Dict[str, Any
     return properties.get("reference_genome", "")
 
 
+def get_gene_annotation(properties: Dict[str, Any]) -> Union[str, Dict[str, Any]]:
+    """Get gene annotation from properties."""
+    return properties.get("gene_annotation", "")
+
+
 def get_file_sets(properties: Dict[str, Any]) -> List[Union[str, Dict[str, Any]]]:
     """Get file sets from properties."""
     return properties.get("file_sets", [])
@@ -412,17 +417,11 @@ def has_mobile_element_insertions(file: Dict[str, Any]) -> bool:
     return "MEI" in get_data_type(file)
 
 
-def get_associated_files_status(
-    file: Dict[str, Any], request_handler: RequestHandler, at_id: str
-) -> List[str]:
-    """Get associated files status from the FileSet.files_status calcprop"""
-    return  get_property_values_from_identifiers(
-            request_handler,
-            get_file_sets(file),
-            partial(file_set.get_associated_files_status, request_handler, at_id)
-    )
-
-
 def get_override_group_coverage(file: Dict[str, Any]) -> str:
     """Get override group coverage from properties."""
-    return file.get("override_group_coverage","")
\ No newline at end of file
+    return file.get("override_group_coverage","")
+
+
+def get_release_tracker_description(file: Dict[str, Any]) -> str:
+    """Get release tracker description from properties."""
+    return file.get("release_tracker_description","")
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
new file mode 100644
index 000000000..a51319692
--- /dev/null
+++ b/src/encoded/recent_files_summary.py
@@ -0,0 +1,210 @@
+import pyramid
+from typing import List, Optional
+from urllib.parse import urlencode
+from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
+from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
+from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
+from encoded.endpoint_utils import parse_date_range_related_arguments
+from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
+from snovault.search.search import search as snovault_search
+from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
+
+QUERY_FILE_TYPES = ["OutputFile"]
+QUERY_FILE_STATUSES = ["released"]
+QUERY_FILE_CATEGORIES = ["!Quality Control"]
+QUERY_RECENT_MONTHS = 3
+QUERY_INCLUDE_CURRENT_MONTH = True
+
+AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
+AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
+AGGREGATION_FIELD_DONOR = "donors.display_title"
+AGGREGATION_FIELD_FILE_DESCRIPTOR = "release_tracker_description"
+
+AGGREGATION_MAX_BUCKETS = 100
+AGGREGATION_NO_VALUE = "No value"
+
+
+def recent_files_summary(request: pyramid.request.Request) -> dict:
+    """
+    This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
+    by default, info for files released withing the past three months grouped by release-date,
+    cell-line or donor, and file-description. The specific fields used for these groupings are:
+
+    - release-date: file_status_tracking.released
+    - cell-line: file_sets.libraries.analytes.samples.sample_sources.cell_line.code
+    - donor: donors.display_title
+    - file-dsecription: release_tracker_description
+
+    Note that release_tracker_description is a newer (2024-12)
+    calculated property - see PR-298 (branch: sn_file_release_tracker).
+
+    By default the current (assuminging partial) month IS included, so we really return info for
+    the past FULL three months plus for whatever time has currently elapsed for the current month. 
+    Use pass the include_current_month=false query argument to NOT include the current month.
+
+    The number of months of data can be controlled using the nmonths query argument, e.g. nmonths=6.
+
+    A specific date range can also be passed in e.g. using from_date=2024-08-01 and thru_date=2024-10-31.
+
+    For testing purposes, a date field other than the default file_status_tracking.released can
+    also be specified using the date_property_name query argument. And file statuses other than
+    released can be queried for using one or more status query arguments, e.g. status=uploaded. 
+    """
+
+    date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
+    max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
+    nosort = request_arg_bool(request, "nosort")
+    debug = request_arg_bool(request, "debug")
+    debug_query = request_arg_bool(request, "debug_query")
+    raw = request_arg_bool(request, "raw")
+
+    def create_query(request: pyramid.request.Request) -> str:
+
+        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
+        nonlocal date_property_name
+
+        types = request_args(request, "type", QUERY_FILE_TYPES)
+        statuses = request_args(request, "status", QUERY_FILE_STATUSES)
+        categories = request_args(request, "category", QUERY_FILE_CATEGORIES)
+        recent_months = request_arg_int(request, "nmonths", request_arg_int(request, "months", QUERY_RECENT_MONTHS))
+        from_date = request_arg(request, "from_date")
+        thru_date = request_arg(request, "thru_date")
+        include_current_month = request_arg_bool(request, "include_current_month", QUERY_INCLUDE_CURRENT_MONTH)
+
+        from_date, thru_date = parse_date_range_related_arguments(from_date, thru_date, nmonths=recent_months,
+                                                                  include_current_month=include_current_month,
+                                                                  strings=True)
+        query_parameters = {
+            "type": types if types else None,
+            "status": statuses if statuses else None,
+            "data_category": categories if categories else None,
+            f"{date_property_name}.from": from_date if from_date else None,
+            f"{date_property_name}.to": thru_date if from_date else None,
+            "from": 0,
+            "limit": 0
+        }
+        query_parameters = {key: value for key, value in query_parameters.items() if value is not None}
+        query_string = urlencode(query_parameters, True)
+        # Hackishness to change "=!" to "!=" in search_param_lists value for e.g. to turn this in the
+        # query_parameters above "data_category": ["!Quality Control"] into: data_category&21=Quality+Control
+        query_string = query_string.replace("=%21", "%21=")
+        return f"/search/?{query_string}"
+
+    def create_aggregations_query(aggregation_fields: List[str]) -> dict:
+        global AGGREGATION_NO_VALUE
+        nonlocal date_property_name, max_buckets
+        aggregations = []
+        if not isinstance(aggregation_fields, list):
+            aggregation_fields = [aggregation_fields]
+        for item in aggregation_fields:
+            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
+                aggregations.append(item)
+        if not aggregations:
+            return {}
+        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
+            nonlocal date_property_name
+            if field == date_property_name:
+                return {
+                    "date_histogram": {
+                        "field": f"embedded.{field}",
+                        "calendar_interval": "month",
+                        "format": "yyyy-MM",
+                        "missing": "1970-01",
+                        "order": {"_key": "desc"}
+                    }
+                }
+        aggregation_query = create_elasticsearch_aggregation_query(
+            aggregations,
+            max_buckets=max_buckets,
+            missing_value=AGGREGATION_NO_VALUE,
+            create_field_aggregation=create_field_aggregation)
+        return aggregation_query[date_property_name]
+
+    def execute_query(request: pyramid.request.Request, query: str, aggregations_query: dict) -> str:
+        request = snovault_make_search_subreq(request, path=query, method="GET")
+        results = snovault_search(None, request, custom_aggregations=aggregations_query)
+        return results
+
+    query = create_query(request)
+
+    aggregations_by_cell_line = [
+        date_property_name,
+        AGGREGATION_FIELD_CELL_LINE,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ]
+
+    aggregations_by_donor = [
+        date_property_name,
+        AGGREGATION_FIELD_DONOR,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ]
+
+    aggregations_query = {
+        "group_by_cell_line": create_aggregations_query(aggregations_by_cell_line),
+        "group_by_donor": create_aggregations_query(aggregations_by_donor)
+    }
+
+    if debug_query:
+        return {"query": query, "aggregations_query": aggregations_query}
+
+    raw_results = execute_query(request, query, aggregations_query)
+
+    # Note that the doc_count values returned by ElasticSearch do actually seem to be for unique items,
+    # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
+    # has file_sets.libraries.analytes.samples.sample_sources.cell_line.code values for both HG00438 and HG005),
+    # then it its doc_count will not count it twice. This creates a situation where it might look like the counts
+    # are wrong in this returned merged/normalized result set where the outer item count is less than the sum of
+    # the individual counts withni each sub-group. For example, the below result shows a top-level doc_count of 1
+    # even though there are 2 documents, 1 in the HG00438 group and the other in the HG005 it would be because
+    # the same unique file has a cell_line.code of both HG00438 and HG005.
+    # {
+    #     "meta": { "field_name": "file_status_tracking.released" },
+    #     "buckets": [
+    #         {
+    #             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 1,
+    #             "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+    #                 "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+    #                 "buckets": [
+    #                     {   "key": "HG00438", "doc_count": 1,
+    #                         "release_tracker_description": {
+    #                             "meta": { "field_name": "release_tracker_description" },
+    #                             "buckets": [
+    #                                 { "key": "WGS Illumina NovaSeq X bam", "doc_count": 1 },
+    #                             ]
+    #                         }
+    #                     },
+    #                     {   "key": "HG005", "doc_count": 1,
+    #                         "release_tracker_description": {
+    #                             "meta": { "field_name": "release_tracker_description" },
+    #                             "buckets": [
+    #                                 { "key": "Fiber-seq PacBio Revio bam", "doc_count": 1 }
+    #                             ]
+    #                         }
+    #                     }
+    #                 ]
+    #             }
+    #         }
+    #     ]
+    # }
+
+    if raw:
+        # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
+        if debug:
+            raw_results = {"query": query, "aggregations_query": aggregations_query, "raw_results": raw_results}
+        elif "@id" in raw_results:
+            # Unless we do this we get redirect to the URL in this field, for example
+            # to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
+            #         &file_status_tracking.released.from=2024-09-30
+            #         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
+            del raw_results["@id"]
+        return raw_results
+
+    if not (raw_results := raw_results.get("aggregations")):
+        return {}
+
+    raw_results_by_cell_line = raw_results.get("group_by_cell_line")
+    raw_results_by_donor = raw_results.get("group_by_donor")
+    merged_results = merge_elasticsearch_aggregation_results(raw_results_by_cell_line, raw_results_by_donor)
+    additional_properties = {"query": query, "aggregations_query": aggregations_query} if debug else None
+    return normalize_elasticsearch_aggregation_results(merged_results, sort=not nosort,
+                                                       additional_properties=additional_properties)
diff --git a/src/encoded/types/file.py b/src/encoded/types/file.py
index 1c7f456b8..bb0f50245 100644
--- a/src/encoded/types/file.py
+++ b/src/encoded/types/file.py
@@ -62,6 +62,7 @@
 from ..item_utils.utils import (
     get_property_value_from_identifier,
     get_property_values_from_identifiers,
+    get_unique_values,
     RequestHandler,
 )
 
@@ -252,6 +253,10 @@ class CalcPropConstants:
             }
         },
     }
+    RELEASE_TRACKER_DESCRIPTION = {
+        "title": "Release Tracker Description",
+        "type": "string",
+    }
     SAMPLE_SUMMARY_DONOR_IDS = "donor_ids"
     SAMPLE_SUMMARY_TISSUES = "tissues"
     SAMPLE_SUMMARY_SAMPLE_NAMES = "sample_names"
@@ -695,6 +700,22 @@ def analysis_summary(
             reference_genome=reference_genome,
         )
 
+    @calculated_property(schema=CalcPropConstants.RELEASE_TRACKER_DESCRIPTION)
+    def release_tracker_description(
+        self,
+        request: Request,
+        file_sets: Optional[List[str]] = None
+    ) -> Union[str, None]:
+        """Get file release tracker description for display on home page."""
+        result = None
+        if file_sets:
+            request_handler = RequestHandler(request=request)
+            result = self._get_release_tracker_description(
+                request_handler,
+                file_properties=self.properties
+            )
+        return result     
+
     def _get_libraries(
         self, request: Request, file_sets: Optional[List[str]] = None
     ) -> List[str]:
@@ -979,6 +1000,39 @@ def _get_analysis_summary_fields(
             ),
         }
         return {key: value for key, value in to_include.items() if value}
+    
+    def _get_release_tracker_description(
+            self,
+            request_handler: RequestHandler,
+            file_properties: Dict[str, Any],
+        ) -> Union[str, None]:
+        """Get release tracker description for display on the home page."""
+        assay_title= get_unique_values(
+            request_handler.get_items(file_utils.get_assays(file_properties, request_handler)),
+            item_utils.get_display_title,
+            )
+        sequencer_title = get_unique_values(
+            request_handler.get_items(
+            file_utils.get_sequencers(file_properties, request_handler)),
+            item_utils.get_display_title,
+            )
+        file_format_title = get_property_value_from_identifier(
+                request_handler,
+                file_utils.get_file_format(file_properties),
+                item_utils.get_display_title,
+            )
+        if len(assay_title) > 1 or len(sequencer_title) > 1:
+            # More than one unique assay or sequencer
+            return ""
+        elif len(assay_title) == 0 or len(sequencer_title) == 0:
+            # No assay or sequencer
+            return ""
+        to_include = [
+            assay_title[0],
+            sequencer_title[0],
+            file_format_title
+        ]
+        return " ".join(to_include)
 
 
 @view_config(name='drs', context=File, request_method='GET',

From 6148956e3a4079175de1ce94a87306cc264dc908 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 7 Dec 2024 00:38:26 -0500
Subject: [PATCH 06/78] debugging

---
 src/encoded/elasticsearch_utils.py  | 25 ++++++++----
 src/encoded/recent_files_summary.py | 61 ++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index c45258d5f..09a0884ea 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -109,7 +109,12 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
                     if get_aggregation_bucket_doc_count(target_bucket) is not None:
                         target_bucket["doc_count"] += source_bucket_item_count
                         merged_item_count += source_bucket_item_count
-                continue
+            else:
+                target["buckets"].append(source_bucket)
+                if isinstance(target.get("doc_count"), int):
+                    target["doc_count"] += source_bucket_item_count
+                else:
+                    target["doc_count"] = source_bucket_item_count
         return merged_item_count, target
 
     if copy is True:
@@ -149,10 +154,12 @@ def get_nested_aggregations(data: dict) -> List[dict]:
         results = []
         if isinstance(data, dict):
             for key in data:
-                if get_aggregation_key(data[key]):
+                if get_aggregation_key(data[key]) and data[key]["buckets"]:
                     results.append(data[key])
-            if (not results) and data.get("buckets", list):
-                results.append(data)
+            if not results:
+                if ((isinstance(data.get("buckets"), list) and data["buckets"]) or
+                    (isinstance(data.get("key"), str) and isinstance(data.get("doc_count"), int))):
+                    results.append(data)
         return results
 
     def find_group_item(group_items: List[dict], value: Any) -> Optional[dict]:
@@ -178,7 +185,6 @@ def normalize_results(aggregation: dict,
                 for nested_aggregation in nested_aggregations:
                     if normalized_aggregation := normalize_results(nested_aggregation, aggregation_key, bucket_value):
                         if group_item := find_group_item(group_items, bucket_value):
-                            # group_item["items"].extend(normalized_aggregation["items"])
                             for normalized_aggregation_item in normalized_aggregation["items"]:
                                 group_item["items"].append(normalized_aggregation_item)
                                 group_item["count"] += normalized_aggregation_item["count"]
@@ -200,11 +206,14 @@ def normalize_results(aggregation: dict,
                 del results["value"]
         return results
 
-    def sort_results(data: dict) -> None:
+    def sort_results(data: dict, _level: int = 0) -> None:
         if isinstance(data, dict) and isinstance(items := data.get("items"), list):
-            items.sort(key=lambda item: (-item.get("count", 0), item.get("value", "")))
+            if _level == 0:  # TODO: hack/parameterize
+                items.sort(key=lambda item: item.get("value", ""), reverse=True)
+            else:
+                items.sort(key=lambda item: (-item.get("count", 0), item.get("value", "")))
             for item in items:
-                sort_results(item)
+                sort_results(item, _level=_level + 1)
 
     results = normalize_results(aggregation, additional_properties=additional_properties)
     if sort is True:
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a51319692..b0aeee24e 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,4 +1,5 @@
 import pyramid
+from copy import deepcopy
 from typing import List, Optional
 from urllib.parse import urlencode
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
@@ -144,6 +145,42 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
         "group_by_donor": create_aggregations_query(aggregations_by_donor)
     }
 
+    if False:
+        aggregations_query["group_by_cell_line"]["filter"] = {
+            "bool": {
+                "must": [{
+                    "exists": {
+                        "field": f"embedded.{AGGREGATION_FIELD_CELL_LINE}.raw"
+                    }
+                }]
+            }
+        }
+        aggregations_query["group_by_donor"]["filter"] = {
+            "bool": {
+                "must": [{
+                    "exists": {
+                        "field": f"embedded.{AGGREGATION_FIELD_DONOR}.raw"
+                    }
+                }]
+            }
+        }
+        # aggregations_query["group_by_cell_line"]["aggs"] = {"date_histogram": aggregations_query["group_by_cell_line"]["aggs"]}
+        # aggregations_query["group_by_donor"]["aggs"] = {"date_histogram": aggregations_query["group_by_donor"]["aggs"]}
+        aggregations_query["group_by_cell_line"]["aggs"] = {
+            "date_histogram": {
+                "date_histogram": aggregations_query["group_by_cell_line"]["date_histogram"],
+                "aggs": aggregations_query["group_by_cell_line"]["aggs"]
+            }
+        }
+        del aggregations_query["group_by_cell_line"]["date_histogram"]
+        aggregations_query["group_by_donor"]["aggs"] = {
+            "date_histogram": {
+                "date_histogram": aggregations_query["group_by_donor"]["date_histogram"],
+                "aggs": aggregations_query["group_by_donor"]["aggs"]
+            }
+        }
+        del aggregations_query["group_by_donor"]["date_histogram"]
+
     if debug_query:
         return {"query": query, "aggregations_query": aggregations_query}
 
@@ -202,9 +239,31 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
     if not (raw_results := raw_results.get("aggregations")):
         return {}
 
+    if debug:
+        raw_results_original = deepcopy(raw_results)
+
     raw_results_by_cell_line = raw_results.get("group_by_cell_line")
     raw_results_by_donor = raw_results.get("group_by_donor")
+
+    if False:
+        raw_results_by_cell_line["buckets"] = raw_results_by_cell_line["date_histogram"]["buckets"]
+        del raw_results_by_cell_line["date_histogram"]
+        raw_results_by_donor["buckets"] = raw_results_by_donor["date_histogram"]["buckets"]
+        del raw_results_by_donor["date_histogram"]
+        pass
+
     merged_results = merge_elasticsearch_aggregation_results(raw_results_by_cell_line, raw_results_by_donor)
-    additional_properties = {"query": query, "aggregations_query": aggregations_query} if debug else None
+    additional_properties = None
+    if debug:
+        additional_properties = {
+            "debug": {
+                "query": query,
+                "aggregations_query": aggregations_query,
+                "raw_results": raw_results_original,
+                "raw_results_by_cell_line": deepcopy(raw_results_by_cell_line),
+                "raw_results_by_donor": deepcopy(raw_results_by_donor),
+                "merged_results": deepcopy(merged_results)
+            }
+        }
     return normalize_elasticsearch_aggregation_results(merged_results, sort=not nosort,
                                                        additional_properties=additional_properties)

From fdcca3d25b7631e15a064fc786c691a8d7cc2de5 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 7 Dec 2024 09:17:56 -0500
Subject: [PATCH 07/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  | 75 ++++++++++++++++++++++-------
 src/encoded/recent_files_summary.py | 46 +++++++++++-------
 2 files changed, 86 insertions(+), 35 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 09a0884ea..37bd4e2df 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -1,12 +1,15 @@
 from copy import deepcopy
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, List, Optional, Tuple, Union
+
+AGGREGATION_MAX_BUCKETS = 100
+AGGREGATION_NO_VALUE = "No value"
 
 
 def create_elasticsearch_aggregation_query(fields: List[str],
-                                            aggregation_property_name: Optional[str] = None,
-                                            max_buckets: Optional[int] = None,
-                                            missing_value: Optional[str] = None,
-                                            create_field_aggregation: Optional[Callable] = None) -> dict:
+                                           aggregation_property_name: Optional[str] = None,
+                                           max_buckets: Optional[int] = None,
+                                           missing_value: Optional[str] = None,
+                                           create_field_aggregation: Optional[Callable] = None) -> dict:
 
     global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 
@@ -122,10 +125,11 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
     return merge_results(target, source)[1]
 
 
-def normalize_elasticsearch_aggregation_results(aggregation: dict,
-                                                sort: bool = False,
-                                                additional_properties: Optional[dict] = None,
-                                                remove_empty_items: bool = True) -> dict:
+def normalize_elasticsearch_aggregation_results(
+        aggregation: dict,
+        sort: Union[bool, str, Callable, List[Union[bool, str, Callable]]] = False,
+        additional_properties: Optional[dict] = None,
+        remove_empty_items: bool = True) -> dict:
 
     def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
         # TODO: same as in merge_elasticsearch_aggregation_results function
@@ -158,7 +162,7 @@ def get_nested_aggregations(data: dict) -> List[dict]:
                     results.append(data[key])
             if not results:
                 if ((isinstance(data.get("buckets"), list) and data["buckets"]) or
-                    (isinstance(data.get("key"), str) and isinstance(data.get("doc_count"), int))):
+                    (isinstance(data.get("key"), str) and isinstance(data.get("doc_count"), int))):  # noqa
                     results.append(data)
         return results
 
@@ -172,9 +176,12 @@ def find_group_item(group_items: List[dict], value: Any) -> Optional[dict]:
     def normalize_results(aggregation: dict,
                           key: Optional[str] = None, value: Optional[str] = None,
                           additional_properties: Optional[dict] = None) -> dict:
+
         nonlocal remove_empty_items
+
         if not (aggregation_key := get_aggregation_key(aggregation)):
             return {}
+
         group_items = [] ; item_count = 0  # noqa
         for bucket in aggregation["buckets"]:
             if (((bucket_value := get_aggregation_bucket_value(bucket)) is None) or
@@ -195,27 +202,59 @@ def normalize_results(aggregation: dict,
                         if (remove_empty_items is False) or (bucket_item_count > 0):
                             group_item = {"name": aggregation_key, "value": bucket_value, "count": bucket_item_count}
                             group_items.append(group_item)
+
         if (remove_empty_items is not False) and (not group_items):
             return {}
         results = {"name": key, "value": value, "count": item_count, "items": group_items}
+
         if isinstance(additional_properties, dict) and additional_properties:
             results = {**additional_properties, **results}
+
         if key is None:
             del results["name"]
             if value is None:
                 del results["value"]
+
         return results
 
-    def sort_results(data: dict, _level: int = 0) -> None:
-        if isinstance(data, dict) and isinstance(items := data.get("items"), list):
-            if _level == 0:  # TODO: hack/parameterize
-                items.sort(key=lambda item: item.get("value", ""), reverse=True)
+    def sort_results(data: dict) -> None:
+
+        nonlocal sort
+
+        def sort_items(items: List[dict], sort: Union[bool, str, Callable]) -> None:
+            sort_function_default = lambda item: (-item.get("count", 0), item.get("value", ""))  # noqa
+            if (sort is True) or (isinstance(sort, str) and (sort.strip().lower() == "default")):
+                items.sort(key=sort_function_default)
+            elif isinstance(sort, str) and (sort := sort.strip().lower()):
+                if sort.startswith("-"):
+                    sort_reverse = True
+                    sort = sort[1:]
+                else:
+                    sort_reverse = False
+                if (sort in ["default"]):
+                    items.sort(key=sort_function_default, reverse=sort_reverse)
+                elif (sort in ["key", "value"]):
+                    items.sort(key=lambda item: item.get("value", ""), reverse=sort_reverse)
+            elif callable(sort):
+                items.sort(key=lambda item: sort(item))
+
+        def sort_results_nested(data: dict, level: int = 0) -> None:
+            nonlocal sort
+            if isinstance(sort, list) and sort:
+                if level < len(sort):
+                    sort_level = sort[level]
+                else:
+                    sort_level = sort[len(sort) - 1]
             else:
-                items.sort(key=lambda item: (-item.get("count", 0), item.get("value", "")))
-            for item in items:
-                sort_results(item, _level=_level + 1)
+                sort_level = sort
+            if isinstance(data, dict) and isinstance(items := data.get("items"), list):
+                sort_items(items, sort=sort_level)
+                for item in items:
+                    sort_results_nested(item, level=level + 1)
+
+        sort_results_nested(data)
 
     results = normalize_results(aggregation, additional_properties=additional_properties)
-    if sort is True:
+    if sort:
         sort_results(results)
     return results
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index b0aeee24e..81af07fa1 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -52,6 +52,8 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     released can be queried for using one or more status query arguments, e.g. status=uploaded. 
     """
 
+    hack_filter_date_histogram = True
+
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
     nosort = request_arg_bool(request, "nosort")
@@ -145,7 +147,11 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
         "group_by_donor": create_aggregations_query(aggregations_by_donor)
     }
 
-    if False:
+    if hack_filter_date_histogram:
+        # TODO
+        # Late-breaking hack with addition of per-aggregation filter to disregard items not part
+        # of a group; when using the date_histogram # grouping specifier must be elevated to an
+        # actual additional aggregation grouping. Also see below (hack_filter_date_histrgram).
         aggregations_query["group_by_cell_line"]["filter"] = {
             "bool": {
                 "must": [{
@@ -164,8 +170,6 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
                 }]
             }
         }
-        # aggregations_query["group_by_cell_line"]["aggs"] = {"date_histogram": aggregations_query["group_by_cell_line"]["aggs"]}
-        # aggregations_query["group_by_donor"]["aggs"] = {"date_histogram": aggregations_query["group_by_donor"]["aggs"]}
         aggregations_query["group_by_cell_line"]["aggs"] = {
             "date_histogram": {
                 "date_histogram": aggregations_query["group_by_cell_line"]["date_histogram"],
@@ -226,9 +230,7 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
 
     if raw:
         # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
-        if debug:
-            raw_results = {"query": query, "aggregations_query": aggregations_query, "raw_results": raw_results}
-        elif "@id" in raw_results:
+        if "@id" in raw_results:
             # Unless we do this we get redirect to the URL in this field, for example
             # to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
             #         &file_status_tracking.released.from=2024-09-30
@@ -239,31 +241,41 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
     if not (raw_results := raw_results.get("aggregations")):
         return {}
 
-    if debug:
-        raw_results_original = deepcopy(raw_results)
-
     raw_results_by_cell_line = raw_results.get("group_by_cell_line")
     raw_results_by_donor = raw_results.get("group_by_donor")
 
-    if False:
+    if hack_filter_date_histogram:
+        if debug:
+            raw_results = deepcopy(raw_results)  # otherwise overwritten by below
         raw_results_by_cell_line["buckets"] = raw_results_by_cell_line["date_histogram"]["buckets"]
         del raw_results_by_cell_line["date_histogram"]
         raw_results_by_donor["buckets"] = raw_results_by_donor["date_histogram"]["buckets"]
         del raw_results_by_donor["date_histogram"]
-        pass
 
     merged_results = merge_elasticsearch_aggregation_results(raw_results_by_cell_line, raw_results_by_donor)
-    additional_properties = None
+
     if debug:
         additional_properties = {
             "debug": {
                 "query": query,
                 "aggregations_query": aggregations_query,
-                "raw_results": raw_results_original,
-                "raw_results_by_cell_line": deepcopy(raw_results_by_cell_line),
-                "raw_results_by_donor": deepcopy(raw_results_by_donor),
+                "raw_results": raw_results,
                 "merged_results": deepcopy(merged_results)
             }
         }
-    return normalize_elasticsearch_aggregation_results(merged_results, sort=not nosort,
-                                                       additional_properties=additional_properties)
+    else:
+        additional_properties = None
+
+    if nosort is not True:
+        # We can sort on the aggregations by level; outermost/left to innermost/right.
+        # In our case the outermost is the date aggregation so sort taht by the key value,
+        # e.g. 2014-12, descending; and the rest of the inner levels by the default
+        # sorting which is by aggregation count descending and secondarily by the key value.
+        sort = ["-key", "default"]
+    else:
+        sort = False
+
+    normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
+                                                                     sort=sort,
+                                                                     additional_properties=additional_properties)
+    return normalized_results

From 300ae14f2b20982c695c833f25e025bfb81bfb87 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 7 Dec 2024 15:53:54 -0500
Subject: [PATCH 08/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  | 138 ++++++++++++++++++----------
 src/encoded/recent_files_summary.py |  89 +++++-------------
 2 files changed, 114 insertions(+), 113 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 37bd4e2df..9b00818e7 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -6,14 +6,16 @@
 
 
 def create_elasticsearch_aggregation_query(fields: List[str],
-                                           aggregation_property_name: Optional[str] = None,
+                                           property_name: Optional[str] = None,
                                            max_buckets: Optional[int] = None,
                                            missing_value: Optional[str] = None,
-                                           create_field_aggregation: Optional[Callable] = None) -> dict:
+                                           include_missing: bool = False,
+                                           create_field_aggregation: Optional[Callable] = None,
+                                           _toplevel: bool = True) -> dict:
 
     global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 
-    if not (isinstance(fields, list) and fields and isinstance(field := fields[0], str) and field):
+    if not (isinstance(fields, list) and fields and isinstance(field := fields[0], str) and (field := field.strip())):
         return {}
     if not isinstance(missing_value, str):
         missing_value = AGGREGATION_NO_VALUE
@@ -30,20 +32,60 @@ def create_elasticsearch_aggregation_query(fields: List[str],
             }
         }
 
-    if not (isinstance(aggregation_property_name, str) and aggregation_property_name):
-        aggregation_property_name = field
-    aggregation = {aggregation_property_name: field_aggregation}
-    aggregation[aggregation_property_name]["meta"] = {"field_name": field}
+    if not (isinstance(property_name, str) and (property_name := property_name.strip())):
+        property_name = field
+
+    aggregation = {property_name: {"meta": {"field_name": field}}}
+
+    if (include_missing is not True) and (_toplevel is True):
+        # Filtering out items which are not in any of the aggregations; this introduces complication if
+        # using date_histogram rather than simple terms, which we need add another level of aggregation
+        # just for the date_histogram; then the caller will need deal with (remove) it later.
+        extra_nesting_for_date_histogram_and_filter = "date_histogram" in field_aggregation
+        for field in fields:
+            if isinstance(field, str) and (field := field.strip()):
+                if not aggregation[property_name].get("filter"):
+                    aggregation[property_name]["filter"] = {"bool": {"must": []}}
+                aggregation[property_name]["filter"]["bool"]["must"].append({
+                    "exists": {
+                        "field": f"embedded.{field}.raw"
+                    }
+                })
+    else:
+        extra_nesting_for_date_histogram_and_filter = False
+
+    if not extra_nesting_for_date_histogram_and_filter:
+        aggregation[property_name].update(field_aggregation)
 
     if nested_aggregation := create_elasticsearch_aggregation_query(
             fields[1:], max_buckets=max_buckets,
             missing_value=missing_value,
-            create_field_aggregation=create_field_aggregation):
-        aggregation[aggregation_property_name]["aggs"] = nested_aggregation
-
+            create_field_aggregation=create_field_aggregation, _toplevel=False):
+        if extra_nesting_for_date_histogram_and_filter:
+            aggregation[property_name]["aggs"] = {"dummy_date_histogram": {**field_aggregation, "aggs": nested_aggregation}}
+        else:
+            aggregation[property_name]["aggs"] = nested_aggregation
     return aggregation
 
 
+def prune_elasticsearch_aggregation_results(results: dict) -> None:
+    """
+    This removes any extra level(s) of aggregation that may have been introduces in
+    the create_elasticsearch_aggregation_query function (above), for when/if both
+    a filter and a date_histogram are used together.
+    """
+    if isinstance(results, dict):
+        for key in list(results.keys()):
+            if (key == "dummy_date_histogram") and isinstance(buckets := results[key].get("buckets"), list):
+                results["buckets"] = buckets
+                del results[key]
+            else:
+                prune_elasticsearch_aggregation_results(results[key])
+    elif isinstance(results, list):
+        for element in results:
+            prune_elasticsearch_aggregation_results(element)
+
+
 def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bool = False) -> Optional[dict]:
 
     def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
@@ -217,44 +259,44 @@ def normalize_results(aggregation: dict,
 
         return results
 
-    def sort_results(data: dict) -> None:
-
-        nonlocal sort
-
-        def sort_items(items: List[dict], sort: Union[bool, str, Callable]) -> None:
-            sort_function_default = lambda item: (-item.get("count", 0), item.get("value", ""))  # noqa
-            if (sort is True) or (isinstance(sort, str) and (sort.strip().lower() == "default")):
-                items.sort(key=sort_function_default)
-            elif isinstance(sort, str) and (sort := sort.strip().lower()):
-                if sort.startswith("-"):
-                    sort_reverse = True
-                    sort = sort[1:]
-                else:
-                    sort_reverse = False
-                if (sort in ["default"]):
-                    items.sort(key=sort_function_default, reverse=sort_reverse)
-                elif (sort in ["key", "value"]):
-                    items.sort(key=lambda item: item.get("value", ""), reverse=sort_reverse)
-            elif callable(sort):
-                items.sort(key=lambda item: sort(item))
-
-        def sort_results_nested(data: dict, level: int = 0) -> None:
-            nonlocal sort
-            if isinstance(sort, list) and sort:
-                if level < len(sort):
-                    sort_level = sort[level]
-                else:
-                    sort_level = sort[len(sort) - 1]
-            else:
-                sort_level = sort
-            if isinstance(data, dict) and isinstance(items := data.get("items"), list):
-                sort_items(items, sort=sort_level)
-                for item in items:
-                    sort_results_nested(item, level=level + 1)
-
-        sort_results_nested(data)
-
     results = normalize_results(aggregation, additional_properties=additional_properties)
     if sort:
-        sort_results(results)
+        sort_elasticsearch_aggregation_results(results)
     return results
+
+
+def sort_elasticsearch_aggregation_results(data: dict, sort: Union[bool, str, Callable,
+                                                                   List[Union[bool, str, Callable]]] = False) -> None:
+
+    def sort_items(items: List[dict], sort: Union[bool, str, Callable]) -> None:
+        sort_function_default = lambda item: (-item.get("count", 0), item.get("value", ""))  # noqa
+        if (sort is True) or (isinstance(sort, str) and (sort.strip().lower() == "default")):
+            items.sort(key=sort_function_default)
+        elif isinstance(sort, str) and (sort := sort.strip().lower()):
+            if sort.startswith("-"):
+                sort_reverse = True
+                sort = sort[1:]
+            else:
+                sort_reverse = False
+            if (sort in ["default"]):
+                items.sort(key=sort_function_default, reverse=sort_reverse)
+            elif (sort in ["key", "value"]):
+                items.sort(key=lambda item: item.get("value", ""), reverse=sort_reverse)
+        elif callable(sort):
+            items.sort(key=lambda item: sort(item))
+
+    def sort_results(data: dict, level: int = 0) -> None:
+        nonlocal sort
+        if isinstance(sort, list) and sort:
+            if level < len(sort):
+                sort_level = sort[level]
+            else:
+                sort_level = sort[len(sort) - 1]
+        else:
+            sort_level = sort
+        if isinstance(data, dict) and isinstance(items := data.get("items"), list):
+            sort_items(items, sort=sort_level)
+            for item in items:
+                sort_results(item, level=level + 1)
+
+    sort_results(data)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 81af07fa1..17bb3b2ea 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -5,6 +5,8 @@
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
+from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
+from encoded.elasticsearch_utils import sort_elasticsearch_aggregation_results
 from encoded.endpoint_utils import parse_date_range_related_arguments
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
@@ -24,7 +26,6 @@
 AGGREGATION_MAX_BUCKETS = 100
 AGGREGATION_NO_VALUE = "No value"
 
-
 def recent_files_summary(request: pyramid.request.Request) -> dict:
     """
     This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
@@ -52,10 +53,9 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     released can be queried for using one or more status query arguments, e.g. status=uploaded. 
     """
 
-    hack_filter_date_histogram = True
-
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
+    include_missing = request_arg_bool(request, "novalues", request_arg_bool(request, "include_missing"))
     nosort = request_arg_bool(request, "nosort")
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
@@ -95,7 +95,7 @@ def create_query(request: pyramid.request.Request) -> str:
 
     def create_aggregations_query(aggregation_fields: List[str]) -> dict:
         global AGGREGATION_NO_VALUE
-        nonlocal date_property_name, max_buckets
+        nonlocal date_property_name, max_buckets, include_missing
         aggregations = []
         if not isinstance(aggregation_fields, list):
             aggregation_fields = [aggregation_fields]
@@ -120,6 +120,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             aggregations,
             max_buckets=max_buckets,
             missing_value=AGGREGATION_NO_VALUE,
+            include_missing=include_missing,
             create_field_aggregation=create_field_aggregation)
         return aggregation_query[date_property_name]
 
@@ -142,49 +143,14 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
         AGGREGATION_FIELD_FILE_DESCRIPTOR
     ]
 
+    aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
+    aggregate_by_donor_property_name = "aggregate_by_donor"
+
     aggregations_query = {
-        "group_by_cell_line": create_aggregations_query(aggregations_by_cell_line),
-        "group_by_donor": create_aggregations_query(aggregations_by_donor)
+        aggregate_by_cell_line_property_name: create_aggregations_query(aggregations_by_cell_line),
+        aggregate_by_donor_property_name: create_aggregations_query(aggregations_by_donor)
     }
 
-    if hack_filter_date_histogram:
-        # TODO
-        # Late-breaking hack with addition of per-aggregation filter to disregard items not part
-        # of a group; when using the date_histogram # grouping specifier must be elevated to an
-        # actual additional aggregation grouping. Also see below (hack_filter_date_histrgram).
-        aggregations_query["group_by_cell_line"]["filter"] = {
-            "bool": {
-                "must": [{
-                    "exists": {
-                        "field": f"embedded.{AGGREGATION_FIELD_CELL_LINE}.raw"
-                    }
-                }]
-            }
-        }
-        aggregations_query["group_by_donor"]["filter"] = {
-            "bool": {
-                "must": [{
-                    "exists": {
-                        "field": f"embedded.{AGGREGATION_FIELD_DONOR}.raw"
-                    }
-                }]
-            }
-        }
-        aggregations_query["group_by_cell_line"]["aggs"] = {
-            "date_histogram": {
-                "date_histogram": aggregations_query["group_by_cell_line"]["date_histogram"],
-                "aggs": aggregations_query["group_by_cell_line"]["aggs"]
-            }
-        }
-        del aggregations_query["group_by_cell_line"]["date_histogram"]
-        aggregations_query["group_by_donor"]["aggs"] = {
-            "date_histogram": {
-                "date_histogram": aggregations_query["group_by_donor"]["date_histogram"],
-                "aggs": aggregations_query["group_by_donor"]["aggs"]
-            }
-        }
-        del aggregations_query["group_by_donor"]["date_histogram"]
-
     if debug_query:
         return {"query": query, "aggregations_query": aggregations_query}
 
@@ -230,29 +196,24 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
 
     if raw:
         # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
+        # And note that unless we remove teh @id property we get redirected to the URL in this field,
+        # for example to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
+        #                         &file_status_tracking.released.from=2024-09-30
+        #                         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
         if "@id" in raw_results:
-            # Unless we do this we get redirect to the URL in this field, for example
-            # to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
-            #         &file_status_tracking.released.from=2024-09-30
-            #         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
             del raw_results["@id"]
         return raw_results
 
     if not (raw_results := raw_results.get("aggregations")):
         return {}
 
-    raw_results_by_cell_line = raw_results.get("group_by_cell_line")
-    raw_results_by_donor = raw_results.get("group_by_donor")
-
-    if hack_filter_date_histogram:
-        if debug:
-            raw_results = deepcopy(raw_results)  # otherwise overwritten by below
-        raw_results_by_cell_line["buckets"] = raw_results_by_cell_line["date_histogram"]["buckets"]
-        del raw_results_by_cell_line["date_histogram"]
-        raw_results_by_donor["buckets"] = raw_results_by_donor["date_histogram"]["buckets"]
-        del raw_results_by_donor["date_histogram"]
+    if debug:
+        raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
 
-    merged_results = merge_elasticsearch_aggregation_results(raw_results_by_cell_line, raw_results_by_donor)
+    prune_elasticsearch_aggregation_results(raw_results)
+    merged_results = merge_elasticsearch_aggregation_results(
+            raw_results.get(aggregate_by_cell_line_property_name),
+            raw_results.get(aggregate_by_donor_property_name))
 
     if debug:
         additional_properties = {
@@ -266,16 +227,14 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
     else:
         additional_properties = None
 
+    normalized_results = normalize_elasticsearch_aggregation_results(
+            merged_results, additional_properties=additional_properties)
+
     if nosort is not True:
         # We can sort on the aggregations by level; outermost/left to innermost/right.
         # In our case the outermost is the date aggregation so sort taht by the key value,
         # e.g. 2014-12, descending; and the rest of the inner levels by the default
         # sorting which is by aggregation count descending and secondarily by the key value.
-        sort = ["-key", "default"]
-    else:
-        sort = False
+        sort_elasticsearch_aggregation_results(normalized_results, ["-key", "default"])
 
-    normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
-                                                                     sort=sort,
-                                                                     additional_properties=additional_properties)
     return normalized_results

From 0650d8d627a8fde0e4d2ab0f09df3f9fe1ea4835 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 7 Dec 2024 16:00:01 -0500
Subject: [PATCH 09/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  9 ++-------
 src/encoded/recent_files_summary.py | 10 +++++-----
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 9b00818e7..b7a32ab9d 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -167,11 +167,8 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
     return merge_results(target, source)[1]
 
 
-def normalize_elasticsearch_aggregation_results(
-        aggregation: dict,
-        sort: Union[bool, str, Callable, List[Union[bool, str, Callable]]] = False,
-        additional_properties: Optional[dict] = None,
-        remove_empty_items: bool = True) -> dict:
+def normalize_elasticsearch_aggregation_results(aggregation: dict, additional_properties: Optional[dict] = None,
+                                                remove_empty_items: bool = True) -> dict:
 
     def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
         # TODO: same as in merge_elasticsearch_aggregation_results function
@@ -260,8 +257,6 @@ def normalize_results(aggregation: dict,
         return results
 
     results = normalize_results(aggregation, additional_properties=additional_properties)
-    if sort:
-        sort_elasticsearch_aggregation_results(results)
     return results
 
 
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 17bb3b2ea..a21515470 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -55,7 +55,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
 
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
-    include_missing = request_arg_bool(request, "novalues", request_arg_bool(request, "include_missing"))
+    include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "inovalues"))
     nosort = request_arg_bool(request, "nosort")
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
@@ -63,7 +63,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
 
     def create_query(request: pyramid.request.Request) -> str:
 
-        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
+        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES, QUERY_RECENT_MONTHS
         nonlocal date_property_name
 
         types = request_args(request, "type", QUERY_FILE_TYPES)
@@ -93,7 +93,7 @@ def create_query(request: pyramid.request.Request) -> str:
         query_string = query_string.replace("=%21", "%21=")
         return f"/search/?{query_string}"
 
-    def create_aggregations_query(aggregation_fields: List[str]) -> dict:
+    def create_aggregation_query(aggregation_fields: List[str]) -> dict:
         global AGGREGATION_NO_VALUE
         nonlocal date_property_name, max_buckets, include_missing
         aggregations = []
@@ -147,8 +147,8 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
     aggregate_by_donor_property_name = "aggregate_by_donor"
 
     aggregations_query = {
-        aggregate_by_cell_line_property_name: create_aggregations_query(aggregations_by_cell_line),
-        aggregate_by_donor_property_name: create_aggregations_query(aggregations_by_donor)
+        aggregate_by_cell_line_property_name: create_aggregation_query(aggregations_by_cell_line),
+        aggregate_by_donor_property_name: create_aggregation_query(aggregations_by_donor)
     }
 
     if debug_query:

From f598c3fb2671f9eb6ce92bbeec4f35c014aa02df Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 7 Dec 2024 16:08:18 -0500
Subject: [PATCH 10/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  9 +++++++--
 src/encoded/recent_files_summary.py | 16 ++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index b7a32ab9d..adc446c2b 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -260,9 +260,14 @@ def normalize_results(aggregation: dict,
     return results
 
 
-def sort_elasticsearch_aggregation_results(data: dict, sort: Union[bool, str, Callable,
+def sort_normalized_aggregation_results(data: dict, sort: Union[bool, str, Callable,
                                                                    List[Union[bool, str, Callable]]] = False) -> None:
 
+    """
+    Sorts the given *normalized* (see above) ElasticSearch aggregation results.
+    By default, this is by item (doc) count descending and secondarily by key value.
+    """
+
     def sort_items(items: List[dict], sort: Union[bool, str, Callable]) -> None:
         sort_function_default = lambda item: (-item.get("count", 0), item.get("value", ""))  # noqa
         if (sort is True) or (isinstance(sort, str) and (sort.strip().lower() == "default")):
@@ -273,7 +278,7 @@ def sort_items(items: List[dict], sort: Union[bool, str, Callable]) -> None:
                 sort = sort[1:]
             else:
                 sort_reverse = False
-            if (sort in ["default"]):
+            if sort == "default":
                 items.sort(key=sort_function_default, reverse=sort_reverse)
             elif (sort in ["key", "value"]):
                 items.sort(key=lambda item: item.get("value", ""), reverse=sort_reverse)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a21515470..551001bcf 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -6,7 +6,7 @@
 from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
-from encoded.elasticsearch_utils import sort_elasticsearch_aggregation_results
+from encoded.elasticsearch_utils import sort_normalized_aggregation_results
 from encoded.endpoint_utils import parse_date_range_related_arguments
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
@@ -124,9 +124,9 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             create_field_aggregation=create_field_aggregation)
         return aggregation_query[date_property_name]
 
-    def execute_query(request: pyramid.request.Request, query: str, aggregations_query: dict) -> str:
+    def execute_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
         request = snovault_make_search_subreq(request, path=query, method="GET")
-        results = snovault_search(None, request, custom_aggregations=aggregations_query)
+        results = snovault_search(None, request, custom_aggregations=aggregation_query)
         return results
 
     query = create_query(request)
@@ -146,15 +146,15 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
     aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
     aggregate_by_donor_property_name = "aggregate_by_donor"
 
-    aggregations_query = {
+    aggregation_query = {
         aggregate_by_cell_line_property_name: create_aggregation_query(aggregations_by_cell_line),
         aggregate_by_donor_property_name: create_aggregation_query(aggregations_by_donor)
     }
 
     if debug_query:
-        return {"query": query, "aggregations_query": aggregations_query}
+        return {"query": query, "aggregation_query": aggregation_query}
 
-    raw_results = execute_query(request, query, aggregations_query)
+    raw_results = execute_query(request, query, aggregation_query)
 
     # Note that the doc_count values returned by ElasticSearch do actually seem to be for unique items,
     # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
@@ -219,7 +219,7 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
         additional_properties = {
             "debug": {
                 "query": query,
-                "aggregations_query": aggregations_query,
+                "aggregation_query": aggregation_query,
                 "raw_results": raw_results,
                 "merged_results": deepcopy(merged_results)
             }
@@ -235,6 +235,6 @@ def execute_query(request: pyramid.request.Request, query: str, aggregations_que
         # In our case the outermost is the date aggregation so sort taht by the key value,
         # e.g. 2014-12, descending; and the rest of the inner levels by the default
         # sorting which is by aggregation count descending and secondarily by the key value.
-        sort_elasticsearch_aggregation_results(normalized_results, ["-key", "default"])
+        sort_normalized_aggregation_results(normalized_results, ["-key", "default"])
 
     return normalized_results

From b30dd873af929c392b587c6f07b674af8f604f35 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 00:47:48 -0500
Subject: [PATCH 11/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py | 178 +++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index adc446c2b..28825bf78 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -13,8 +13,59 @@ def create_elasticsearch_aggregation_query(fields: List[str],
                                            create_field_aggregation: Optional[Callable] = None,
                                            _toplevel: bool = True) -> dict:
 
+    """
+    Returns a dictionary representing an ElasticSearch aggregation query for the field names.
+    If more than one is given the the aggregation will be nested, one within another, for example,
+    given ["date_created", "donors.display_title", "release_tracker_description"] we my return
+    something like this:
+
+      {
+        "aggregate_by_donor": {
+          "meta": { "field_name": "date_created" },
+          "filter": {
+            "bool": {
+              "must": [
+                {"exists": {"field": "embedded.date_created.raw"}},
+                {"exists": {"field": "embedded.donors.display_title.raw"}},
+                {"exists": {"field": "embedded.release_tracker_description.raw"}}
+              ]
+            }
+          },
+          "aggs": {
+            "dummy_date_histogram": {
+              "date_histogram": {
+                "field": "embedded.date_created",
+                "calendar_interval": "month",
+                "format": "yyyy-MM", "missing": "1970-01",
+                "order": { "_key": "desc"}
+              },
+              "aggs": {
+                "donors.display_title": {
+                  "meta": {"field_name": "donors.display_title"},
+                  "terms": {
+                    "field": "embedded.donors.display_title.raw",
+                    "missing": "No value", "size": 100
+                  },
+                  "aggs": {
+                    "release_tracker_description": {
+                      "meta": {"field_name": "release_tracker_description"},
+                      "terms": {
+                        "field": "embedded.release_tracker_description.raw",
+                        "missing": "No value", "size": 100
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    """
     global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 
+    if isinstance(fields, str):
+        fields = [fields]
     if not (isinstance(fields, list) and fields and isinstance(field := fields[0], str) and (field := field.strip())):
         return {}
     if not isinstance(missing_value, str):
@@ -87,6 +138,95 @@ def prune_elasticsearch_aggregation_results(results: dict) -> None:
 
 
 def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bool = False) -> Optional[dict]:
+    """
+    Merges the given second (source) argument into the given first (target) argument (in palce), recursively, both
+    of which are assumed to be ElasticSearch aggregation query results; doc_coiunt values are updated as expected.
+    If the given copy argument is True then then the merge is not done to the given target in-place, rather a copy
+    of it is made and the merge done to it. In eiter case the resultant merged target is returned. For example:
+
+      target = {
+        "meta": { "field_name": "date_created" }, "doc_count": 15,
+        "buckets": [
+          {
+            "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 13,
+            "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+                "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+                "buckets": [
+                    {
+                        "key": "COLO829T", "doc_count": 7,
+                        "release_tracker_description": {
+                            "meta": { "field_name": "release_tracker_description" },
+                            "buckets": [
+                                { "key": "WGS ONT PromethION 24 bam", "doc_count": 1 }
+                            ]
+                        }
+                    }
+                ]
+            }
+          }
+        ]
+      }
+
+      source = {
+        "meta": { "field_name": "date_created" }, "doc_count": 16,
+        "buckets": [
+          {
+            "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 14,
+            "donors.display_title": {
+              "meta": { "field_name": "donors.display_title" },
+              "buckets": [
+                {
+                  "key": "DAC_DONOR_COLO829", "doc_count": 12,
+                  "release_tracker_description": {
+                    "meta": { "field_name": "release_tracker_description" },
+                    "buckets": [
+                      { "key": "Fiber-seq PacBio Revio bam", "doc_count": 4 }
+                    ]
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+
+      merge_elasticsearch_aggregation_results(target, source) == {
+        "meta": { "field_name": "date_created" }, "doc_count": 15,
+        "buckets": [
+          {
+            "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 25,
+            "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+              "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+              "buckets": [
+                {
+                  "key": "COLO829T", "doc_count": 7,
+                  "release_tracker_description": {
+                    "meta": { "field_name": "release_tracker_description" },
+                    "buckets": [
+                      { "key": "WGS ONT PromethION 24 bam", "doc_count": 1 }
+                    ]
+                  }
+                }
+              ]
+            },
+            "donors.display_title": {
+              "meta": { "field_name": "donors.display_title" },
+              "buckets": [
+                {
+                  "key": "DAC_DONOR_COLO829", "doc_count": 12,
+                  "release_tracker_description": {
+                    "meta": { "field_name": "release_tracker_description" },
+                    "buckets": [
+                      { "key": "Fiber-seq PacBio Revio bam", "doc_count": 4 }
+                    ]
+                  }
+                }
+              ]
+            }
+          }
+        ]
+      }
+    """
 
     def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
         if isinstance(aggregation, dict) and isinstance(aggregation.get("buckets"), list):
@@ -170,6 +310,44 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
 def normalize_elasticsearch_aggregation_results(aggregation: dict, additional_properties: Optional[dict] = None,
                                                 remove_empty_items: bool = True) -> dict:
 
+    """
+    Normalizes the given result of an ElasticSearch aggregation query into a more readable/consumable format.
+    For example, given the result of the the example for merge_elasticsearch_aggregation_results above as input,
+    this function would return something like this:
+
+      normalize_elasticsearch_aggregation_results(aggregation_results) == {
+        "count": 25,
+        "items": [
+          {
+            "name": "date_created",
+            "value": "2024-12", "count": 11,
+            "items": [
+              {
+                "name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code",
+                "value": "COLO829T", "count": 1,
+                "items": [
+                  {
+                    "name": "release_tracker_description",
+                    "value": "WGS ONT PromethION 24 bam", "count": 1
+                  }
+                ]
+              },
+              {
+                "name": "donors.display_title",
+                "value": "DAC_DONOR_COLO829", "count": 4,
+                "items": [
+                  {
+                    "name": "release_tracker_description",
+                    "value": "Fiber-seq PacBio Revio bam", "count": 4
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    """
+
     def get_aggregation_key(aggregation: dict, aggregation_key: Optional[str] = None) -> Optional[str]:
         # TODO: same as in merge_elasticsearch_aggregation_results function
         if isinstance(aggregation, dict) and isinstance(aggregation.get("buckets"), list):

From 6da89610ed7d7fcb27bfee0468131127a840897d Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 00:56:48 -0500
Subject: [PATCH 12/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 28825bf78..282c95a01 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -16,7 +16,7 @@ def create_elasticsearch_aggregation_query(fields: List[str],
     """
     Returns a dictionary representing an ElasticSearch aggregation query for the field names.
     If more than one is given the the aggregation will be nested, one within another, for example,
-    given ["date_created", "donors.display_title", "release_tracker_description"] we my return
+    given ["date_created", "donors.display_title", "release_tracker_description"] we would return
     something like this:
 
       {
@@ -61,6 +61,25 @@ def create_elasticsearch_aggregation_query(fields: List[str],
           }
         }
       }
+
+    The above example assumes that a create_field_aggregation function callable was passed as an argument
+    and that if/when its argument is date_created then it would have returned something like this 
+
+      {
+        "date_histogram": {
+          "field": f"embedded.date_created",
+          "calendar_interval": "month",
+          "format": "yyyy-MM",
+          "missing": "1970-01",
+          "order": {"_key": "desc"}
+        }
+      }
+
+    And further, that the include_missing was the (default) of False, in whice case items which were not part of any
+    of the aggregation fields specified, would be filtered out. This demonstrates a slight complication dealt with
+    in this particular case where an extra level of aggregation needs to be introducts (dummy_date_histogram).
+    This extra bit of cruft necessary to get the ElasticSearch query to work as expected, manifests itself in the
+    query result as well and is dispensed with using the prune_elasticsearch_aggregation_results function below.
     """
     global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 
@@ -121,8 +140,8 @@ def create_elasticsearch_aggregation_query(fields: List[str],
 
 def prune_elasticsearch_aggregation_results(results: dict) -> None:
     """
-    This removes any extra level(s) of aggregation that may have been introduces in
-    the create_elasticsearch_aggregation_query function (above), for when/if both
+    This removes any extra level(s) of aggregation (i.e. dummy_date_histogram) that may have been
+    introduced in the create_elasticsearch_aggregation_query function (above), for when/if both
     a filter and a date_histogram are used together.
     """
     if isinstance(results, dict):

From c5078356cfb12977a75bcd795c1b92be3f68a268 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 12:40:16 -0500
Subject: [PATCH 13/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py            |  13 +-
 src/encoded/tests/test_elasticsearch_utils.py | 122 ++++++++++++++++++
 2 files changed, 129 insertions(+), 6 deletions(-)
 create mode 100644 src/encoded/tests/test_elasticsearch_utils.py

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 282c95a01..1bb64481b 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -75,11 +75,11 @@ def create_elasticsearch_aggregation_query(fields: List[str],
         }
       }
 
-    And further, that the include_missing was the (default) of False, in whice case items which were not part of any
-    of the aggregation fields specified, would be filtered out. This demonstrates a slight complication dealt with
-    in this particular case where an extra level of aggregation needs to be introducts (dummy_date_histogram).
-    This extra bit of cruft necessary to get the ElasticSearch query to work as expected, manifests itself in the
-    query result as well and is dispensed with using the prune_elasticsearch_aggregation_results function below.
+    It further assumes, that the include_missing argument is False (default), in which case items not part of
+    any of the specified aggregation fields would be filtered out. This demonstrates a slight complication with
+    this particular case where an extra level of aggregation needs to be introducts (dummy_date_histogram).
+    This extra bit of cruft, necessary to get the ElasticSearch query to work as expected, manifests itself in
+    the query result as well and is dispensed with using the prune_elasticsearch_aggregation_results function below.
     """
     global AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 
@@ -132,7 +132,8 @@ def create_elasticsearch_aggregation_query(fields: List[str],
             missing_value=missing_value,
             create_field_aggregation=create_field_aggregation, _toplevel=False):
         if extra_nesting_for_date_histogram_and_filter:
-            aggregation[property_name]["aggs"] = {"dummy_date_histogram": {**field_aggregation, "aggs": nested_aggregation}}
+            aggregation[property_name]["aggs"] = \
+                {"dummy_date_histogram": {**field_aggregation, "aggs": nested_aggregation}}
         else:
             aggregation[property_name]["aggs"] = nested_aggregation
     return aggregation
diff --git a/src/encoded/tests/test_elasticsearch_utils.py b/src/encoded/tests/test_elasticsearch_utils.py
new file mode 100644
index 000000000..7ed6ac11e
--- /dev/null
+++ b/src/encoded/tests/test_elasticsearch_utils.py
@@ -0,0 +1,122 @@
+from hms_utils.misc_utils import dj
+import pytest
+from typing import Optional
+from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
+from encoded.recent_files_summary import (AGGREGATION_FIELD_RELEASE_DATE,
+                                          AGGREGATION_FIELD_CELL_LINE,
+                                          AGGREGATION_FIELD_FILE_DESCRIPTOR)
+
+def test_create_elasticsearch_aggregation_query_a():
+
+    def create_field_aggregation(field: str) -> Optional[dict]:
+        if field == AGGREGATION_FIELD_RELEASE_DATE:
+            return {
+                "date_histogram": {
+                    "field": f"embedded.{field}",
+                    "calendar_interval": "month", "format": "yyyy-MM",
+                    "missing": "1970-01", "order": {"_key": "desc"}
+                }
+            }
+
+    aggregations = [
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_CELL_LINE,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ]
+
+    aggregation_query = create_elasticsearch_aggregation_query(
+        aggregations, create_field_aggregation=create_field_aggregation)
+
+    assert aggregation_query ==  {
+      "file_status_tracking.released": {
+        "meta": {"field_name": "file_status_tracking.released"},
+        "filter": {
+          "bool": {
+            "must": [
+              {"exists": {"field": "embedded.file_status_tracking.released.raw"}},
+              {"exists": {"field": "embedded.file_sets.libraries.analytes.samples.sample_sources.cell_line.code.raw"}},
+              {"exists": {"field": "embedded.release_tracker_description.raw"}}
+            ]
+          }
+        },
+        "aggs": {
+          "dummy_date_histogram": {
+            "date_histogram": {
+              "field": "embedded.file_status_tracking.released",
+              "calendar_interval": "month", "format": "yyyy-MM",
+              "missing": "1970-01", "order": { "_key": "desc" }
+            },
+            "aggs": {
+              "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+                "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
+                "terms": {
+                  "field": "embedded.file_sets.libraries.analytes.samples.sample_sources.cell_line.code.raw",
+                  "missing": "No value", "size": 100
+                },
+                "aggs": {
+                  "release_tracker_description": {
+                    "meta": { "field_name": "release_tracker_description" },
+                    "terms": {
+                      "field": "embedded.release_tracker_description.raw",
+                      "missing": "No value", "size": 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+
+def test_create_elasticsearch_aggregation_query_b():
+
+    def create_field_aggregation(field: str) -> Optional[dict]:
+        if field == AGGREGATION_FIELD_RELEASE_DATE:
+            return {
+                "date_histogram": {
+                    "field": f"embedded.{field}",
+                    "calendar_interval": "month", "format": "yyyy-MM",
+                    "missing": "1970-01", "order": {"_key": "desc"}
+                }
+            }
+
+    aggregations = [
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_CELL_LINE,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ]
+
+    # Same as previous tests but with include_missing=True (no date_histogram complication).
+    aggregation_query = create_elasticsearch_aggregation_query(
+        aggregations, create_field_aggregation=create_field_aggregation, include_missing=True)
+
+    aggregation_query == {
+        "file_status_tracking.released": {
+          "meta": {"field_name": "file_status_tracking.released"},
+          "date_histogram": {
+            "field": "embedded.file_status_tracking.released",
+            "calendar_interval": "month", "format": "yyyy-MM",
+            "missing": "1970-01", "order": {"_key": "desc"}
+          },
+          "aggs": {
+            "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+              "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
+              "terms": {
+                "field": "embedded.file_sets.libraries.analytes.samples.sample_sources.cell_line.code.raw",
+                "missing": "No value", "size": 100
+              },
+              "aggs": {
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "terms": {
+                    "field": "embedded.release_tracker_description.raw",
+                    "missing": "No value", "size": 100
+                  }
+                }
+              }
+            }
+          }
+        }
+      }

From b36eb83fb0f8da5270665859b89aa5ef45c48967 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 12:51:41 -0500
Subject: [PATCH 14/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py            |  30 +--
 src/encoded/tests/test_elasticsearch_utils.py | 171 +++++++++++++++++-
 2 files changed, 180 insertions(+), 21 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 1bb64481b..49d158484 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -165,19 +165,19 @@ def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bo
     of it is made and the merge done to it. In eiter case the resultant merged target is returned. For example:
 
       target = {
-        "meta": { "field_name": "date_created" }, "doc_count": 15,
+        "meta": {"field_name": "date_created"}, "doc_count": 15,
         "buckets": [
           {
             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 13,
             "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
-                "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+                "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
                 "buckets": [
                     {
                         "key": "COLO829T", "doc_count": 7,
                         "release_tracker_description": {
-                            "meta": { "field_name": "release_tracker_description" },
+                            "meta": {"field_name": "release_tracker_description"},
                             "buckets": [
-                                { "key": "WGS ONT PromethION 24 bam", "doc_count": 1 }
+                                {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
                             ]
                         }
                     }
@@ -188,19 +188,19 @@ def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bo
       }
 
       source = {
-        "meta": { "field_name": "date_created" }, "doc_count": 16,
+        "meta": {"field_name": "date_created"}, "doc_count": 16,
         "buckets": [
           {
             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 14,
             "donors.display_title": {
-              "meta": { "field_name": "donors.display_title" },
+              "meta": {"field_name": "donors.display_title"},
               "buckets": [
                 {
                   "key": "DAC_DONOR_COLO829", "doc_count": 12,
                   "release_tracker_description": {
-                    "meta": { "field_name": "release_tracker_description" },
+                    "meta": {"field_name": "release_tracker_description"},
                     "buckets": [
-                      { "key": "Fiber-seq PacBio Revio bam", "doc_count": 4 }
+                      {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
                     ]
                   }
                 }
@@ -211,33 +211,33 @@ def merge_elasticsearch_aggregation_results(target: dict, source: dict, copy: bo
       }
 
       merge_elasticsearch_aggregation_results(target, source) == {
-        "meta": { "field_name": "date_created" }, "doc_count": 15,
+        "meta": {"field_name": "date_created"}, "doc_count": 15,
         "buckets": [
           {
             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 25,
             "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
-              "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+              "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
               "buckets": [
                 {
                   "key": "COLO829T", "doc_count": 7,
                   "release_tracker_description": {
-                    "meta": { "field_name": "release_tracker_description" },
+                    "meta": {"field_name": "release_tracker_description"},
                     "buckets": [
-                      { "key": "WGS ONT PromethION 24 bam", "doc_count": 1 }
+                      {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
                     ]
                   }
                 }
               ]
             },
             "donors.display_title": {
-              "meta": { "field_name": "donors.display_title" },
+              "meta": {"field_name": "donors.display_title"},
               "buckets": [
                 {
                   "key": "DAC_DONOR_COLO829", "doc_count": 12,
                   "release_tracker_description": {
-                    "meta": { "field_name": "release_tracker_description" },
+                    "meta": {"field_name": "release_tracker_description"},
                     "buckets": [
-                      { "key": "Fiber-seq PacBio Revio bam", "doc_count": 4 }
+                      {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
                     ]
                   }
                 }
diff --git a/src/encoded/tests/test_elasticsearch_utils.py b/src/encoded/tests/test_elasticsearch_utils.py
index 7ed6ac11e..d44fdc089 100644
--- a/src/encoded/tests/test_elasticsearch_utils.py
+++ b/src/encoded/tests/test_elasticsearch_utils.py
@@ -1,10 +1,11 @@
-from hms_utils.misc_utils import dj
 import pytest
 from typing import Optional
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
-from encoded.recent_files_summary import (AGGREGATION_FIELD_RELEASE_DATE,
-                                          AGGREGATION_FIELD_CELL_LINE,
-                                          AGGREGATION_FIELD_FILE_DESCRIPTOR)
+from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
+from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
+from encoded.recent_files_summary import AGGREGATION_FIELD_RELEASE_DATE
+from encoded.recent_files_summary import AGGREGATION_FIELD_CELL_LINE
+from encoded.recent_files_summary import AGGREGATION_FIELD_FILE_DESCRIPTOR
 
 def test_create_elasticsearch_aggregation_query_a():
 
@@ -55,7 +56,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:
                 },
                 "aggs": {
                   "release_tracker_description": {
-                    "meta": { "field_name": "release_tracker_description" },
+                    "meta": {"field_name": "release_tracker_description"},
                     "terms": {
                       "field": "embedded.release_tracker_description.raw",
                       "missing": "No value", "size": 100
@@ -92,7 +93,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:
     aggregation_query = create_elasticsearch_aggregation_query(
         aggregations, create_field_aggregation=create_field_aggregation, include_missing=True)
 
-    aggregation_query == {
+    assert aggregation_query == {
         "file_status_tracking.released": {
           "meta": {"field_name": "file_status_tracking.released"},
           "date_histogram": {
@@ -120,3 +121,161 @@ def create_field_aggregation(field: str) -> Optional[dict]:
           }
         }
       }
+
+
+def test_merge_elasticsearch_aggregation_results_a():
+
+    target = {
+      "meta": {"field_name": "date_created"}, "doc_count": 15,
+      "buckets": [
+        {
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 13,
+          "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+              "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
+              "buckets": [
+                  {
+                      "key": "COLO829T", "doc_count": 7,
+                      "release_tracker_description": {
+                          "meta": {"field_name": "release_tracker_description"},
+                          "buckets": [
+                              {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
+                          ]
+                      }
+                  }
+              ]
+          }
+        }
+      ]
+    }
+
+    source = {
+      "meta": {"field_name": "date_created"}, "doc_count": 16,
+      "buckets": [
+        {
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 14,
+          "donors.display_title": {
+            "meta": {"field_name": "donors.display_title"},
+            "buckets": [
+              {
+                "key": "DAC_DONOR_COLO829", "doc_count": 12,
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "buckets": [
+                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
+                  ]
+                }
+              }
+            ]
+          }
+        }
+      ]
+    }
+
+    assert merge_elasticsearch_aggregation_results(target, source) == {
+      "meta": {"field_name": "date_created"}, "doc_count": 15,
+      "buckets": [
+        {
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 25,
+          "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+            "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
+            "buckets": [
+              {
+                "key": "COLO829T", "doc_count": 7,
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "buckets": [
+                    {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
+                  ]
+                }
+              }
+            ]
+          },
+          "donors.display_title": {
+            "meta": {"field_name": "donors.display_title"},
+            "buckets": [
+              {
+                "key": "DAC_DONOR_COLO829", "doc_count": 12,
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "buckets": [
+                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
+                  ]
+                }
+              }
+            ]
+          }
+        }
+      ]
+    }
+
+
+def test_normalize_elasticsearch_aggregation_results_a():
+
+    results = {
+      "meta": {"field_name": "date_created"}, "doc_count": 15,
+      "buckets": [
+        {
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 25,
+          "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+            "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
+            "buckets": [
+              {
+                "key": "COLO829T", "doc_count": 7,
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "buckets": [
+                    {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
+                  ]
+                }
+              }
+            ]
+          },
+          "donors.display_title": {
+            "meta": {"field_name": "donors.display_title"},
+            "buckets": [
+              {
+                "key": "DAC_DONOR_COLO829", "doc_count": 12,
+                "release_tracker_description": {
+                  "meta": {"field_name": "release_tracker_description"},
+                  "buckets": [
+                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
+                  ]
+                }
+              }
+            ]
+          }
+        }
+      ]
+    }
+
+    assert normalize_elasticsearch_aggregation_results(results) == {
+        "count": 25,
+        "items": [
+          {
+            "name": "date_created",
+            "value": "2024-12", "count": 11,
+            "items": [
+              {
+                "name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code",
+                "value": "COLO829T", "count": 1,
+                "items": [
+                  {
+                    "name": "release_tracker_description",
+                    "value": "WGS ONT PromethION 24 bam", "count": 1
+                  }
+                ]
+              },
+              {
+                "name": "donors.display_title",
+                "value": "DAC_DONOR_COLO829", "count": 4,
+                "items": [
+                  {
+                    "name": "release_tracker_description",
+                    "value": "Fiber-seq PacBio Revio bam", "count": 4
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }

From 911808bfc49a0fce1951f204f20f4bd9c25ab66a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 13:41:37 -0500
Subject: [PATCH 15/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py            |  9 ++++++++-
 src/encoded/tests/test_elasticsearch_utils.py | 20 +++++++++----------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 49d158484..b01e76231 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -324,7 +324,14 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
 
     if copy is True:
         target = deepcopy(target)
-    return merge_results(target, source)[1]
+
+    _, target = merge_results(target, source)
+
+    if (((source_item_count := get_aggregation_bucket_doc_count(source)) is not None) and
+        (get_aggregation_bucket_doc_count(target) is not None)):  # noqa
+        target["doc_count"] += source_item_count
+
+    return target
 
 
 def normalize_elasticsearch_aggregation_results(aggregation: dict, additional_properties: Optional[dict] = None,
diff --git a/src/encoded/tests/test_elasticsearch_utils.py b/src/encoded/tests/test_elasticsearch_utils.py
index d44fdc089..97d690500 100644
--- a/src/encoded/tests/test_elasticsearch_utils.py
+++ b/src/encoded/tests/test_elasticsearch_utils.py
@@ -126,10 +126,10 @@ def create_field_aggregation(field: str) -> Optional[dict]:
 def test_merge_elasticsearch_aggregation_results_a():
 
     target = {
-      "meta": {"field_name": "date_created"}, "doc_count": 15,
+      "meta": {"field_name": "date_created"}, "doc_count": 7,
       "buckets": [
         {
-          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 13,
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 7,
           "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
               "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
               "buckets": [
@@ -138,7 +138,7 @@ def test_merge_elasticsearch_aggregation_results_a():
                       "release_tracker_description": {
                           "meta": {"field_name": "release_tracker_description"},
                           "buckets": [
-                              {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
+                              {"key": "WGS ONT PromethION 24 bam", "doc_count": 7}
                           ]
                       }
                   }
@@ -149,10 +149,10 @@ def test_merge_elasticsearch_aggregation_results_a():
     }
 
     source = {
-      "meta": {"field_name": "date_created"}, "doc_count": 16,
+      "meta": {"field_name": "date_created"}, "doc_count": 12,
       "buckets": [
         {
-          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 14,
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 12,
           "donors.display_title": {
             "meta": {"field_name": "donors.display_title"},
             "buckets": [
@@ -161,7 +161,7 @@ def test_merge_elasticsearch_aggregation_results_a():
                 "release_tracker_description": {
                   "meta": {"field_name": "release_tracker_description"},
                   "buckets": [
-                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
+                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 12}
                   ]
                 }
               }
@@ -172,10 +172,10 @@ def test_merge_elasticsearch_aggregation_results_a():
     }
 
     assert merge_elasticsearch_aggregation_results(target, source) == {
-      "meta": {"field_name": "date_created"}, "doc_count": 15,
+      "meta": {"field_name": "date_created"}, "doc_count": 19,
       "buckets": [
         {
-          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 25,
+          "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 19,
           "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
             "meta": {"field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"},
             "buckets": [
@@ -184,7 +184,7 @@ def test_merge_elasticsearch_aggregation_results_a():
                 "release_tracker_description": {
                   "meta": {"field_name": "release_tracker_description"},
                   "buckets": [
-                    {"key": "WGS ONT PromethION 24 bam", "doc_count": 1}
+                    {"key": "WGS ONT PromethION 24 bam", "doc_count": 7}
                   ]
                 }
               }
@@ -198,7 +198,7 @@ def test_merge_elasticsearch_aggregation_results_a():
                 "release_tracker_description": {
                   "meta": {"field_name": "release_tracker_description"},
                   "buckets": [
-                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 4}
+                    {"key": "Fiber-seq PacBio Revio bam", "doc_count": 12}
                   ]
                 }
               }

From 7bde707eaa310c76336398b7e99fc7308515aa87 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 15:02:47 -0500
Subject: [PATCH 16/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index b01e76231..df24a6e0f 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -1,3 +1,4 @@
+from hms_utils.misc_utils import dj
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Tuple, Union
 
@@ -275,7 +276,7 @@ def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
                 return doc_count
         return None
 
-    def get_aggregation_buckets_doc_count(aggregation: dict):
+    def get_aggregation_buckets_doc_count(aggregation: dict) -> int:
         buckets_doc_count = 0
         if get_aggregation_key(aggregation):
             for aggregation_bucket in aggregation["buckets"]:
@@ -293,7 +294,7 @@ def find_aggregation_bucket(aggregation: dict, value: str) -> Optional[dict]:
     def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[int]]:
         merged_item_count = 0
         if not ((aggregation_key := get_aggregation_key(source)) and (get_aggregation_key(target) == aggregation_key)):
-            return None, None
+            return 0, None
         for source_bucket in source["buckets"]:
             if (((source_bucket_value := get_aggregation_bucket_value(source_bucket)) is None) or
                 ((source_bucket_item_count := get_aggregation_bucket_doc_count(source_bucket)) is None)):  # noqa
@@ -301,13 +302,15 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
             if (target_bucket := find_aggregation_bucket(target, source_bucket_value)):
                 if source_nested_aggregation := get_nested_aggregation(source_bucket):
                     if target_nested_aggregation := get_nested_aggregation(target_bucket):
-                        merged_item_count, _ = merge_results(target_nested_aggregation, source_nested_aggregation)
-                        if merged_item_count is None:
+                        merged_item_count, merged_results = merge_results(target_nested_aggregation, source_nested_aggregation)
+                        if merged_results is None:
                             if source_nested_aggregation_key := get_aggregation_key(source_nested_aggregation):
-                                target_bucket[source_nested_aggregation_key] = \
-                                    source_bucket[source_nested_aggregation_key]
-                                target_bucket["doc_count"] += \
-                                    get_aggregation_buckets_doc_count(source_bucket[source_nested_aggregation_key])
+                                target_bucket[source_nested_aggregation_key] = (
+                                    source_nested_bucket := source_bucket[source_nested_aggregation_key])
+                                if (source_nested_bucket_item_count :=
+                                    get_aggregation_buckets_doc_count(source_nested_bucket)) > 0:
+                                    target_bucket["doc_count"] += source_nested_bucket_item_count
+                                    merged_item_count += source_nested_bucket_item_count
                         elif merged_item_count > 0:
                             target_bucket["doc_count"] += merged_item_count
                 elif get_aggregation_bucket_value(target_bucket) is not None:
@@ -320,16 +323,15 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
                     target["doc_count"] += source_bucket_item_count
                 else:
                     target["doc_count"] = source_bucket_item_count
+                merged_item_count += source_bucket_item_count
         return merged_item_count, target
 
     if copy is True:
         target = deepcopy(target)
 
-    _, target = merge_results(target, source)
-
-    if (((source_item_count := get_aggregation_bucket_doc_count(source)) is not None) and
-        (get_aggregation_bucket_doc_count(target) is not None)):  # noqa
-        target["doc_count"] += source_item_count
+    merged_item_count, target = merge_results(target, source)
+    if (merged_item_count > 0) and (get_aggregation_bucket_doc_count(target) is not None):
+        target["doc_count"] += merged_item_count
 
     return target
 

From e59840a929a52868c3676dee2e0ee5cb00177d25 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 15:09:15 -0500
Subject: [PATCH 17/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index df24a6e0f..ad9c5f92a 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -1,4 +1,3 @@
-from hms_utils.misc_utils import dj
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Tuple, Union
 
@@ -276,7 +275,7 @@ def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
                 return doc_count
         return None
 
-    def get_aggregation_buckets_doc_count(aggregation: dict) -> int:
+    def get_aggregation_total_buckets_doc_count(aggregation: dict) -> int:
         buckets_doc_count = 0
         if get_aggregation_key(aggregation):
             for aggregation_bucket in aggregation["buckets"]:
@@ -308,15 +307,14 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
                                 target_bucket[source_nested_aggregation_key] = (
                                     source_nested_bucket := source_bucket[source_nested_aggregation_key])
                                 if (source_nested_bucket_item_count :=
-                                    get_aggregation_buckets_doc_count(source_nested_bucket)) > 0:
+                                    get_aggregation_total_buckets_doc_count(source_nested_bucket)) > 0:
                                     target_bucket["doc_count"] += source_nested_bucket_item_count
                                     merged_item_count += source_nested_bucket_item_count
                         elif merged_item_count > 0:
                             target_bucket["doc_count"] += merged_item_count
-                elif get_aggregation_bucket_value(target_bucket) is not None:
-                    if get_aggregation_bucket_doc_count(target_bucket) is not None:
-                        target_bucket["doc_count"] += source_bucket_item_count
-                        merged_item_count += source_bucket_item_count
+                elif get_aggregation_bucket_doc_count(target_bucket) is not None:
+                    target_bucket["doc_count"] += source_bucket_item_count
+                    merged_item_count += source_bucket_item_count
             else:
                 target["buckets"].append(source_bucket)
                 if isinstance(target.get("doc_count"), int):

From d772ec1caff7a5c6c08838fac0176ceacd4cf6a1 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 8 Dec 2024 20:33:29 -0500
Subject: [PATCH 18/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  8 +++++
 src/encoded/recent_files_summary.py | 52 ++++++++++++++---------------
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index ad9c5f92a..b501a05f0 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -1,3 +1,4 @@
+from hms_utils.misc_utils import dj
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Tuple, Union
 
@@ -435,6 +436,13 @@ def normalize_results(aggregation: dict,
             if nested_aggregations := get_nested_aggregations(bucket):
                 for nested_aggregation in nested_aggregations:
                     if normalized_aggregation := normalize_results(nested_aggregation, aggregation_key, bucket_value):
+                        if normalized_aggregation["count"] != bucket_item_count:
+                            # Record the original doc_count value from the raw result;
+                            # this may be different (lesser) than the result we aggregate here
+                            # because ElasticSearch aggregations actually are based on unique values.
+                            # TODO: Whould we use this as the real count value though it may look wrong.
+                            normalized_aggregation["count_original"] = bucket_item_count
+                            # normalized_aggregation["count"] = bucket_item_count
                         if group_item := find_group_item(group_items, bucket_value):
                             for normalized_aggregation_item in normalized_aggregation["items"]:
                                 group_item["items"].append(normalized_aggregation_item)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 551001bcf..0d5dd7213 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -156,13 +156,34 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
 
     raw_results = execute_query(request, query, aggregation_query)
 
-    # Note that the doc_count values returned by ElasticSearch do actually seem to be for unique items,
+    if raw:
+        # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
+        # And note that unless we remove teh @id property we get redirected to the URL in this field,
+        # for example to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
+        #                         &file_status_tracking.released.from=2024-09-30
+        #                         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
+        if "@id" in raw_results:
+            del raw_results["@id"]
+        return raw_results
+
+    if not (raw_results := raw_results.get("aggregations")):
+        return {}
+
+    if debug:
+        raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
+
+    prune_elasticsearch_aggregation_results(raw_results)
+    merged_results = merge_elasticsearch_aggregation_results(
+            raw_results.get(aggregate_by_cell_line_property_name),
+            raw_results.get(aggregate_by_donor_property_name))
+
+    # Note that the doc_count values returned by ElasticSearch DO actually seem to be for UNIQUE items,
     # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
     # has file_sets.libraries.analytes.samples.sample_sources.cell_line.code values for both HG00438 and HG005),
-    # then it its doc_count will not count it twice. This creates a situation where it might look like the counts
-    # are wrong in this returned merged/normalized result set where the outer item count is less than the sum of
-    # the individual counts withni each sub-group. For example, the below result shows a top-level doc_count of 1
-    # even though there are 2 documents, 1 in the HG00438 group and the other in the HG005 it would be because
+    # then its doc_count will NOT be counted TWICE. This creates a situation where it might LOOK like the counts
+    # are WRONG in the MERGED (via returned merge_elasticsearch_aggregation_results) result set, where the outer
+    # item count may be than the sum of the individual counts within each sub-group. For example, the below result shows
+    # a top-level doc_count of 1, even though there are 2 documents, 1 in the HG00438 group and the other in the HG005 it would be because
     # the same unique file has a cell_line.code of both HG00438 and HG005.
     # {
     #     "meta": { "field_name": "file_status_tracking.released" },
@@ -194,27 +215,6 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
     #     ]
     # }
 
-    if raw:
-        # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
-        # And note that unless we remove teh @id property we get redirected to the URL in this field,
-        # for example to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
-        #                         &file_status_tracking.released.from=2024-09-30
-        #                         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
-        if "@id" in raw_results:
-            del raw_results["@id"]
-        return raw_results
-
-    if not (raw_results := raw_results.get("aggregations")):
-        return {}
-
-    if debug:
-        raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
-
-    prune_elasticsearch_aggregation_results(raw_results)
-    merged_results = merge_elasticsearch_aggregation_results(
-            raw_results.get(aggregate_by_cell_line_property_name),
-            raw_results.get(aggregate_by_donor_property_name))
-
     if debug:
         additional_properties = {
             "debug": {

From 8ddcd24e4649938e73919c4bd94a3b088e7ae9e9 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 9 Dec 2024 17:01:57 -0500
Subject: [PATCH 19/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  31 ++++---
 src/encoded/recent_files_summary.py | 132 ++++++++++++++++++++++------
 2 files changed, 124 insertions(+), 39 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index b501a05f0..17e139d8c 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -12,6 +12,7 @@ def create_elasticsearch_aggregation_query(fields: List[str],
                                            missing_value: Optional[str] = None,
                                            include_missing: bool = False,
                                            create_field_aggregation: Optional[Callable] = None,
+                                           create_field_filter: Optional[Callable] = None,
                                            _toplevel: bool = True) -> dict:
 
     """
@@ -115,13 +116,15 @@ def create_elasticsearch_aggregation_query(fields: List[str],
         extra_nesting_for_date_histogram_and_filter = "date_histogram" in field_aggregation
         for field in fields:
             if isinstance(field, str) and (field := field.strip()):
+                if not (callable(create_field_filter) and isinstance(filter := create_field_filter(field), dict)):
+                    filter =  {
+                        "exists": {
+                            "field": f"embedded.{field}.raw"
+                        }
+                    }
                 if not aggregation[property_name].get("filter"):
                     aggregation[property_name]["filter"] = {"bool": {"must": []}}
-                aggregation[property_name]["filter"]["bool"]["must"].append({
-                    "exists": {
-                        "field": f"embedded.{field}.raw"
-                    }
-                })
+                aggregation[property_name]["filter"]["bool"]["must"].append(filter)
     else:
         extra_nesting_for_date_histogram_and_filter = False
 
@@ -308,7 +311,7 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
                                 target_bucket[source_nested_aggregation_key] = (
                                     source_nested_bucket := source_bucket[source_nested_aggregation_key])
                                 if (source_nested_bucket_item_count :=
-                                    get_aggregation_total_buckets_doc_count(source_nested_bucket)) > 0:
+                                    get_aggregation_total_buckets_doc_count(source_nested_bucket)) > 0:  # noqa
                                     target_bucket["doc_count"] += source_nested_bucket_item_count
                                     merged_item_count += source_nested_bucket_item_count
                         elif merged_item_count > 0:
@@ -336,7 +339,8 @@ def merge_results(target: dict, source: dict) -> Tuple[Optional[dict], Optional[
 
 
 def normalize_elasticsearch_aggregation_results(aggregation: dict, additional_properties: Optional[dict] = None,
-                                                remove_empty_items: bool = True) -> dict:
+                                                remove_empty_items: bool = True,
+                                                retain_original_item_count: bool = False) -> dict:
 
     """
     Normalizes the given result of an ElasticSearch aggregation query into a more readable/consumable format.
@@ -422,7 +426,7 @@ def normalize_results(aggregation: dict,
                           key: Optional[str] = None, value: Optional[str] = None,
                           additional_properties: Optional[dict] = None) -> dict:
 
-        nonlocal remove_empty_items
+        nonlocal remove_empty_items, retain_original_item_count
 
         if not (aggregation_key := get_aggregation_key(aggregation)):
             return {}
@@ -437,12 +441,11 @@ def normalize_results(aggregation: dict,
                 for nested_aggregation in nested_aggregations:
                     if normalized_aggregation := normalize_results(nested_aggregation, aggregation_key, bucket_value):
                         if normalized_aggregation["count"] != bucket_item_count:
-                            # Record the original doc_count value from the raw result;
-                            # this may be different (lesser) than the result we aggregate here
-                            # because ElasticSearch aggregations actually are based on unique values.
-                            # TODO: Whould we use this as the real count value though it may look wrong.
-                            normalized_aggregation["count_original"] = bucket_item_count
-                            # normalized_aggregation["count"] = bucket_item_count
+                            if retain_original_item_count is True:
+                                # The original doc_count value from the raw result may be different/lesser than/from
+                                # the result we aggregate here because ElasticSearch aggregations actually are based
+                                # on unique values. Should we use this as the real count value though it may look wrong.
+                                normalized_aggregation["count"] = bucket_item_count
                         if group_item := find_group_item(group_items, bucket_value):
                             for normalized_aggregation_item in normalized_aggregation["items"]:
                                 group_item["items"].append(normalized_aggregation_item)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 0d5dd7213..fea7e5811 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,7 +1,9 @@
+from hms_utils.misc_utils import dj
 import pyramid
 from copy import deepcopy
 from typing import List, Optional
 from urllib.parse import urlencode
+from dcicutils.misc_utils import normalize_spaces
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
@@ -55,8 +57,9 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
 
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
-    include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "inovalues"))
+    include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
     nosort = request_arg_bool(request, "nosort")
+    simplified = request_arg_bool(request, "simplified")
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
     raw = request_arg_bool(request, "raw")
@@ -124,6 +127,68 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             create_field_aggregation=create_field_aggregation)
         return aggregation_query[date_property_name]
 
+    def create_aggregation_query_simplified(aggregation_fields: List[str]) -> dict:
+        global AGGREGATION_NO_VALUE
+        nonlocal date_property_name, max_buckets, include_missing
+        aggregations = []
+        if not isinstance(aggregation_fields, list):
+            aggregation_fields = [aggregation_fields]
+        for item in aggregation_fields:
+            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
+                aggregations.append(item)
+        if not aggregations:
+            return {}
+        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
+            nonlocal date_property_name
+            if field == date_property_name:
+                return {
+                    "date_histogram": {
+                        "field": f"embedded.{field}",
+                        "calendar_interval": "month",
+                        "format": "yyyy-MM",
+                        "missing": "1970-01",
+                        "order": {"_key": "desc"}
+                    }
+                }
+            elif field == AGGREGATION_FIELD_CELL_LINE:
+                script = normalize_spaces(f"""
+                    if (doc['embedded.{AGGREGATION_FIELD_CELL_LINE}.raw'].size() > 0) {{
+                        return doc['embedded.{AGGREGATION_FIELD_CELL_LINE}.raw'].value;
+                    }} else if (doc['embedded.{AGGREGATION_FIELD_DONOR}.raw'].size() > 0) {{
+                        return doc['embedded.{AGGREGATION_FIELD_DONOR}.raw'].value;
+                    }} else {{
+                        return 'unknown';
+                    }}
+                """)
+                return {
+                    "terms": {
+                        "script": {
+                            "source": script,
+                            "lang": "painless"
+                        },
+                        "size": max_buckets
+                    }
+                }
+        def create_field_filter(field: str) -> Optional[dict]:  # noqa
+            if field == AGGREGATION_FIELD_CELL_LINE:
+                return {
+                    "bool": {
+                        "should": [
+                            {"exists": { "field": f"embedded.{AGGREGATION_FIELD_CELL_LINE}.raw"}},
+                            {"exists": { "field": f"embedded.{AGGREGATION_FIELD_DONOR}.raw"}}
+                        ],
+                        "minimum_should_match": 1
+                    }
+                }
+        aggregation_query = create_elasticsearch_aggregation_query(
+            aggregations,
+            max_buckets=max_buckets,
+            missing_value=AGGREGATION_NO_VALUE,
+            include_missing=include_missing,
+            create_field_aggregation=create_field_aggregation,
+            create_field_filter=create_field_filter)
+        return aggregation_query[date_property_name]
+
     def execute_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
         request = snovault_make_search_subreq(request, path=query, method="GET")
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
@@ -131,30 +196,44 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
 
     query = create_query(request)
 
-    aggregations_by_cell_line = [
-        date_property_name,
-        AGGREGATION_FIELD_CELL_LINE,
-        AGGREGATION_FIELD_FILE_DESCRIPTOR
-    ]
-
-    aggregations_by_donor = [
-        date_property_name,
-        AGGREGATION_FIELD_DONOR,
-        AGGREGATION_FIELD_FILE_DESCRIPTOR
-    ]
-
-    aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
-    aggregate_by_donor_property_name = "aggregate_by_donor"
-
-    aggregation_query = {
-        aggregate_by_cell_line_property_name: create_aggregation_query(aggregations_by_cell_line),
-        aggregate_by_donor_property_name: create_aggregation_query(aggregations_by_donor)
-    }
+    if simplified:
+        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
+        aggregate_by_cell_line = [
+            date_property_name,
+            AGGREGATION_FIELD_CELL_LINE,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregation_query = {
+            aggregate_by_cell_line_property_name: create_aggregation_query_simplified(aggregate_by_cell_line)
+        }
+    else:
+        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
+        aggregate_by_cell_line = [
+            date_property_name,
+            AGGREGATION_FIELD_CELL_LINE,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregate_by_donor_property_name = "aggregate_by_donor"
+        aggregate_by_donor = [
+            date_property_name,
+            AGGREGATION_FIELD_DONOR,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregation_query = {
+            aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line),
+            aggregate_by_donor_property_name: create_aggregation_query(aggregate_by_donor)
+        }
 
     if debug_query:
         return {"query": query, "aggregation_query": aggregation_query}
 
+    dj(aggregation_query)
+    import pdb ; pdb.set_trace()  # noqa
+    pass
     raw_results = execute_query(request, query, aggregation_query)
+    dj(raw_results)
+    import pdb ; pdb.set_trace()  # noqa
+    pass
 
     if raw:
         # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
@@ -173,9 +252,12 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
         raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
 
     prune_elasticsearch_aggregation_results(raw_results)
-    merged_results = merge_elasticsearch_aggregation_results(
-            raw_results.get(aggregate_by_cell_line_property_name),
-            raw_results.get(aggregate_by_donor_property_name))
+
+    if simplified:
+        merged_results = raw_results.get(aggregate_by_cell_line_property_name)
+    else:
+        merged_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
+                                                                 raw_results.get(aggregate_by_donor_property_name))
 
     # Note that the doc_count values returned by ElasticSearch DO actually seem to be for UNIQUE items,
     # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
@@ -227,8 +309,8 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
     else:
         additional_properties = None
 
-    normalized_results = normalize_elasticsearch_aggregation_results(
-            merged_results, additional_properties=additional_properties)
+    normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
+                                                                     additional_properties=additional_properties)
 
     if nosort is not True:
         # We can sort on the aggregations by level; outermost/left to innermost/right.

From 81ae204555eb6f7c3a0e6b7cb46a6e25c9b29f5f Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 9 Dec 2024 17:52:22 -0500
Subject: [PATCH 20/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 62 ++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index fea7e5811..2decf5929 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -58,8 +58,9 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
+    favor_donor = request_arg_bool(request, "favor_donor")
     nosort = request_arg_bool(request, "nosort")
-    simplified = request_arg_bool(request, "simplified")
+    legacy = request_arg_bool(request, "legacy")
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
     raw = request_arg_bool(request, "raw")
@@ -96,7 +97,7 @@ def create_query(request: pyramid.request.Request) -> str:
         query_string = query_string.replace("=%21", "%21=")
         return f"/search/?{query_string}"
 
-    def create_aggregation_query(aggregation_fields: List[str]) -> dict:
+    def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
         global AGGREGATION_NO_VALUE
         nonlocal date_property_name, max_buckets, include_missing
         aggregations = []
@@ -127,9 +128,9 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             create_field_aggregation=create_field_aggregation)
         return aggregation_query[date_property_name]
 
-    def create_aggregation_query_simplified(aggregation_fields: List[str]) -> dict:
+    def create_aggregation_query(aggregation_fields: List[str]) -> dict:
         global AGGREGATION_NO_VALUE
-        nonlocal date_property_name, max_buckets, include_missing
+        nonlocal date_property_name, max_buckets, include_missing, favor_donor
         aggregations = []
         if not isinstance(aggregation_fields, list):
             aggregation_fields = [aggregation_fields]
@@ -151,11 +152,23 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                     }
                 }
             elif field == AGGREGATION_FIELD_CELL_LINE:
+                # This specializes the aggregation query to group first by the cell-line field,
+                # and then alternatively (if a cell-line field does not exist) by the donor field.
+                # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
+                # look first for the donor field and then secondarily for the cell-line field. 
+                if favor_donor:
+                    field_one = AGGREGATION_FIELD_DONOR
+                    field_two = AGGREGATION_FIELD_CELL_LINE
+                else:
+                    field_one = AGGREGATION_FIELD_CELL_LINE
+                    field_two = AGGREGATION_FIELD_DONOR
+                # Note how we prefix the result with the aggregation field name;
+                # this is so later we can tell which grouping/field was matched.
                 script = normalize_spaces(f"""
-                    if (doc['embedded.{AGGREGATION_FIELD_CELL_LINE}.raw'].size() > 0) {{
-                        return doc['embedded.{AGGREGATION_FIELD_CELL_LINE}.raw'].value;
-                    }} else if (doc['embedded.{AGGREGATION_FIELD_DONOR}.raw'].size() > 0) {{
-                        return doc['embedded.{AGGREGATION_FIELD_DONOR}.raw'].value;
+                    if (doc['embedded.{field_one}.raw'].size() > 0) {{
+                        return '{field_one}:' + doc['embedded.{field_one}.raw'].value;
+                    }} else if (doc['embedded.{field_two}.raw'].size() > 0) {{
+                        return '{field_two}:' + doc['embedded.{field_two}.raw'].value;
                     }} else {{
                         return 'unknown';
                     }}
@@ -194,9 +207,24 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
         return results
 
+    def fixup_names_values(normalized_results: dict) -> None:
+        if isinstance(normalized_results, dict):
+            if (isinstance(name := normalized_results.get("name"), str) and
+                isinstance(value := normalized_results.get("value"), str)):
+                if (colon := value.find(":")) > 0:
+                    if (prefix := value[0:colon]) == AGGREGATION_FIELD_CELL_LINE:
+                        normalized_results["name"] = AGGREGATION_FIELD_CELL_LINE
+                        normalized_results["value"] = value[colon + 1:]
+                    elif prefix == AGGREGATION_FIELD_DONOR:
+                        normalized_results["name"] = AGGREGATION_FIELD_DONOR
+                        normalized_results["value"] = value[colon + 1:]
+            if isinstance(items := normalized_results.get("items"), list):
+                for element in items:
+                    fixup_names_values(element)
+
     query = create_query(request)
 
-    if simplified:
+    if not legacy:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
         aggregate_by_cell_line = [
             date_property_name,
@@ -204,7 +232,7 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
             AGGREGATION_FIELD_FILE_DESCRIPTOR
         ]
         aggregation_query = {
-            aggregate_by_cell_line_property_name: create_aggregation_query_simplified(aggregate_by_cell_line)
+            aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line)
         }
     else:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
@@ -220,20 +248,14 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
             AGGREGATION_FIELD_FILE_DESCRIPTOR
         ]
         aggregation_query = {
-            aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line),
-            aggregate_by_donor_property_name: create_aggregation_query(aggregate_by_donor)
+            aggregate_by_cell_line_property_name: create_aggregation_query_legacy(aggregate_by_cell_line),
+            aggregate_by_donor_property_name: create_aggregation_query_legacy(aggregate_by_donor)
         }
 
     if debug_query:
         return {"query": query, "aggregation_query": aggregation_query}
 
-    dj(aggregation_query)
-    import pdb ; pdb.set_trace()  # noqa
-    pass
     raw_results = execute_query(request, query, aggregation_query)
-    dj(raw_results)
-    import pdb ; pdb.set_trace()  # noqa
-    pass
 
     if raw:
         # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
@@ -253,7 +275,7 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
 
     prune_elasticsearch_aggregation_results(raw_results)
 
-    if simplified:
+    if not legacy:
         merged_results = raw_results.get(aggregate_by_cell_line_property_name)
     else:
         merged_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
@@ -311,6 +333,8 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
 
     normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
                                                                      additional_properties=additional_properties)
+    if not legacy:
+        fixup_names_values(normalized_results)
 
     if nosort is not True:
         # We can sort on the aggregations by level; outermost/left to innermost/right.

From 021f0b052e1e8176f5321c02cb173f35a0141a66 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 9 Dec 2024 17:53:17 -0500
Subject: [PATCH 21/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  | 1 -
 src/encoded/recent_files_summary.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 17e139d8c..55a03ddf2 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -1,4 +1,3 @@
-from hms_utils.misc_utils import dj
 from copy import deepcopy
 from typing import Any, Callable, List, Optional, Tuple, Union
 
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 2decf5929..d76f8e7f4 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,4 +1,3 @@
-from hms_utils.misc_utils import dj
 import pyramid
 from copy import deepcopy
 from typing import List, Optional

From 099b525f7cc65ef2f97c40a202fc1e14e13d3172 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 11:46:02 -0500
Subject: [PATCH 22/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py       |  14 +++
 src/encoded/recent_files_summary.py | 157 ++++++++++++++++++----------
 2 files changed, 116 insertions(+), 55 deletions(-)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index f03d09328..146a45ee3 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -3,6 +3,7 @@
 from dateutil.relativedelta import relativedelta
 import pyramid
 from typing import Any, List, Optional, Tuple, Union
+from urllib.parse import urlencode
 from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
 
 
@@ -176,3 +177,16 @@ def _add_months(day: Optional[Union[datetime, date, str]] = None, nmonths: int =
     if isinstance(nmonths, int) and (nmonths != 0):
         return day + relativedelta(months=nmonths)
     return day
+
+
+def create_query_string(query_arguments: dict, base: Optional[str] = None) -> str:
+    query_string = ""
+    if isinstance(query_arguments, dict):
+        if query_arguments := {key: value for key, value in query_arguments.items() if value is not None}:
+            query_string = urlencode(query_arguments, True)
+            # Hackishness to change "=!" to "!=" in query_string value for e.g. to turn this
+            # {"data_category": ["!Quality Control"]} into this: data_category&21=Quality+Control
+            query_string = query_string.replace("=%21", "%21=")
+    if isinstance(base, str) and base:
+        query_string = f"{base}?{query_string}" if query_string else base
+    return query_string
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index d76f8e7f4..c3983c07d 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,14 +1,13 @@
 import pyramid
 from copy import deepcopy
 from typing import List, Optional
-from urllib.parse import urlencode
 from dcicutils.misc_utils import normalize_spaces
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import sort_normalized_aggregation_results
-from encoded.endpoint_utils import parse_date_range_related_arguments
+from encoded.endpoint_utils import create_query_string, parse_date_range_related_arguments
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
 from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
@@ -27,6 +26,8 @@
 AGGREGATION_MAX_BUCKETS = 100
 AGGREGATION_NO_VALUE = "No value"
 
+BASE_SEARCH_QUERY = "/search/"
+
 def recent_files_summary(request: pyramid.request.Request) -> dict:
     """
     This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
@@ -56,6 +57,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
 
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
+    include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
     favor_donor = request_arg_bool(request, "favor_donor")
     nosort = request_arg_bool(request, "nosort")
@@ -64,14 +66,27 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     debug_query = request_arg_bool(request, "debug_query")
     raw = request_arg_bool(request, "raw")
 
-    def create_query(request: pyramid.request.Request) -> str:
+    def create_base_query_arguments(request: pyramid.request.Request) -> dict:
 
-        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES, QUERY_RECENT_MONTHS
-        nonlocal date_property_name
+        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
 
         types = request_args(request, "type", QUERY_FILE_TYPES)
         statuses = request_args(request, "status", QUERY_FILE_STATUSES)
         categories = request_args(request, "category", QUERY_FILE_CATEGORIES)
+
+        base_query_arguments = {
+            "type": types if types else None,
+            "status": statuses if statuses else None,
+            "data_category": categories if categories else None
+        }
+
+        return {key: value for key, value in base_query_arguments.items() if value is not None}
+
+    def create_query(request: pyramid.request.Request, base_query_arguments: Optional[dict] = None) -> str:
+
+        global BASE_SEARCH_QUERY, QUERY_RECENT_MONTHS, QUERY_INCLUDE_CURRENT_MONTH
+        nonlocal date_property_name
+
         recent_months = request_arg_int(request, "nmonths", request_arg_int(request, "months", QUERY_RECENT_MONTHS))
         from_date = request_arg(request, "from_date")
         thru_date = request_arg(request, "thru_date")
@@ -80,56 +95,23 @@ def create_query(request: pyramid.request.Request) -> str:
         from_date, thru_date = parse_date_range_related_arguments(from_date, thru_date, nmonths=recent_months,
                                                                   include_current_month=include_current_month,
                                                                   strings=True)
-        query_parameters = {
-            "type": types if types else None,
-            "status": statuses if statuses else None,
-            "data_category": categories if categories else None,
+        query_arguments = {
             f"{date_property_name}.from": from_date if from_date else None,
             f"{date_property_name}.to": thru_date if from_date else None,
             "from": 0,
             "limit": 0
         }
-        query_parameters = {key: value for key, value in query_parameters.items() if value is not None}
-        query_string = urlencode(query_parameters, True)
-        # Hackishness to change "=!" to "!=" in search_param_lists value for e.g. to turn this in the
-        # query_parameters above "data_category": ["!Quality Control"] into: data_category&21=Quality+Control
-        query_string = query_string.replace("=%21", "%21=")
-        return f"/search/?{query_string}"
 
-    def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
-        global AGGREGATION_NO_VALUE
-        nonlocal date_property_name, max_buckets, include_missing
-        aggregations = []
-        if not isinstance(aggregation_fields, list):
-            aggregation_fields = [aggregation_fields]
-        for item in aggregation_fields:
-            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
-                aggregations.append(item)
-        if not aggregations:
-            return {}
-        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal date_property_name
-            if field == date_property_name:
-                return {
-                    "date_histogram": {
-                        "field": f"embedded.{field}",
-                        "calendar_interval": "month",
-                        "format": "yyyy-MM",
-                        "missing": "1970-01",
-                        "order": {"_key": "desc"}
-                    }
-                }
-        aggregation_query = create_elasticsearch_aggregation_query(
-            aggregations,
-            max_buckets=max_buckets,
-            missing_value=AGGREGATION_NO_VALUE,
-            include_missing=include_missing,
-            create_field_aggregation=create_field_aggregation)
-        return aggregation_query[date_property_name]
+        if isinstance(base_query_arguments, dict):
+            query_arguments = {**base_query_arguments, **query_arguments}
+
+        return f"{BASE_SEARCH_QUERY}?{create_query_string(query_arguments)}"
 
     def create_aggregation_query(aggregation_fields: List[str]) -> dict:
+
         global AGGREGATION_NO_VALUE
         nonlocal date_property_name, max_buckets, include_missing, favor_donor
+
         aggregations = []
         if not isinstance(aggregation_fields, list):
             aggregation_fields = [aggregation_fields]
@@ -138,6 +120,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
                 aggregations.append(item)
         if not aggregations:
             return {}
+
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             nonlocal date_property_name
             if field == date_property_name:
@@ -162,7 +145,8 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                     field_one = AGGREGATION_FIELD_CELL_LINE
                     field_two = AGGREGATION_FIELD_DONOR
                 # Note how we prefix the result with the aggregation field name;
-                # this is so later we can tell which grouping/field was matched.
+                # this is so later we can tell which grouping/field was matched;
+                # see fixup_names_values_for_normalized_results for this fixup.
                 script = normalize_spaces(f"""
                     if (doc['embedded.{field_one}.raw'].size() > 0) {{
                         return '{field_one}:' + doc['embedded.{field_one}.raw'].value;
@@ -181,6 +165,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                         "size": max_buckets
                     }
                 }
+
         def create_field_filter(field: str) -> Optional[dict]:  # noqa
             if field == AGGREGATION_FIELD_CELL_LINE:
                 return {
@@ -192,6 +177,7 @@ def create_field_filter(field: str) -> Optional[dict]:  # noqa
                         "minimum_should_match": 1
                     }
                 }
+
         aggregation_query = create_elasticsearch_aggregation_query(
             aggregations,
             max_buckets=max_buckets,
@@ -199,6 +185,43 @@ def create_field_filter(field: str) -> Optional[dict]:  # noqa
             include_missing=include_missing,
             create_field_aggregation=create_field_aggregation,
             create_field_filter=create_field_filter)
+
+        return aggregation_query[date_property_name]
+
+    def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
+
+        global AGGREGATION_NO_VALUE
+        nonlocal date_property_name, max_buckets, include_missing
+
+        aggregations = []
+        if not isinstance(aggregation_fields, list):
+            aggregation_fields = [aggregation_fields]
+        for item in aggregation_fields:
+            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
+                aggregations.append(item)
+        if not aggregations:
+            return {}
+
+        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
+            nonlocal date_property_name
+            if field == date_property_name:
+                return {
+                    "date_histogram": {
+                        "field": f"embedded.{field}",
+                        "calendar_interval": "month",
+                        "format": "yyyy-MM",
+                        "missing": "1970-01",
+                        "order": {"_key": "desc"}
+                    }
+                }
+
+        aggregation_query = create_elasticsearch_aggregation_query(
+            aggregations,
+            max_buckets=max_buckets,
+            missing_value=AGGREGATION_NO_VALUE,
+            include_missing=include_missing,
+            create_field_aggregation=create_field_aggregation)
+
         return aggregation_query[date_property_name]
 
     def execute_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
@@ -206,10 +229,10 @@ def execute_query(request: pyramid.request.Request, query: str, aggregation_quer
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
         return results
 
-    def fixup_names_values(normalized_results: dict) -> None:
+    def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
+        nonlocal include_queries
         if isinstance(normalized_results, dict):
-            if (isinstance(name := normalized_results.get("name"), str) and
-                isinstance(value := normalized_results.get("value"), str)):
+            if isinstance(value := normalized_results.get("value"), str):
                 if (colon := value.find(":")) > 0:
                     if (prefix := value[0:colon]) == AGGREGATION_FIELD_CELL_LINE:
                         normalized_results["name"] = AGGREGATION_FIELD_CELL_LINE
@@ -219,9 +242,31 @@ def fixup_names_values(normalized_results: dict) -> None:
                         normalized_results["value"] = value[colon + 1:]
             if isinstance(items := normalized_results.get("items"), list):
                 for element in items:
-                    fixup_names_values(element)
+                    fixup_names_values_for_normalized_results(element)
+
+    def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
+        global BASE_SEARCH_QUERY
+        nonlocal date_property_name
+        if isinstance(normalized_results, dict):
+            if not (name := normalized_results.get("name")):
+                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
+            elif value := normalized_results.get("value"):
+                if name == date_property_name:
+                    # Special case for date value which is just year/month (e.g. 2024-12);
+                    # we want to turn this into a date range query for the month.
+                    from_date, thru_date = parse_date_range_related_arguments(value, None, strings=True)
+                    if from_date and thru_date:
+                        base_query_arguments = {**base_query_arguments,
+                                                f"{name}.from": from_date, f"{name}.to": thru_date}
+                else:
+                    base_query_arguments = {**base_query_arguments, name: value}
+                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
+            if isinstance(items := normalized_results.get("items"), list):
+                for element in items:
+                    add_queries_to_normalized_results(element, base_query_arguments)
 
-    query = create_query(request)
+    base_query_arguments = create_base_query_arguments(request)
+    query = create_query(request, base_query_arguments)
 
     if not legacy:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
@@ -285,9 +330,9 @@ def fixup_names_values(normalized_results: dict) -> None:
     # has file_sets.libraries.analytes.samples.sample_sources.cell_line.code values for both HG00438 and HG005),
     # then its doc_count will NOT be counted TWICE. This creates a situation where it might LOOK like the counts
     # are WRONG in the MERGED (via returned merge_elasticsearch_aggregation_results) result set, where the outer
-    # item count may be than the sum of the individual counts within each sub-group. For example, the below result shows
-    # a top-level doc_count of 1, even though there are 2 documents, 1 in the HG00438 group and the other in the HG005 it would be because
-    # the same unique file has a cell_line.code of both HG00438 and HG005.
+    # item count may be than the sum of the individual counts within each sub-group. For example, the below result
+    # shows a top-level doc_count of 1, even though there are 2 documents, 1 in the HG00438 group and the other
+    # in the HG005 it would be because the same unique file has a cell_line.code of both HG00438 and HG005.
     # {
     #     "meta": { "field_name": "file_status_tracking.released" },
     #     "buckets": [
@@ -333,7 +378,9 @@ def fixup_names_values(normalized_results: dict) -> None:
     normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
                                                                      additional_properties=additional_properties)
     if not legacy:
-        fixup_names_values(normalized_results)
+        fixup_names_values_for_normalized_results(normalized_results)
+    if include_queries:
+        add_queries_to_normalized_results(normalized_results, base_query_arguments)
 
     if nosort is not True:
         # We can sort on the aggregations by level; outermost/left to innermost/right.

From eac4384965bfccc0cb5422621d3350e82728a81c Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 12:52:37 -0500
Subject: [PATCH 23/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 33 ++++++++++++++---------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index c3983c07d..96958c05b 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -97,9 +97,7 @@ def create_query(request: pyramid.request.Request, base_query_arguments: Optiona
                                                                   strings=True)
         query_arguments = {
             f"{date_property_name}.from": from_date if from_date else None,
-            f"{date_property_name}.to": thru_date if from_date else None,
-            "from": 0,
-            "limit": 0
+            f"{date_property_name}.to": thru_date if from_date else None
         }
 
         if isinstance(base_query_arguments, dict):
@@ -224,7 +222,8 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
 
         return aggregation_query[date_property_name]
 
-    def execute_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
+    def execute_aggregation_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
+        query += "&limit=0"  # needed for aggregation query to not return the actual/individual item results.
         request = snovault_make_search_subreq(request, path=query, method="GET")
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
         return results
@@ -248,18 +247,17 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         global BASE_SEARCH_QUERY
         nonlocal date_property_name
         if isinstance(normalized_results, dict):
-            if not (name := normalized_results.get("name")):
-                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
-            elif value := normalized_results.get("value"):
-                if name == date_property_name:
-                    # Special case for date value which is just year/month (e.g. 2024-12);
-                    # we want to turn this into a date range query for the month.
-                    from_date, thru_date = parse_date_range_related_arguments(value, None, strings=True)
-                    if from_date and thru_date:
-                        base_query_arguments = {**base_query_arguments,
-                                                f"{name}.from": from_date, f"{name}.to": thru_date}
-                else:
-                    base_query_arguments = {**base_query_arguments, name: value}
+            if name := normalized_results.get("name"):
+                if value := normalized_results.get("value"):
+                    if name == date_property_name:
+                        # Special case for date value which is just year/month (e.g. 2024-12);
+                        # we want to turn this into a date range query for the month.
+                        from_date, thru_date = parse_date_range_related_arguments(value, None, strings=True)
+                        if from_date and thru_date:
+                            base_query_arguments = {**base_query_arguments,
+                                                    f"{name}.from": from_date, f"{name}.to": thru_date}
+                    else:
+                        base_query_arguments = {**base_query_arguments, name: value}
                 normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
             if isinstance(items := normalized_results.get("items"), list):
                 for element in items:
@@ -299,7 +297,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
     if debug_query:
         return {"query": query, "aggregation_query": aggregation_query}
 
-    raw_results = execute_query(request, query, aggregation_query)
+    raw_results = execute_aggregation_query(request, query, aggregation_query)
 
     if raw:
         # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
@@ -381,6 +379,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         fixup_names_values_for_normalized_results(normalized_results)
     if include_queries:
         add_queries_to_normalized_results(normalized_results, base_query_arguments)
+        normalized_results["query"] = query
 
     if nosort is not True:
         # We can sort on the aggregations by level; outermost/left to innermost/right.

From 6f244875b1f23ebeca4d820b16b7e3da71841ede Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 18:46:17 -0500
Subject: [PATCH 24/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py       |  34 +++++++
 src/encoded/recent_files_summary.py | 137 +++++++++++++++++++++++++++-
 2 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 146a45ee3..83566a5d5 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -190,3 +190,37 @@ def create_query_string(query_arguments: dict, base: Optional[str] = None) -> st
     if isinstance(base, str) and base:
         query_string = f"{base}?{query_string}" if query_string else base
     return query_string
+
+
+def get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort: bool = False) -> List[Any]:
+    """
+    TODO: Move this to dcicutils. Maybe much of the above too.
+    Returns the values of the given property name within the given dictionary as a list, where the
+    given property name can be a dot-separated list of property names, which indicate a path into
+    nested dictionaries within the given dictionary; and - where if any of the elements within
+    the path are lists then we iterate through each, collecting the values for each and including
+    each within the list of returned values.
+    """
+    if isinstance(data, dict) and isinstance(name, str) and name:
+        if keys := name.split("."):
+            nkeys = len(keys) ; key_index_max = nkeys - 1  # noqa
+            for key_index in range(nkeys):
+                if (value := data.get(keys[key_index], None)) is not None:
+                    if key_index == key_index_max:
+                        return [value]
+                    elif isinstance(value, dict):
+                        data = value
+                        continue
+                    elif isinstance(value, list) and value and ((sub_key_index := key_index + 1) < nkeys):
+                        sub_key = ".".join(keys[sub_key_index:])
+                        values = []
+                        for element in value:
+                            if isinstance(element_value := get_properties(element, sub_key), list):
+                                for element_value_item in element_value:
+                                    if (element_value_item is not None) and (element_value_item not in values):
+                                        values.append(element_value_item)
+                            elif (element_value is not None) and (element_value not in values):
+                                values.append(element_value)
+                        return sorted(values) if (sort is True) else values
+                break
+    return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 96958c05b..13b4ed3a1 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -19,10 +19,17 @@
 QUERY_INCLUDE_CURRENT_MONTH = True
 
 AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
+AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
 AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
 AGGREGATION_FIELD_DONOR = "donors.display_title"
 AGGREGATION_FIELD_FILE_DESCRIPTOR = "release_tracker_description"
 
+AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR = [
+    AGGREGATION_FIELD_CELL_MIXTURE,
+    AGGREGATION_FIELD_CELL_LINE,
+    AGGREGATION_FIELD_DONOR
+]
+
 AGGREGATION_MAX_BUCKETS = 100
 AGGREGATION_NO_VALUE = "No value"
 
@@ -59,11 +66,13 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
     include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
+    nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
     favor_donor = request_arg_bool(request, "favor_donor")
     nosort = request_arg_bool(request, "nosort")
     legacy = request_arg_bool(request, "legacy")
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
+    troubleshoot = request_arg_bool(request, "troubleshoot")
     raw = request_arg_bool(request, "raw")
 
     def create_base_query_arguments(request: pyramid.request.Request) -> dict:
@@ -120,7 +129,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
             return {}
 
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal date_property_name
+            nonlocal date_property_name, nomixtures
             if field == date_property_name:
                 return {
                     "date_histogram": {
@@ -132,6 +141,44 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                     }
                 }
             elif field == AGGREGATION_FIELD_CELL_LINE:
+                # This specializes the aggregation query to group first by the cell-line field,
+                # and then alternatively (if a cell-line field does not exist) by the donor field.
+                # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
+                # look first for the donor field and then secondarily for the cell-line field. 
+                aggregation_field_grouping = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
+                if nomixtures:
+                    aggregation_field_grouping.remove(AGGREGATION_FIELD_CELL_MIXTURE)
+                if favor_donor:
+                    aggregation_field_grouping.remove(AGGREGATION_FIELD_DONOR)
+                    aggregation_field_grouping.insert(0, AGGREGATION_FIELD_DONOR)
+                # Note how we prefix the result with the aggregation field name;
+                # this is so later we can tell which grouping/field was matched;
+                # see fixup_names_values_for_normalized_results for this fixup.
+                script = ""
+                for aggregation_field_grouping_index in range(len(aggregation_field_grouping)):
+                    aggregation_field = aggregation_field_grouping[aggregation_field_grouping_index]
+                    if_or_else_if = "if" if aggregation_field_grouping_index == 0 else "else if"
+                    script += f"""
+                        {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
+                            return '{aggregation_field}:' + doc['embedded.{aggregation_field}.raw'].value;
+                        }}
+                    """
+                script += f"""
+                    else {{
+                        return 'unknown';
+                    }}
+                """
+                return {
+                    "terms": {
+                        "script": {
+                            "source": script,
+                            "lang": "painless"
+                        },
+                        "size": max_buckets
+                    }
+                }
+            elif False and (field == AGGREGATION_FIELD_CELL_LINE):
+                # OBSOLETE: See above.
                 # This specializes the aggregation query to group first by the cell-line field,
                 # and then alternatively (if a cell-line field does not exist) by the donor field.
                 # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
@@ -165,6 +212,13 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                 }
 
         def create_field_filter(field: str) -> Optional[dict]:  # noqa
+            if field == AGGREGATION_FIELD_CELL_LINE:
+                filter = {"bool": {"should": [], "minimum_should_match": 1}}
+                for aggregation_field in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                    filter["bool"]["should"].append({"exists": { "field": f"embedded.{aggregation_field}.raw"}})
+                return filter
+
+        def obsolete_create_field_filter(field: str) -> Optional[dict]:  # noqa
             if field == AGGREGATION_FIELD_CELL_LINE:
                 return {
                     "bool": {
@@ -229,7 +283,19 @@ def execute_aggregation_query(request: pyramid.request.Request, query: str, aggr
         return results
 
     def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
-        nonlocal include_queries
+        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
+        if isinstance(normalized_results, dict):
+            if isinstance(value := normalized_results.get("value"), str):
+                if ((separator_index := value.find(":")) > 0) and (value_prefix := value[0:separator_index]):
+                    if value_prefix in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                        if value := value[separator_index + 1:]:
+                            normalized_results["name"] = value_prefix
+                            normalized_results["value"] = value
+            if isinstance(items := normalized_results.get("items"), list):
+                for element in items:
+                    fixup_names_values_for_normalized_results(element)
+
+    def obsolete_fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
         if isinstance(normalized_results, dict):
             if isinstance(value := normalized_results.get("value"), str):
                 if (colon := value.find(":")) > 0:
@@ -381,11 +447,76 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         add_queries_to_normalized_results(normalized_results, base_query_arguments)
         normalized_results["query"] = query
 
-    if nosort is not True:
+    if not nosort:
         # We can sort on the aggregations by level; outermost/left to innermost/right.
         # In our case the outermost is the date aggregation so sort taht by the key value,
         # e.g. 2014-12, descending; and the rest of the inner levels by the default
         # sorting which is by aggregation count descending and secondarily by the key value.
         sort_normalized_aggregation_results(normalized_results, ["-key", "default"])
 
+    if troubleshoot:
+        add_info_for_troubleshooting(normalized_results, request)
+        
+
     return normalized_results
+
+
+def add_info_for_troubleshooting(normalized_results: dict, request: pyramid.request.Request) -> None:
+
+    from encoded.endpoint_utils import get_properties, parse_datetime_string
+
+    def get_files(files, property_name, property_value, map_property_value = None):
+        found = []
+        for file in files:
+            if properties := get_properties(file, property_name):
+                if callable(map_property_value):
+                    mapped_properties = [] 
+                    for value in properties:
+                        mapped_properties.append(map_property_value(value))
+                    properties = mapped_properties
+                if property_value in properties:
+                    found.append(file)
+        return found
+
+    def map_date_property_value(value):
+        if date_value := parse_datetime_string(value):
+            return f"{date_value.year}-{date_value.month:02}"
+        return value
+
+    def annotate_with_uuids(normalized_results: dict):
+        aggregation_fields = [
+            AGGREGATION_FIELD_RELEASE_DATE,
+            AGGREGATION_FIELD_CELL_MIXTURE,
+            AGGREGATION_FIELD_CELL_LINE,
+            AGGREGATION_FIELD_DONOR,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        query = normalized_results.get("query")
+        files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
+        for first_item in normalized_results["items"]:
+            first_property_name = first_item["name"]
+            first_property_value = first_item["value"]
+            for second_item in first_item["items"]:
+                second_property_name = second_item["name"]
+                second_property_value = second_item["value"]
+                for third_item in second_item["items"]:
+                    third_property_name = third_item["name"]
+                    third_property_value = third_item["value"]
+                    if first_files := get_files(files, first_property_name, first_property_value,
+                                                map_property_value=map_date_property_value):
+                        if second_files := get_files(first_files, second_property_name, second_property_value):
+                            if third_files := get_files(second_files, third_property_name, third_property_value):
+                                for file in third_files:
+                                    if isinstance(uuid := file.get("uuid"), str):
+                                        if not third_item.get("uuids"):
+                                            third_item["uuids"] = []
+                                        uuid_record = {"uuid": uuid}
+                                        for aggregation_field in aggregation_fields:
+                                            uuid_record[aggregation_field] = \
+                                                ", ".join(get_properties(file, aggregation_field))
+                                        third_item["uuids"].append(uuid_record)
+
+    try:
+        annotate_with_uuids(normalized_results)
+    except Exception:
+        pass

From 1a362632dd8276681294f922995069d684befe42 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 18:53:45 -0500
Subject: [PATCH 25/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 13b4ed3a1..1469ec213 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -488,6 +488,8 @@ def annotate_with_uuids(normalized_results: dict):
             AGGREGATION_FIELD_RELEASE_DATE,
             AGGREGATION_FIELD_CELL_MIXTURE,
             AGGREGATION_FIELD_CELL_LINE,
+            "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
+            "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
             AGGREGATION_FIELD_DONOR,
             AGGREGATION_FIELD_FILE_DESCRIPTOR
         ]

From 828ae52bd37b9e8aa62dd9c49c73bb0151bf27b1 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 19:04:34 -0500
Subject: [PATCH 26/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 1469ec213..d7e92d314 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -483,6 +483,13 @@ def map_date_property_value(value):
             return f"{date_value.year}-{date_value.month:02}"
         return value
 
+    def contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> bool:
+        for uuid_record in uuid_records:
+            if id(uuid_record) != ignore_uuid_record_id:
+                if uuid_record.get("uuid") == uuid:
+                    return True
+        return False
+
     def annotate_with_uuids(normalized_results: dict):
         aggregation_fields = [
             AGGREGATION_FIELD_RELEASE_DATE,
@@ -493,6 +500,7 @@ def annotate_with_uuids(normalized_results: dict):
             AGGREGATION_FIELD_DONOR,
             AGGREGATION_FIELD_FILE_DESCRIPTOR
         ]
+        uuid_records = []
         query = normalized_results.get("query")
         files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
         for first_item in normalized_results["items"]:
@@ -517,6 +525,11 @@ def annotate_with_uuids(normalized_results: dict):
                                             uuid_record[aggregation_field] = \
                                                 ", ".join(get_properties(file, aggregation_field))
                                         third_item["uuids"].append(uuid_record)
+                                        uuid_records.append(uuid_record)
+
+        for uuid_record in uuid_records:
+            if contains_uuid(uuid_records, uuid_record["uuid"], id(uuid_record)):
+                uuid_record["duplicative"] = True
 
     try:
         annotate_with_uuids(normalized_results)

From df7c33cd19aad9cd253e3863bd46979cd4398a1c Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 19:09:25 -0500
Subject: [PATCH 27/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index d7e92d314..fadde3edf 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -483,13 +483,21 @@ def map_date_property_value(value):
             return f"{date_value.year}-{date_value.month:02}"
         return value
 
-    def contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> bool:
+    def obsolete_contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> bool:
         for uuid_record in uuid_records:
             if id(uuid_record) != ignore_uuid_record_id:
                 if uuid_record.get("uuid") == uuid:
                     return True
         return False
 
+    def contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> int:
+        count = 0
+        for uuid_record in uuid_records:
+            if id(uuid_record) != ignore_uuid_record_id:
+                if uuid_record.get("uuid") == uuid:
+                    count += 1
+        return count
+
     def annotate_with_uuids(normalized_results: dict):
         aggregation_fields = [
             AGGREGATION_FIELD_RELEASE_DATE,
@@ -528,8 +536,8 @@ def annotate_with_uuids(normalized_results: dict):
                                         uuid_records.append(uuid_record)
 
         for uuid_record in uuid_records:
-            if contains_uuid(uuid_records, uuid_record["uuid"], id(uuid_record)):
-                uuid_record["duplicative"] = True
+            if (count := contains_uuid(uuid_records, uuid_record["uuid"], id(uuid_record))) > 0:
+                uuid_record["duplicative"] = count
 
     try:
         annotate_with_uuids(normalized_results)

From fdbe871a710a304f21f0d0129f1a88e51884936a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 19:13:50 -0500
Subject: [PATCH 28/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index fadde3edf..16b535779 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -483,19 +483,11 @@ def map_date_property_value(value):
             return f"{date_value.year}-{date_value.month:02}"
         return value
 
-    def obsolete_contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> bool:
-        for uuid_record in uuid_records:
-            if id(uuid_record) != ignore_uuid_record_id:
-                if uuid_record.get("uuid") == uuid:
-                    return True
-        return False
-
-    def contains_uuid(uuid_records: List[dict], uuid: str, ignore_uuid_record_id: int) -> int:
+    def count_uuid(uuid_records: List[dict], uuid: str) -> int:
         count = 0
         for uuid_record in uuid_records:
-            if id(uuid_record) != ignore_uuid_record_id:
-                if uuid_record.get("uuid") == uuid:
-                    count += 1
+            if uuid_record.get("uuid") == uuid:
+                count += 1
         return count
 
     def annotate_with_uuids(normalized_results: dict):
@@ -530,13 +522,13 @@ def annotate_with_uuids(normalized_results: dict):
                                             third_item["uuids"] = []
                                         uuid_record = {"uuid": uuid}
                                         for aggregation_field in aggregation_fields:
-                                            uuid_record[aggregation_field] = \
-                                                ", ".join(get_properties(file, aggregation_field))
+                                            aggregation_values = ", ".join(get_properties(file, aggregation_field))
+                                            uuid_record[aggregation_field] = aggregation_values or None
                                         third_item["uuids"].append(uuid_record)
                                         uuid_records.append(uuid_record)
 
         for uuid_record in uuid_records:
-            if (count := contains_uuid(uuid_records, uuid_record["uuid"], id(uuid_record))) > 0:
+            if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:
                 uuid_record["duplicative"] = count
 
     try:

From e21a5c70457d766ef5b1f1f595468913efb54262 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Tue, 10 Dec 2024 22:33:19 -0500
Subject: [PATCH 29/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 16b535779..62e6475ec 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -171,7 +171,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                 return {
                     "terms": {
                         "script": {
-                            "source": script,
+                            "source": normalize_spaces(script),
                             "lang": "painless"
                         },
                         "size": max_buckets
@@ -277,7 +277,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
         return aggregation_query[date_property_name]
 
     def execute_aggregation_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
-        query += "&limit=0"  # needed for aggregation query to not return the actual/individual item results.
+        query += "&from=0&limit=0"  # needed for aggregation query to not return the actual/individual item results.
         request = snovault_make_search_subreq(request, path=query, method="GET")
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
         return results

From 4ed1041a9f56ffc921833e6a6c4e6681a0582c18 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 08:56:28 -0500
Subject: [PATCH 30/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  | 15 ++++++++
 src/encoded/recent_files_summary.py | 54 ++++++++++++++++++++++++++---
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 55a03ddf2..293dc6a24 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -402,6 +402,18 @@ def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
                 return doc_count
         return None
 
+    def get_aggregation_bucket_debug_hits(aggregation_bucket: dict) -> List[str]:
+        debug_hits = []
+        if isinstance(aggregation_bucket, dict):
+            if isinstance(doc_count := aggregation_bucket.get("doc_count"), int):
+                if (isinstance(top_hits_debug := aggregation_bucket.get("top_hits_debug"), dict) and
+                    isinstance(hits := top_hits_debug.get("hits"), dict) and
+                    isinstance(hits := hits.get("hits"), list)):  # noqa
+                    for hit in hits:
+                        if isinstance(hit, dict) and isinstance(hit := hit.get("_id"), str):
+                            debug_hits.append(hit)
+        return debug_hits
+
     def get_nested_aggregations(data: dict) -> List[dict]:
         results = []
         if isinstance(data, dict):
@@ -436,6 +448,7 @@ def normalize_results(aggregation: dict,
                 ((bucket_item_count := get_aggregation_bucket_doc_count(bucket)) is None)):  # noqa
                 continue
             item_count += bucket_item_count
+            debug_hits = get_aggregation_bucket_debug_hits(bucket)
             if nested_aggregations := get_nested_aggregations(bucket):
                 for nested_aggregation in nested_aggregations:
                     if normalized_aggregation := normalize_results(nested_aggregation, aggregation_key, bucket_value):
@@ -455,6 +468,8 @@ def normalize_results(aggregation: dict,
                     else:
                         if (remove_empty_items is False) or (bucket_item_count > 0):
                             group_item = {"name": aggregation_key, "value": bucket_value, "count": bucket_item_count}
+                            if debug_hits:
+                                group_item["debug_elasticsearch_hits"] = debug_hits
                             group_items.append(group_item)
 
         if (remove_empty_items is not False) and (not group_items):
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 62e6475ec..16e915a9d 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -73,6 +73,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     debug = request_arg_bool(request, "debug")
     debug_query = request_arg_bool(request, "debug_query")
     troubleshoot = request_arg_bool(request, "troubleshoot")
+    troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
     raw = request_arg_bool(request, "raw")
 
     def create_base_query_arguments(request: pyramid.request.Request) -> dict:
@@ -117,7 +118,7 @@ def create_query(request: pyramid.request.Request, base_query_arguments: Optiona
     def create_aggregation_query(aggregation_fields: List[str]) -> dict:
 
         global AGGREGATION_NO_VALUE
-        nonlocal date_property_name, max_buckets, include_missing, favor_donor
+        nonlocal date_property_name, max_buckets, include_missing, favor_donor, troubleshoot_elasticsearch
 
         aggregations = []
         if not isinstance(aggregation_fields, list):
@@ -238,6 +239,23 @@ def obsolete_create_field_filter(field: str) -> Optional[dict]:  # noqa
             create_field_aggregation=create_field_aggregation,
             create_field_filter=create_field_filter)
 
+        if troubleshoot_elasticsearch:
+            def add_debug_query_to_elasticsearch_aggregation_query(aggregation: dict) -> None:  # noqa
+                top_hits_debug = {"aggs": {"top_hits_debug": {"top_hits": {"_source": False,
+                                                                           "docvalue_fields": ["_id"], "size": 100 }}}}
+                def add_debug_query(aggs: dict) -> None:  # noqa
+                    if "aggs" in aggs:
+                        for key, sub_agg in aggs["aggs"].items():
+                            add_debug_query(sub_agg)
+                    else:
+                        aggs.update(top_hits_debug)
+                for agg in aggregation["aggs"].values():
+                    add_debug_query(agg)
+            try:
+                add_debug_query_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
+            except Exception:
+                pass
+
         return aggregation_query[date_property_name]
 
     def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
@@ -342,6 +360,21 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         aggregation_query = {
             aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line)
         }
+        # print(aggregation_query)
+        # import json
+        # print(json.dumps(aggregation_query, indent=4))
+        # import pdb ; pdb.set_trace()  # noqa
+        # xxx = { "top_hits_debug": { "top_hits": { "_source": False, "docvalue_fields": ["_id"], "size": 10 } } }
+        # aggregation_query["aggregate_by_cell_line"]["aggs"]["file_sets.libraries.analytes.samples.sample_sources.cell_line.code"]["aggs"]["release_tracker_description"]["aggs"] = xxx
+#                               "aggs": {
+#                                   "top_hits_debug": {
+#                                       "top_hits": {
+#                                           "_source": false,
+#                                           "docvalue_fields": ["_id"],
+#                                           "size": 10
+#                                       }
+#                                   }
+#                               }
     else:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
         aggregate_by_cell_line = [
@@ -512,20 +545,33 @@ def annotate_with_uuids(normalized_results: dict):
                 for third_item in second_item["items"]:
                     third_property_name = third_item["name"]
                     third_property_value = third_item["value"]
+                    if debug_elasticsearch_hits := third_item.get("debug_elasticsearch_hits"):
+                        if not third_item.get("debug"):
+                            third_item["debug"] = {}
+                        third_item["debug"]["elasticsearch_hits"] = debug_elasticsearch_hits
+                        third_item["debug"]["elasticsearch_hits"].sort()
+                        del third_item["debug_elasticsearch_hits"]
                     if first_files := get_files(files, first_property_name, first_property_value,
                                                 map_property_value=map_date_property_value):
                         if second_files := get_files(first_files, second_property_name, second_property_value):
                             if third_files := get_files(second_files, third_property_name, third_property_value):
                                 for file in third_files:
                                     if isinstance(uuid := file.get("uuid"), str):
-                                        if not third_item.get("uuids"):
-                                            third_item["uuids"] = []
+                                        if not third_item.get("debug"):
+                                            third_item["debug"] = {}
+                                        if not third_item["debug"].get("uuids"):
+                                            third_item["debug"]["uuids"] = []
                                         uuid_record = {"uuid": uuid}
                                         for aggregation_field in aggregation_fields:
                                             aggregation_values = ", ".join(get_properties(file, aggregation_field))
                                             uuid_record[aggregation_field] = aggregation_values or None
-                                        third_item["uuids"].append(uuid_record)
+                                        if third_item["debug"].get("elasticsearch_hits"):
+                                            uuid_record["elasticsearch_counted"] = \
+                                                uuid in third_item["debug"]["elasticsearch_hits"]
+                                        third_item["debug"]["uuids"].append(uuid_record)
                                         uuid_records.append(uuid_record)
+                                if third_item.get("debug", {}).get("uuids"):
+                                    third_item["debug"]["uuids"].sort(key=lambda item: item.get("uuid"))
 
         for uuid_record in uuid_records:
             if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:

From b5c99ce5ee38e3a32afeabcd0983e8da084bd59a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 12:50:08 -0500
Subject: [PATCH 31/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  16 ++-
 src/encoded/recent_files_summary.py | 158 ++++++++--------------------
 2 files changed, 58 insertions(+), 116 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index 293dc6a24..b8ab8f332 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -142,6 +142,20 @@ def create_elasticsearch_aggregation_query(fields: List[str],
     return aggregation
 
 
+def add_debugging_to_elasticsearch_aggregation_query(aggregation_query: dict) -> None:  # noqa
+    top_hits_debug = {"aggs": {"top_hits_debug": {"top_hits": {"_source": False,
+                                                               "docvalue_fields": ["_id"], "size": 100 }}}}
+    def add_debug_query(aggs: dict) -> None:  # noqa
+        if "aggs" in aggs:
+            for _, agg in aggs["aggs"].items():
+                add_debug_query(agg)
+        else:
+            aggs.update(top_hits_debug)
+    if isinstance(aggregation_query, dict) and isinstance(aggs := aggregation_query.get("aggs"), dict):
+        for agg in aggs.values():
+            add_debug_query(agg)
+
+
 def prune_elasticsearch_aggregation_results(results: dict) -> None:
     """
     This removes any extra level(s) of aggregation (i.e. dummy_date_histogram) that may have been
@@ -405,7 +419,7 @@ def get_aggregation_bucket_doc_count(aggregation_bucket: dict) -> Optional[int]:
     def get_aggregation_bucket_debug_hits(aggregation_bucket: dict) -> List[str]:
         debug_hits = []
         if isinstance(aggregation_bucket, dict):
-            if isinstance(doc_count := aggregation_bucket.get("doc_count"), int):
+            if isinstance(aggregation_bucket.get("doc_count"), int):
                 if (isinstance(top_hits_debug := aggregation_bucket.get("top_hits_debug"), dict) and
                     isinstance(hits := top_hits_debug.get("hits"), dict) and
                     isinstance(hits := hits.get("hits"), list)):  # noqa
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 16e915a9d..7c6f0d43d 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -2,11 +2,13 @@
 from copy import deepcopy
 from typing import List, Optional
 from dcicutils.misc_utils import normalize_spaces
+from encoded.elasticsearch_utils import add_debugging_to_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import sort_normalized_aggregation_results
+from encoded.elasticsearch_utils import AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 from encoded.endpoint_utils import create_query_string, parse_date_range_related_arguments
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
@@ -30,9 +32,6 @@
     AGGREGATION_FIELD_DONOR
 ]
 
-AGGREGATION_MAX_BUCKETS = 100
-AGGREGATION_NO_VALUE = "No value"
-
 BASE_SEARCH_QUERY = "/search/"
 
 def recent_files_summary(request: pyramid.request.Request) -> dict:
@@ -62,10 +61,13 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     released can be queried for using one or more status query arguments, e.g. status=uploaded. 
     """
 
+    global AGGREGATION_FIELD_RELEASE_DATE
+
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
     include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
+    nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell"))
     nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
     favor_donor = request_arg_bool(request, "favor_donor")
     nosort = request_arg_bool(request, "nosort")
@@ -75,6 +77,24 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     troubleshoot = request_arg_bool(request, "troubleshoot")
     troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
     raw = request_arg_bool(request, "raw")
+    willrfix = request_arg_bool(request, "willrfix")
+
+    def get_aggregation_field_grouping_cell_or_donor():
+        # This specializes the aggregation query to group first by the cell-line field,
+        # and then alternatively (if a cell-line field does not exist) by the donor field.
+        # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
+        # look first for the donor field and then secondarily for the cell-line field. 
+        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
+        nonlocal nocells, nomixtures, favor_donor
+        aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
+        if nocells:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_LINE)
+        if nomixtures:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_MIXTURE)
+        if favor_donor:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
+            aggregation_field_grouping_cell_or_donor.insert(0, AGGREGATION_FIELD_DONOR)
+        return aggregation_field_grouping_cell_or_donor
 
     def create_base_query_arguments(request: pyramid.request.Request) -> dict:
 
@@ -117,7 +137,6 @@ def create_query(request: pyramid.request.Request, base_query_arguments: Optiona
 
     def create_aggregation_query(aggregation_fields: List[str]) -> dict:
 
-        global AGGREGATION_NO_VALUE
         nonlocal date_property_name, max_buckets, include_missing, favor_donor, troubleshoot_elasticsearch
 
         aggregations = []
@@ -130,7 +149,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
             return {}
 
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal date_property_name, nomixtures
+            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, nocells, nomixtures
             if field == date_property_name:
                 return {
                     "date_histogram": {
@@ -142,22 +161,12 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                     }
                 }
             elif field == AGGREGATION_FIELD_CELL_LINE:
-                # This specializes the aggregation query to group first by the cell-line field,
-                # and then alternatively (if a cell-line field does not exist) by the donor field.
-                # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
-                # look first for the donor field and then secondarily for the cell-line field. 
-                aggregation_field_grouping = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
-                if nomixtures:
-                    aggregation_field_grouping.remove(AGGREGATION_FIELD_CELL_MIXTURE)
-                if favor_donor:
-                    aggregation_field_grouping.remove(AGGREGATION_FIELD_DONOR)
-                    aggregation_field_grouping.insert(0, AGGREGATION_FIELD_DONOR)
                 # Note how we prefix the result with the aggregation field name;
                 # this is so later we can tell which grouping/field was matched;
                 # see fixup_names_values_for_normalized_results for this fixup.
                 script = ""
-                for aggregation_field_grouping_index in range(len(aggregation_field_grouping)):
-                    aggregation_field = aggregation_field_grouping[aggregation_field_grouping_index]
+                for aggregation_field_grouping_index in range(len(aggregation_field_grouping_cell_or_donor)):
+                    aggregation_field = aggregation_field_grouping_cell_or_donor[aggregation_field_grouping_index]
                     if_or_else_if = "if" if aggregation_field_grouping_index == 0 else "else if"
                     script += f"""
                         {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
@@ -178,59 +187,15 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                         "size": max_buckets
                     }
                 }
-            elif False and (field == AGGREGATION_FIELD_CELL_LINE):
-                # OBSOLETE: See above.
-                # This specializes the aggregation query to group first by the cell-line field,
-                # and then alternatively (if a cell-line field does not exist) by the donor field.
-                # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
-                # look first for the donor field and then secondarily for the cell-line field. 
-                if favor_donor:
-                    field_one = AGGREGATION_FIELD_DONOR
-                    field_two = AGGREGATION_FIELD_CELL_LINE
-                else:
-                    field_one = AGGREGATION_FIELD_CELL_LINE
-                    field_two = AGGREGATION_FIELD_DONOR
-                # Note how we prefix the result with the aggregation field name;
-                # this is so later we can tell which grouping/field was matched;
-                # see fixup_names_values_for_normalized_results for this fixup.
-                script = normalize_spaces(f"""
-                    if (doc['embedded.{field_one}.raw'].size() > 0) {{
-                        return '{field_one}:' + doc['embedded.{field_one}.raw'].value;
-                    }} else if (doc['embedded.{field_two}.raw'].size() > 0) {{
-                        return '{field_two}:' + doc['embedded.{field_two}.raw'].value;
-                    }} else {{
-                        return 'unknown';
-                    }}
-                """)
-                return {
-                    "terms": {
-                        "script": {
-                            "source": script,
-                            "lang": "painless"
-                        },
-                        "size": max_buckets
-                    }
-                }
 
         def create_field_filter(field: str) -> Optional[dict]:  # noqa
+            nonlocal aggregation_field_grouping_cell_or_donor
             if field == AGGREGATION_FIELD_CELL_LINE:
                 filter = {"bool": {"should": [], "minimum_should_match": 1}}
-                for aggregation_field in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                for aggregation_field in aggregation_field_grouping_cell_or_donor:
                     filter["bool"]["should"].append({"exists": { "field": f"embedded.{aggregation_field}.raw"}})
                 return filter
 
-        def obsolete_create_field_filter(field: str) -> Optional[dict]:  # noqa
-            if field == AGGREGATION_FIELD_CELL_LINE:
-                return {
-                    "bool": {
-                        "should": [
-                            {"exists": { "field": f"embedded.{AGGREGATION_FIELD_CELL_LINE}.raw"}},
-                            {"exists": { "field": f"embedded.{AGGREGATION_FIELD_DONOR}.raw"}}
-                        ],
-                        "minimum_should_match": 1
-                    }
-                }
-
         aggregation_query = create_elasticsearch_aggregation_query(
             aggregations,
             max_buckets=max_buckets,
@@ -240,27 +205,12 @@ def obsolete_create_field_filter(field: str) -> Optional[dict]:  # noqa
             create_field_filter=create_field_filter)
 
         if troubleshoot_elasticsearch:
-            def add_debug_query_to_elasticsearch_aggregation_query(aggregation: dict) -> None:  # noqa
-                top_hits_debug = {"aggs": {"top_hits_debug": {"top_hits": {"_source": False,
-                                                                           "docvalue_fields": ["_id"], "size": 100 }}}}
-                def add_debug_query(aggs: dict) -> None:  # noqa
-                    if "aggs" in aggs:
-                        for key, sub_agg in aggs["aggs"].items():
-                            add_debug_query(sub_agg)
-                    else:
-                        aggs.update(top_hits_debug)
-                for agg in aggregation["aggs"].values():
-                    add_debug_query(agg)
-            try:
-                add_debug_query_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
-            except Exception:
-                pass
+            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
 
         return aggregation_query[date_property_name]
 
     def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
 
-        global AGGREGATION_NO_VALUE
         nonlocal date_property_name, max_buckets, include_missing
 
         aggregations = []
@@ -301,11 +251,11 @@ def execute_aggregation_query(request: pyramid.request.Request, query: str, aggr
         return results
 
     def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
-        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
+        nonlocal aggregation_field_grouping_cell_or_donor
         if isinstance(normalized_results, dict):
             if isinstance(value := normalized_results.get("value"), str):
                 if ((separator_index := value.find(":")) > 0) and (value_prefix := value[0:separator_index]):
-                    if value_prefix in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                    if value_prefix in aggregation_field_grouping_cell_or_donor:
                         if value := value[separator_index + 1:]:
                             normalized_results["name"] = value_prefix
                             normalized_results["value"] = value
@@ -313,23 +263,9 @@ def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
                 for element in items:
                     fixup_names_values_for_normalized_results(element)
 
-    def obsolete_fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
-        if isinstance(normalized_results, dict):
-            if isinstance(value := normalized_results.get("value"), str):
-                if (colon := value.find(":")) > 0:
-                    if (prefix := value[0:colon]) == AGGREGATION_FIELD_CELL_LINE:
-                        normalized_results["name"] = AGGREGATION_FIELD_CELL_LINE
-                        normalized_results["value"] = value[colon + 1:]
-                    elif prefix == AGGREGATION_FIELD_DONOR:
-                        normalized_results["name"] = AGGREGATION_FIELD_DONOR
-                        normalized_results["value"] = value[colon + 1:]
-            if isinstance(items := normalized_results.get("items"), list):
-                for element in items:
-                    fixup_names_values_for_normalized_results(element)
-
     def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
         global BASE_SEARCH_QUERY
-        nonlocal date_property_name
+        nonlocal date_property_name, willrfix
         if isinstance(normalized_results, dict):
             if name := normalized_results.get("name"):
                 if value := normalized_results.get("value"):
@@ -342,11 +278,18 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
                                                     f"{name}.from": from_date, f"{name}.to": thru_date}
                     else:
                         base_query_arguments = {**base_query_arguments, name: value}
+                if willrfix:
+                    if name == AGGREGATION_FIELD_CELL_LINE:
+                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
+                    elif name == AGGREGATION_FIELD_DONOR:
+                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
+                        base_query_arguments[AGGREGATION_FIELD_CELL_LINE] = AGGREGATION_NO_VALUE
                 normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
             if isinstance(items := normalized_results.get("items"), list):
                 for element in items:
                     add_queries_to_normalized_results(element, base_query_arguments)
 
+    aggregation_field_grouping_cell_or_donor = get_aggregation_field_grouping_cell_or_donor()
     base_query_arguments = create_base_query_arguments(request)
     query = create_query(request, base_query_arguments)
 
@@ -360,21 +303,6 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         aggregation_query = {
             aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line)
         }
-        # print(aggregation_query)
-        # import json
-        # print(json.dumps(aggregation_query, indent=4))
-        # import pdb ; pdb.set_trace()  # noqa
-        # xxx = { "top_hits_debug": { "top_hits": { "_source": False, "docvalue_fields": ["_id"], "size": 10 } } }
-        # aggregation_query["aggregate_by_cell_line"]["aggs"]["file_sets.libraries.analytes.samples.sample_sources.cell_line.code"]["aggs"]["release_tracker_description"]["aggs"] = xxx
-#                               "aggs": {
-#                                   "top_hits_debug": {
-#                                       "top_hits": {
-#                                           "_source": false,
-#                                           "docvalue_fields": ["_id"],
-#                                           "size": 10
-#                                       }
-#                                   }
-#                               }
     else:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
         aggregate_by_cell_line = [
@@ -417,9 +345,9 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
     prune_elasticsearch_aggregation_results(raw_results)
 
     if not legacy:
-        merged_results = raw_results.get(aggregate_by_cell_line_property_name)
+        aggregation_results = raw_results.get(aggregate_by_cell_line_property_name)
     else:
-        merged_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
+        aggregation_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
                                                                  raw_results.get(aggregate_by_donor_property_name))
 
     # Note that the doc_count values returned by ElasticSearch DO actually seem to be for UNIQUE items,
@@ -466,13 +394,13 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
                 "query": query,
                 "aggregation_query": aggregation_query,
                 "raw_results": raw_results,
-                "merged_results": deepcopy(merged_results)
+                "aggregation_results": deepcopy(aggregation_results)
             }
         }
     else:
         additional_properties = None
 
-    normalized_results = normalize_elasticsearch_aggregation_results(merged_results,
+    normalized_results = normalize_elasticsearch_aggregation_results(aggregation_results,
                                                                      additional_properties=additional_properties)
     if not legacy:
         fixup_names_values_for_normalized_results(normalized_results)

From e7420cc8e1080a59bf0c98fbd5835783093c6c9b Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 12:57:48 -0500
Subject: [PATCH 32/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 7c6f0d43d..f99134aed 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -487,8 +487,8 @@ def annotate_with_uuids(normalized_results: dict):
                                     if isinstance(uuid := file.get("uuid"), str):
                                         if not third_item.get("debug"):
                                             third_item["debug"] = {}
-                                        if not third_item["debug"].get("uuids"):
-                                            third_item["debug"]["uuids"] = []
+                                        if not third_item["debug"].get("portal_hits"):
+                                            third_item["debug"]["portal_hits"] = []
                                         uuid_record = {"uuid": uuid}
                                         for aggregation_field in aggregation_fields:
                                             aggregation_values = ", ".join(get_properties(file, aggregation_field))
@@ -496,10 +496,10 @@ def annotate_with_uuids(normalized_results: dict):
                                         if third_item["debug"].get("elasticsearch_hits"):
                                             uuid_record["elasticsearch_counted"] = \
                                                 uuid in third_item["debug"]["elasticsearch_hits"]
-                                        third_item["debug"]["uuids"].append(uuid_record)
+                                        third_item["debug"]["portal_hits"].append(uuid_record)
                                         uuid_records.append(uuid_record)
-                                if third_item.get("debug", {}).get("uuids"):
-                                    third_item["debug"]["uuids"].sort(key=lambda item: item.get("uuid"))
+                                if third_item.get("debug", {}).get("portal_hits"):
+                                    third_item["debug"]["portal_hits"].sort(key=lambda item: item.get("uuid"))
 
         for uuid_record in uuid_records:
             if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:

From da2a211cb3cc015a271a76d4b18764999eb7ab73 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 18:41:04 -0500
Subject: [PATCH 33/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index f99134aed..a774dee1f 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -69,6 +69,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
     nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell"))
     nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
+    nodonors = request_arg_bool(request, "nodonors", request_arg_bool(request, "nodonor"))
     favor_donor = request_arg_bool(request, "favor_donor")
     nosort = request_arg_bool(request, "nosort")
     legacy = request_arg_bool(request, "legacy")
@@ -85,12 +86,14 @@ def get_aggregation_field_grouping_cell_or_donor():
         # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
         # look first for the donor field and then secondarily for the cell-line field. 
         global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
-        nonlocal nocells, nomixtures, favor_donor
+        nonlocal nocells, nomixtures, nodonors, favor_donor
         aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
         if nocells:
             aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_LINE)
         if nomixtures:
             aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_MIXTURE)
+        if nodonors:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
         if favor_donor:
             aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
             aggregation_field_grouping_cell_or_donor.insert(0, AGGREGATION_FIELD_DONOR)
@@ -149,7 +152,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
             return {}
 
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, nocells, nomixtures
+            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name
             if field == date_property_name:
                 return {
                     "date_histogram": {
@@ -401,7 +404,8 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         additional_properties = None
 
     normalized_results = normalize_elasticsearch_aggregation_results(aggregation_results,
-                                                                     additional_properties=additional_properties)
+                                                                     additional_properties=additional_properties,
+                                                                     remove_empty_items=not include_missing)
     if not legacy:
         fixup_names_values_for_normalized_results(normalized_results)
     if include_queries:

From 8be133ed4e5cd87958353674ad195d01ca56dfd5 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:00:37 -0500
Subject: [PATCH 34/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 114 +++++++++++++++++++++++++++-
 1 file changed, 113 insertions(+), 1 deletion(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a774dee1f..c3f6a0c12 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -80,7 +80,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     raw = request_arg_bool(request, "raw")
     willrfix = request_arg_bool(request, "willrfix")
 
-    def get_aggregation_field_grouping_cell_or_donor():
+    def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
         # This specializes the aggregation query to group first by the cell-line field,
         # and then alternatively (if a cell-line field does not exist) by the donor field.
         # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
@@ -395,6 +395,11 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         additional_properties = {
             "debug": {
                 "query": query,
+                "aggregation_query_fields": [
+                    AGGREGATION_FIELD_RELEASE_DATE,
+                    *get_aggregation_field_grouping_cell_or_donor(),
+                    AGGREGATION_FIELD_FILE_DESCRIPTOR
+                ],
                 "aggregation_query": aggregation_query,
                 "raw_results": raw_results,
                 "aggregation_results": deepcopy(aggregation_results)
@@ -513,3 +518,110 @@ def annotate_with_uuids(normalized_results: dict):
         annotate_with_uuids(normalized_results)
     except Exception:
         pass
+
+
+def print_normalized_aggregation_results(data: dict,
+                                         title: Optional[str] = None,
+                                         parent_grouping_name: Optional[str] = None,
+                                         parent_grouping_value: Optional[str] = None,
+                                         uuids: bool = False,
+                                         uuid_details: bool = False,
+                                         nobold: bool = False,
+                                         verbose: bool = False) -> None:
+    
+    """
+    For deveopment/troubleshooting only ...
+    """
+
+    from hms_utils.chars import chars
+    from hms_utils.terminal_utils import terminal_color
+
+    def get_aggregation_fields(data: dict) -> List[str]:
+        if not isinstance(aggregation_fields := data.get("debug", {}).get("aggregation_query_fields"), list):
+            aggregation_fields = []
+        return aggregation_fields
+
+    def print_results(data: dict,
+                      parent_grouping_name: Optional[str] = None,
+                      parent_grouping_value: Optional[str] = None,
+                      indent: int = 0) -> None:
+
+        nonlocal title, uuids, uuid_details, nobold, verbose
+        nonlocal aggregation_fields, red, green, gray, bold
+
+        def get_hits(data: dict) -> List[str]:
+            hits = []
+            if isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
+                for portal_hit in portal_hits:
+                    if isinstance(portal_hit, dict) and isinstance(uuid := portal_hit.get("uuid"), str) and uuid:
+                        hits.append(portal_hit)
+            return hits
+    
+        def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
+            nonlocal parent_grouping_name, parent_grouping_value
+            if property_value := hit.get(property_name):
+                if property_name == parent_grouping_name:
+                    property_values = []
+                    for property_value in property_value.split(","):
+                        if (property_value := property_value.strip()) == parent_grouping_value:
+                            property_values.append(green(property_value))
+                        else:
+                            property_values.append(property_value)
+                    property_value = ", ".join(property_values)
+            return property_value
+    
+        def print_hit_property_values(hit: dict, property_name: str,
+                                      label: Optional[str] = None, prefix: Optional[str] = None) -> None:
+            nonlocal aggregation_fields
+            if property_values := format_hit_property_values(hit, property_name):
+                if not label:
+                    label = property_name
+                property_description = f"{prefix or ""}{chars.dot_hollow} {label}: {property_values}"
+                if property_name not in aggregation_fields:
+                    property_description = gray(property_description)
+                print(property_description)
+
+        if not (isinstance(data, dict) and data):
+            return
+        if not (isinstance(indent, int) and (indent > 0)):
+            indent = 0
+        spaces = (" " * indent) if indent > 0 else ""
+        grouping_name = data.get("name")
+        if isinstance(grouping_value := data.get("value"), str) and grouping_value:
+            grouping = bold(grouping_value)
+            if (verbose is True) and isinstance(grouping_name, str) and grouping_name:
+                grouping = f"{grouping_name} {chars.dot} {grouping}"
+        elif not (isinstance(grouping := title, str) and grouping):
+            grouping = "RESULTS"
+        grouping = f"{chars.diamond} {grouping}"
+        hits = get_hits(data) if (uuids is True) else []
+        if isinstance(count := data.get("count"), int):
+            note = ""
+            if len(hits) > count:
+                note = red(f" {chars.rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
+            print(f"{spaces}{grouping}: {count}{note}")
+        for hit in hits:
+            if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
+                note = ""
+                if hit.get("elasticsearch_counted") is False:
+                    print(red(f"{spaces}  {chars.dot} {uuid} {chars.xmark} UNCOUNTED"))
+                else:
+                    print(f"{spaces}  {chars.dot} {uuid} {chars.check}")
+                if uuid_details is True:
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", f"{spaces}    ")
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", f"{spaces}    ")
+                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", f"{spaces}    ")
+        if isinstance(items := data.get("items"), list):
+            for element in items:
+                print_results(element,
+                              parent_grouping_name=grouping_name,
+                              parent_grouping_value=grouping_value,
+                              indent=indent + 2)
+
+    aggregation_fields = get_aggregation_fields(data)
+    red = lambda text: terminal_color(text, "red")  # noqa
+    green = lambda text: terminal_color(text, "green")  # noqa
+    gray = lambda text: terminal_color(text, "grey")  # noqa
+    bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
+
+    print_results(data)

From 713f50f23523b39efc6f0677375b9b7203fe2346 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:07:55 -0500
Subject: [PATCH 35/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index c3f6a0c12..a37da7b64 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -10,6 +10,7 @@
 from encoded.elasticsearch_utils import sort_normalized_aggregation_results
 from encoded.elasticsearch_utils import AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
 from encoded.endpoint_utils import create_query_string, parse_date_range_related_arguments
+from encoded.endpoint_utils import get_properties, parse_datetime_string
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
 from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
@@ -67,7 +68,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
     include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
     include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
-    nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell"))
+    nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell", True)) # N.B. default True
     nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
     nodonors = request_arg_bool(request, "nodonors", request_arg_bool(request, "nodonor"))
     favor_donor = request_arg_bool(request, "favor_donor")
@@ -433,8 +434,6 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
 
 def add_info_for_troubleshooting(normalized_results: dict, request: pyramid.request.Request) -> None:
 
-    from encoded.endpoint_utils import get_properties, parse_datetime_string
-
     def get_files(files, property_name, property_value, map_property_value = None):
         found = []
         for file in files:
@@ -532,7 +531,6 @@ def print_normalized_aggregation_results(data: dict,
     """
     For deveopment/troubleshooting only ...
     """
-
     from hms_utils.chars import chars
     from hms_utils.terminal_utils import terminal_color
 

From cc4f8f12523eed849fbf6ba680958bc56c5cc6e4 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:18:39 -0500
Subject: [PATCH 36/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a37da7b64..bccf7312f 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -531,7 +531,6 @@ def print_normalized_aggregation_results(data: dict,
     """
     For deveopment/troubleshooting only ...
     """
-    from hms_utils.chars import chars
     from hms_utils.terminal_utils import terminal_color
 
     def get_aggregation_fields(data: dict) -> List[str]:
@@ -546,6 +545,7 @@ def print_results(data: dict,
 
         nonlocal title, uuids, uuid_details, nobold, verbose
         nonlocal aggregation_fields, red, green, gray, bold
+        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark
 
         def get_hits(data: dict) -> List[str]:
             hits = []
@@ -570,11 +570,11 @@ def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
     
         def print_hit_property_values(hit: dict, property_name: str,
                                       label: Optional[str] = None, prefix: Optional[str] = None) -> None:
-            nonlocal aggregation_fields
+            nonlocal aggregation_fields, chars_dot_hollow
             if property_values := format_hit_property_values(hit, property_name):
                 if not label:
                     label = property_name
-                property_description = f"{prefix or ""}{chars.dot_hollow} {label}: {property_values}"
+                property_description = f"{prefix or ""}{chars_dot_hollow} {label}: {property_values}"
                 if property_name not in aggregation_fields:
                     property_description = gray(property_description)
                 print(property_description)
@@ -588,23 +588,23 @@ def print_hit_property_values(hit: dict, property_name: str,
         if isinstance(grouping_value := data.get("value"), str) and grouping_value:
             grouping = bold(grouping_value)
             if (verbose is True) and isinstance(grouping_name, str) and grouping_name:
-                grouping = f"{grouping_name} {chars.dot} {grouping}"
+                grouping = f"{grouping_name} {chars_dot} {grouping}"
         elif not (isinstance(grouping := title, str) and grouping):
             grouping = "RESULTS"
-        grouping = f"{chars.diamond} {grouping}"
+        grouping = f"{chars_diamond} {grouping}"
         hits = get_hits(data) if (uuids is True) else []
         if isinstance(count := data.get("count"), int):
             note = ""
             if len(hits) > count:
-                note = red(f" {chars.rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
+                note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
             print(f"{spaces}{grouping}: {count}{note}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
                 note = ""
                 if hit.get("elasticsearch_counted") is False:
-                    print(red(f"{spaces}  {chars.dot} {uuid} {chars.xmark} UNCOUNTED"))
+                    print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
                 else:
-                    print(f"{spaces}  {chars.dot} {uuid} {chars.check}")
+                    print(f"{spaces}  {chars_dot} {uuid} {chars_check}")
                 if uuid_details is True:
                     print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", f"{spaces}    ")
                     print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", f"{spaces}    ")
@@ -621,5 +621,11 @@ def print_hit_property_values(hit: dict, property_name: str,
     green = lambda text: terminal_color(text, "green")  # noqa
     gray = lambda text: terminal_color(text, "grey")  # noqa
     bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
+    chars_check = "✓"
+    chars_xmark = "✗"
+    chars_dot = "•"
+    chars_dot_hollow = "◦"
+    chars_diamond = "❖"
+    chars_rarrow_hollow = "▷"
 
     print_results(data)

From c8f3ba9c82e078a59db149d6659cd508c02c7074 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:25:29 -0500
Subject: [PATCH 37/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index bccf7312f..f4e6b9d56 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -574,7 +574,7 @@ def print_hit_property_values(hit: dict, property_name: str,
             if property_values := format_hit_property_values(hit, property_name):
                 if not label:
                     label = property_name
-                property_description = f"{prefix or ""}{chars_dot_hollow} {label}: {property_values}"
+                property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
                 if property_name not in aggregation_fields:
                     property_description = gray(property_description)
                 print(property_description)

From a854653d1d43a6234f733b49c2504a9c3272556f Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:36:10 -0500
Subject: [PATCH 38/78] refactoring /recent_files_summary endpoint

---
 src/encoded/elasticsearch_utils.py  |  2 +-
 src/encoded/recent_files_summary.py | 15 +++++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/elasticsearch_utils.py
index b8ab8f332..daf4ca2e5 100644
--- a/src/encoded/elasticsearch_utils.py
+++ b/src/encoded/elasticsearch_utils.py
@@ -64,7 +64,7 @@ def create_elasticsearch_aggregation_query(fields: List[str],
       }
 
     The above example assumes that a create_field_aggregation function callable was passed as an argument
-    and that if/when its argument is date_created then it would have returned something like this 
+    and that if/when its argument is date_created then it would have returned something like this:
 
       {
         "date_histogram": {
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index f4e6b9d56..0511d6ca6 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -50,7 +50,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
     calculated property - see PR-298 (branch: sn_file_release_tracker).
 
     By default the current (assuminging partial) month IS included, so we really return info for
-    the past FULL three months plus for whatever time has currently elapsed for the current month. 
+    the past FULL three months plus for whatever time has currently elapsed for the current month.
     Use pass the include_current_month=false query argument to NOT include the current month.
 
     The number of months of data can be controlled using the nmonths query argument, e.g. nmonths=6.
@@ -59,7 +59,7 @@ def recent_files_summary(request: pyramid.request.Request) -> dict:
 
     For testing purposes, a date field other than the default file_status_tracking.released can
     also be specified using the date_property_name query argument. And file statuses other than
-    released can be queried for using one or more status query arguments, e.g. status=uploaded. 
+    released can be queried for using one or more status query arguments, e.g. status=uploaded.
     """
 
     global AGGREGATION_FIELD_RELEASE_DATE
@@ -85,7 +85,7 @@ def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
         # This specializes the aggregation query to group first by the cell-line field,
         # and then alternatively (if a cell-line field does not exist) by the donor field.
         # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
-        # look first for the donor field and then secondarily for the cell-line field. 
+        # look first for the donor field and then secondarily for the cell-line field.
         global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
         nonlocal nocells, nomixtures, nodonors, favor_donor
         aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
@@ -427,7 +427,6 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
 
     if troubleshoot:
         add_info_for_troubleshooting(normalized_results, request)
-        
 
     return normalized_results
 
@@ -439,7 +438,7 @@ def get_files(files, property_name, property_value, map_property_value = None):
         for file in files:
             if properties := get_properties(file, property_name):
                 if callable(map_property_value):
-                    mapped_properties = [] 
+                    mapped_properties = []
                     for value in properties:
                         mapped_properties.append(map_property_value(value))
                     properties = mapped_properties
@@ -527,7 +526,7 @@ def print_normalized_aggregation_results(data: dict,
                                          uuid_details: bool = False,
                                          nobold: bool = False,
                                          verbose: bool = False) -> None:
-    
+
     """
     For deveopment/troubleshooting only ...
     """
@@ -554,7 +553,7 @@ def get_hits(data: dict) -> List[str]:
                     if isinstance(portal_hit, dict) and isinstance(uuid := portal_hit.get("uuid"), str) and uuid:
                         hits.append(portal_hit)
             return hits
-    
+
         def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
             nonlocal parent_grouping_name, parent_grouping_value
             if property_value := hit.get(property_name):
@@ -567,7 +566,7 @@ def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
                             property_values.append(property_value)
                     property_value = ", ".join(property_values)
             return property_value
-    
+
         def print_hit_property_values(hit: dict, property_name: str,
                                       label: Optional[str] = None, prefix: Optional[str] = None) -> None:
             nonlocal aggregation_fields, chars_dot_hollow

From 986067439e5c7f8342ed3df4408133c79556febf Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Wed, 11 Dec 2024 22:46:59 -0500
Subject: [PATCH 39/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 0511d6ca6..59e6e95cf 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -22,6 +22,8 @@
 QUERY_INCLUDE_CURRENT_MONTH = True
 
 AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
+# FYI: Note there there is also file_sets.libraries.analytes.samples.sample_sources.display_title
+# and that sometimes file_sets.libraries.analytes.samples.sample_sources.code does not exist.
 AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
 AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
 AGGREGATION_FIELD_DONOR = "donors.display_title"

From 56a72f40342b1fb77b33e3d78b0ce9a5e6eeadcf Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 11:08:09 -0500
Subject: [PATCH 40/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 59e6e95cf..afedeb365 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -22,7 +22,7 @@
 QUERY_INCLUDE_CURRENT_MONTH = True
 
 AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
-# FYI: Note there there is also file_sets.libraries.analytes.samples.sample_sources.display_title
+# FYI FWIW: There is also file_sets.libraries.analytes.samples.sample_sources.display_title;
 # and that sometimes file_sets.libraries.analytes.samples.sample_sources.code does not exist.
 AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
 AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
@@ -465,8 +465,10 @@ def annotate_with_uuids(normalized_results: dict):
             AGGREGATION_FIELD_RELEASE_DATE,
             AGGREGATION_FIELD_CELL_MIXTURE,
             AGGREGATION_FIELD_CELL_LINE,
+            # Some extra properties for troublehooting (as this whole thing is).
             "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
             "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
+            "file_sets.libraries.analytes.samples.sample_sources.display_title",
             AGGREGATION_FIELD_DONOR,
             AGGREGATION_FIELD_FILE_DESCRIPTOR
         ]
@@ -545,7 +547,7 @@ def print_results(data: dict,
                       indent: int = 0) -> None:
 
         nonlocal title, uuids, uuid_details, nobold, verbose
-        nonlocal aggregation_fields, red, green, gray, bold
+        nonlocal aggregation_fields, red, green_bold, gray, bold
         nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark
 
         def get_hits(data: dict) -> List[str]:
@@ -563,7 +565,7 @@ def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
                     property_values = []
                     for property_value in property_value.split(","):
                         if (property_value := property_value.strip()) == parent_grouping_value:
-                            property_values.append(green(property_value))
+                            property_values.append(green_bold(property_value))
                         else:
                             property_values.append(property_value)
                     property_value = ", ".join(property_values)
@@ -607,9 +609,13 @@ def print_hit_property_values(hit: dict, property_name: str,
                 else:
                     print(f"{spaces}  {chars_dot} {uuid} {chars_check}")
                 if uuid_details is True:
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", f"{spaces}    ")
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", f"{spaces}    ")
-                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", f"{spaces}    ")
+                    prefix =  f"{spaces}    "
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", prefix)
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", prefix)
+                    # Some extra for troubleshooting (as this whole thing is).
+                    print_hit_property_values(hit, "file_sets.libraries.analytes.samples.sample_sources.display_title",
+                                              "sample-sources-title", prefix)
+                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", prefix)
         if isinstance(items := data.get("items"), list):
             for element in items:
                 print_results(element,
@@ -620,6 +626,7 @@ def print_hit_property_values(hit: dict, property_name: str,
     aggregation_fields = get_aggregation_fields(data)
     red = lambda text: terminal_color(text, "red")  # noqa
     green = lambda text: terminal_color(text, "green")  # noqa
+    green_bold = lambda text: terminal_color(text, "green", bold=True)  # noqa
     gray = lambda text: terminal_color(text, "grey")  # noqa
     bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
     chars_check = "✓"

From 1fe7fbfc79280d57b0e9056ceefe579f67cf078d Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 11:19:59 -0500
Subject: [PATCH 41/78] minor /recent_files_summary refactor mostly for
 troubleshooting

---
 src/encoded/recent_files_summary.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index afedeb365..cad61c995 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -573,9 +573,9 @@ def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
 
         def print_hit_property_values(hit: dict, property_name: str,
                                       label: Optional[str] = None, prefix: Optional[str] = None) -> None:
-            nonlocal aggregation_fields, chars_dot_hollow
+            nonlocal verbose, aggregation_fields, chars_dot_hollow, verbose
             if property_values := format_hit_property_values(hit, property_name):
-                if not label:
+                if (verbose is True) or (not label):
                     label = property_name
                 property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
                 if property_name not in aggregation_fields:

From f84340aae650e72a96cf4ccfb976f71cd236508e Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 13:03:07 -0500
Subject: [PATCH 42/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 69 +++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index cad61c995..99cd2b266 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,6 +1,6 @@
 import pyramid
 from copy import deepcopy
-from typing import List, Optional
+from typing import Callable, List, Optional, Tuple
 from dcicutils.misc_utils import normalize_spaces
 from encoded.elasticsearch_utils import add_debugging_to_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
@@ -522,7 +522,7 @@ def annotate_with_uuids(normalized_results: dict):
         pass
 
 
-def print_normalized_aggregation_results(data: dict,
+def print_normalized_aggregation_results(normalized_results: dict,
                                          title: Optional[str] = None,
                                          parent_grouping_name: Optional[str] = None,
                                          parent_grouping_value: Optional[str] = None,
@@ -558,23 +558,63 @@ def get_hits(data: dict) -> List[str]:
                         hits.append(portal_hit)
             return hits
 
-        def format_hit_property_values(hit: dict, property_name: str) -> Optional[str]:
-            nonlocal parent_grouping_name, parent_grouping_value
+        def format_hit_property_values(hit: dict, property_name: str,
+                                       color: Optional[Callable] = None) -> Optional[str]:
+            nonlocal parent_grouping_name, parent_grouping_value, green, green_bold
             if property_value := hit.get(property_name):
                 if property_name == parent_grouping_name:
                     property_values = []
                     for property_value in property_value.split(","):
                         if (property_value := property_value.strip()) == parent_grouping_value:
-                            property_values.append(green_bold(property_value))
+                            property_value = color(property_value) if callable(color) else green_bold(property_value)
+                            property_values.append(property_value)
                         else:
                             property_values.append(property_value)
                     property_value = ", ".join(property_values)
+                elif hit.get("elasticsearch_counted") is False:
+                    counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
+                    if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
+                        property_value = green(property_value)
             return property_value
 
+        def find_where_aggregated_and_counted(uuid: str) -> Tuple[str, str]:
+
+            nonlocal normalized_results
+
+            def find_where(data: dict, uuid: str,
+                           parent_grouping_name: Optional[str] = None,
+                           parent_grouping_value: Optional[str] = None) -> List[Tuple[str, str]]:
+                found_uuid_grouping_names_and_values = set()
+                if isinstance(data, dict):
+                    grouping_name = data.get("name")
+                    grouping_value = data.get("value")
+                    if isinstance(items := data.get("items"), list):
+                        for item in items:
+                            if found := find_where(item, uuid,
+                                                   parent_grouping_name=grouping_name,
+                                                   parent_grouping_value=grouping_value):
+                                found_uuid_grouping_names_and_values.update(found)
+                    elif isinstance(hits := data.get("debug", {}).get("portal_hits"), list):
+                        for hit in hits:
+                            if hit.get("uuid") == uuid:
+                                if hit.get("elasticsearch_counted") is True:
+                                    found_uuid_grouping_names_and_values.add((parent_grouping_name, parent_grouping_value))
+                return found_uuid_grouping_names_and_values
+
+            if found_uuid_grouping_names_and_values := list(find_where(normalized_results, uuid)):
+                if len(found_uuid_grouping_names_and_values) > 0:
+                    if len(found_uuid_grouping_names_and_values) > 1:
+                        # Something is wrong; should only be at most one iterm with elasticsearch_counted set to True.
+                        pass
+                    return found_uuid_grouping_names_and_values[0]
+            return None, None
+
         def print_hit_property_values(hit: dict, property_name: str,
-                                      label: Optional[str] = None, prefix: Optional[str] = None) -> None:
+                                      label: Optional[str] = None,
+                                      prefix: Optional[str] = None,
+                                      color: Optional[Callable] = None) -> None:
             nonlocal verbose, aggregation_fields, chars_dot_hollow, verbose
-            if property_values := format_hit_property_values(hit, property_name):
+            if property_values := format_hit_property_values(hit, property_name, color=color):
                 if (verbose is True) or (not label):
                     label = property_name
                 property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
@@ -606,16 +646,18 @@ def print_hit_property_values(hit: dict, property_name: str,
                 note = ""
                 if hit.get("elasticsearch_counted") is False:
                     print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
+                    color = red_bold
                 else:
                     print(f"{spaces}  {chars_dot} {uuid} {chars_check}")
+                    color = green_bold
                 if uuid_details is True:
                     prefix =  f"{spaces}    "
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", prefix)
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", prefix)
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", prefix=prefix, color=color)
+                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", prefix=prefix, color=color)
                     # Some extra for troubleshooting (as this whole thing is).
                     print_hit_property_values(hit, "file_sets.libraries.analytes.samples.sample_sources.display_title",
-                                              "sample-sources-title", prefix)
-                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", prefix)
+                                              "sample-sources-title", prefix=prefix, color=color)
+                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", prefix=prefix, color=color)
         if isinstance(items := data.get("items"), list):
             for element in items:
                 print_results(element,
@@ -623,8 +665,9 @@ def print_hit_property_values(hit: dict, property_name: str,
                               parent_grouping_value=grouping_value,
                               indent=indent + 2)
 
-    aggregation_fields = get_aggregation_fields(data)
+    aggregation_fields = get_aggregation_fields(normalized_results)
     red = lambda text: terminal_color(text, "red")  # noqa
+    red_bold = lambda text: terminal_color(text, "red", bold=True)  # noqa
     green = lambda text: terminal_color(text, "green")  # noqa
     green_bold = lambda text: terminal_color(text, "green", bold=True)  # noqa
     gray = lambda text: terminal_color(text, "grey")  # noqa
@@ -636,4 +679,4 @@ def print_hit_property_values(hit: dict, property_name: str,
     chars_diamond = "❖"
     chars_rarrow_hollow = "▷"
 
-    print_results(data)
+    print_results(normalized_results)

From 45917f5a6c4ce91eaf5f93d56880c1f5cab94be7 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 14:09:31 -0500
Subject: [PATCH 43/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 99cd2b266..ddb5daea3 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -560,7 +560,7 @@ def get_hits(data: dict) -> List[str]:
 
         def format_hit_property_values(hit: dict, property_name: str,
                                        color: Optional[Callable] = None) -> Optional[str]:
-            nonlocal parent_grouping_name, parent_grouping_value, green, green_bold
+            nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
             if property_value := hit.get(property_name):
                 if property_name == parent_grouping_name:
                     property_values = []
@@ -574,7 +574,7 @@ def format_hit_property_values(hit: dict, property_name: str,
                 elif hit.get("elasticsearch_counted") is False:
                     counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
                     if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
-                        property_value = green(property_value)
+                        property_value = green(f"{property_value} {chars_larrow_hollow} COUNTED HERE")
             return property_value
 
         def find_where_aggregated_and_counted(uuid: str) -> Tuple[str, str]:
@@ -678,5 +678,6 @@ def print_hit_property_values(hit: dict, property_name: str,
     chars_dot_hollow = "◦"
     chars_diamond = "❖"
     chars_rarrow_hollow = "▷"
+    chars_larrow_hollow = "◁"
 
     print_results(normalized_results)

From 975f297a8bd5f37c0ced69c94cee5c033919615b Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 16:43:17 -0500
Subject: [PATCH 44/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index ddb5daea3..f6f9e5804 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -529,6 +529,7 @@ def print_normalized_aggregation_results(normalized_results: dict,
                                          uuids: bool = False,
                                          uuid_details: bool = False,
                                          nobold: bool = False,
+                                         checks: bool = True,
                                          verbose: bool = False) -> None:
 
     """
@@ -536,6 +537,8 @@ def print_normalized_aggregation_results(normalized_results: dict,
     """
     from hms_utils.terminal_utils import terminal_color
 
+    global AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_CELL_LINE, AGGREGATION_FIELD_DONOR
+
     def get_aggregation_fields(data: dict) -> List[str]:
         if not isinstance(aggregation_fields := data.get("debug", {}).get("aggregation_query_fields"), list):
             aggregation_fields = []
@@ -550,7 +553,7 @@ def print_results(data: dict,
         nonlocal aggregation_fields, red, green_bold, gray, bold
         nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark
 
-        def get_hits(data: dict) -> List[str]:
+        def get_portal_hits(data: dict) -> List[dict]:
             hits = []
             if isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
                 for portal_hit in portal_hits:
@@ -635,11 +638,20 @@ def print_hit_property_values(hit: dict, property_name: str,
         elif not (isinstance(grouping := title, str) and grouping):
             grouping = "RESULTS"
         grouping = f"{chars_diamond} {grouping}"
-        hits = get_hits(data) if (uuids is True) else []
+        hits = get_portal_hits(data) if (uuids is True) else []
         if isinstance(count := data.get("count"), int):
             note = ""
             if len(hits) > count:
                 note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
+            elif checks is True:
+                if isinstance(items := data.get("items"), list):
+                    subcount = 0
+                    for item in items:
+                        if isinstance(subcount_item := item.get("count"), int):
+                            subcount += subcount_item
+                    note = f" {chars_check}" if subcount == count else f" {chars_xmark}"
+                else:
+                    note = f" {chars_check}"
             print(f"{spaces}{grouping}: {count}{note}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
@@ -652,9 +664,10 @@ def print_hit_property_values(hit: dict, property_name: str,
                     color = green_bold
                 if uuid_details is True:
                     prefix =  f"{spaces}    "
+                    # Show property values for troubleshooting (as this whole thing is);
+                    # see add_info_for_troubleshooting.annotate_with_uuids.
                     print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", prefix=prefix, color=color)
                     print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", prefix=prefix, color=color)
-                    # Some extra for troubleshooting (as this whole thing is).
                     print_hit_property_values(hit, "file_sets.libraries.analytes.samples.sample_sources.display_title",
                                               "sample-sources-title", prefix=prefix, color=color)
                     print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", prefix=prefix, color=color)

From 6eba0a8eb167390dcbd02c2bfda6d0c87a059a70 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Thu, 12 Dec 2024 18:13:54 -0500
Subject: [PATCH 45/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 83566a5d5..1a51a6db0 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -125,6 +125,10 @@ def parse_datetime_string(value: Union[str, datetime, date],
                 # Special case to accept for example "2024-10" to mean "2024-10-01".
                 value = f"{value}-01"
                 last_day_of_month = last_day_of_month_if_no_day
+            elif (len(value) == 6) and value[0:4].isdigit() and value[4:].isdigit():
+                # Special case to accept for example "202410" to mean "2024-10-01".
+                value = f"{value[0:4]}-{value[4:]}-01"
+                last_day_of_month = last_day_of_month_if_no_day
             elif (len(value) == 7) and (value[2] == "/") and value[0:2].isdigit() and value[3:].isdigit():
                 # Special case to accept for example "11/2024" to mean "2024-11-01".
                 value = f"{value[3:]}-{value[0:2]}-01"

From 77a01712716ddd6159f4778ecd54fc046d034bac Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 13 Dec 2024 00:08:43 -0500
Subject: [PATCH 46/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py       | 31 ++++++++++++++++++++++-------
 src/encoded/recent_files_summary.py | 20 +++++++++++--------
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 1a51a6db0..3a026722c 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -1,17 +1,17 @@
 import calendar
 from datetime import date, datetime
 from dateutil.relativedelta import relativedelta
-import pyramid
+from pyramid.request import Request as PyramidRequest
 from typing import Any, List, Optional, Tuple, Union
 from urllib.parse import urlencode
 from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
 
 
-def request_arg(request: pyramid.request.Request, name: str, fallback: Optional[str] = None) -> Optional[str]:
+def request_arg(request: PyramidRequest, name: str, fallback: Optional[str] = None) -> Optional[str]:
     return str(value).strip() if (value := request.params.get(name, None)) is not None else fallback
 
 
-def request_arg_int(request: pyramid.request.Request, name: str, fallback: Optional[int] = 0) -> Optional[Any]:
+def request_arg_int(request: PyramidRequest, name: str, fallback: Optional[int] = 0) -> Optional[Any]:
     if (value := request_arg(request, name)) is not None:
         try:
             return int(value)
@@ -20,11 +20,11 @@ def request_arg_int(request: pyramid.request.Request, name: str, fallback: Optio
     return fallback
 
 
-def request_arg_bool(request: pyramid.request.Request, name: str, fallback: Optional[bool] = False) -> Optional[bool]:
+def request_arg_bool(request: PyramidRequest, name: str, fallback: Optional[bool] = False) -> Optional[bool]:
     return fallback if (value := request_arg(request, name)) is None else (value.lower() == "true")
 
 
-def request_args(request: pyramid.request.Request,
+def request_args(request: PyramidRequest,
                  name: str, fallback: Optional[str] = None, duplicates: bool = False) -> List[str]:
     args = []
     if isinstance(value := request.params.getall(name), list):
@@ -70,7 +70,22 @@ def parse_date_range_related_arguments(
     nmonths arguments represents a non-zero integer, in which case the returned from/thru dates will represent
     the past (absolute value) nmonths months starting with the month previous to the month of "today"; however
     if the include_current_month is True it is rather the past nmonths starting with the month of "today".
+
+    FYI WRT smaht-portal/elasticsearch behavior and dates, when using a query like date_created.from=2024-11-01
+    and date_created.to=2024-10-31, what is actually passed to the elasticsearch filter/range query looks like:
+
+        "range": {
+            "date_created": {
+                "gte": "2024-10-31 00:00",
+                "lte": "lte": "2024-12-31 23:59"
+            }
+        }
+
+    I.e. so the "from" date is from the very BEGINNING of the date/day (00:00) and and greater-than-or-EQUAL
+    to and the "thru" date is thru the very END of the date/day (23:59). This is actually done by the method
+    snovault.search.lucene_builder.LuceneBuilder.handle_range_filters.
     """
+    include_current_month = include_current_month is True
     from_date = parse_datetime_string(from_date, notz=True)
     thru_date = parse_datetime_string(thru_date, last_day_of_month_if_no_day=True, notz=True)
     if not isinstance(nmonths, int):
@@ -93,14 +108,16 @@ def parse_date_range_related_arguments(
                 from_date = _add_months(thru_date, nmonths)
             elif nmonths == 0:
                 from_date = _get_first_date_of_month(thru_date)
-    elif isinstance(nmonths, int) and ((nmonths := abs(nmonths)) != 0):
+    elif ((nmonths := abs(nmonths)) != 0) or include_current_month:
         # If no (valid) from/thru dates given, but the absolute value of nmonths is a non-zero integer, then returns
         # from/thru dates for the last nmonths month ending with the last day of month previous to the current month.
         # thru_date = _add_months(_get_last_date_of_month(), -1)
         thru_date = _get_last_date_of_month()
-        if include_current_month is not True:
+        if not include_current_month:
             thru_date = _add_months(thru_date, -1)
+            nmonths -= 1
         from_date = _add_months(thru_date, -nmonths)
+        from_date = _get_first_date_of_month(from_date)
     if strings is True:
         return (from_date.strftime(f"%Y-%m-%d") if from_date else None,
                 thru_date.strftime(f"%Y-%m-%d") if thru_date else None)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index f6f9e5804..7e230e874 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,4 +1,4 @@
-import pyramid
+from pyramid.request import Request as PyramidRequest
 from copy import deepcopy
 from typing import Callable, List, Optional, Tuple
 from dcicutils.misc_utils import normalize_spaces
@@ -37,7 +37,7 @@
 
 BASE_SEARCH_QUERY = "/search/"
 
-def recent_files_summary(request: pyramid.request.Request) -> dict:
+def recent_files_summary(request: PyramidRequest) -> dict:
     """
     This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
     by default, info for files released withing the past three months grouped by release-date,
@@ -102,7 +102,7 @@ def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
             aggregation_field_grouping_cell_or_donor.insert(0, AGGREGATION_FIELD_DONOR)
         return aggregation_field_grouping_cell_or_donor
 
-    def create_base_query_arguments(request: pyramid.request.Request) -> dict:
+    def create_base_query_arguments(request: PyramidRequest) -> dict:
 
         global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
 
@@ -118,7 +118,7 @@ def create_base_query_arguments(request: pyramid.request.Request) -> dict:
 
         return {key: value for key, value in base_query_arguments.items() if value is not None}
 
-    def create_query(request: pyramid.request.Request, base_query_arguments: Optional[dict] = None) -> str:
+    def create_query_arguments(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
 
         global BASE_SEARCH_QUERY, QUERY_RECENT_MONTHS, QUERY_INCLUDE_CURRENT_MONTH
         nonlocal date_property_name
@@ -138,7 +138,9 @@ def create_query(request: pyramid.request.Request, base_query_arguments: Optiona
 
         if isinstance(base_query_arguments, dict):
             query_arguments = {**base_query_arguments, **query_arguments}
+        return query_arguments
 
+    def create_query(query_arguments: Optional[dict] = None) -> str:
         return f"{BASE_SEARCH_QUERY}?{create_query_string(query_arguments)}"
 
     def create_aggregation_query(aggregation_fields: List[str]) -> dict:
@@ -250,7 +252,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
 
         return aggregation_query[date_property_name]
 
-    def execute_aggregation_query(request: pyramid.request.Request, query: str, aggregation_query: dict) -> str:
+    def execute_aggregation_query(request: PyramidRequest, query: str, aggregation_query: dict) -> str:
         query += "&from=0&limit=0"  # needed for aggregation query to not return the actual/individual item results.
         request = snovault_make_search_subreq(request, path=query, method="GET")
         results = snovault_search(None, request, custom_aggregations=aggregation_query)
@@ -297,7 +299,8 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
 
     aggregation_field_grouping_cell_or_donor = get_aggregation_field_grouping_cell_or_donor()
     base_query_arguments = create_base_query_arguments(request)
-    query = create_query(request, base_query_arguments)
+    query_arguments = create_query_arguments(request, base_query_arguments)
+    query = create_query(query_arguments)
 
     if not legacy:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
@@ -328,7 +331,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         }
 
     if debug_query:
-        return {"query": query, "aggregation_query": aggregation_query}
+        return {"query": query, "query_arguments": query_arguments, "aggregation_query": aggregation_query}
 
     raw_results = execute_aggregation_query(request, query, aggregation_query)
 
@@ -398,6 +401,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         additional_properties = {
             "debug": {
                 "query": query,
+                "query_arguments": query_arguments,
                 "aggregation_query_fields": [
                     AGGREGATION_FIELD_RELEASE_DATE,
                     *get_aggregation_field_grouping_cell_or_donor(),
@@ -433,7 +437,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
     return normalized_results
 
 
-def add_info_for_troubleshooting(normalized_results: dict, request: pyramid.request.Request) -> None:
+def add_info_for_troubleshooting(normalized_results: dict, request: PyramidRequest) -> None:
 
     def get_files(files, property_name, property_value, map_property_value = None):
         found = []

From 2b0ad4b9797cf574769ffafdc045d47ed66eaa67 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 13 Dec 2024 11:10:59 -0500
Subject: [PATCH 47/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py            | 45 ++++++++------
 src/encoded/recent_files_summary.py      | 25 +++++---
 src/encoded/tests/test_endpoint_utils.py | 78 ++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 28 deletions(-)
 create mode 100644 src/encoded/tests/test_endpoint_utils.py

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 3a026722c..c1f6a8c08 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -45,7 +45,7 @@ def parse_date_range_related_arguments(
         from_date: Optional[Union[str, datetime, date]],
         thru_date: Optional[Union[str, datetime, date]],
         nmonths: Optional[Union[str, int]] = None,
-        include_current_month: bool = True,
+        include_current_month: Optional[bool] = True,
         strings: bool = False) -> Tuple[Optional[Union[str, datetime]], Optional[Union[str, datetime]]]:
 
     """
@@ -53,8 +53,11 @@ def parse_date_range_related_arguments(
     Given dates may be datetime or date objects or strings. Returned dates are datetime objects, or
     if the the given strings arguments is True, then strings (formatted as YYYY-MM-DD).
 
-    If both of the given from/thru dates are specified/valid then those are returned
-    and the given nmonths argument is not used.
+    If BOTH of the given from/thru dates are specified/valid then those are parsed and returned;
+    and the given nmonths and include_current_month arguments are NOT used in this case.
+
+    Note that the include_current_month argument is used ONLY if NEITHER from NOR thru date
+    are specified; and note that its default value is True.
 
     If only the given from date is specified then a None thru date is returned, UNLESS the given nmonths
     argument represents a positive integer, in which case the returned thru date will be nmonths months
@@ -85,42 +88,48 @@ def parse_date_range_related_arguments(
     to and the "thru" date is thru the very END of the date/day (23:59). This is actually done by the method
     snovault.search.lucene_builder.LuceneBuilder.handle_range_filters.
     """
-    include_current_month = include_current_month is True
     from_date = parse_datetime_string(from_date, notz=True)
     thru_date = parse_datetime_string(thru_date, last_day_of_month_if_no_day=True, notz=True)
-    if not isinstance(nmonths, int):
-        if isinstance(nmonths, str) and (nmonths := nmonths.strip()):
-            try:
-                nmonths = int(nmonths)
-            except Exception:
+    if nmonths is None:
+        nmonths = 0
+        nmonths_none = True
+    else:
+        nmonths_none = False
+        if not isinstance(nmonths, int):
+            if isinstance(nmonths, str) and (nmonths := nmonths.strip()):
+                try:
+                    nmonths = int(nmonths)
+                except Exception:
+                    nmonths = 0
+            else:
                 nmonths = 0
-        else:
-            nmonths = 0
     if from_date:
         if (not thru_date) and isinstance(nmonths, int):
             if nmonths > 0:
                 thru_date = _add_months(from_date, nmonths)
-            elif nmonths == 0:
+            elif (nmonths == 0) and (not nmonths_none):
                 thru_date = _get_last_date_of_month(from_date)
     elif thru_date:
         if isinstance(nmonths, int):
             if nmonths < 0:
                 from_date = _add_months(thru_date, nmonths)
-            elif nmonths == 0:
+            elif (nmonths == 0) and (not nmonths_none):
                 from_date = _get_first_date_of_month(thru_date)
-    elif ((nmonths := abs(nmonths)) != 0) or include_current_month:
+    elif ((nmonths := abs(nmonths)) != 0) or (include_current_month is not False):
         # If no (valid) from/thru dates given, but the absolute value of nmonths is a non-zero integer, then returns
         # from/thru dates for the last nmonths month ending with the last day of month previous to the current month.
         # thru_date = _add_months(_get_last_date_of_month(), -1)
         thru_date = _get_last_date_of_month()
-        if not include_current_month:
-            thru_date = _add_months(thru_date, -1)
+        if include_current_month is False:
+            thru_date = _get_last_date_of_month(_add_months(thru_date, -1))
             nmonths -= 1
         from_date = _add_months(thru_date, -nmonths)
         from_date = _get_first_date_of_month(from_date)
     if strings is True:
-        return (from_date.strftime(f"%Y-%m-%d") if from_date else None,
-                thru_date.strftime(f"%Y-%m-%d") if thru_date else None)
+        from_date = from_date.strftime(f"%Y-%m-%d") if from_date else None
+        thru_date = thru_date.strftime(f"%Y-%m-%d") if thru_date else None
+    if from_date and thru_date and thru_date < from_date:
+        from_date, thru_date = thru_date, from_date
     return from_date, thru_date
 
 
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 7e230e874..a1a71fe69 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -533,7 +533,8 @@ def print_normalized_aggregation_results(normalized_results: dict,
                                          uuids: bool = False,
                                          uuid_details: bool = False,
                                          nobold: bool = False,
-                                         checks: bool = True,
+                                         checks: bool = False,
+                                         query: bool  = False,
                                          verbose: bool = False) -> None:
 
     """
@@ -553,7 +554,7 @@ def print_results(data: dict,
                       parent_grouping_value: Optional[str] = None,
                       indent: int = 0) -> None:
 
-        nonlocal title, uuids, uuid_details, nobold, verbose
+        nonlocal title, uuids, uuid_details, nobold, query, verbose
         nonlocal aggregation_fields, red, green_bold, gray, bold
         nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark
 
@@ -647,16 +648,20 @@ def print_hit_property_values(hit: dict, property_name: str,
             note = ""
             if len(hits) > count:
                 note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
-            elif checks is True:
-                if isinstance(items := data.get("items"), list):
-                    subcount = 0
-                    for item in items:
-                        if isinstance(subcount_item := item.get("count"), int):
-                            subcount += subcount_item
-                    note = f" {chars_check}" if subcount == count else f" {chars_xmark}"
-                else:
+            elif isinstance(items := data.get("items"), list):
+                subcount = 0
+                for item in items:
+                    if isinstance(subcount_item := item.get("count"), int):
+                        subcount += subcount_item
+                if subcount != count:
+                    note = f" {chars_xmark}"
+                elif checks is True:
                     note = f" {chars_check}"
+            elif checks:
+                note = f" {chars_check}"
             print(f"{spaces}{grouping}: {count}{note}")
+        if (query is True) and (query_string := data.get("query")):
+            print(f"{spaces}  {query_string}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
                 note = ""
diff --git a/src/encoded/tests/test_endpoint_utils.py b/src/encoded/tests/test_endpoint_utils.py
new file mode 100644
index 000000000..b877a8b62
--- /dev/null
+++ b/src/encoded/tests/test_endpoint_utils.py
@@ -0,0 +1,78 @@
+from contextlib import contextmanager
+import datetime
+from typing import Optional, Union
+from unittest.mock import patch as mock_patch
+from encoded.endpoint_utils import parse_date_range_related_arguments, parse_datetime_string
+
+DEFAULT_MOCK_DATETIME_TODAY_VALUE = "2024-11-06 07:54:16"
+
+
+def test_parse_date_range_related_arguments_sans_from_thru_dates():
+
+    def testf(nmonths, include_current_month):
+        # Note that include_current_month used ONLY if NEITHER from_date NOR thru_date are specified (this case).
+        return parse_date_range_related_arguments(None, None, nmonths=nmonths,
+                                                  include_current_month=include_current_month, strings=True)
+
+    with mocked_datetime_today(DEFAULT_MOCK_DATETIME_TODAY_VALUE):
+        assert testf(nmonths=3, include_current_month=False) == ("2024-08-01", "2024-10-31")
+        assert testf(nmonths=3, include_current_month=False) == ("2024-08-01", "2024-10-31")
+        assert testf(nmonths=-3, include_current_month=True) == ("2024-08-01", "2024-11-30")
+        assert testf(nmonths=-3, include_current_month=False) == ("2024-08-01", "2024-10-31")
+        assert testf(nmonths=1, include_current_month=False) == ("2024-10-01", "2024-10-31")
+        assert testf(nmonths=1, include_current_month=True) == ("2024-10-01", "2024-11-30")
+        assert testf(nmonths=0, include_current_month=False) == (None, None)
+        assert testf(nmonths=0, include_current_month=True) == ("2024-11-01", "2024-11-30")
+
+
+def test_parse_date_range_related_arguments_with_from_thru_dates():
+
+    def testf(from_date, thru_date):
+        # Note that include_current_month used ONLY if NEITHER from_date NOR thru_date are specified.
+        return parse_date_range_related_arguments(from_date, thru_date, nmonths=None,
+                                                  include_current_month=None, strings=True)
+
+    with mocked_datetime_today(DEFAULT_MOCK_DATETIME_TODAY_VALUE):
+        assert testf("2024-05-16", "2024-08-29") == ("2024-05-16", "2024-08-29")
+        assert testf("2024-08-29", "2024-05-16") == ("2024-05-16", "2024-08-29")
+        assert testf("2024-11-04", "2035-10-06") == ("2024-11-04", "2035-10-06")
+
+
+def test_parse_date_range_related_arguments_with_from_date():
+
+    def testf(from_date, nmonths):
+        # Note that include_current_month used ONLY if NEITHER from_date NOR thru_date are specified.
+        return parse_date_range_related_arguments(from_date, None, nmonths=nmonths,
+                                                  include_current_month=None, strings=True)
+
+    with mocked_datetime_today(DEFAULT_MOCK_DATETIME_TODAY_VALUE):
+        assert testf("2024-06-24", nmonths=None) == ("2024-06-24", None)
+        assert testf("2024-06-24", nmonths=0) == ("2024-06-24", "2024-06-30")
+        assert testf("2024-06-24", nmonths=1) == ("2024-06-24", "2024-07-24")
+
+
+def test_parse_date_range_related_arguments_with_thru_date():
+
+    def testf(thru_date, nmonths):
+        # Note that include_current_month used ONLY if NEITHER from_date NOR thru_date are specified.
+        return parse_date_range_related_arguments(None, thru_date, nmonths=nmonths,
+                                                  include_current_month=None, strings=True)
+
+    with mocked_datetime_today(DEFAULT_MOCK_DATETIME_TODAY_VALUE):
+        assert testf("2024-06-24", nmonths=None) == (None, "2024-06-24")
+        assert testf("2024-06-24", nmonths=0) == ("2024-06-01", "2024-06-24")
+        assert testf("2024-06-24", nmonths=-1) == ("2024-05-24", "2024-06-24")
+
+
+@contextmanager
+def mocked_datetime_today(value: Optional[Union[str, datetime.datetime]] = DEFAULT_MOCK_DATETIME_TODAY_VALUE):
+    if isinstance(value, str):
+        value = parse_datetime_string(value)
+    if not isinstance(value, datetime.datetime):
+        raise Exception("Error using mocked_datetime_today function!")
+    class MockDateTime(datetime.datetime):  # noqa
+        @classmethod
+        def today(cls):
+            nonlocal value ; return value  # noqa
+    with (mock_patch("encoded.endpoint_utils.datetime", MockDateTime), mock_patch("datetime.datetime", MockDateTime)):
+        yield

From 66cec6ea51226a2d886e1e55f166e108057adead Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 13 Dec 2024 12:17:15 -0500
Subject: [PATCH 48/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py       | 25 ++++++++++++++++++++-
 src/encoded/recent_files_summary.py | 34 +++++++++++++++++++++--------
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index c1f6a8c08..2f22f6717 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -3,7 +3,7 @@
 from dateutil.relativedelta import relativedelta
 from pyramid.request import Request as PyramidRequest
 from typing import Any, List, Optional, Tuple, Union
-from urllib.parse import urlencode
+from urllib.parse import parse_qs, urlencode
 from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
 
 
@@ -173,6 +173,20 @@ def parse_datetime_string(value: Union[str, datetime, date],
     return value
 
 
+def get_date_range_for_month(
+        date: Union[str, datetime, date],
+        strings: bool = False) -> Tuple[Optional[Union[str, datetime]], Optional[Union[str, datetime]]]:
+    if date := parse_datetime_string(date, notz=True):
+        from_date = _get_first_date_of_month(date)
+        thru_date = _get_last_date_of_month(date)
+        if strings is True:
+            from_date = from_date.strftime(f"%Y-%m-%d") if from_date else None
+            thru_date = thru_date.strftime(f"%Y-%m-%d") if thru_date else None
+    else:
+        from_date = thru_date = None
+    return from_date, thru_date
+
+
 def _get_first_date_of_month(day: Optional[Union[datetime, date, str]] = None) -> datetime:
     """
     Returns a datetime object representing the first day of the month of the given date;
@@ -222,6 +236,15 @@ def create_query_string(query_arguments: dict, base: Optional[str] = None) -> st
     return query_string
 
 
+def deconstruct_query_string(query_string: str) -> dict:
+    if isinstance(query_string, str):
+        if (question_mark_index := query_string.find("?")) >= 0:
+            query_string = query_string[question_mark_index + 1:]
+        query_string = query_string.replace("%21=", "=%21")
+        return {key: value[0] if len(value) == 1 else value for key, value in parse_qs(query_string).items()}
+    return {}
+
+
 def get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort: bool = False) -> List[Any]:
     """
     TODO: Move this to dcicutils. Maybe much of the above too.
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a1a71fe69..7613608dc 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -9,7 +9,8 @@
 from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
 from encoded.elasticsearch_utils import sort_normalized_aggregation_results
 from encoded.elasticsearch_utils import AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
-from encoded.endpoint_utils import create_query_string, parse_date_range_related_arguments
+from encoded.endpoint_utils import create_query_string, deconstruct_query_string
+from encoded.endpoint_utils import get_date_range_for_month, parse_date_range_related_arguments
 from encoded.endpoint_utils import get_properties, parse_datetime_string
 from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
 from snovault.search.search import search as snovault_search
@@ -140,8 +141,10 @@ def create_query_arguments(request: PyramidRequest, base_query_arguments: Option
             query_arguments = {**base_query_arguments, **query_arguments}
         return query_arguments
 
-    def create_query(query_arguments: Optional[dict] = None) -> str:
-        return f"{BASE_SEARCH_QUERY}?{create_query_string(query_arguments)}"
+    def create_query(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
+        query_arguments = create_query_arguments(request, base_query_arguments)
+        query_string = create_query_string(query_arguments)
+        return f"{BASE_SEARCH_QUERY}?{query_string}"
 
     def create_aggregation_query(aggregation_fields: List[str]) -> dict:
 
@@ -279,8 +282,10 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
                 if value := normalized_results.get("value"):
                     if name == date_property_name:
                         # Special case for date value which is just year/month (e.g. 2024-12);
-                        # we want to turn this into a date range query for the month.
-                        from_date, thru_date = parse_date_range_related_arguments(value, None, strings=True)
+                        # we want to turn this into a date range query for the month; actually
+                        # this is not a special case, this is the NORMAL case we are dealing with.
+                        # from_date, thru_date = parse_date_range_related_arguments(value, None, nmonths=0, strings=True)
+                        from_date, thru_date = get_date_range_for_month(value, strings=True)
                         if from_date and thru_date:
                             base_query_arguments = {**base_query_arguments,
                                                     f"{name}.from": from_date, f"{name}.to": thru_date}
@@ -298,9 +303,11 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
                     add_queries_to_normalized_results(element, base_query_arguments)
 
     aggregation_field_grouping_cell_or_donor = get_aggregation_field_grouping_cell_or_donor()
+    # The base_query_arguments does not contain the from/thru dates as this is used;
+    # this is used to construct the query-string for the individually grouped items which
+    # will have the from/thru dates specifically representing their place within the group.
     base_query_arguments = create_base_query_arguments(request)
-    query_arguments = create_query_arguments(request, base_query_arguments)
-    query = create_query(query_arguments)
+    query = create_query(request, base_query_arguments)
 
     if not legacy:
         aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
@@ -331,7 +338,16 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         }
 
     if debug_query:
-        return {"query": query, "query_arguments": query_arguments, "aggregation_query": aggregation_query}
+        return {
+            "query": query,
+            "query_arguments": deconstruct_query_string(query),
+            "aggregation_query_fields": [
+                AGGREGATION_FIELD_RELEASE_DATE,
+                *get_aggregation_field_grouping_cell_or_donor(),
+                AGGREGATION_FIELD_FILE_DESCRIPTOR
+            ],
+            "aggregation_query": aggregation_query
+        }
 
     raw_results = execute_aggregation_query(request, query, aggregation_query)
 
@@ -401,7 +417,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         additional_properties = {
             "debug": {
                 "query": query,
-                "query_arguments": query_arguments,
+                "query_arguments": deconstruct_query_string(query),
                 "aggregation_query_fields": [
                     AGGREGATION_FIELD_RELEASE_DATE,
                     *get_aggregation_field_grouping_cell_or_donor(),

From 0c1195fd48474f19399db7fe0dc186b2e8e020f2 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 13 Dec 2024 12:32:53 -0500
Subject: [PATCH 49/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 7613608dc..0f98133c1 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -598,7 +598,7 @@ def format_hit_property_values(hit: dict, property_name: str,
                 elif hit.get("elasticsearch_counted") is False:
                     counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
                     if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
-                        property_value = green(f"{property_value} {chars_larrow_hollow} COUNTED HERE")
+                        property_value = green_bold(f"{property_value} {chars_larrow_hollow}") + green(" COUNTED HERE")
             return property_value
 
         def find_where_aggregated_and_counted(uuid: str) -> Tuple[str, str]:

From c546ca21076a474a9e8d195b6b2f2c9753a6437f Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Fri, 13 Dec 2024 16:47:19 -0500
Subject: [PATCH 50/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 48 ++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 0f98133c1..4c7de06ef 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -75,6 +75,7 @@ def recent_files_summary(request: PyramidRequest) -> dict:
     nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
     nodonors = request_arg_bool(request, "nodonors", request_arg_bool(request, "nodonor"))
     favor_donor = request_arg_bool(request, "favor_donor")
+    multi = request_arg_bool(request, "multi")
     nosort = request_arg_bool(request, "nosort")
     legacy = request_arg_bool(request, "legacy")
     debug = request_arg_bool(request, "debug")
@@ -160,7 +161,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
             return {}
 
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name
+            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, multi
             if field == date_property_name:
                 return {
                     "date_histogram": {
@@ -179,11 +180,34 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                 for aggregation_field_grouping_index in range(len(aggregation_field_grouping_cell_or_donor)):
                     aggregation_field = aggregation_field_grouping_cell_or_donor[aggregation_field_grouping_index]
                     if_or_else_if = "if" if aggregation_field_grouping_index == 0 else "else if"
-                    script += f"""
-                        {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
-                            return '{aggregation_field}:' + doc['embedded.{aggregation_field}.raw'].value;
-                        }}
-                    """
+                    # Note that if there are multiple values for the aggregation field just the "first" one will be chosen;
+                    # where "first" means which was indexed first, which from an application POV is kind of arbitrary.
+                    # If we want to make it more deterministic we could order the results (say) alphabetically like so: 
+                    #   def value = doc['embedded.{aggregation_field}.raw'].stream().min((a, b) -> a.compareTo(b)).get();
+                    #   return '{aggregation_field}:' + value;
+                    # OR, if we actually want to aggregation on ALL values we could collect the results and return all like so:
+                    #   def values = [];
+                    #   for (value in doc['embedded.{aggregation_field}.raw']) {
+                    #       values.add('{aggregation_field}:' + value);
+                    #   }
+                    #   return values;
+                    # But then we'd get double counting and so on. We are told in any case that these groups should be distinct.
+                    if not multi:
+                        script += f"""
+                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
+                                return '{aggregation_field}:' + doc['embedded.{aggregation_field}.raw'].value;
+                            }}
+                        """
+                    else:
+                        script += f"""
+                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
+                                def values = [];
+                                for (value in doc['embedded.{aggregation_field}.raw']) {{
+                                    values.add('{aggregation_field}:' + value);
+                                }}
+                                return values;
+                            }}
+                        """
                 script += f"""
                     else {{
                         return 'unknown';
@@ -585,6 +609,10 @@ def get_portal_hits(data: dict) -> List[dict]:
         def format_hit_property_values(hit: dict, property_name: str,
                                        color: Optional[Callable] = None) -> Optional[str]:
             nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
+            if hit.get("elasticsearch_counted") is False:
+                counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
+            else:
+                counted_grouping_name, counted_grouping_value = (None, None)
             if property_value := hit.get(property_name):
                 if property_name == parent_grouping_name:
                     property_values = []
@@ -593,7 +621,11 @@ def format_hit_property_values(hit: dict, property_name: str,
                             property_value = color(property_value) if callable(color) else green_bold(property_value)
                             property_values.append(property_value)
                         else:
-                            property_values.append(property_value)
+                            if (counted_grouping_name, counted_grouping_value) == (property_name, property_value):
+                                property_values.append(green_bold(f"{property_value} {chars_larrow_hollow}") +
+                                                       green(" COUNTED HERE"))
+                            else:
+                                property_values.append(property_value)
                     property_value = ", ".join(property_values)
                 elif hit.get("elasticsearch_counted") is False:
                     counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
@@ -670,7 +702,7 @@ def print_hit_property_values(hit: dict, property_name: str,
                     if isinstance(subcount_item := item.get("count"), int):
                         subcount += subcount_item
                 if subcount != count:
-                    note = f" {chars_xmark}"
+                    note = red(f" {chars_xmark} ACTUAL COUNT: {subcount}")
                 elif checks is True:
                     note = f" {chars_check}"
             elif checks:

From 77f0537586c9be07e93b7c45cba558e2b5361fcc Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 12:04:19 -0500
Subject: [PATCH 51/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 87 ++++++++++++++++++++++-------
 1 file changed, 68 insertions(+), 19 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 4c7de06ef..6ec3e6de7 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -277,6 +277,9 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
             include_missing=include_missing,
             create_field_aggregation=create_field_aggregation)
 
+        if troubleshoot_elasticsearch:
+            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
+
         return aggregation_query[date_property_name]
 
     def execute_aggregation_query(request: PyramidRequest, query: str, aggregation_query: dict) -> str:
@@ -509,7 +512,7 @@ def annotate_with_uuids(normalized_results: dict):
             AGGREGATION_FIELD_RELEASE_DATE,
             AGGREGATION_FIELD_CELL_MIXTURE,
             AGGREGATION_FIELD_CELL_LINE,
-            # Some extra properties for troublehooting (as this whole thing is).
+            # Store some extra properties for troublehooting (as this whole thing is).
             "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
             "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
             "file_sets.libraries.analytes.samples.sample_sources.display_title",
@@ -582,21 +585,60 @@ def print_normalized_aggregation_results(normalized_results: dict,
     """
     from hms_utils.terminal_utils import terminal_color
 
-    global AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_CELL_LINE, AGGREGATION_FIELD_DONOR
-
-    def get_aggregation_fields(data: dict) -> List[str]:
-        if not isinstance(aggregation_fields := data.get("debug", {}).get("aggregation_query_fields"), list):
+    def get_aggregation_fields(normalized_results: dict) -> List[str]:
+        # Returns all noted/important aggregation fields which ARE actually being used by the query;
+        # we only are interested in ones that are in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+        # which is all of the possible sample-source/cell-line/donor aggregations.
+        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
+        if not isinstance(aggregation_fields :=
+                          normalized_results.get("debug", {}).get("aggregation_query_fields"), list):
             aggregation_fields = []
+        for aggregation_field in aggregation_fields:
+            # Remove the ones we are not interested in reporting on.
+            if aggregation_field not in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                aggregation_fields.remove(aggregation_field)
         return aggregation_fields
 
+    def get_unused_aggregation_fields(normalized_results: dict) -> List[str]:
+        # Returns all noted/important aggregation fields which are NOT actually being used by the query;
+        # we only are interested in ones that are in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+        # which is all of the possible sample-source/cell-line/donor aggregations.
+        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
+        unused_aggregation_fields = []
+        aggregation_fields = get_aggregation_fields(normalized_results)
+        for aggregation_field in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+            if aggregation_field not in aggregation_fields:
+                unused_aggregation_fields.append(aggregation_field)
+        unused_aggregation_fields.append(
+            "file_sets.libraries.analytes.samples.sample_sources.display_title")
+        return unused_aggregation_fields
+
+    def get_aggregation_field_labels() -> dict:
+        global AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_CELL_LINE, AGGREGATION_FIELD_DONOR
+        return {
+            AGGREGATION_FIELD_CELL_MIXTURE: "sample-sources",
+            AGGREGATION_FIELD_CELL_LINE: "cell-lines",
+            AGGREGATION_FIELD_DONOR: "donors",
+            "file_sets.libraries.analytes.samples.sample_sources.display_title": "sample-sources-title"
+        }
+
+    def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
+        aggregation_fields = get_aggregation_fields(normalized_results)
+        unused_aggregation_fields = get_unused_aggregation_fields(normalized_results)
+        aggregation_fields_to_print = aggregation_fields + unused_aggregation_fields
+        for aggregation_field_label in get_aggregation_field_labels():
+            if aggregation_field_label not in aggregation_fields_to_print:
+                aggregation_field_labels.append(aggregation_field_label)
+        return aggregation_fields_to_print
+
     def print_results(data: dict,
                       parent_grouping_name: Optional[str] = None,
                       parent_grouping_value: Optional[str] = None,
                       indent: int = 0) -> None:
 
         nonlocal title, uuids, uuid_details, nobold, query, verbose
-        nonlocal aggregation_fields, red, green_bold, gray, bold
-        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark
+        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green_bold, gray, bold
+        nonlocal aggregation_fields, aggregation_fields_to_print, aggregation_field_labels
 
         def get_portal_hits(data: dict) -> List[dict]:
             hits = []
@@ -669,14 +711,20 @@ def print_hit_property_values(hit: dict, property_name: str,
                                       label: Optional[str] = None,
                                       prefix: Optional[str] = None,
                                       color: Optional[Callable] = None) -> None:
-            nonlocal verbose, aggregation_fields, chars_dot_hollow, verbose
-            if property_values := format_hit_property_values(hit, property_name, color=color):
-                if (verbose is True) or (not label):
-                    label = property_name
+            nonlocal aggregation_fields, aggregation_field_labels, chars_dot_hollow, chars_null, verbose
+            if not label:
+                label = aggregation_field_labels.get(property_name)
+            if (verbose is True) or (not label):
+                label = property_name
+            property_values = format_hit_property_values(hit, property_name, color=color)
+            if not property_values:
+                property_values = chars_null
+            if property_name not in aggregation_fields:
                 property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
-                if property_name not in aggregation_fields:
-                    property_description = gray(property_description)
-                print(property_description)
+                property_description = gray(property_description)
+            else:
+                property_description = f"{prefix or ''}{chars_dot} {label}: {property_values}"
+            print(property_description)
 
         if not (isinstance(data, dict) and data):
             return
@@ -723,11 +771,8 @@ def print_hit_property_values(hit: dict, property_name: str,
                     prefix =  f"{spaces}    "
                     # Show property values for troubleshooting (as this whole thing is);
                     # see add_info_for_troubleshooting.annotate_with_uuids.
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_MIXTURE, "sample-sources", prefix=prefix, color=color)
-                    print_hit_property_values(hit, AGGREGATION_FIELD_CELL_LINE, "cell-lines", prefix=prefix, color=color)
-                    print_hit_property_values(hit, "file_sets.libraries.analytes.samples.sample_sources.display_title",
-                                              "sample-sources-title", prefix=prefix, color=color)
-                    print_hit_property_values(hit, AGGREGATION_FIELD_DONOR, "donors", prefix=prefix, color=color)
+                    for aggregation_field in aggregation_fields_to_print:
+                        print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
         if isinstance(items := data.get("items"), list):
             for element in items:
                 print_results(element,
@@ -736,6 +781,9 @@ def print_hit_property_values(hit: dict, property_name: str,
                               indent=indent + 2)
 
     aggregation_fields = get_aggregation_fields(normalized_results)
+    aggregation_fields_to_print = get_aggregation_fields_to_print(normalized_results)
+    aggregation_field_labels = get_aggregation_field_labels()
+
     red = lambda text: terminal_color(text, "red")  # noqa
     red_bold = lambda text: terminal_color(text, "red", bold=True)  # noqa
     green = lambda text: terminal_color(text, "green")  # noqa
@@ -749,5 +797,6 @@ def print_hit_property_values(hit: dict, property_name: str,
     chars_diamond = "❖"
     chars_rarrow_hollow = "▷"
     chars_larrow_hollow = "◁"
+    chars_null = "∅"
 
     print_results(normalized_results)

From 3448c9a3a00606611f07f0826cb850bd69a3c1d2 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 12:08:00 -0500
Subject: [PATCH 52/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 6ec3e6de7..0e33314c1 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -614,6 +614,7 @@ def get_unused_aggregation_fields(normalized_results: dict) -> List[str]:
         return unused_aggregation_fields
 
     def get_aggregation_field_labels() -> dict:
+        # Shorter/nicer names for aggregation fields of interest to print.
         global AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_CELL_LINE, AGGREGATION_FIELD_DONOR
         return {
             AGGREGATION_FIELD_CELL_MIXTURE: "sample-sources",
@@ -626,6 +627,7 @@ def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
         aggregation_fields = get_aggregation_fields(normalized_results)
         unused_aggregation_fields = get_unused_aggregation_fields(normalized_results)
         aggregation_fields_to_print = aggregation_fields + unused_aggregation_fields
+        # Look at get_aggregation_field_labels above for other/miscellaneous fields we want to print.
         for aggregation_field_label in get_aggregation_field_labels():
             if aggregation_field_label not in aggregation_fields_to_print:
                 aggregation_field_labels.append(aggregation_field_label)
@@ -638,7 +640,7 @@ def print_results(data: dict,
 
         nonlocal title, uuids, uuid_details, nobold, query, verbose
         nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green_bold, gray, bold
-        nonlocal aggregation_fields, aggregation_fields_to_print, aggregation_field_labels
+        nonlocal aggregation_fields_to_print
 
         def get_portal_hits(data: dict) -> List[dict]:
             hits = []

From 414dfdec71eff2ea717bb41f4a7fcda29bcef816 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 12:29:30 -0500
Subject: [PATCH 53/78] refactoring /recent_files_summary endpoint

---
 src/encoded/endpoint_utils.py       | 2 +-
 src/encoded/recent_files_summary.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 2f22f6717..832868f46 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -260,7 +260,7 @@ def get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort:
             for key_index in range(nkeys):
                 if (value := data.get(keys[key_index], None)) is not None:
                     if key_index == key_index_max:
-                        return [value]
+                        return [value] if not isinstance(value, list) else value
                     elif isinstance(value, dict):
                         data = value
                         continue
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 0e33314c1..c9299446a 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -630,7 +630,7 @@ def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
         # Look at get_aggregation_field_labels above for other/miscellaneous fields we want to print.
         for aggregation_field_label in get_aggregation_field_labels():
             if aggregation_field_label not in aggregation_fields_to_print:
-                aggregation_field_labels.append(aggregation_field_label)
+                aggregation_fields_to_print.append(aggregation_field_label)
         return aggregation_fields_to_print
 
     def print_results(data: dict,

From c6e595dad1b631529e85850d1ae398e45dd0bf30 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 16:38:25 -0500
Subject: [PATCH 54/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 70 +++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index c9299446a..a3107b463 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,6 +1,6 @@
 from pyramid.request import Request as PyramidRequest
 from copy import deepcopy
-from typing import Callable, List, Optional, Tuple
+from typing import Callable, List, Optional, Tuple, Union
 from dcicutils.misc_utils import normalize_spaces
 from encoded.elasticsearch_utils import add_debugging_to_elasticsearch_aggregation_query
 from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
@@ -639,7 +639,7 @@ def print_results(data: dict,
                       indent: int = 0) -> None:
 
         nonlocal title, uuids, uuid_details, nobold, query, verbose
-        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green_bold, gray, bold
+        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green, green_bold, gray, bold
         nonlocal aggregation_fields_to_print
 
         def get_portal_hits(data: dict) -> List[dict]:
@@ -651,8 +651,9 @@ def get_portal_hits(data: dict) -> List[dict]:
             return hits
 
         def format_hit_property_values(hit: dict, property_name: str,
-                                       color: Optional[Callable] = None) -> Optional[str]:
+                                       color: Optional[Callable] = None) -> Tuple[Optional[str], List[Tuple[str, str]]]:
             nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
+            counted_elsewhere = []
             if hit.get("elasticsearch_counted") is False:
                 counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
             else:
@@ -668,6 +669,7 @@ def format_hit_property_values(hit: dict, property_name: str,
                             if (counted_grouping_name, counted_grouping_value) == (property_name, property_value):
                                 property_values.append(green_bold(f"{property_value} {chars_larrow_hollow}") +
                                                        green(" COUNTED HERE"))
+                                counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
                             else:
                                 property_values.append(property_value)
                     property_value = ", ".join(property_values)
@@ -675,9 +677,14 @@ def format_hit_property_values(hit: dict, property_name: str,
                     counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
                     if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
                         property_value = green_bold(f"{property_value} {chars_larrow_hollow}") + green(" COUNTED HERE")
-            return property_value
+                        counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
+            return property_value, counted_elsewhere
 
-        def find_where_aggregated_and_counted(uuid: str) -> Tuple[str, str]:
+        def find_where_aggregated_and_counted(
+                uuid: str,
+                multiple: bool = False,
+                ignore: Optional[Union[List[Tuple[str, str]],
+                Tuple[str, str]]] = None) -> Union[Tuple[str, str], List[Tuple[str, str]]]:
 
             nonlocal normalized_results
 
@@ -702,23 +709,30 @@ def find_where(data: dict, uuid: str,
                 return found_uuid_grouping_names_and_values
 
             if found_uuid_grouping_names_and_values := list(find_where(normalized_results, uuid)):
-                if len(found_uuid_grouping_names_and_values) > 0:
-                    if len(found_uuid_grouping_names_and_values) > 1:
-                        # Something is wrong; should only be at most one iterm with elasticsearch_counted set to True.
-                        pass
-                    return found_uuid_grouping_names_and_values[0]
-            return None, None
+                if isinstance(ignore, tuple) and (len(ignore) == 2) and (ignore in found_uuid_grouping_names_and_values):
+                    found_uuid_grouping_names_and_values.remove(ignore)
+                elif isinstance(ignore, list):
+                    for ignore_item in ignore:
+                        if isinstance(ignore_item, tuple) and (len(ignore_item) == 2) and (ignore_item in found_uuid_grouping_names_and_values):
+                            found_uuid_grouping_names_and_values.remove(ignore_item)
+                if multiple is True:
+                    return found_uuid_grouping_names_and_values
+                if len(found_uuid_grouping_names_and_values) > 1:
+                    # Normally should only be at most one item with elasticsearch_counted set to True.
+                    pass
+                return found_uuid_grouping_names_and_values[0]
+            return [(None, None)] if multiple is True else (None, None)
 
         def print_hit_property_values(hit: dict, property_name: str,
                                       label: Optional[str] = None,
                                       prefix: Optional[str] = None,
-                                      color: Optional[Callable] = None) -> None:
+                                      color: Optional[Callable] = None) -> List[Tuple[str, str]]:
             nonlocal aggregation_fields, aggregation_field_labels, chars_dot_hollow, chars_null, verbose
             if not label:
                 label = aggregation_field_labels.get(property_name)
             if (verbose is True) or (not label):
                 label = property_name
-            property_values = format_hit_property_values(hit, property_name, color=color)
+            property_values, counted_elsewhere = format_hit_property_values(hit, property_name, color=color)
             if not property_values:
                 property_values = chars_null
             if property_name not in aggregation_fields:
@@ -727,6 +741,7 @@ def print_hit_property_values(hit: dict, property_name: str,
             else:
                 property_description = f"{prefix or ''}{chars_dot} {label}: {property_values}"
             print(property_description)
+            return counted_elsewhere
 
         if not (isinstance(data, dict) and data):
             return
@@ -771,10 +786,37 @@ def print_hit_property_values(hit: dict, property_name: str,
                     color = green_bold
                 if uuid_details is True:
                     prefix =  f"{spaces}    "
+                    counted_elsewhere = []
                     # Show property values for troubleshooting (as this whole thing is);
                     # see add_info_for_troubleshooting.annotate_with_uuids.
                     for aggregation_field in aggregation_fields_to_print:
-                        print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
+                        hit_counted_elsewhere = \
+                            print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
+                        if False and hit_counted_elsewhere:
+                            counted_elsewhere.extend(hit_counted_elsewhere)
+                    # See if also grouped elsewhere for our FYI.
+                    duplicative = hit.get("duplicative")
+                    duplicates = duplicative - 1 if isinstance(duplicative, int) else 0
+                    counted_groupings = find_where_aggregated_and_counted(
+                        hit.get("uuid"), multiple=True,
+                        ignore=counted_elsewhere + [(parent_grouping_name, parent_grouping_value)])
+                    if counted_groupings:
+                        message = f"{spaces}    {green(chars_rarrow_hollow)} {green('ALSO COUNTED HERE')}:"
+                        if verbose is True:
+                            if duplicates > 0:
+                                message += f" {duplicates}"
+                                if duplicates != len(counted_groupings):
+                                    message += red_bold(f" {chars_xmark} vs {len(counted_groupings)}")
+                            print(message)
+                            for counted_grouping in counted_groupings:
+                                print(f"{spaces}      - {counted_grouping[0]} {green(counted_grouping[1])}")
+                        else:
+                            counted_grouping_values = [green(counted_grouping[1]) for counted_grouping in counted_groupings]
+                            message = f"{message} {', '.join(counted_grouping_values)}"
+                            if duplicates > 0:
+                                if duplicates != len(counted_groupings):
+                                    message += red_bold(f" {chars_xmark} {duplicates} vs {len(counted_grouping_values)}")
+                            print(message)
         if isinstance(items := data.get("items"), list):
             for element in items:
                 print_results(element,

From b87d83a478949ac67c125c2d3b645eb50a48b44f Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 16:38:54 -0500
Subject: [PATCH 55/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a3107b463..b0e6170d7 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -792,7 +792,7 @@ def print_hit_property_values(hit: dict, property_name: str,
                     for aggregation_field in aggregation_fields_to_print:
                         hit_counted_elsewhere = \
                             print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
-                        if False and hit_counted_elsewhere:
+                        if hit_counted_elsewhere:
                             counted_elsewhere.extend(hit_counted_elsewhere)
                     # See if also grouped elsewhere for our FYI.
                     duplicative = hit.get("duplicative")

From c4f97f914a5b88609987b4845406b5f00d531e63 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sat, 14 Dec 2024 19:33:15 -0500
Subject: [PATCH 56/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 72 +++++++++++++++--------------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index b0e6170d7..6a7ea52d5 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -507,20 +507,28 @@ def count_uuid(uuid_records: List[dict], uuid: str) -> int:
                 count += 1
         return count
 
+    def dedup_list(data: list) -> list:  # noqa
+        return list(dict.fromkeys(data)) if isinstance(data, list) else []
+
+    aggregation_fields_for_troubleshooting = dedup_list([
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_CELL_MIXTURE,
+        AGGREGATION_FIELD_CELL_LINE,
+        # Store some extra properties for troublehooting (as this whole thing is).
+#       "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
+#       "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
+        "file_sets.libraries.analytes.samples.sample_sources.display_title",
+        AGGREGATION_FIELD_DONOR,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ])
+
     def annotate_with_uuids(normalized_results: dict):
-        aggregation_fields = [
-            AGGREGATION_FIELD_RELEASE_DATE,
-            AGGREGATION_FIELD_CELL_MIXTURE,
-            AGGREGATION_FIELD_CELL_LINE,
-            # Store some extra properties for troublehooting (as this whole thing is).
-            "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
-            "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
-            "file_sets.libraries.analytes.samples.sample_sources.display_title",
-            AGGREGATION_FIELD_DONOR,
-            AGGREGATION_FIELD_FILE_DESCRIPTOR
-        ]
+        nonlocal aggregation_fields_for_troubleshooting
         uuid_records = []
         query = normalized_results.get("query")
+        if isinstance(debug := normalized_results.get("debug"), dict):
+            normalized_results["debug"]["aggregation_fields_for_troubleshooting"] = (
+                aggregation_fields_for_troubleshooting)
         files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
         for first_item in normalized_results["items"]:
             first_property_name = first_item["name"]
@@ -548,7 +556,7 @@ def annotate_with_uuids(normalized_results: dict):
                                         if not third_item["debug"].get("portal_hits"):
                                             third_item["debug"]["portal_hits"] = []
                                         uuid_record = {"uuid": uuid}
-                                        for aggregation_field in aggregation_fields:
+                                        for aggregation_field in aggregation_fields_for_troubleshooting:
                                             aggregation_values = ", ".join(get_properties(file, aggregation_field))
                                             uuid_record[aggregation_field] = aggregation_values or None
                                         if third_item["debug"].get("elasticsearch_hits"):
@@ -593,25 +601,29 @@ def get_aggregation_fields(normalized_results: dict) -> List[str]:
         if not isinstance(aggregation_fields :=
                           normalized_results.get("debug", {}).get("aggregation_query_fields"), list):
             aggregation_fields = []
+        else:
+            aggregation_fields = deepcopy(aggregation_fields)
         for aggregation_field in aggregation_fields:
             # Remove the ones we are not interested in reporting on.
             if aggregation_field not in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
                 aggregation_fields.remove(aggregation_field)
         return aggregation_fields
 
-    def get_unused_aggregation_fields(normalized_results: dict) -> List[str]:
-        # Returns all noted/important aggregation fields which are NOT actually being used by the query;
-        # we only are interested in ones that are in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
-        # which is all of the possible sample-source/cell-line/donor aggregations.
-        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
-        unused_aggregation_fields = []
-        aggregation_fields = get_aggregation_fields(normalized_results)
-        for aggregation_field in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
-            if aggregation_field not in aggregation_fields:
-                unused_aggregation_fields.append(aggregation_field)
-        unused_aggregation_fields.append(
-            "file_sets.libraries.analytes.samples.sample_sources.display_title")
-        return unused_aggregation_fields
+    def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
+        aggregation_fields_to_print = get_aggregation_fields(normalized_results)
+        if isinstance(aggregation_fields_for_troubleshooting :=
+                      normalized_results.get("debug", {}).get("aggregation_fields_for_troubleshooting"), list):
+            for aggregation_field_for_troubleshooting in aggregation_fields_for_troubleshooting:
+                if aggregation_field_for_troubleshooting not in aggregation_fields_to_print:
+                    aggregation_fields_to_print.append(aggregation_field_for_troubleshooting)
+            aggregation_fields_to_not_print = [
+                AGGREGATION_FIELD_RELEASE_DATE,
+                AGGREGATION_FIELD_FILE_DESCRIPTOR 
+            ]
+            for aggregation_field_to_not_print in aggregation_fields_to_not_print:
+                if aggregation_field_to_not_print in aggregation_fields_to_print:
+                    aggregation_fields_to_print.remove(aggregation_field_to_not_print)
+        return aggregation_fields_to_print
 
     def get_aggregation_field_labels() -> dict:
         # Shorter/nicer names for aggregation fields of interest to print.
@@ -623,16 +635,6 @@ def get_aggregation_field_labels() -> dict:
             "file_sets.libraries.analytes.samples.sample_sources.display_title": "sample-sources-title"
         }
 
-    def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
-        aggregation_fields = get_aggregation_fields(normalized_results)
-        unused_aggregation_fields = get_unused_aggregation_fields(normalized_results)
-        aggregation_fields_to_print = aggregation_fields + unused_aggregation_fields
-        # Look at get_aggregation_field_labels above for other/miscellaneous fields we want to print.
-        for aggregation_field_label in get_aggregation_field_labels():
-            if aggregation_field_label not in aggregation_fields_to_print:
-                aggregation_fields_to_print.append(aggregation_field_label)
-        return aggregation_fields_to_print
-
     def print_results(data: dict,
                       parent_grouping_name: Optional[str] = None,
                       parent_grouping_value: Optional[str] = None,

From 18ceaa032c5bd248d28f1b181d6cc3420a71611c Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 00:05:25 -0500
Subject: [PATCH 57/78] refactoring /recent_files_summary endpoint

---
 src/encoded/browse.py               | 64 ++++++++++++++++++++++++++++-
 src/encoded/recent_files_summary.py |  8 +++-
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/src/encoded/browse.py b/src/encoded/browse.py
index 3ad80270d..07935ab96 100644
--- a/src/encoded/browse.py
+++ b/src/encoded/browse.py
@@ -63,5 +63,67 @@ def browse(context, request, search_type=DEFAULT_BROWSE_TYPE, return_generator=F
 @view_config(route_name="recent_files_summary_endpoint", request_method=["GET"], effective_principals=Authenticated)
 @debug_log
 def recent_files_summary_endpoint(context, request):
-    results = recent_files_summary(request)
+    from encoded.endpoint_utils import request_arg_bool
+    text = request_arg_bool(request, "text")
+    results = recent_files_summary(request, troubleshooting=text)
+    if text:
+        import json
+        import os
+        from pyramid.response import Response
+        import sys
+        from encoded.recent_files_summary import print_normalized_aggregation_results
+        with capture_output_to_html_string() as captured_output:
+            print_normalized_aggregation_results(results, uuids=True, uuid_details=True)
+            text = captured_output.getvalue() 
+            text = ansi_to_html(text)
+        return Response(f"<pre>{text}</pre>", content_type='text/html')
     return results
+
+
+from contextlib import contextmanager
+@contextmanager
+def capture_output_to_html_string():
+    from io import StringIO
+    from unittest.mock import patch as patch
+    print_original = print
+    captured_output = StringIO()
+    def captured_print(*args, **kwargs):
+        nonlocal captured_output
+        print_original(*args, **kwargs, file=captured_output)
+    with patch("builtins.print", captured_print):
+        yield captured_output
+
+
+def ansi_to_html(text):
+    import re
+    ANSI_ESCAPE_RE = re.compile(r'\x1b\[(\d+)m')
+    ANSI_COLOR_MAP = {
+        '30': 'black',
+        '31': 'red',
+        '32': 'green',
+        '33': 'yellow',
+        '34': 'blue',
+        '35': 'magenta',
+        '36': 'cyan',
+        '37': 'white',
+        '90': 'bright_black',
+        '91': 'bright_red',
+        '92': 'bright_green',
+        '93': 'bright_yellow',
+        '94': 'bright_blue',
+        '95': 'bright_magenta',
+        '96': 'bright_cyan',
+        '97': 'bright_white',
+    }
+    def replace_ansi(match):
+        code = match.group(1)  # Extract ANSI code
+        color = ANSI_COLOR_MAP.get(code)
+        if color:
+            return f'<span style="color: {color};">'
+        elif code == '0':  # Reset code
+            return '</span>'
+        return ''  # Ignore unsupported codes
+    html_text = ANSI_ESCAPE_RE.sub(replace_ansi, text)
+    if html_text.count('<span') > html_text.count('</span>'):
+        html_text += '</span>'
+    return f'<pre>{html_text}</pre>'
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 6a7ea52d5..640a6e959 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -38,7 +38,7 @@
 
 BASE_SEARCH_QUERY = "/search/"
 
-def recent_files_summary(request: PyramidRequest) -> dict:
+def recent_files_summary(request: PyramidRequest, troubleshooting: bool = True) -> dict:
     """
     This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
     by default, info for files released withing the past three months grouped by release-date,
@@ -65,6 +65,7 @@ def recent_files_summary(request: PyramidRequest) -> dict:
     released can be queried for using one or more status query arguments, e.g. status=uploaded.
     """
 
+
     global AGGREGATION_FIELD_RELEASE_DATE
 
     date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
@@ -85,6 +86,11 @@ def recent_files_summary(request: PyramidRequest) -> dict:
     raw = request_arg_bool(request, "raw")
     willrfix = request_arg_bool(request, "willrfix")
 
+    if troubleshooting is True:
+        debug = True
+        troubleshoot = True
+        troubleshoot_elasticsearch = True
+
     def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
         # This specializes the aggregation query to group first by the cell-line field,
         # and then alternatively (if a cell-line field does not exist) by the donor field.

From 567f6857cdd1df0bbbd09021af3585c2d326d8d5 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 00:24:24 -0500
Subject: [PATCH 58/78] refactoring /recent_files_summary endpoint

---
 src/encoded/browse.py               | 61 ++------------------------
 src/encoded/recent_files_summary.py | 67 ++++++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/src/encoded/browse.py b/src/encoded/browse.py
index 07935ab96..bbda20b0a 100644
--- a/src/encoded/browse.py
+++ b/src/encoded/browse.py
@@ -1,4 +1,4 @@
-from pyramid.httpexceptions import HTTPBadRequest, HTTPFound
+from pyramid.httpexceptions import HTTPFound
 from pyramid.security import Authenticated
 from pyramid.view import view_config
 import structlog
@@ -67,63 +67,8 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
-        import json
-        import os
         from pyramid.response import Response
-        import sys
-        from encoded.recent_files_summary import print_normalized_aggregation_results
-        with capture_output_to_html_string() as captured_output:
-            print_normalized_aggregation_results(results, uuids=True, uuid_details=True)
-            text = captured_output.getvalue() 
-            text = ansi_to_html(text)
+        from encoded.recent_files_summary import get_normalized_aggregation_results_as_html_for_troublehshooting
+        text = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
         return Response(f"<pre>{text}</pre>", content_type='text/html')
     return results
-
-
-from contextlib import contextmanager
-@contextmanager
-def capture_output_to_html_string():
-    from io import StringIO
-    from unittest.mock import patch as patch
-    print_original = print
-    captured_output = StringIO()
-    def captured_print(*args, **kwargs):
-        nonlocal captured_output
-        print_original(*args, **kwargs, file=captured_output)
-    with patch("builtins.print", captured_print):
-        yield captured_output
-
-
-def ansi_to_html(text):
-    import re
-    ANSI_ESCAPE_RE = re.compile(r'\x1b\[(\d+)m')
-    ANSI_COLOR_MAP = {
-        '30': 'black',
-        '31': 'red',
-        '32': 'green',
-        '33': 'yellow',
-        '34': 'blue',
-        '35': 'magenta',
-        '36': 'cyan',
-        '37': 'white',
-        '90': 'bright_black',
-        '91': 'bright_red',
-        '92': 'bright_green',
-        '93': 'bright_yellow',
-        '94': 'bright_blue',
-        '95': 'bright_magenta',
-        '96': 'bright_cyan',
-        '97': 'bright_white',
-    }
-    def replace_ansi(match):
-        code = match.group(1)  # Extract ANSI code
-        color = ANSI_COLOR_MAP.get(code)
-        if color:
-            return f'<span style="color: {color};">'
-        elif code == '0':  # Reset code
-            return '</span>'
-        return ''  # Ignore unsupported codes
-    html_text = ANSI_ESCAPE_RE.sub(replace_ansi, text)
-    if html_text.count('<span') > html_text.count('</span>'):
-        html_text += '</span>'
-    return f'<pre>{html_text}</pre>'
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 640a6e959..4b24b191a 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,4 +1,5 @@
 from pyramid.request import Request as PyramidRequest
+from contextlib import contextmanager
 from copy import deepcopy
 from typing import Callable, List, Optional, Tuple, Union
 from dcicutils.misc_utils import normalize_spaces
@@ -532,7 +533,7 @@ def annotate_with_uuids(normalized_results: dict):
         nonlocal aggregation_fields_for_troubleshooting
         uuid_records = []
         query = normalized_results.get("query")
-        if isinstance(debug := normalized_results.get("debug"), dict):
+        if isinstance(normalized_results.get("debug"), dict):
             normalized_results["debug"]["aggregation_fields_for_troubleshooting"] = (
                 aggregation_fields_for_troubleshooting)
         files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
@@ -852,3 +853,67 @@ def print_hit_property_values(hit: dict, property_name: str,
     chars_null = "∅"
 
     print_results(normalized_results)
+
+
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
+    with capture_output_to_html_string() as captured_output:
+        print_normalized_aggregation_results(normalized_results, uuids=True, uuid_details=True)
+        return captured_output.html
+    return
+
+
+@contextmanager
+def capture_output_to_html_string():
+    from io import StringIO
+    from unittest.mock import patch as patch
+    print_original = print
+    captured_output = StringIO()
+    class CapturedOutput:  # noqa
+        def __init__(self, captured_output: StringIO):
+            self._captured_output = captured_output
+        @property  # noqa
+        def text(self):
+            return self._captured_output.getvalue()
+        @property  # noqa
+        def html(self):
+            return ansi_to_html(self._captured_output.getvalue())
+    def captured_print(*args, **kwargs):  # noqa
+        nonlocal captured_output
+        print_original(*args, **kwargs, file=captured_output)
+    with patch("builtins.print", captured_print):
+        yield CapturedOutput(captured_output)
+
+
+def ansi_to_html(text):
+    import re
+    ANSI_ESCAPE_RE = re.compile(r'\x1b\[(\d+)m')
+    ANSI_COLOR_MAP = {
+        '30': 'black',
+        '31': 'red',
+        '32': 'green',
+        '33': 'yellow',
+        '34': 'blue',
+        '35': 'magenta',
+        '36': 'cyan',
+        '37': 'white',
+        '90': 'bright_black',
+        '91': 'bright_red',
+        '92': 'bright_green',
+        '93': 'bright_yellow',
+        '94': 'bright_blue',
+        '95': 'bright_magenta',
+        '96': 'bright_cyan',
+        '97': 'bright_white',
+    }
+    def replace_ansi(match):  # noqa
+        code = match.group(1)
+        color = ANSI_COLOR_MAP.get(code)
+        if color:
+            return f'<span style="color: {color};">'
+        elif code == '0':
+            return '</span>'
+        return ''
+    html_text = ANSI_ESCAPE_RE.sub(replace_ansi, text)
+    if html_text.count('<span') > html_text.count('</span>'):
+        html_text += '</span>'
+    return f'<pre>{html_text}</pre>'

From 2f45e8cfc419408c352565f51484ab6ccfbb8683 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 10:56:28 -0500
Subject: [PATCH 59/78] refactoring /recent_files_summary endpoint

---
 src/encoded/recent_files_summary.py | 39 ++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index 4b24b191a..a72830f7c 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -1,6 +1,7 @@
-from pyramid.request import Request as PyramidRequest
 from contextlib import contextmanager
 from copy import deepcopy
+from pyramid.request import Request as PyramidRequest
+import re
 from typing import Callable, List, Optional, Tuple, Union
 from dcicutils.misc_utils import normalize_spaces
 from encoded.elasticsearch_utils import add_debugging_to_elasticsearch_aggregation_query
@@ -885,8 +886,6 @@ def captured_print(*args, **kwargs):  # noqa
 
 
 def ansi_to_html(text):
-    import re
-    ANSI_ESCAPE_RE = re.compile(r'\x1b\[(\d+)m')
     ANSI_COLOR_MAP = {
         '30': 'black',
         '31': 'red',
@@ -905,15 +904,27 @@ def ansi_to_html(text):
         '96': 'bright_cyan',
         '97': 'bright_white',
     }
+    ANSI_ESCAPE_RE = re.compile(r'\x1b\[([0-9;]*)m')
+    bold_active = False
     def replace_ansi(match):  # noqa
-        code = match.group(1)
-        color = ANSI_COLOR_MAP.get(code)
-        if color:
-            return f'<span style="color: {color};">'
-        elif code == '0':
-            return '</span>'
-        return ''
-    html_text = ANSI_ESCAPE_RE.sub(replace_ansi, text)
-    if html_text.count('<span') > html_text.count('</span>'):
-        html_text += '</span>'
-    return f'<pre>{html_text}</pre>'
+        nonlocal bold_active
+        codes = match.group(1).split(';')  # Split multiple codes (e.g., "1;31")
+        html_parts = []
+        for code in codes:
+            if code == '1':  # Bold
+                if not bold_active:  # Activate bold
+                    html_parts.append('<b>')
+                    bold_active = True
+            elif code in ANSI_COLOR_MAP:  # Colors
+                color = ANSI_COLOR_MAP[code]
+                html_parts.append(f'<span style="color: {color};">')
+            elif code == '0':  # Reset
+                if bold_active:
+                    html_parts.append('</b>')
+                    bold_active = False
+                html_parts.append('</span>')  # Close color
+        return ''.join(html_parts)
+    text_with_html = ANSI_ESCAPE_RE.sub(replace_ansi, text)
+    if bold_active:
+        text_with_html += '</b>'
+    return f'<pre>{text_with_html}</pre>'

From 203228ca2e8175584a626819f3655c3ddb895f46 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 15:57:09 -0500
Subject: [PATCH 60/78] refactoring /recent_files_summary endpoint

---
 poetry.lock                         | 17 ++++++++++++++++-
 pyproject.toml                      |  1 +
 src/encoded/endpoint_utils.py       | 22 ++++++++++++++++++++++
 src/encoded/recent_files_summary.py |  2 +-
 4 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 60da629ed..cad476ce2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4015,6 +4015,21 @@ setuptools = "*"
 [package.extras]
 testing = ["pytest", "pytest-cov"]
 
+[[package]]
+name = "termcolor"
+version = "2.5.0"
+description = "ANSI color formatting for output in terminal"
+category = "main"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8"},
+    {file = "termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f"},
+]
+
+[package.extras]
+tests = ["pytest", "pytest-cov"]
+
 [[package]]
 name = "threadpoolctl"
 version = "3.5.0"
@@ -4475,4 +4490,4 @@ test = ["zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9.1,<3.13"
-content-hash = "85d3cfc258bd495fab8caf35d943f40fb9e3c7114fcd59f1661d380fe15a0c09"
+content-hash = "72b303a0100150cc88c75fceb3b9ab1f2a5123686a6ef75bf8d2e4320cb0a6a9"
diff --git a/pyproject.toml b/pyproject.toml
index ad7134d24..6f6938ff1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,6 +99,7 @@ structlog = ">=19.2.0,<20"
 subprocess-middleware = "^0.3.0"
 supervisor = "^4.2.4"
 # Useful for picking apart pyproject.toml
+termcolor = "^2.4.0"
 toml = ">=0.10.1,<1"
 tqdm = "^4.59.0"
 transaction = "^3.0.0"
diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoint_utils.py
index 832868f46..ed7f9f640 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoint_utils.py
@@ -2,6 +2,7 @@
 from datetime import date, datetime
 from dateutil.relativedelta import relativedelta
 from pyramid.request import Request as PyramidRequest
+from termcolor import colored
 from typing import Any, List, Optional, Tuple, Union
 from urllib.parse import parse_qs, urlencode
 from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
@@ -277,3 +278,24 @@ def get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort:
                         return sorted(values) if (sort is True) else values
                 break
     return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
+
+
+def terminal_color(value: str,
+                   color: Optional[str] = None,
+                   dark: bool = False,
+                   bold: bool = False,
+                   underline: bool = False,
+                   nocolor: bool = False) -> str:
+    # This is used only for troubleshooting by
+    if nocolor is True:
+        return value
+    attributes = []
+    if dark is True:
+        attributes.append("dark")
+    if bold is True:
+        attributes.append("bold")
+    if underline is True:
+        attributes.append("underline")
+    if isinstance(color, str) and color:
+        return colored(value, color.lower(), attrs=attributes)
+    return colored(value, attrs=attributes)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
index a72830f7c..06ecd3069 100644
--- a/src/encoded/recent_files_summary.py
+++ b/src/encoded/recent_files_summary.py
@@ -599,7 +599,7 @@ def print_normalized_aggregation_results(normalized_results: dict,
     """
     For deveopment/troubleshooting only ...
     """
-    from hms_utils.terminal_utils import terminal_color
+    from encoded.endpoint_utils import terminal_color
 
     def get_aggregation_fields(normalized_results: dict) -> List[str]:
         # Returns all noted/important aggregation fields which ARE actually being used by the query;

From 82daa17f46bdaa0e5515165e9029a5b29a6d3161 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 17:11:58 -0500
Subject: [PATCH 61/78] refactoring /recent_files_summary endpoint

---
 src/encoded/browse.py                         |  18 +-
 .../{ => endpoints}/elasticsearch_utils.py    |   0
 src/encoded/{ => endpoints}/endpoint_utils.py |  56 --
 .../recent_files_summary.py                   | 490 +++++++++
 .../recent_files_summary_fields.py            |  16 +
 .../recent_files_summary_troubleshooting.py   | 504 ++++++++++
 src/encoded/recent_files_summary.py           | 930 ------------------
 src/encoded/tests/test_elasticsearch_utils.py |  14 +-
 src/encoded/tests/test_endpoint_utils.py      |   5 +-
 9 files changed, 1026 insertions(+), 1007 deletions(-)
 rename src/encoded/{ => endpoints}/elasticsearch_utils.py (100%)
 rename src/encoded/{ => endpoints}/endpoint_utils.py (81%)
 create mode 100644 src/encoded/endpoints/recent_files_summary/recent_files_summary.py
 create mode 100644 src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py
 create mode 100644 src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
 delete mode 100644 src/encoded/recent_files_summary.py

diff --git a/src/encoded/browse.py b/src/encoded/browse.py
index bbda20b0a..98eb0f37e 100644
--- a/src/encoded/browse.py
+++ b/src/encoded/browse.py
@@ -6,7 +6,7 @@
 from urllib.parse import urlencode
 from snovault.search.search import search
 from snovault.util import debug_log
-from encoded.recent_files_summary import recent_files_summary
+from encoded.endpoints.recent_files_summary.recent_files_summary import recent_files_summary_endpoint
 
 log = structlog.getLogger(__name__)
 
@@ -14,7 +14,7 @@
 
 def includeme(config):
     config.add_route('browse', '/browse{slash:/?}')
-    config.add_route("recent_files_summary_endpoint", "/recent_files_summary")
+    config.add_route("recent_files_summary", "/recent_files_summary")
     config.scan(__name__)
 
 
@@ -60,15 +60,7 @@ def browse(context, request, search_type=DEFAULT_BROWSE_TYPE, return_generator=F
     return search(context, request, search_type, return_generator, forced_type="Browse")
 
 
-@view_config(route_name="recent_files_summary_endpoint", request_method=["GET"], effective_principals=Authenticated)
+@view_config(route_name="recent_files_summary", request_method=["GET"], effective_principals=Authenticated)
 @debug_log
-def recent_files_summary_endpoint(context, request):
-    from encoded.endpoint_utils import request_arg_bool
-    text = request_arg_bool(request, "text")
-    results = recent_files_summary(request, troubleshooting=text)
-    if text:
-        from pyramid.response import Response
-        from encoded.recent_files_summary import get_normalized_aggregation_results_as_html_for_troublehshooting
-        text = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
-        return Response(f"<pre>{text}</pre>", content_type='text/html')
-    return results
+def recent_files_summary(context, request):
+    return recent_files_summary_endpoint(context, request)
diff --git a/src/encoded/elasticsearch_utils.py b/src/encoded/endpoints/elasticsearch_utils.py
similarity index 100%
rename from src/encoded/elasticsearch_utils.py
rename to src/encoded/endpoints/elasticsearch_utils.py
diff --git a/src/encoded/endpoint_utils.py b/src/encoded/endpoints/endpoint_utils.py
similarity index 81%
rename from src/encoded/endpoint_utils.py
rename to src/encoded/endpoints/endpoint_utils.py
index ed7f9f640..b518e3ea3 100644
--- a/src/encoded/endpoint_utils.py
+++ b/src/encoded/endpoints/endpoint_utils.py
@@ -2,7 +2,6 @@
 from datetime import date, datetime
 from dateutil.relativedelta import relativedelta
 from pyramid.request import Request as PyramidRequest
-from termcolor import colored
 from typing import Any, List, Optional, Tuple, Union
 from urllib.parse import parse_qs, urlencode
 from dcicutils.datetime_utils import parse_datetime_string as dcicutils_parse_datetime_string
@@ -244,58 +243,3 @@ def deconstruct_query_string(query_string: str) -> dict:
         query_string = query_string.replace("%21=", "=%21")
         return {key: value[0] if len(value) == 1 else value for key, value in parse_qs(query_string).items()}
     return {}
-
-
-def get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort: bool = False) -> List[Any]:
-    """
-    TODO: Move this to dcicutils. Maybe much of the above too.
-    Returns the values of the given property name within the given dictionary as a list, where the
-    given property name can be a dot-separated list of property names, which indicate a path into
-    nested dictionaries within the given dictionary; and - where if any of the elements within
-    the path are lists then we iterate through each, collecting the values for each and including
-    each within the list of returned values.
-    """
-    if isinstance(data, dict) and isinstance(name, str) and name:
-        if keys := name.split("."):
-            nkeys = len(keys) ; key_index_max = nkeys - 1  # noqa
-            for key_index in range(nkeys):
-                if (value := data.get(keys[key_index], None)) is not None:
-                    if key_index == key_index_max:
-                        return [value] if not isinstance(value, list) else value
-                    elif isinstance(value, dict):
-                        data = value
-                        continue
-                    elif isinstance(value, list) and value and ((sub_key_index := key_index + 1) < nkeys):
-                        sub_key = ".".join(keys[sub_key_index:])
-                        values = []
-                        for element in value:
-                            if isinstance(element_value := get_properties(element, sub_key), list):
-                                for element_value_item in element_value:
-                                    if (element_value_item is not None) and (element_value_item not in values):
-                                        values.append(element_value_item)
-                            elif (element_value is not None) and (element_value not in values):
-                                values.append(element_value)
-                        return sorted(values) if (sort is True) else values
-                break
-    return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
-
-
-def terminal_color(value: str,
-                   color: Optional[str] = None,
-                   dark: bool = False,
-                   bold: bool = False,
-                   underline: bool = False,
-                   nocolor: bool = False) -> str:
-    # This is used only for troubleshooting by
-    if nocolor is True:
-        return value
-    attributes = []
-    if dark is True:
-        attributes.append("dark")
-    if bold is True:
-        attributes.append("bold")
-    if underline is True:
-        attributes.append("underline")
-    if isinstance(color, str) and color:
-        return colored(value, color.lower(), attrs=attributes)
-    return colored(value, attrs=attributes)
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
new file mode 100644
index 000000000..f90d577c6
--- /dev/null
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -0,0 +1,490 @@
+from copy import deepcopy
+from pyramid.request import Request as PyramidRequest, Response as PyramidResponse
+from typing import List, Optional
+from dcicutils.misc_utils import normalize_spaces
+from encoded.endpoints.elasticsearch_utils import (
+        add_debugging_to_elasticsearch_aggregation_query,
+        create_elasticsearch_aggregation_query,
+        merge_elasticsearch_aggregation_results,
+        normalize_elasticsearch_aggregation_results,
+        prune_elasticsearch_aggregation_results,
+        sort_normalized_aggregation_results,
+        AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE)
+from encoded.endpoints.endpoint_utils import (
+        request_arg, request_args, request_arg_bool, request_arg_int,
+        create_query_string, deconstruct_query_string,
+        get_date_range_for_month, parse_date_range_related_arguments)
+from encoded.endpoints.recent_files_summary.recent_files_summary_fields import (
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+        AGGREGATION_FIELD_CELL_LINE,
+        AGGREGATION_FIELD_CELL_MIXTURE,
+        AGGREGATION_FIELD_DONOR,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR)
+from encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting import (
+        add_info_for_troubleshooting,
+        get_normalized_aggregation_results_as_html_for_troublehshooting)
+from snovault.search.search import search as snovault_search
+from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
+
+QUERY_FILE_TYPES = ["OutputFile"]
+QUERY_FILE_STATUSES = ["released"]
+QUERY_FILE_CATEGORIES = ["!Quality Control"]
+QUERY_RECENT_MONTHS = 3
+QUERY_INCLUDE_CURRENT_MONTH = True
+BASE_SEARCH_QUERY = "/search/"
+
+
+def recent_files_summary_endpoint(context, request):
+    # This text=true support is purely for troublesooting purposes; it dumps
+    # terminal-like formatted output for the results returned by the query.
+    text = request_arg_bool(request, "text")
+    results = recent_files_summary(request, troubleshooting=text)
+    if text:
+        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
+        results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
+    return results
+
+
+def recent_files_summary(request: PyramidRequest, troubleshooting: bool = True) -> dict:
+    """
+    This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
+    by default, info for files released withing the past three months grouped by release-date,
+    cell-line or donor, and file-description. The specific fields used for these groupings are:
+
+    - release-date: file_status_tracking.released
+    - cell-line: file_sets.libraries.analytes.samples.sample_sources.cell_line.code
+    - donor: donors.display_title
+    - file-dsecription: release_tracker_description
+
+    Note that release_tracker_description is a newer (2024-12)
+    calculated property - see PR-298 (branch: sn_file_release_tracker).
+
+    By default the current (assuminging partial) month IS included, so we really return info for
+    the past FULL three months plus for whatever time has currently elapsed for the current month.
+    Use pass the include_current_month=false query argument to NOT include the current month.
+
+    The number of months of data can be controlled using the nmonths query argument, e.g. nmonths=6.
+
+    A specific date range can also be passed in e.g. using from_date=2024-08-01 and thru_date=2024-10-31.
+
+    For testing purposes, a date field other than the default file_status_tracking.released can
+    also be specified using the date_property_name query argument. And file statuses other than
+    released can be queried for using one or more status query arguments, e.g. status=uploaded.
+    """
+
+    date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
+    max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
+    include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
+    include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
+    nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell", True)) # N.B. default True
+    nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
+    nodonors = request_arg_bool(request, "nodonors", request_arg_bool(request, "nodonor"))
+    favor_donor = request_arg_bool(request, "favor_donor")
+    multi = request_arg_bool(request, "multi")
+    nosort = request_arg_bool(request, "nosort")
+    legacy = request_arg_bool(request, "legacy")
+    debug = request_arg_bool(request, "debug")
+    debug_query = request_arg_bool(request, "debug_query")
+    troubleshoot = request_arg_bool(request, "troubleshoot")
+    troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
+    raw = request_arg_bool(request, "raw")
+    willrfix = request_arg_bool(request, "willrfix")
+
+    if troubleshooting is True:
+        debug = True
+        troubleshoot = True
+        troubleshoot_elasticsearch = True
+
+    def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
+        # This specializes the aggregation query to group first by the cell-line field,
+        # and then alternatively (if a cell-line field does not exist) by the donor field.
+        # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
+        # look first for the donor field and then secondarily for the cell-line field.
+        nonlocal nocells, nomixtures, nodonors, favor_donor
+        aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
+        if nocells:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_LINE)
+        if nomixtures:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_MIXTURE)
+        if nodonors:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
+        if favor_donor:
+            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
+            aggregation_field_grouping_cell_or_donor.insert(0, AGGREGATION_FIELD_DONOR)
+        return aggregation_field_grouping_cell_or_donor
+
+    def create_base_query_arguments(request: PyramidRequest) -> dict:
+
+        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
+
+        types = request_args(request, "type", QUERY_FILE_TYPES)
+        statuses = request_args(request, "status", QUERY_FILE_STATUSES)
+        categories = request_args(request, "category", QUERY_FILE_CATEGORIES)
+
+        base_query_arguments = {
+            "type": types if types else None,
+            "status": statuses if statuses else None,
+            "data_category": categories if categories else None
+        }
+
+        return {key: value for key, value in base_query_arguments.items() if value is not None}
+
+    def create_query_arguments(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
+
+        global BASE_SEARCH_QUERY, QUERY_RECENT_MONTHS, QUERY_INCLUDE_CURRENT_MONTH
+        nonlocal date_property_name
+
+        recent_months = request_arg_int(request, "nmonths", request_arg_int(request, "months", QUERY_RECENT_MONTHS))
+        from_date = request_arg(request, "from_date")
+        thru_date = request_arg(request, "thru_date")
+        include_current_month = request_arg_bool(request, "include_current_month", QUERY_INCLUDE_CURRENT_MONTH)
+
+        from_date, thru_date = parse_date_range_related_arguments(from_date, thru_date, nmonths=recent_months,
+                                                                  include_current_month=include_current_month,
+                                                                  strings=True)
+        query_arguments = {
+            f"{date_property_name}.from": from_date if from_date else None,
+            f"{date_property_name}.to": thru_date if from_date else None
+        }
+
+        if isinstance(base_query_arguments, dict):
+            query_arguments = {**base_query_arguments, **query_arguments}
+        return query_arguments
+
+    def create_query(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
+        query_arguments = create_query_arguments(request, base_query_arguments)
+        query_string = create_query_string(query_arguments)
+        return f"{BASE_SEARCH_QUERY}?{query_string}"
+
+    def create_aggregation_query(aggregation_fields: List[str]) -> dict:
+
+        nonlocal date_property_name, max_buckets, include_missing, favor_donor, troubleshoot_elasticsearch
+
+        aggregations = []
+        if not isinstance(aggregation_fields, list):
+            aggregation_fields = [aggregation_fields]
+        for item in aggregation_fields:
+            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
+                aggregations.append(item)
+        if not aggregations:
+            return {}
+
+        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
+            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, multi
+            if field == date_property_name:
+                return {
+                    "date_histogram": {
+                        "field": f"embedded.{field}",
+                        "calendar_interval": "month",
+                        "format": "yyyy-MM",
+                        "missing": "1970-01",
+                        "order": {"_key": "desc"}
+                    }
+                }
+            elif field == AGGREGATION_FIELD_CELL_LINE:
+                # Note how we prefix the result with the aggregation field name;
+                # this is so later we can tell which grouping/field was matched;
+                # see fixup_names_values_for_normalized_results for this fixup.
+                script = ""
+                for aggregation_field_grouping_index in range(len(aggregation_field_grouping_cell_or_donor)):
+                    aggregation_field = aggregation_field_grouping_cell_or_donor[aggregation_field_grouping_index]
+                    if_or_else_if = "if" if aggregation_field_grouping_index == 0 else "else if"
+                    # Note that if there are multiple values for the aggregation field just the "first" one will be chosen;
+                    # where "first" means which was indexed first, which from an application POV is kind of arbitrary.
+                    # If we want to make it more deterministic we could order the results (say) alphabetically like so: 
+                    #   def value = doc['embedded.{aggregation_field}.raw'].stream().min((a, b) -> a.compareTo(b)).get();
+                    #   return '{aggregation_field}:' + value;
+                    # OR, if we actually want to aggregation on ALL values we could collect the results and return all like so:
+                    #   def values = [];
+                    #   for (value in doc['embedded.{aggregation_field}.raw']) {
+                    #       values.add('{aggregation_field}:' + value);
+                    #   }
+                    #   return values;
+                    # But then we'd get double counting and so on. We are told in any case that these groups should be distinct.
+                    if not multi:
+                        script += f"""
+                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
+                                return '{aggregation_field}:' + doc['embedded.{aggregation_field}.raw'].value;
+                            }}
+                        """
+                    else:
+                        script += f"""
+                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
+                                def values = [];
+                                for (value in doc['embedded.{aggregation_field}.raw']) {{
+                                    values.add('{aggregation_field}:' + value);
+                                }}
+                                return values;
+                            }}
+                        """
+                script += f"""
+                    else {{
+                        return 'unknown';
+                    }}
+                """
+                return {
+                    "terms": {
+                        "script": {
+                            "source": normalize_spaces(script),
+                            "lang": "painless"
+                        },
+                        "size": max_buckets
+                    }
+                }
+
+        def create_field_filter(field: str) -> Optional[dict]:  # noqa
+            nonlocal aggregation_field_grouping_cell_or_donor
+            if field == AGGREGATION_FIELD_CELL_LINE:
+                filter = {"bool": {"should": [], "minimum_should_match": 1}}
+                for aggregation_field in aggregation_field_grouping_cell_or_donor:
+                    filter["bool"]["should"].append({"exists": { "field": f"embedded.{aggregation_field}.raw"}})
+                return filter
+
+        aggregation_query = create_elasticsearch_aggregation_query(
+            aggregations,
+            max_buckets=max_buckets,
+            missing_value=AGGREGATION_NO_VALUE,
+            include_missing=include_missing,
+            create_field_aggregation=create_field_aggregation,
+            create_field_filter=create_field_filter)
+
+        if troubleshoot_elasticsearch:
+            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
+
+        return aggregation_query[date_property_name]
+
+    def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
+
+        nonlocal date_property_name, max_buckets, include_missing
+
+        aggregations = []
+        if not isinstance(aggregation_fields, list):
+            aggregation_fields = [aggregation_fields]
+        for item in aggregation_fields:
+            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
+                aggregations.append(item)
+        if not aggregations:
+            return {}
+
+        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
+            nonlocal date_property_name
+            if field == date_property_name:
+                return {
+                    "date_histogram": {
+                        "field": f"embedded.{field}",
+                        "calendar_interval": "month",
+                        "format": "yyyy-MM",
+                        "missing": "1970-01",
+                        "order": {"_key": "desc"}
+                    }
+                }
+
+        aggregation_query = create_elasticsearch_aggregation_query(
+            aggregations,
+            max_buckets=max_buckets,
+            missing_value=AGGREGATION_NO_VALUE,
+            include_missing=include_missing,
+            create_field_aggregation=create_field_aggregation)
+
+        if troubleshoot_elasticsearch:
+            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
+
+        return aggregation_query[date_property_name]
+
+    def execute_aggregation_query(request: PyramidRequest, query: str, aggregation_query: dict) -> str:
+        query += "&from=0&limit=0"  # needed for aggregation query to not return the actual/individual item results.
+        request = snovault_make_search_subreq(request, path=query, method="GET")
+        results = snovault_search(None, request, custom_aggregations=aggregation_query)
+        return results
+
+    def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
+        nonlocal aggregation_field_grouping_cell_or_donor
+        if isinstance(normalized_results, dict):
+            if isinstance(value := normalized_results.get("value"), str):
+                if ((separator_index := value.find(":")) > 0) and (value_prefix := value[0:separator_index]):
+                    if value_prefix in aggregation_field_grouping_cell_or_donor:
+                        if value := value[separator_index + 1:]:
+                            normalized_results["name"] = value_prefix
+                            normalized_results["value"] = value
+            if isinstance(items := normalized_results.get("items"), list):
+                for element in items:
+                    fixup_names_values_for_normalized_results(element)
+
+    def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
+        global BASE_SEARCH_QUERY
+        nonlocal date_property_name, willrfix
+        if isinstance(normalized_results, dict):
+            if name := normalized_results.get("name"):
+                if value := normalized_results.get("value"):
+                    if name == date_property_name:
+                        # Special case for date value which is just year/month (e.g. 2024-12);
+                        # we want to turn this into a date range query for the month; actually
+                        # this is not a special case, this is the NORMAL case we are dealing with.
+                        # from_date, thru_date = parse_date_range_related_arguments(value, None, nmonths=0, strings=True)
+                        from_date, thru_date = get_date_range_for_month(value, strings=True)
+                        if from_date and thru_date:
+                            base_query_arguments = {**base_query_arguments,
+                                                    f"{name}.from": from_date, f"{name}.to": thru_date}
+                    else:
+                        base_query_arguments = {**base_query_arguments, name: value}
+                if willrfix:
+                    if name == AGGREGATION_FIELD_CELL_LINE:
+                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
+                    elif name == AGGREGATION_FIELD_DONOR:
+                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
+                        base_query_arguments[AGGREGATION_FIELD_CELL_LINE] = AGGREGATION_NO_VALUE
+                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
+            if isinstance(items := normalized_results.get("items"), list):
+                for element in items:
+                    add_queries_to_normalized_results(element, base_query_arguments)
+
+    aggregation_field_grouping_cell_or_donor = get_aggregation_field_grouping_cell_or_donor()
+    # The base_query_arguments does not contain the from/thru dates as this is used;
+    # this is used to construct the query-string for the individually grouped items which
+    # will have the from/thru dates specifically representing their place within the group.
+    base_query_arguments = create_base_query_arguments(request)
+    query = create_query(request, base_query_arguments)
+
+    if not legacy:
+        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
+        aggregate_by_cell_line = [
+            date_property_name,
+            AGGREGATION_FIELD_CELL_LINE,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregation_query = {
+            aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line)
+        }
+    else:
+        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
+        aggregate_by_cell_line = [
+            date_property_name,
+            AGGREGATION_FIELD_CELL_LINE,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregate_by_donor_property_name = "aggregate_by_donor"
+        aggregate_by_donor = [
+            date_property_name,
+            AGGREGATION_FIELD_DONOR,
+            AGGREGATION_FIELD_FILE_DESCRIPTOR
+        ]
+        aggregation_query = {
+            aggregate_by_cell_line_property_name: create_aggregation_query_legacy(aggregate_by_cell_line),
+            aggregate_by_donor_property_name: create_aggregation_query_legacy(aggregate_by_donor)
+        }
+
+    if debug_query:
+        return {
+            "query": query,
+            "query_arguments": deconstruct_query_string(query),
+            "aggregation_query_fields": [
+                AGGREGATION_FIELD_RELEASE_DATE,
+                *get_aggregation_field_grouping_cell_or_donor(),
+                AGGREGATION_FIELD_FILE_DESCRIPTOR
+            ],
+            "aggregation_query": aggregation_query
+        }
+
+    raw_results = execute_aggregation_query(request, query, aggregation_query)
+
+    if raw:
+        # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
+        # And note that unless we remove teh @id property we get redirected to the URL in this field,
+        # for example to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
+        #                         &file_status_tracking.released.from=2024-09-30
+        #                         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
+        if "@id" in raw_results:
+            del raw_results["@id"]
+        return raw_results
+
+    if not (raw_results := raw_results.get("aggregations")):
+        return {}
+
+    if debug:
+        raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
+
+    prune_elasticsearch_aggregation_results(raw_results)
+
+    if not legacy:
+        aggregation_results = raw_results.get(aggregate_by_cell_line_property_name)
+    else:
+        aggregation_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
+                                                                 raw_results.get(aggregate_by_donor_property_name))
+
+    # Note that the doc_count values returned by ElasticSearch DO actually seem to be for UNIQUE items,
+    # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
+    # has file_sets.libraries.analytes.samples.sample_sources.cell_line.code values for both HG00438 and HG005),
+    # then its doc_count will NOT be counted TWICE. This creates a situation where it might LOOK like the counts
+    # are WRONG in the MERGED (via returned merge_elasticsearch_aggregation_results) result set, where the outer
+    # item count may be than the sum of the individual counts within each sub-group. For example, the below result
+    # shows a top-level doc_count of 1, even though there are 2 documents, 1 in the HG00438 group and the other
+    # in the HG005 it would be because the same unique file has a cell_line.code of both HG00438 and HG005.
+    # {
+    #     "meta": { "field_name": "file_status_tracking.released" },
+    #     "buckets": [
+    #         {
+    #             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 1,
+    #             "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
+    #                 "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
+    #                 "buckets": [
+    #                     {   "key": "HG00438", "doc_count": 1,
+    #                         "release_tracker_description": {
+    #                             "meta": { "field_name": "release_tracker_description" },
+    #                             "buckets": [
+    #                                 { "key": "WGS Illumina NovaSeq X bam", "doc_count": 1 },
+    #                             ]
+    #                         }
+    #                     },
+    #                     {   "key": "HG005", "doc_count": 1,
+    #                         "release_tracker_description": {
+    #                             "meta": { "field_name": "release_tracker_description" },
+    #                             "buckets": [
+    #                                 { "key": "Fiber-seq PacBio Revio bam", "doc_count": 1 }
+    #                             ]
+    #                         }
+    #                     }
+    #                 ]
+    #             }
+    #         }
+    #     ]
+    # }
+
+    if debug:
+        additional_properties = {
+            "debug": {
+                "query": query,
+                "query_arguments": deconstruct_query_string(query),
+                "aggregation_query_fields": [
+                    AGGREGATION_FIELD_RELEASE_DATE,
+                    *get_aggregation_field_grouping_cell_or_donor(),
+                    AGGREGATION_FIELD_FILE_DESCRIPTOR
+                ],
+                "aggregation_query": aggregation_query,
+                "raw_results": raw_results,
+                "aggregation_results": deepcopy(aggregation_results)
+            }
+        }
+    else:
+        additional_properties = None
+
+    normalized_results = normalize_elasticsearch_aggregation_results(aggregation_results,
+                                                                     additional_properties=additional_properties,
+                                                                     remove_empty_items=not include_missing)
+    if not legacy:
+        fixup_names_values_for_normalized_results(normalized_results)
+    if include_queries:
+        add_queries_to_normalized_results(normalized_results, base_query_arguments)
+        normalized_results["query"] = query
+
+    if not nosort:
+        # We can sort on the aggregations by level; outermost/left to innermost/right.
+        # In our case the outermost is the date aggregation so sort taht by the key value,
+        # e.g. 2014-12, descending; and the rest of the inner levels by the default
+        # sorting which is by aggregation count descending and secondarily by the key value.
+        sort_normalized_aggregation_results(normalized_results, ["-key", "default"])
+
+    if troubleshoot:
+        add_info_for_troubleshooting(normalized_results, request)
+
+    return normalized_results
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py
new file mode 100644
index 000000000..c7a9e6a16
--- /dev/null
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py
@@ -0,0 +1,16 @@
+# These are all the possible fields on which the /recent_files_summary endpoint can aggregate by.
+# Various flags modify the specifics, for experimentation, troubleshooting, and possible future changes.
+
+AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
+# FYI FWIW: There is also file_sets.libraries.analytes.samples.sample_sources.display_title;
+# and that sometimes file_sets.libraries.analytes.samples.sample_sources.code does not exist.
+AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
+AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
+AGGREGATION_FIELD_DONOR = "donors.display_title"
+AGGREGATION_FIELD_FILE_DESCRIPTOR = "release_tracker_description"
+
+AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR = [
+    AGGREGATION_FIELD_CELL_MIXTURE,
+    AGGREGATION_FIELD_CELL_LINE,
+    AGGREGATION_FIELD_DONOR
+]
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
new file mode 100644
index 000000000..7fbc5d4ea
--- /dev/null
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -0,0 +1,504 @@
+from contextlib import contextmanager
+from copy import deepcopy
+from pyramid.request import Request as PyramidRequest
+import re
+from termcolor import colored
+from typing import Any, Callable, List, Optional, Tuple, Union
+from encoded.endpoints.endpoint_utils import parse_datetime_string
+from encoded.endpoints.recent_files_summary.recent_files_summary_fields import (
+    AGGREGATION_FIELD_RELEASE_DATE,
+    AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+    AGGREGATION_FIELD_CELL_LINE,
+    AGGREGATION_FIELD_CELL_MIXTURE,
+    AGGREGATION_FIELD_DONOR,
+    AGGREGATION_FIELD_FILE_DESCRIPTOR)
+
+
+def add_info_for_troubleshooting(normalized_results: dict, request: PyramidRequest) -> None:
+
+    def get_files(files, property_name, property_value, map_property_value = None):
+        found = []
+        for file in files:
+            if properties := _get_properties(file, property_name):
+                if callable(map_property_value):
+                    mapped_properties = []
+                    for value in properties:
+                        mapped_properties.append(map_property_value(value))
+                    properties = mapped_properties
+                if property_value in properties:
+                    found.append(file)
+        return found
+
+    def map_date_property_value(value):
+        if date_value := parse_datetime_string(value):
+            return f"{date_value.year}-{date_value.month:02}"
+        return value
+
+    def count_uuid(uuid_records: List[dict], uuid: str) -> int:
+        count = 0
+        for uuid_record in uuid_records:
+            if uuid_record.get("uuid") == uuid:
+                count += 1
+        return count
+
+    def dedup_list(data: list) -> list:  # noqa
+        return list(dict.fromkeys(data)) if isinstance(data, list) else []
+
+    aggregation_fields_for_troubleshooting = dedup_list([
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_CELL_MIXTURE,
+        AGGREGATION_FIELD_CELL_LINE,
+        # Store some extra properties for troublehooting (as this whole thing is).
+        "file_sets.libraries.analytes.samples.sample_sources.display_title",
+        AGGREGATION_FIELD_DONOR,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR
+    ])
+
+    def annotate_with_uuids(normalized_results: dict):
+        nonlocal aggregation_fields_for_troubleshooting
+        uuid_records = []
+        query = normalized_results.get("query")
+        if isinstance(normalized_results.get("debug"), dict):
+            normalized_results["debug"]["aggregation_fields_for_troubleshooting"] = (
+                aggregation_fields_for_troubleshooting)
+        files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
+        for first_item in normalized_results["items"]:
+            first_property_name = first_item["name"]
+            first_property_value = first_item["value"]
+            for second_item in first_item["items"]:
+                second_property_name = second_item["name"]
+                second_property_value = second_item["value"]
+                for third_item in second_item["items"]:
+                    third_property_name = third_item["name"]
+                    third_property_value = third_item["value"]
+                    if debug_elasticsearch_hits := third_item.get("debug_elasticsearch_hits"):
+                        if not third_item.get("debug"):
+                            third_item["debug"] = {}
+                        third_item["debug"]["elasticsearch_hits"] = debug_elasticsearch_hits
+                        third_item["debug"]["elasticsearch_hits"].sort()
+                        del third_item["debug_elasticsearch_hits"]
+                    if first_files := get_files(files, first_property_name, first_property_value,
+                                                map_property_value=map_date_property_value):
+                        if second_files := get_files(first_files, second_property_name, second_property_value):
+                            if third_files := get_files(second_files, third_property_name, third_property_value):
+                                for file in third_files:
+                                    if isinstance(uuid := file.get("uuid"), str):
+                                        if not third_item.get("debug"):
+                                            third_item["debug"] = {}
+                                        if not third_item["debug"].get("portal_hits"):
+                                            third_item["debug"]["portal_hits"] = []
+                                        uuid_record = {"uuid": uuid}
+                                        for aggregation_field in aggregation_fields_for_troubleshooting:
+                                            aggregation_values = ", ".join(_get_properties(file, aggregation_field))
+                                            uuid_record[aggregation_field] = aggregation_values or None
+                                        if third_item["debug"].get("elasticsearch_hits"):
+                                            uuid_record["elasticsearch_counted"] = \
+                                                uuid in third_item["debug"]["elasticsearch_hits"]
+                                        third_item["debug"]["portal_hits"].append(uuid_record)
+                                        uuid_records.append(uuid_record)
+                                if third_item.get("debug", {}).get("portal_hits"):
+                                    third_item["debug"]["portal_hits"].sort(key=lambda item: item.get("uuid"))
+
+        for uuid_record in uuid_records:
+            if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:
+                uuid_record["duplicative"] = count
+
+    try:
+        annotate_with_uuids(normalized_results)
+    except Exception:
+        pass
+
+
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
+    with _capture_output_to_html_string() as captured_output:
+        print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
+        return captured_output.html
+
+
+def print_normalized_aggregation_results_for_troubleshooting(normalized_results: dict,
+                                                             title: Optional[str] = None,
+                                                             parent_grouping_name: Optional[str] = None,
+                                                             parent_grouping_value: Optional[str] = None,
+                                                             uuids: bool = False,
+                                                             uuid_details: bool = False,
+                                                             nobold: bool = False,
+                                                             checks: bool = False,
+                                                             query: bool  = False,
+                                                             verbose: bool = False) -> None:
+
+    """
+    For deveopment/troubleshooting only ...
+    """
+    def get_aggregation_fields(normalized_results: dict) -> List[str]:
+        # Returns all noted/important aggregation fields which ARE actually being used by the query;
+        # we only are interested in ones that are in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+        # which is all of the possible sample-source/cell-line/donor aggregations.
+        if not isinstance(aggregation_fields :=
+                          normalized_results.get("debug", {}).get("aggregation_query_fields"), list):
+            aggregation_fields = []
+        else:
+            aggregation_fields = deepcopy(aggregation_fields)
+        for aggregation_field in aggregation_fields:
+            # Remove the ones we are not interested in reporting on.
+            if aggregation_field not in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
+                aggregation_fields.remove(aggregation_field)
+        return aggregation_fields
+
+    def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
+        aggregation_fields_to_print = get_aggregation_fields(normalized_results)
+        if isinstance(aggregation_fields_for_troubleshooting :=
+                      normalized_results.get("debug", {}).get("aggregation_fields_for_troubleshooting"), list):
+            for aggregation_field_for_troubleshooting in aggregation_fields_for_troubleshooting:
+                if aggregation_field_for_troubleshooting not in aggregation_fields_to_print:
+                    aggregation_fields_to_print.append(aggregation_field_for_troubleshooting)
+            aggregation_fields_to_not_print = [
+                AGGREGATION_FIELD_RELEASE_DATE,
+                AGGREGATION_FIELD_FILE_DESCRIPTOR 
+            ]
+            for aggregation_field_to_not_print in aggregation_fields_to_not_print:
+                if aggregation_field_to_not_print in aggregation_fields_to_print:
+                    aggregation_fields_to_print.remove(aggregation_field_to_not_print)
+        return aggregation_fields_to_print
+
+    def get_aggregation_field_labels() -> dict:
+        # Shorter/nicer names for aggregation fields of interest to print.
+        return {
+            AGGREGATION_FIELD_CELL_MIXTURE: "sample-sources",
+            AGGREGATION_FIELD_CELL_LINE: "cell-lines",
+            AGGREGATION_FIELD_DONOR: "donors",
+            "file_sets.libraries.analytes.samples.sample_sources.display_title": "sample-sources-title"
+        }
+
+    def terminal_color(value: str,
+                       color: Optional[str] = None,
+                       dark: bool = False,
+                       bold: bool = False,
+                       underline: bool = False,
+                       nocolor: bool = False) -> str:
+        # This is used only for troubleshooting by
+        if nocolor is True:
+            return value
+        attributes = []
+        if dark is True:
+            attributes.append("dark")
+        if bold is True:
+            attributes.append("bold")
+        if underline is True:
+            attributes.append("underline")
+        if isinstance(color, str) and color:
+            return colored(value, color.lower(), attrs=attributes)
+        return colored(value, attrs=attributes)
+
+    def print_results(data: dict,
+                      parent_grouping_name: Optional[str] = None,
+                      parent_grouping_value: Optional[str] = None,
+                      indent: int = 0) -> None:
+
+        nonlocal title, uuids, uuid_details, nobold, query, verbose
+        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green, green_bold, gray, bold
+        nonlocal aggregation_fields_to_print
+
+        def get_portal_hits(data: dict) -> List[dict]:
+            hits = []
+            if isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
+                for portal_hit in portal_hits:
+                    if isinstance(portal_hit, dict) and isinstance(uuid := portal_hit.get("uuid"), str) and uuid:
+                        hits.append(portal_hit)
+            return hits
+
+        def format_hit_property_values(hit: dict, property_name: str,
+                                       color: Optional[Callable] = None) -> Tuple[Optional[str], List[Tuple[str, str]]]:
+            nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
+            counted_elsewhere = []
+            if hit.get("elasticsearch_counted") is False:
+                counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
+            else:
+                counted_grouping_name, counted_grouping_value = (None, None)
+            if property_value := hit.get(property_name):
+                if property_name == parent_grouping_name:
+                    property_values = []
+                    for property_value in property_value.split(","):
+                        if (property_value := property_value.strip()) == parent_grouping_value:
+                            property_value = color(property_value) if callable(color) else green_bold(property_value)
+                            property_values.append(property_value)
+                        else:
+                            if (counted_grouping_name, counted_grouping_value) == (property_name, property_value):
+                                property_values.append(green_bold(f"{property_value} {chars_larrow_hollow}") +
+                                                       green(" COUNTED HERE"))
+                                counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
+                            else:
+                                property_values.append(property_value)
+                    property_value = ", ".join(property_values)
+                elif hit.get("elasticsearch_counted") is False:
+                    counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
+                    if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
+                        property_value = green_bold(f"{property_value} {chars_larrow_hollow}") + green(" COUNTED HERE")
+                        counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
+            return property_value, counted_elsewhere
+
+        def find_where_aggregated_and_counted(
+                uuid: str,
+                multiple: bool = False,
+                ignore: Optional[Union[List[Tuple[str, str]],
+                Tuple[str, str]]] = None) -> Union[Tuple[str, str], List[Tuple[str, str]]]:
+
+            nonlocal normalized_results
+
+            def find_where(data: dict, uuid: str,
+                           parent_grouping_name: Optional[str] = None,
+                           parent_grouping_value: Optional[str] = None) -> List[Tuple[str, str]]:
+                found_uuid_grouping_names_and_values = set()
+                if isinstance(data, dict):
+                    grouping_name = data.get("name")
+                    grouping_value = data.get("value")
+                    if isinstance(items := data.get("items"), list):
+                        for item in items:
+                            if found := find_where(item, uuid,
+                                                   parent_grouping_name=grouping_name,
+                                                   parent_grouping_value=grouping_value):
+                                found_uuid_grouping_names_and_values.update(found)
+                    elif isinstance(hits := data.get("debug", {}).get("portal_hits"), list):
+                        for hit in hits:
+                            if hit.get("uuid") == uuid:
+                                if hit.get("elasticsearch_counted") is True:
+                                    found_uuid_grouping_names_and_values.add((parent_grouping_name, parent_grouping_value))
+                return found_uuid_grouping_names_and_values
+
+            if found_uuid_grouping_names_and_values := list(find_where(normalized_results, uuid)):
+                if isinstance(ignore, tuple) and (len(ignore) == 2) and (ignore in found_uuid_grouping_names_and_values):
+                    found_uuid_grouping_names_and_values.remove(ignore)
+                elif isinstance(ignore, list):
+                    for ignore_item in ignore:
+                        if isinstance(ignore_item, tuple) and (len(ignore_item) == 2) and (ignore_item in found_uuid_grouping_names_and_values):
+                            found_uuid_grouping_names_and_values.remove(ignore_item)
+                if multiple is True:
+                    return found_uuid_grouping_names_and_values
+                if len(found_uuid_grouping_names_and_values) > 1:
+                    # Normally should only be at most one item with elasticsearch_counted set to True.
+                    pass
+                return found_uuid_grouping_names_and_values[0]
+            return [(None, None)] if multiple is True else (None, None)
+
+        def print_hit_property_values(hit: dict, property_name: str,
+                                      label: Optional[str] = None,
+                                      prefix: Optional[str] = None,
+                                      color: Optional[Callable] = None) -> List[Tuple[str, str]]:
+            nonlocal aggregation_fields, aggregation_field_labels, chars_dot_hollow, chars_null, verbose
+            if not label:
+                label = aggregation_field_labels.get(property_name)
+            if (verbose is True) or (not label):
+                label = property_name
+            property_values, counted_elsewhere = format_hit_property_values(hit, property_name, color=color)
+            if not property_values:
+                property_values = chars_null
+            if property_name not in aggregation_fields:
+                property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
+                property_description = gray(property_description)
+            else:
+                property_description = f"{prefix or ''}{chars_dot} {label}: {property_values}"
+            print(property_description)
+            return counted_elsewhere
+
+        if not (isinstance(data, dict) and data):
+            return
+        if not (isinstance(indent, int) and (indent > 0)):
+            indent = 0
+        spaces = (" " * indent) if indent > 0 else ""
+        grouping_name = data.get("name")
+        if isinstance(grouping_value := data.get("value"), str) and grouping_value:
+            grouping = bold(grouping_value)
+            if (verbose is True) and isinstance(grouping_name, str) and grouping_name:
+                grouping = f"{grouping_name} {chars_dot} {grouping}"
+        elif not (isinstance(grouping := title, str) and grouping):
+            grouping = "RESULTS"
+        grouping = f"{chars_diamond} {grouping}"
+        hits = get_portal_hits(data) if (uuids is True) else []
+        if isinstance(count := data.get("count"), int):
+            note = ""
+            if len(hits) > count:
+                note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
+            elif isinstance(items := data.get("items"), list):
+                subcount = 0
+                for item in items:
+                    if isinstance(subcount_item := item.get("count"), int):
+                        subcount += subcount_item
+                if subcount != count:
+                    note = red(f" {chars_xmark} ACTUAL COUNT: {subcount}")
+                elif checks is True:
+                    note = f" {chars_check}"
+            elif checks:
+                note = f" {chars_check}"
+            print(f"{spaces}{grouping}: {count}{note}")
+        if (query is True) and (query_string := data.get("query")):
+            print(f"{spaces}  {query_string}")
+        for hit in hits:
+            if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
+                note = ""
+                if hit.get("elasticsearch_counted") is False:
+                    print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
+                    color = red_bold
+                else:
+                    print(f"{spaces}  {chars_dot} {uuid} {chars_check}")
+                    color = green_bold
+                if uuid_details is True:
+                    prefix =  f"{spaces}    "
+                    counted_elsewhere = []
+                    # Show property values for troubleshooting (as this whole thing is);
+                    # see add_info_for_troubleshooting.annotate_with_uuids.
+                    for aggregation_field in aggregation_fields_to_print:
+                        hit_counted_elsewhere = \
+                            print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
+                        if hit_counted_elsewhere:
+                            counted_elsewhere.extend(hit_counted_elsewhere)
+                    # See if also grouped elsewhere for our FYI.
+                    duplicative = hit.get("duplicative")
+                    duplicates = duplicative - 1 if isinstance(duplicative, int) else 0
+                    counted_groupings = find_where_aggregated_and_counted(
+                        hit.get("uuid"), multiple=True,
+                        ignore=counted_elsewhere + [(parent_grouping_name, parent_grouping_value)])
+                    if counted_groupings:
+                        message = f"{spaces}    {green(chars_rarrow_hollow)} {green('ALSO COUNTED HERE')}:"
+                        if verbose is True:
+                            if duplicates > 0:
+                                message += f" {duplicates}"
+                                if duplicates != len(counted_groupings):
+                                    message += red_bold(f" {chars_xmark} vs {len(counted_groupings)}")
+                            print(message)
+                            for counted_grouping in counted_groupings:
+                                print(f"{spaces}      - {counted_grouping[0]} {green(counted_grouping[1])}")
+                        else:
+                            counted_grouping_values = [green(counted_grouping[1]) for counted_grouping in counted_groupings]
+                            message = f"{message} {', '.join(counted_grouping_values)}"
+                            if duplicates > 0:
+                                if duplicates != len(counted_groupings):
+                                    message += red_bold(f" {chars_xmark} {duplicates} vs {len(counted_grouping_values)}")
+                            print(message)
+        if isinstance(items := data.get("items"), list):
+            for element in items:
+                print_results(element,
+                              parent_grouping_name=grouping_name,
+                              parent_grouping_value=grouping_value,
+                              indent=indent + 2)
+
+    aggregation_fields = get_aggregation_fields(normalized_results)
+    aggregation_fields_to_print = get_aggregation_fields_to_print(normalized_results)
+    aggregation_field_labels = get_aggregation_field_labels()
+
+    red = lambda text: terminal_color(text, "red")  # noqa
+    red_bold = lambda text: terminal_color(text, "red", bold=True)  # noqa
+    green = lambda text: terminal_color(text, "green")  # noqa
+    green_bold = lambda text: terminal_color(text, "green", bold=True)  # noqa
+    gray = lambda text: terminal_color(text, "grey")  # noqa
+    bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
+    chars_check = "✓"
+    chars_xmark = "✗"
+    chars_dot = "•"
+    chars_dot_hollow = "◦"
+    chars_diamond = "❖"
+    chars_rarrow_hollow = "▷"
+    chars_larrow_hollow = "◁"
+    chars_null = "∅"
+
+    print_results(normalized_results)
+
+
+def _get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort: bool = False) -> List[Any]:
+    """
+    TODO: Move this to dcicutils. Maybe much of the above too.
+    Returns the values of the given property name within the given dictionary as a list, where the
+    given property name can be a dot-separated list of property names, which indicate a path into
+    nested dictionaries within the given dictionary; and - where if any of the elements within
+    the path are lists then we iterate through each, collecting the values for each and including
+    each within the list of returned values.
+    """
+    if isinstance(data, dict) and isinstance(name, str) and name:
+        if keys := name.split("."):
+            nkeys = len(keys) ; key_index_max = nkeys - 1  # noqa
+            for key_index in range(nkeys):
+                if (value := data.get(keys[key_index], None)) is not None:
+                    if key_index == key_index_max:
+                        return [value] if not isinstance(value, list) else value
+                    elif isinstance(value, dict):
+                        data = value
+                        continue
+                    elif isinstance(value, list) and value and ((sub_key_index := key_index + 1) < nkeys):
+                        sub_key = ".".join(keys[sub_key_index:])
+                        values = []
+                        for element in value:
+                            if isinstance(element_value := _get_properties(element, sub_key), list):
+                                for element_value_item in element_value:
+                                    if (element_value_item is not None) and (element_value_item not in values):
+                                        values.append(element_value_item)
+                            elif (element_value is not None) and (element_value not in values):
+                                values.append(element_value)
+                        return sorted(values) if (sort is True) else values
+                break
+    return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
+
+
+@contextmanager
+def _capture_output_to_html_string():
+
+    from io import StringIO
+    from unittest.mock import patch as patch
+
+    def ansi_to_html(text):
+        ANSI_COLOR_MAP = {
+            "30": "black",
+            "31": "red",
+            "32": "green",
+            "33": "yellow",
+            "34": "blue",
+            "35": "magenta",
+            "36": "cyan",
+            "37": "white",
+            "90": "bright_black",
+            "91": "bright_red",
+            "92": "bright_green",
+            "93": "bright_yellow",
+            "94": "bright_blue",
+            "95": "bright_magenta",
+            "96": "bright_cyan",
+            "97": "bright_white",
+        }
+        ANSI_ESCAPE_RE = re.compile(r"\x1b\[([0-9;]*)m")
+        bold_active = False
+        def replace_ansi(match):  # noqa
+            nonlocal bold_active
+            codes = match.group(1).split(";")  # Split multiple codes (e.g., "1;31")
+            html_parts = []
+            for code in codes:
+                if code == "1":  # Bold
+                    if not bold_active:  # Activate bold
+                        html_parts.append("<b>")
+                        bold_active = True
+                elif code in ANSI_COLOR_MAP:  # Colors
+                    color = ANSI_COLOR_MAP[code]
+                    html_parts.append(f"<span style='color: {color};'>")
+                elif code == "0":  # Reset
+                    if bold_active:
+                        html_parts.append("</b>")
+                        bold_active = False
+                    html_parts.append("</span>")  # Close color
+            return "".join(html_parts)
+        text_with_html = ANSI_ESCAPE_RE.sub(replace_ansi, text)
+        if bold_active:
+            text_with_html += "</b>"
+        return f"<pre>{text_with_html}</pre>"
+
+    print_original = print
+    captured_output = StringIO()
+    class CapturedOutput:  # noqa
+        def __init__(self, captured_output: StringIO):
+            self._captured_output = captured_output
+        @property  # noqa
+        def text(self):
+            return self._captured_output.getvalue()
+        @property  # noqa
+        def html(self):
+            return ansi_to_html(self._captured_output.getvalue())
+    def captured_print(*args, **kwargs):  # noqa
+        nonlocal captured_output
+        print_original(*args, **kwargs, file=captured_output)
+    with patch("builtins.print", captured_print):
+        yield CapturedOutput(captured_output)
diff --git a/src/encoded/recent_files_summary.py b/src/encoded/recent_files_summary.py
deleted file mode 100644
index 06ecd3069..000000000
--- a/src/encoded/recent_files_summary.py
+++ /dev/null
@@ -1,930 +0,0 @@
-from contextlib import contextmanager
-from copy import deepcopy
-from pyramid.request import Request as PyramidRequest
-import re
-from typing import Callable, List, Optional, Tuple, Union
-from dcicutils.misc_utils import normalize_spaces
-from encoded.elasticsearch_utils import add_debugging_to_elasticsearch_aggregation_query
-from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
-from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
-from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
-from encoded.elasticsearch_utils import prune_elasticsearch_aggregation_results
-from encoded.elasticsearch_utils import sort_normalized_aggregation_results
-from encoded.elasticsearch_utils import AGGREGATION_MAX_BUCKETS, AGGREGATION_NO_VALUE
-from encoded.endpoint_utils import create_query_string, deconstruct_query_string
-from encoded.endpoint_utils import get_date_range_for_month, parse_date_range_related_arguments
-from encoded.endpoint_utils import get_properties, parse_datetime_string
-from encoded.endpoint_utils import request_arg, request_args, request_arg_bool, request_arg_int
-from snovault.search.search import search as snovault_search
-from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
-
-QUERY_FILE_TYPES = ["OutputFile"]
-QUERY_FILE_STATUSES = ["released"]
-QUERY_FILE_CATEGORIES = ["!Quality Control"]
-QUERY_RECENT_MONTHS = 3
-QUERY_INCLUDE_CURRENT_MONTH = True
-
-AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
-# FYI FWIW: There is also file_sets.libraries.analytes.samples.sample_sources.display_title;
-# and that sometimes file_sets.libraries.analytes.samples.sample_sources.code does not exist.
-AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
-AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"
-AGGREGATION_FIELD_DONOR = "donors.display_title"
-AGGREGATION_FIELD_FILE_DESCRIPTOR = "release_tracker_description"
-
-AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR = [
-    AGGREGATION_FIELD_CELL_MIXTURE,
-    AGGREGATION_FIELD_CELL_LINE,
-    AGGREGATION_FIELD_DONOR
-]
-
-BASE_SEARCH_QUERY = "/search/"
-
-def recent_files_summary(request: PyramidRequest, troubleshooting: bool = True) -> dict:
-    """
-    This supports the (new as of 2024-12)  /recent_files_summary endpoint (for C4-1192) to return,
-    by default, info for files released withing the past three months grouped by release-date,
-    cell-line or donor, and file-description. The specific fields used for these groupings are:
-
-    - release-date: file_status_tracking.released
-    - cell-line: file_sets.libraries.analytes.samples.sample_sources.cell_line.code
-    - donor: donors.display_title
-    - file-dsecription: release_tracker_description
-
-    Note that release_tracker_description is a newer (2024-12)
-    calculated property - see PR-298 (branch: sn_file_release_tracker).
-
-    By default the current (assuminging partial) month IS included, so we really return info for
-    the past FULL three months plus for whatever time has currently elapsed for the current month.
-    Use pass the include_current_month=false query argument to NOT include the current month.
-
-    The number of months of data can be controlled using the nmonths query argument, e.g. nmonths=6.
-
-    A specific date range can also be passed in e.g. using from_date=2024-08-01 and thru_date=2024-10-31.
-
-    For testing purposes, a date field other than the default file_status_tracking.released can
-    also be specified using the date_property_name query argument. And file statuses other than
-    released can be queried for using one or more status query arguments, e.g. status=uploaded.
-    """
-
-
-    global AGGREGATION_FIELD_RELEASE_DATE
-
-    date_property_name = request_arg(request, "date_property_name", AGGREGATION_FIELD_RELEASE_DATE)
-    max_buckets = request_arg_bool(request, "max_buckets", AGGREGATION_MAX_BUCKETS)
-    include_queries = request_arg_bool(request, "include_queries", request_arg_bool(request, "include_query", True))
-    include_missing = request_arg_bool(request, "include_missing", request_arg_bool(request, "novalues"))
-    nocells = request_arg_bool(request, "nocells", request_arg_bool(request, "nocell", True)) # N.B. default True
-    nomixtures = request_arg_bool(request, "nomixtures", request_arg_bool(request, "nomixture"))
-    nodonors = request_arg_bool(request, "nodonors", request_arg_bool(request, "nodonor"))
-    favor_donor = request_arg_bool(request, "favor_donor")
-    multi = request_arg_bool(request, "multi")
-    nosort = request_arg_bool(request, "nosort")
-    legacy = request_arg_bool(request, "legacy")
-    debug = request_arg_bool(request, "debug")
-    debug_query = request_arg_bool(request, "debug_query")
-    troubleshoot = request_arg_bool(request, "troubleshoot")
-    troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
-    raw = request_arg_bool(request, "raw")
-    willrfix = request_arg_bool(request, "willrfix")
-
-    if troubleshooting is True:
-        debug = True
-        troubleshoot = True
-        troubleshoot_elasticsearch = True
-
-    def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
-        # This specializes the aggregation query to group first by the cell-line field,
-        # and then alternatively (if a cell-line field does not exist) by the donor field.
-        # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
-        # look first for the donor field and then secondarily for the cell-line field.
-        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
-        nonlocal nocells, nomixtures, nodonors, favor_donor
-        aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
-        if nocells:
-            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_LINE)
-        if nomixtures:
-            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_CELL_MIXTURE)
-        if nodonors:
-            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
-        if favor_donor:
-            aggregation_field_grouping_cell_or_donor.remove(AGGREGATION_FIELD_DONOR)
-            aggregation_field_grouping_cell_or_donor.insert(0, AGGREGATION_FIELD_DONOR)
-        return aggregation_field_grouping_cell_or_donor
-
-    def create_base_query_arguments(request: PyramidRequest) -> dict:
-
-        global QUERY_FILE_CATEGORIES, QUERY_FILE_STATUSES, QUERY_FILE_TYPES
-
-        types = request_args(request, "type", QUERY_FILE_TYPES)
-        statuses = request_args(request, "status", QUERY_FILE_STATUSES)
-        categories = request_args(request, "category", QUERY_FILE_CATEGORIES)
-
-        base_query_arguments = {
-            "type": types if types else None,
-            "status": statuses if statuses else None,
-            "data_category": categories if categories else None
-        }
-
-        return {key: value for key, value in base_query_arguments.items() if value is not None}
-
-    def create_query_arguments(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
-
-        global BASE_SEARCH_QUERY, QUERY_RECENT_MONTHS, QUERY_INCLUDE_CURRENT_MONTH
-        nonlocal date_property_name
-
-        recent_months = request_arg_int(request, "nmonths", request_arg_int(request, "months", QUERY_RECENT_MONTHS))
-        from_date = request_arg(request, "from_date")
-        thru_date = request_arg(request, "thru_date")
-        include_current_month = request_arg_bool(request, "include_current_month", QUERY_INCLUDE_CURRENT_MONTH)
-
-        from_date, thru_date = parse_date_range_related_arguments(from_date, thru_date, nmonths=recent_months,
-                                                                  include_current_month=include_current_month,
-                                                                  strings=True)
-        query_arguments = {
-            f"{date_property_name}.from": from_date if from_date else None,
-            f"{date_property_name}.to": thru_date if from_date else None
-        }
-
-        if isinstance(base_query_arguments, dict):
-            query_arguments = {**base_query_arguments, **query_arguments}
-        return query_arguments
-
-    def create_query(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
-        query_arguments = create_query_arguments(request, base_query_arguments)
-        query_string = create_query_string(query_arguments)
-        return f"{BASE_SEARCH_QUERY}?{query_string}"
-
-    def create_aggregation_query(aggregation_fields: List[str]) -> dict:
-
-        nonlocal date_property_name, max_buckets, include_missing, favor_donor, troubleshoot_elasticsearch
-
-        aggregations = []
-        if not isinstance(aggregation_fields, list):
-            aggregation_fields = [aggregation_fields]
-        for item in aggregation_fields:
-            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
-                aggregations.append(item)
-        if not aggregations:
-            return {}
-
-        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, multi
-            if field == date_property_name:
-                return {
-                    "date_histogram": {
-                        "field": f"embedded.{field}",
-                        "calendar_interval": "month",
-                        "format": "yyyy-MM",
-                        "missing": "1970-01",
-                        "order": {"_key": "desc"}
-                    }
-                }
-            elif field == AGGREGATION_FIELD_CELL_LINE:
-                # Note how we prefix the result with the aggregation field name;
-                # this is so later we can tell which grouping/field was matched;
-                # see fixup_names_values_for_normalized_results for this fixup.
-                script = ""
-                for aggregation_field_grouping_index in range(len(aggregation_field_grouping_cell_or_donor)):
-                    aggregation_field = aggregation_field_grouping_cell_or_donor[aggregation_field_grouping_index]
-                    if_or_else_if = "if" if aggregation_field_grouping_index == 0 else "else if"
-                    # Note that if there are multiple values for the aggregation field just the "first" one will be chosen;
-                    # where "first" means which was indexed first, which from an application POV is kind of arbitrary.
-                    # If we want to make it more deterministic we could order the results (say) alphabetically like so: 
-                    #   def value = doc['embedded.{aggregation_field}.raw'].stream().min((a, b) -> a.compareTo(b)).get();
-                    #   return '{aggregation_field}:' + value;
-                    # OR, if we actually want to aggregation on ALL values we could collect the results and return all like so:
-                    #   def values = [];
-                    #   for (value in doc['embedded.{aggregation_field}.raw']) {
-                    #       values.add('{aggregation_field}:' + value);
-                    #   }
-                    #   return values;
-                    # But then we'd get double counting and so on. We are told in any case that these groups should be distinct.
-                    if not multi:
-                        script += f"""
-                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
-                                return '{aggregation_field}:' + doc['embedded.{aggregation_field}.raw'].value;
-                            }}
-                        """
-                    else:
-                        script += f"""
-                            {if_or_else_if} (doc['embedded.{aggregation_field}.raw'].size() > 0) {{
-                                def values = [];
-                                for (value in doc['embedded.{aggregation_field}.raw']) {{
-                                    values.add('{aggregation_field}:' + value);
-                                }}
-                                return values;
-                            }}
-                        """
-                script += f"""
-                    else {{
-                        return 'unknown';
-                    }}
-                """
-                return {
-                    "terms": {
-                        "script": {
-                            "source": normalize_spaces(script),
-                            "lang": "painless"
-                        },
-                        "size": max_buckets
-                    }
-                }
-
-        def create_field_filter(field: str) -> Optional[dict]:  # noqa
-            nonlocal aggregation_field_grouping_cell_or_donor
-            if field == AGGREGATION_FIELD_CELL_LINE:
-                filter = {"bool": {"should": [], "minimum_should_match": 1}}
-                for aggregation_field in aggregation_field_grouping_cell_or_donor:
-                    filter["bool"]["should"].append({"exists": { "field": f"embedded.{aggregation_field}.raw"}})
-                return filter
-
-        aggregation_query = create_elasticsearch_aggregation_query(
-            aggregations,
-            max_buckets=max_buckets,
-            missing_value=AGGREGATION_NO_VALUE,
-            include_missing=include_missing,
-            create_field_aggregation=create_field_aggregation,
-            create_field_filter=create_field_filter)
-
-        if troubleshoot_elasticsearch:
-            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
-
-        return aggregation_query[date_property_name]
-
-    def create_aggregation_query_legacy(aggregation_fields: List[str]) -> dict:
-
-        nonlocal date_property_name, max_buckets, include_missing
-
-        aggregations = []
-        if not isinstance(aggregation_fields, list):
-            aggregation_fields = [aggregation_fields]
-        for item in aggregation_fields:
-            if isinstance(item, str) and (item := item.strip()) and (item not in aggregations):
-                aggregations.append(item)
-        if not aggregations:
-            return {}
-
-        def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal date_property_name
-            if field == date_property_name:
-                return {
-                    "date_histogram": {
-                        "field": f"embedded.{field}",
-                        "calendar_interval": "month",
-                        "format": "yyyy-MM",
-                        "missing": "1970-01",
-                        "order": {"_key": "desc"}
-                    }
-                }
-
-        aggregation_query = create_elasticsearch_aggregation_query(
-            aggregations,
-            max_buckets=max_buckets,
-            missing_value=AGGREGATION_NO_VALUE,
-            include_missing=include_missing,
-            create_field_aggregation=create_field_aggregation)
-
-        if troubleshoot_elasticsearch:
-            add_debugging_to_elasticsearch_aggregation_query(aggregation_query[date_property_name])
-
-        return aggregation_query[date_property_name]
-
-    def execute_aggregation_query(request: PyramidRequest, query: str, aggregation_query: dict) -> str:
-        query += "&from=0&limit=0"  # needed for aggregation query to not return the actual/individual item results.
-        request = snovault_make_search_subreq(request, path=query, method="GET")
-        results = snovault_search(None, request, custom_aggregations=aggregation_query)
-        return results
-
-    def fixup_names_values_for_normalized_results(normalized_results: dict) -> None:
-        nonlocal aggregation_field_grouping_cell_or_donor
-        if isinstance(normalized_results, dict):
-            if isinstance(value := normalized_results.get("value"), str):
-                if ((separator_index := value.find(":")) > 0) and (value_prefix := value[0:separator_index]):
-                    if value_prefix in aggregation_field_grouping_cell_or_donor:
-                        if value := value[separator_index + 1:]:
-                            normalized_results["name"] = value_prefix
-                            normalized_results["value"] = value
-            if isinstance(items := normalized_results.get("items"), list):
-                for element in items:
-                    fixup_names_values_for_normalized_results(element)
-
-    def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
-        global BASE_SEARCH_QUERY
-        nonlocal date_property_name, willrfix
-        if isinstance(normalized_results, dict):
-            if name := normalized_results.get("name"):
-                if value := normalized_results.get("value"):
-                    if name == date_property_name:
-                        # Special case for date value which is just year/month (e.g. 2024-12);
-                        # we want to turn this into a date range query for the month; actually
-                        # this is not a special case, this is the NORMAL case we are dealing with.
-                        # from_date, thru_date = parse_date_range_related_arguments(value, None, nmonths=0, strings=True)
-                        from_date, thru_date = get_date_range_for_month(value, strings=True)
-                        if from_date and thru_date:
-                            base_query_arguments = {**base_query_arguments,
-                                                    f"{name}.from": from_date, f"{name}.to": thru_date}
-                    else:
-                        base_query_arguments = {**base_query_arguments, name: value}
-                if willrfix:
-                    if name == AGGREGATION_FIELD_CELL_LINE:
-                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
-                    elif name == AGGREGATION_FIELD_DONOR:
-                        base_query_arguments[AGGREGATION_FIELD_CELL_MIXTURE] = AGGREGATION_NO_VALUE
-                        base_query_arguments[AGGREGATION_FIELD_CELL_LINE] = AGGREGATION_NO_VALUE
-                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
-            if isinstance(items := normalized_results.get("items"), list):
-                for element in items:
-                    add_queries_to_normalized_results(element, base_query_arguments)
-
-    aggregation_field_grouping_cell_or_donor = get_aggregation_field_grouping_cell_or_donor()
-    # The base_query_arguments does not contain the from/thru dates as this is used;
-    # this is used to construct the query-string for the individually grouped items which
-    # will have the from/thru dates specifically representing their place within the group.
-    base_query_arguments = create_base_query_arguments(request)
-    query = create_query(request, base_query_arguments)
-
-    if not legacy:
-        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
-        aggregate_by_cell_line = [
-            date_property_name,
-            AGGREGATION_FIELD_CELL_LINE,
-            AGGREGATION_FIELD_FILE_DESCRIPTOR
-        ]
-        aggregation_query = {
-            aggregate_by_cell_line_property_name: create_aggregation_query(aggregate_by_cell_line)
-        }
-    else:
-        aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
-        aggregate_by_cell_line = [
-            date_property_name,
-            AGGREGATION_FIELD_CELL_LINE,
-            AGGREGATION_FIELD_FILE_DESCRIPTOR
-        ]
-        aggregate_by_donor_property_name = "aggregate_by_donor"
-        aggregate_by_donor = [
-            date_property_name,
-            AGGREGATION_FIELD_DONOR,
-            AGGREGATION_FIELD_FILE_DESCRIPTOR
-        ]
-        aggregation_query = {
-            aggregate_by_cell_line_property_name: create_aggregation_query_legacy(aggregate_by_cell_line),
-            aggregate_by_donor_property_name: create_aggregation_query_legacy(aggregate_by_donor)
-        }
-
-    if debug_query:
-        return {
-            "query": query,
-            "query_arguments": deconstruct_query_string(query),
-            "aggregation_query_fields": [
-                AGGREGATION_FIELD_RELEASE_DATE,
-                *get_aggregation_field_grouping_cell_or_donor(),
-                AGGREGATION_FIELD_FILE_DESCRIPTOR
-            ],
-            "aggregation_query": aggregation_query
-        }
-
-    raw_results = execute_aggregation_query(request, query, aggregation_query)
-
-    if raw:
-        # For debugging/troubleshooting only if raw=true then return raw ElasticSearch results.
-        # And note that unless we remove teh @id property we get redirected to the URL in this field,
-        # for example to: /search/?type=OutputFile&status=released&data_category%21=Quality+Control
-        #                         &file_status_tracking.released.from=2024-09-30
-        #                         &file_status_tracking.released.to=2024-12-31&from=0&limit=0'
-        if "@id" in raw_results:
-            del raw_results["@id"]
-        return raw_results
-
-    if not (raw_results := raw_results.get("aggregations")):
-        return {}
-
-    if debug:
-        raw_results = deepcopy(raw_results)  # otherwise may be overwritten by below
-
-    prune_elasticsearch_aggregation_results(raw_results)
-
-    if not legacy:
-        aggregation_results = raw_results.get(aggregate_by_cell_line_property_name)
-    else:
-        aggregation_results = merge_elasticsearch_aggregation_results(raw_results.get(aggregate_by_cell_line_property_name),
-                                                                 raw_results.get(aggregate_by_donor_property_name))
-
-    # Note that the doc_count values returned by ElasticSearch DO actually seem to be for UNIQUE items,
-    # i.e. if an item appears in two different groups (e.g. if, say, f2584000-f810-44b6-8eb7-855298c58eb3
-    # has file_sets.libraries.analytes.samples.sample_sources.cell_line.code values for both HG00438 and HG005),
-    # then its doc_count will NOT be counted TWICE. This creates a situation where it might LOOK like the counts
-    # are WRONG in the MERGED (via returned merge_elasticsearch_aggregation_results) result set, where the outer
-    # item count may be than the sum of the individual counts within each sub-group. For example, the below result
-    # shows a top-level doc_count of 1, even though there are 2 documents, 1 in the HG00438 group and the other
-    # in the HG005 it would be because the same unique file has a cell_line.code of both HG00438 and HG005.
-    # {
-    #     "meta": { "field_name": "file_status_tracking.released" },
-    #     "buckets": [
-    #         {
-    #             "key_as_string": "2024-12", "key": 1733011200000, "doc_count": 1,
-    #             "file_sets.libraries.analytes.samples.sample_sources.cell_line.code": {
-    #                 "meta": { "field_name": "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" },
-    #                 "buckets": [
-    #                     {   "key": "HG00438", "doc_count": 1,
-    #                         "release_tracker_description": {
-    #                             "meta": { "field_name": "release_tracker_description" },
-    #                             "buckets": [
-    #                                 { "key": "WGS Illumina NovaSeq X bam", "doc_count": 1 },
-    #                             ]
-    #                         }
-    #                     },
-    #                     {   "key": "HG005", "doc_count": 1,
-    #                         "release_tracker_description": {
-    #                             "meta": { "field_name": "release_tracker_description" },
-    #                             "buckets": [
-    #                                 { "key": "Fiber-seq PacBio Revio bam", "doc_count": 1 }
-    #                             ]
-    #                         }
-    #                     }
-    #                 ]
-    #             }
-    #         }
-    #     ]
-    # }
-
-    if debug:
-        additional_properties = {
-            "debug": {
-                "query": query,
-                "query_arguments": deconstruct_query_string(query),
-                "aggregation_query_fields": [
-                    AGGREGATION_FIELD_RELEASE_DATE,
-                    *get_aggregation_field_grouping_cell_or_donor(),
-                    AGGREGATION_FIELD_FILE_DESCRIPTOR
-                ],
-                "aggregation_query": aggregation_query,
-                "raw_results": raw_results,
-                "aggregation_results": deepcopy(aggregation_results)
-            }
-        }
-    else:
-        additional_properties = None
-
-    normalized_results = normalize_elasticsearch_aggregation_results(aggregation_results,
-                                                                     additional_properties=additional_properties,
-                                                                     remove_empty_items=not include_missing)
-    if not legacy:
-        fixup_names_values_for_normalized_results(normalized_results)
-    if include_queries:
-        add_queries_to_normalized_results(normalized_results, base_query_arguments)
-        normalized_results["query"] = query
-
-    if not nosort:
-        # We can sort on the aggregations by level; outermost/left to innermost/right.
-        # In our case the outermost is the date aggregation so sort taht by the key value,
-        # e.g. 2014-12, descending; and the rest of the inner levels by the default
-        # sorting which is by aggregation count descending and secondarily by the key value.
-        sort_normalized_aggregation_results(normalized_results, ["-key", "default"])
-
-    if troubleshoot:
-        add_info_for_troubleshooting(normalized_results, request)
-
-    return normalized_results
-
-
-def add_info_for_troubleshooting(normalized_results: dict, request: PyramidRequest) -> None:
-
-    def get_files(files, property_name, property_value, map_property_value = None):
-        found = []
-        for file in files:
-            if properties := get_properties(file, property_name):
-                if callable(map_property_value):
-                    mapped_properties = []
-                    for value in properties:
-                        mapped_properties.append(map_property_value(value))
-                    properties = mapped_properties
-                if property_value in properties:
-                    found.append(file)
-        return found
-
-    def map_date_property_value(value):
-        if date_value := parse_datetime_string(value):
-            return f"{date_value.year}-{date_value.month:02}"
-        return value
-
-    def count_uuid(uuid_records: List[dict], uuid: str) -> int:
-        count = 0
-        for uuid_record in uuid_records:
-            if uuid_record.get("uuid") == uuid:
-                count += 1
-        return count
-
-    def dedup_list(data: list) -> list:  # noqa
-        return list(dict.fromkeys(data)) if isinstance(data, list) else []
-
-    aggregation_fields_for_troubleshooting = dedup_list([
-        AGGREGATION_FIELD_RELEASE_DATE,
-        AGGREGATION_FIELD_CELL_MIXTURE,
-        AGGREGATION_FIELD_CELL_LINE,
-        # Store some extra properties for troublehooting (as this whole thing is).
-#       "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.display_title",
-#       "file_sets.libraries.analytes.samples.sample_sources.components.cell_culture.cell_line.code",
-        "file_sets.libraries.analytes.samples.sample_sources.display_title",
-        AGGREGATION_FIELD_DONOR,
-        AGGREGATION_FIELD_FILE_DESCRIPTOR
-    ])
-
-    def annotate_with_uuids(normalized_results: dict):
-        nonlocal aggregation_fields_for_troubleshooting
-        uuid_records = []
-        query = normalized_results.get("query")
-        if isinstance(normalized_results.get("debug"), dict):
-            normalized_results["debug"]["aggregation_fields_for_troubleshooting"] = (
-                aggregation_fields_for_troubleshooting)
-        files = request.embed(f"{query}&limit=1000", as_user="IMPORT")["@graph"]
-        for first_item in normalized_results["items"]:
-            first_property_name = first_item["name"]
-            first_property_value = first_item["value"]
-            for second_item in first_item["items"]:
-                second_property_name = second_item["name"]
-                second_property_value = second_item["value"]
-                for third_item in second_item["items"]:
-                    third_property_name = third_item["name"]
-                    third_property_value = third_item["value"]
-                    if debug_elasticsearch_hits := third_item.get("debug_elasticsearch_hits"):
-                        if not third_item.get("debug"):
-                            third_item["debug"] = {}
-                        third_item["debug"]["elasticsearch_hits"] = debug_elasticsearch_hits
-                        third_item["debug"]["elasticsearch_hits"].sort()
-                        del third_item["debug_elasticsearch_hits"]
-                    if first_files := get_files(files, first_property_name, first_property_value,
-                                                map_property_value=map_date_property_value):
-                        if second_files := get_files(first_files, second_property_name, second_property_value):
-                            if third_files := get_files(second_files, third_property_name, third_property_value):
-                                for file in third_files:
-                                    if isinstance(uuid := file.get("uuid"), str):
-                                        if not third_item.get("debug"):
-                                            third_item["debug"] = {}
-                                        if not third_item["debug"].get("portal_hits"):
-                                            third_item["debug"]["portal_hits"] = []
-                                        uuid_record = {"uuid": uuid}
-                                        for aggregation_field in aggregation_fields_for_troubleshooting:
-                                            aggregation_values = ", ".join(get_properties(file, aggregation_field))
-                                            uuid_record[aggregation_field] = aggregation_values or None
-                                        if third_item["debug"].get("elasticsearch_hits"):
-                                            uuid_record["elasticsearch_counted"] = \
-                                                uuid in third_item["debug"]["elasticsearch_hits"]
-                                        third_item["debug"]["portal_hits"].append(uuid_record)
-                                        uuid_records.append(uuid_record)
-                                if third_item.get("debug", {}).get("portal_hits"):
-                                    third_item["debug"]["portal_hits"].sort(key=lambda item: item.get("uuid"))
-
-        for uuid_record in uuid_records:
-            if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:
-                uuid_record["duplicative"] = count
-
-    try:
-        annotate_with_uuids(normalized_results)
-    except Exception:
-        pass
-
-
-def print_normalized_aggregation_results(normalized_results: dict,
-                                         title: Optional[str] = None,
-                                         parent_grouping_name: Optional[str] = None,
-                                         parent_grouping_value: Optional[str] = None,
-                                         uuids: bool = False,
-                                         uuid_details: bool = False,
-                                         nobold: bool = False,
-                                         checks: bool = False,
-                                         query: bool  = False,
-                                         verbose: bool = False) -> None:
-
-    """
-    For deveopment/troubleshooting only ...
-    """
-    from encoded.endpoint_utils import terminal_color
-
-    def get_aggregation_fields(normalized_results: dict) -> List[str]:
-        # Returns all noted/important aggregation fields which ARE actually being used by the query;
-        # we only are interested in ones that are in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
-        # which is all of the possible sample-source/cell-line/donor aggregations.
-        global AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR
-        if not isinstance(aggregation_fields :=
-                          normalized_results.get("debug", {}).get("aggregation_query_fields"), list):
-            aggregation_fields = []
-        else:
-            aggregation_fields = deepcopy(aggregation_fields)
-        for aggregation_field in aggregation_fields:
-            # Remove the ones we are not interested in reporting on.
-            if aggregation_field not in AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR:
-                aggregation_fields.remove(aggregation_field)
-        return aggregation_fields
-
-    def get_aggregation_fields_to_print(normalized_results: dict) -> List[str]:
-        aggregation_fields_to_print = get_aggregation_fields(normalized_results)
-        if isinstance(aggregation_fields_for_troubleshooting :=
-                      normalized_results.get("debug", {}).get("aggregation_fields_for_troubleshooting"), list):
-            for aggregation_field_for_troubleshooting in aggregation_fields_for_troubleshooting:
-                if aggregation_field_for_troubleshooting not in aggregation_fields_to_print:
-                    aggregation_fields_to_print.append(aggregation_field_for_troubleshooting)
-            aggregation_fields_to_not_print = [
-                AGGREGATION_FIELD_RELEASE_DATE,
-                AGGREGATION_FIELD_FILE_DESCRIPTOR 
-            ]
-            for aggregation_field_to_not_print in aggregation_fields_to_not_print:
-                if aggregation_field_to_not_print in aggregation_fields_to_print:
-                    aggregation_fields_to_print.remove(aggregation_field_to_not_print)
-        return aggregation_fields_to_print
-
-    def get_aggregation_field_labels() -> dict:
-        # Shorter/nicer names for aggregation fields of interest to print.
-        global AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_CELL_LINE, AGGREGATION_FIELD_DONOR
-        return {
-            AGGREGATION_FIELD_CELL_MIXTURE: "sample-sources",
-            AGGREGATION_FIELD_CELL_LINE: "cell-lines",
-            AGGREGATION_FIELD_DONOR: "donors",
-            "file_sets.libraries.analytes.samples.sample_sources.display_title": "sample-sources-title"
-        }
-
-    def print_results(data: dict,
-                      parent_grouping_name: Optional[str] = None,
-                      parent_grouping_value: Optional[str] = None,
-                      indent: int = 0) -> None:
-
-        nonlocal title, uuids, uuid_details, nobold, query, verbose
-        nonlocal chars_check, chars_dot, chars_rarrow_hollow, chars_xmark, red, green, green_bold, gray, bold
-        nonlocal aggregation_fields_to_print
-
-        def get_portal_hits(data: dict) -> List[dict]:
-            hits = []
-            if isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
-                for portal_hit in portal_hits:
-                    if isinstance(portal_hit, dict) and isinstance(uuid := portal_hit.get("uuid"), str) and uuid:
-                        hits.append(portal_hit)
-            return hits
-
-        def format_hit_property_values(hit: dict, property_name: str,
-                                       color: Optional[Callable] = None) -> Tuple[Optional[str], List[Tuple[str, str]]]:
-            nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
-            counted_elsewhere = []
-            if hit.get("elasticsearch_counted") is False:
-                counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
-            else:
-                counted_grouping_name, counted_grouping_value = (None, None)
-            if property_value := hit.get(property_name):
-                if property_name == parent_grouping_name:
-                    property_values = []
-                    for property_value in property_value.split(","):
-                        if (property_value := property_value.strip()) == parent_grouping_value:
-                            property_value = color(property_value) if callable(color) else green_bold(property_value)
-                            property_values.append(property_value)
-                        else:
-                            if (counted_grouping_name, counted_grouping_value) == (property_name, property_value):
-                                property_values.append(green_bold(f"{property_value} {chars_larrow_hollow}") +
-                                                       green(" COUNTED HERE"))
-                                counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
-                            else:
-                                property_values.append(property_value)
-                    property_value = ", ".join(property_values)
-                elif hit.get("elasticsearch_counted") is False:
-                    counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
-                    if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
-                        property_value = green_bold(f"{property_value} {chars_larrow_hollow}") + green(" COUNTED HERE")
-                        counted_elsewhere.append((counted_grouping_name, counted_grouping_value))
-            return property_value, counted_elsewhere
-
-        def find_where_aggregated_and_counted(
-                uuid: str,
-                multiple: bool = False,
-                ignore: Optional[Union[List[Tuple[str, str]],
-                Tuple[str, str]]] = None) -> Union[Tuple[str, str], List[Tuple[str, str]]]:
-
-            nonlocal normalized_results
-
-            def find_where(data: dict, uuid: str,
-                           parent_grouping_name: Optional[str] = None,
-                           parent_grouping_value: Optional[str] = None) -> List[Tuple[str, str]]:
-                found_uuid_grouping_names_and_values = set()
-                if isinstance(data, dict):
-                    grouping_name = data.get("name")
-                    grouping_value = data.get("value")
-                    if isinstance(items := data.get("items"), list):
-                        for item in items:
-                            if found := find_where(item, uuid,
-                                                   parent_grouping_name=grouping_name,
-                                                   parent_grouping_value=grouping_value):
-                                found_uuid_grouping_names_and_values.update(found)
-                    elif isinstance(hits := data.get("debug", {}).get("portal_hits"), list):
-                        for hit in hits:
-                            if hit.get("uuid") == uuid:
-                                if hit.get("elasticsearch_counted") is True:
-                                    found_uuid_grouping_names_and_values.add((parent_grouping_name, parent_grouping_value))
-                return found_uuid_grouping_names_and_values
-
-            if found_uuid_grouping_names_and_values := list(find_where(normalized_results, uuid)):
-                if isinstance(ignore, tuple) and (len(ignore) == 2) and (ignore in found_uuid_grouping_names_and_values):
-                    found_uuid_grouping_names_and_values.remove(ignore)
-                elif isinstance(ignore, list):
-                    for ignore_item in ignore:
-                        if isinstance(ignore_item, tuple) and (len(ignore_item) == 2) and (ignore_item in found_uuid_grouping_names_and_values):
-                            found_uuid_grouping_names_and_values.remove(ignore_item)
-                if multiple is True:
-                    return found_uuid_grouping_names_and_values
-                if len(found_uuid_grouping_names_and_values) > 1:
-                    # Normally should only be at most one item with elasticsearch_counted set to True.
-                    pass
-                return found_uuid_grouping_names_and_values[0]
-            return [(None, None)] if multiple is True else (None, None)
-
-        def print_hit_property_values(hit: dict, property_name: str,
-                                      label: Optional[str] = None,
-                                      prefix: Optional[str] = None,
-                                      color: Optional[Callable] = None) -> List[Tuple[str, str]]:
-            nonlocal aggregation_fields, aggregation_field_labels, chars_dot_hollow, chars_null, verbose
-            if not label:
-                label = aggregation_field_labels.get(property_name)
-            if (verbose is True) or (not label):
-                label = property_name
-            property_values, counted_elsewhere = format_hit_property_values(hit, property_name, color=color)
-            if not property_values:
-                property_values = chars_null
-            if property_name not in aggregation_fields:
-                property_description = f"{prefix or ''}{chars_dot_hollow} {label}: {property_values}"
-                property_description = gray(property_description)
-            else:
-                property_description = f"{prefix or ''}{chars_dot} {label}: {property_values}"
-            print(property_description)
-            return counted_elsewhere
-
-        if not (isinstance(data, dict) and data):
-            return
-        if not (isinstance(indent, int) and (indent > 0)):
-            indent = 0
-        spaces = (" " * indent) if indent > 0 else ""
-        grouping_name = data.get("name")
-        if isinstance(grouping_value := data.get("value"), str) and grouping_value:
-            grouping = bold(grouping_value)
-            if (verbose is True) and isinstance(grouping_name, str) and grouping_name:
-                grouping = f"{grouping_name} {chars_dot} {grouping}"
-        elif not (isinstance(grouping := title, str) and grouping):
-            grouping = "RESULTS"
-        grouping = f"{chars_diamond} {grouping}"
-        hits = get_portal_hits(data) if (uuids is True) else []
-        if isinstance(count := data.get("count"), int):
-            note = ""
-            if len(hits) > count:
-                note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
-            elif isinstance(items := data.get("items"), list):
-                subcount = 0
-                for item in items:
-                    if isinstance(subcount_item := item.get("count"), int):
-                        subcount += subcount_item
-                if subcount != count:
-                    note = red(f" {chars_xmark} ACTUAL COUNT: {subcount}")
-                elif checks is True:
-                    note = f" {chars_check}"
-            elif checks:
-                note = f" {chars_check}"
-            print(f"{spaces}{grouping}: {count}{note}")
-        if (query is True) and (query_string := data.get("query")):
-            print(f"{spaces}  {query_string}")
-        for hit in hits:
-            if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
-                note = ""
-                if hit.get("elasticsearch_counted") is False:
-                    print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
-                    color = red_bold
-                else:
-                    print(f"{spaces}  {chars_dot} {uuid} {chars_check}")
-                    color = green_bold
-                if uuid_details is True:
-                    prefix =  f"{spaces}    "
-                    counted_elsewhere = []
-                    # Show property values for troubleshooting (as this whole thing is);
-                    # see add_info_for_troubleshooting.annotate_with_uuids.
-                    for aggregation_field in aggregation_fields_to_print:
-                        hit_counted_elsewhere = \
-                            print_hit_property_values(hit, aggregation_field, prefix=prefix, color=color)
-                        if hit_counted_elsewhere:
-                            counted_elsewhere.extend(hit_counted_elsewhere)
-                    # See if also grouped elsewhere for our FYI.
-                    duplicative = hit.get("duplicative")
-                    duplicates = duplicative - 1 if isinstance(duplicative, int) else 0
-                    counted_groupings = find_where_aggregated_and_counted(
-                        hit.get("uuid"), multiple=True,
-                        ignore=counted_elsewhere + [(parent_grouping_name, parent_grouping_value)])
-                    if counted_groupings:
-                        message = f"{spaces}    {green(chars_rarrow_hollow)} {green('ALSO COUNTED HERE')}:"
-                        if verbose is True:
-                            if duplicates > 0:
-                                message += f" {duplicates}"
-                                if duplicates != len(counted_groupings):
-                                    message += red_bold(f" {chars_xmark} vs {len(counted_groupings)}")
-                            print(message)
-                            for counted_grouping in counted_groupings:
-                                print(f"{spaces}      - {counted_grouping[0]} {green(counted_grouping[1])}")
-                        else:
-                            counted_grouping_values = [green(counted_grouping[1]) for counted_grouping in counted_groupings]
-                            message = f"{message} {', '.join(counted_grouping_values)}"
-                            if duplicates > 0:
-                                if duplicates != len(counted_groupings):
-                                    message += red_bold(f" {chars_xmark} {duplicates} vs {len(counted_grouping_values)}")
-                            print(message)
-        if isinstance(items := data.get("items"), list):
-            for element in items:
-                print_results(element,
-                              parent_grouping_name=grouping_name,
-                              parent_grouping_value=grouping_value,
-                              indent=indent + 2)
-
-    aggregation_fields = get_aggregation_fields(normalized_results)
-    aggregation_fields_to_print = get_aggregation_fields_to_print(normalized_results)
-    aggregation_field_labels = get_aggregation_field_labels()
-
-    red = lambda text: terminal_color(text, "red")  # noqa
-    red_bold = lambda text: terminal_color(text, "red", bold=True)  # noqa
-    green = lambda text: terminal_color(text, "green")  # noqa
-    green_bold = lambda text: terminal_color(text, "green", bold=True)  # noqa
-    gray = lambda text: terminal_color(text, "grey")  # noqa
-    bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
-    chars_check = "✓"
-    chars_xmark = "✗"
-    chars_dot = "•"
-    chars_dot_hollow = "◦"
-    chars_diamond = "❖"
-    chars_rarrow_hollow = "▷"
-    chars_larrow_hollow = "◁"
-    chars_null = "∅"
-
-    print_results(normalized_results)
-
-
-def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
-    with capture_output_to_html_string() as captured_output:
-        print_normalized_aggregation_results(normalized_results, uuids=True, uuid_details=True)
-        return captured_output.html
-    return
-
-
-@contextmanager
-def capture_output_to_html_string():
-    from io import StringIO
-    from unittest.mock import patch as patch
-    print_original = print
-    captured_output = StringIO()
-    class CapturedOutput:  # noqa
-        def __init__(self, captured_output: StringIO):
-            self._captured_output = captured_output
-        @property  # noqa
-        def text(self):
-            return self._captured_output.getvalue()
-        @property  # noqa
-        def html(self):
-            return ansi_to_html(self._captured_output.getvalue())
-    def captured_print(*args, **kwargs):  # noqa
-        nonlocal captured_output
-        print_original(*args, **kwargs, file=captured_output)
-    with patch("builtins.print", captured_print):
-        yield CapturedOutput(captured_output)
-
-
-def ansi_to_html(text):
-    ANSI_COLOR_MAP = {
-        '30': 'black',
-        '31': 'red',
-        '32': 'green',
-        '33': 'yellow',
-        '34': 'blue',
-        '35': 'magenta',
-        '36': 'cyan',
-        '37': 'white',
-        '90': 'bright_black',
-        '91': 'bright_red',
-        '92': 'bright_green',
-        '93': 'bright_yellow',
-        '94': 'bright_blue',
-        '95': 'bright_magenta',
-        '96': 'bright_cyan',
-        '97': 'bright_white',
-    }
-    ANSI_ESCAPE_RE = re.compile(r'\x1b\[([0-9;]*)m')
-    bold_active = False
-    def replace_ansi(match):  # noqa
-        nonlocal bold_active
-        codes = match.group(1).split(';')  # Split multiple codes (e.g., "1;31")
-        html_parts = []
-        for code in codes:
-            if code == '1':  # Bold
-                if not bold_active:  # Activate bold
-                    html_parts.append('<b>')
-                    bold_active = True
-            elif code in ANSI_COLOR_MAP:  # Colors
-                color = ANSI_COLOR_MAP[code]
-                html_parts.append(f'<span style="color: {color};">')
-            elif code == '0':  # Reset
-                if bold_active:
-                    html_parts.append('</b>')
-                    bold_active = False
-                html_parts.append('</span>')  # Close color
-        return ''.join(html_parts)
-    text_with_html = ANSI_ESCAPE_RE.sub(replace_ansi, text)
-    if bold_active:
-        text_with_html += '</b>'
-    return f'<pre>{text_with_html}</pre>'
diff --git a/src/encoded/tests/test_elasticsearch_utils.py b/src/encoded/tests/test_elasticsearch_utils.py
index 97d690500..979d13272 100644
--- a/src/encoded/tests/test_elasticsearch_utils.py
+++ b/src/encoded/tests/test_elasticsearch_utils.py
@@ -1,11 +1,13 @@
 import pytest
 from typing import Optional
-from encoded.elasticsearch_utils import create_elasticsearch_aggregation_query
-from encoded.elasticsearch_utils import merge_elasticsearch_aggregation_results
-from encoded.elasticsearch_utils import normalize_elasticsearch_aggregation_results
-from encoded.recent_files_summary import AGGREGATION_FIELD_RELEASE_DATE
-from encoded.recent_files_summary import AGGREGATION_FIELD_CELL_LINE
-from encoded.recent_files_summary import AGGREGATION_FIELD_FILE_DESCRIPTOR
+from encoded.endpoints.elasticsearch_utils import (
+        create_elasticsearch_aggregation_query,
+        merge_elasticsearch_aggregation_results,
+        normalize_elasticsearch_aggregation_results)
+from encoded.endpoints.recent_files_summary.recent_files_summary import (
+        AGGREGATION_FIELD_RELEASE_DATE,
+        AGGREGATION_FIELD_CELL_LINE,
+        AGGREGATION_FIELD_FILE_DESCRIPTOR)
 
 def test_create_elasticsearch_aggregation_query_a():
 
diff --git a/src/encoded/tests/test_endpoint_utils.py b/src/encoded/tests/test_endpoint_utils.py
index b877a8b62..8b30634d1 100644
--- a/src/encoded/tests/test_endpoint_utils.py
+++ b/src/encoded/tests/test_endpoint_utils.py
@@ -2,7 +2,7 @@
 import datetime
 from typing import Optional, Union
 from unittest.mock import patch as mock_patch
-from encoded.endpoint_utils import parse_date_range_related_arguments, parse_datetime_string
+from encoded.endpoints.endpoint_utils import parse_date_range_related_arguments, parse_datetime_string
 
 DEFAULT_MOCK_DATETIME_TODAY_VALUE = "2024-11-06 07:54:16"
 
@@ -74,5 +74,6 @@ class MockDateTime(datetime.datetime):  # noqa
         @classmethod
         def today(cls):
             nonlocal value ; return value  # noqa
-    with (mock_patch("encoded.endpoint_utils.datetime", MockDateTime), mock_patch("datetime.datetime", MockDateTime)):
+    with (mock_patch("encoded.endpoints.endpoint_utils.datetime", MockDateTime),
+          mock_patch("datetime.datetime", MockDateTime)):
         yield

From db2cc99259bab5f0dcc9c618a3684549c19e2b13 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 17:43:57 -0500
Subject: [PATCH 62/78] refactoring /recent_files_summary endpoint

---
 .../endpoints/recent_files_summary/recent_files_summary.py    | 3 ++-
 .../recent_files_summary_troubleshooting.py                   | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
index f90d577c6..74903ef62 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -41,7 +41,8 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
-        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
+        text_debug = request_arg_bool(request, "text_debug")
+        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results, debug=text_debug)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
     return results
 
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 7fbc5d4ea..111e68f3e 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -109,9 +109,11 @@ def annotate_with_uuids(normalized_results: dict):
         pass
 
 
-def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict, debug: bool = False):
     with _capture_output_to_html_string() as captured_output:
         print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
+        if debug is True:
+            return captured_output.text
         return captured_output.html
 
 

From e8280d6d25fee5c1cd16d0bd709a74907d691b46 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 18:09:28 -0500
Subject: [PATCH 63/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py    | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 111e68f3e..f1df59ff3 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -1,9 +1,12 @@
+import builtins
 from contextlib import contextmanager
 from copy import deepcopy
-from pyramid.request import Request as PyramidRequest
 import re
+from pyramid.request import Request as PyramidRequest
+from io import StringIO
 from termcolor import colored
 from typing import Any, Callable, List, Optional, Tuple, Union
+from unittest.mock import patch as patch
 from encoded.endpoints.endpoint_utils import parse_datetime_string
 from encoded.endpoints.recent_files_summary.recent_files_summary_fields import (
     AGGREGATION_FIELD_RELEASE_DATE,
@@ -13,7 +16,6 @@
     AGGREGATION_FIELD_DONOR,
     AGGREGATION_FIELD_FILE_DESCRIPTOR)
 
-
 def add_info_for_troubleshooting(normalized_results: dict, request: PyramidRequest) -> None:
 
     def get_files(files, property_name, property_value, map_property_value = None):
@@ -441,9 +443,6 @@ def _get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort:
 @contextmanager
 def _capture_output_to_html_string():
 
-    from io import StringIO
-    from unittest.mock import patch as patch
-
     def ansi_to_html(text):
         ANSI_COLOR_MAP = {
             "30": "black",
@@ -488,7 +487,6 @@ def replace_ansi(match):  # noqa
             text_with_html += "</b>"
         return f"<pre>{text_with_html}</pre>"
 
-    print_original = print
     captured_output = StringIO()
     class CapturedOutput:  # noqa
         def __init__(self, captured_output: StringIO):
@@ -502,5 +500,9 @@ def html(self):
     def captured_print(*args, **kwargs):  # noqa
         nonlocal captured_output
         print_original(*args, **kwargs, file=captured_output)
-    with patch("builtins.print", captured_print):
-        yield CapturedOutput(captured_output)
+    print_original = builtins.print
+    try:
+        with patch("builtins.print", captured_print):
+            yield CapturedOutput(captured_output)
+    finally:
+        print = print_original

From 3a1f19f8aacd94a988b3290228d643f4f107b5df Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 18:14:23 -0500
Subject: [PATCH 64/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py           | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index f1df59ff3..d3c9d4db4 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -488,6 +488,7 @@ def replace_ansi(match):  # noqa
         return f"<pre>{text_with_html}</pre>"
 
     captured_output = StringIO()
+    print_original = builtins.print
     class CapturedOutput:  # noqa
         def __init__(self, captured_output: StringIO):
             self._captured_output = captured_output
@@ -498,11 +499,7 @@ def text(self):
         def html(self):
             return ansi_to_html(self._captured_output.getvalue())
     def captured_print(*args, **kwargs):  # noqa
-        nonlocal captured_output
+        nonlocal captured_output, print_original
         print_original(*args, **kwargs, file=captured_output)
-    print_original = builtins.print
-    try:
-        with patch("builtins.print", captured_print):
-            yield CapturedOutput(captured_output)
-    finally:
-        print = print_original
+    with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):
+        yield CapturedOutput(captured_output)

From a383ceb02a0e1e6f30d0f708e4a3a9666abf726a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 19:08:43 -0500
Subject: [PATCH 65/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py    | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index d3c9d4db4..d796a1527 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -487,19 +487,25 @@ def replace_ansi(match):  # noqa
             text_with_html += "</b>"
         return f"<pre>{text_with_html}</pre>"
 
-    captured_output = StringIO()
-    print_original = builtins.print
+    #captured_output = StringIO()
+    captured_output = ""
+    # print_original = builtins.print
     class CapturedOutput:  # noqa
         def __init__(self, captured_output: StringIO):
             self._captured_output = captured_output
         @property  # noqa
         def text(self):
-            return self._captured_output.getvalue()
+            return captured_output
+            # return self._captured_output.getvalue()
         @property  # noqa
         def html(self):
-            return ansi_to_html(self._captured_output.getvalue())
+            return ansi_to_html(captured_output)
+            # return ansi_to_html(self._captured_output.getvalue())
     def captured_print(*args, **kwargs):  # noqa
-        nonlocal captured_output, print_original
-        print_original(*args, **kwargs, file=captured_output)
+        # nonlocal captured_output, print_original
+        # print_original(*args, **kwargs, file=captured_output)
+        nonlocal captured_output
+        captured_output += str(args[0])
+        captured_output += "\n"
     with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):
         yield CapturedOutput(captured_output)

From 847aaa5f500ca2bd8659442984ad9c422de82301 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 19:10:12 -0500
Subject: [PATCH 66/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py                     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index d796a1527..17b9e763c 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -495,10 +495,12 @@ def __init__(self, captured_output: StringIO):
             self._captured_output = captured_output
         @property  # noqa
         def text(self):
+            nonlocal captured_output
             return captured_output
             # return self._captured_output.getvalue()
         @property  # noqa
         def html(self):
+            nonlocal captured_output
             return ansi_to_html(captured_output)
             # return ansi_to_html(self._captured_output.getvalue())
     def captured_print(*args, **kwargs):  # noqa

From a092124fc3f74edf27a8dc80134952af165c0526 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 19:24:04 -0500
Subject: [PATCH 67/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py   | 55 ++++++++++---------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 17b9e763c..3c28ae648 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -173,26 +173,6 @@ def get_aggregation_field_labels() -> dict:
             "file_sets.libraries.analytes.samples.sample_sources.display_title": "sample-sources-title"
         }
 
-    def terminal_color(value: str,
-                       color: Optional[str] = None,
-                       dark: bool = False,
-                       bold: bool = False,
-                       underline: bool = False,
-                       nocolor: bool = False) -> str:
-        # This is used only for troubleshooting by
-        if nocolor is True:
-            return value
-        attributes = []
-        if dark is True:
-            attributes.append("dark")
-        if bold is True:
-            attributes.append("bold")
-        if underline is True:
-            attributes.append("underline")
-        if isinstance(color, str) and color:
-            return colored(value, color.lower(), attrs=attributes)
-        return colored(value, attrs=attributes)
-
     def print_results(data: dict,
                       parent_grouping_name: Optional[str] = None,
                       parent_grouping_value: Optional[str] = None,
@@ -388,12 +368,12 @@ def print_hit_property_values(hit: dict, property_name: str,
     aggregation_fields_to_print = get_aggregation_fields_to_print(normalized_results)
     aggregation_field_labels = get_aggregation_field_labels()
 
-    red = lambda text: terminal_color(text, "red")  # noqa
-    red_bold = lambda text: terminal_color(text, "red", bold=True)  # noqa
-    green = lambda text: terminal_color(text, "green")  # noqa
-    green_bold = lambda text: terminal_color(text, "green", bold=True)  # noqa
-    gray = lambda text: terminal_color(text, "grey")  # noqa
-    bold = (lambda text: terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
+    red = lambda text: _terminal_color(text, "red")  # noqa
+    red_bold = lambda text: _terminal_color(text, "red", bold=True)  # noqa
+    green = lambda text: _terminal_color(text, "green")  # noqa
+    green_bold = lambda text: _terminal_color(text, "green", bold=True)  # noqa
+    gray = lambda text: _terminal_color(text, "grey")  # noqa
+    bold = (lambda text: _terminal_color(text, bold=True)) if (nobold is not True) else (lambda text: text)
     chars_check = "✓"
     chars_xmark = "✗"
     chars_dot = "•"
@@ -440,6 +420,27 @@ def _get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort:
     return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
 
 
+def _terminal_color(value: str,
+                    color: Optional[str] = None,
+                    dark: bool = False,
+                    bold: bool = False,
+                    underline: bool = False,
+                    nocolor: bool = False) -> str:
+    # This is used only for troubleshooting by
+    if nocolor is True:
+        return value
+    attributes = []
+    if dark is True:
+        attributes.append("dark")
+    if bold is True:
+        attributes.append("bold")
+    if underline is True:
+        attributes.append("underline")
+    if isinstance(color, str) and color:
+        return colored(value, color.lower(), attrs=attributes)
+    return colored(value, attrs=attributes)
+
+
 @contextmanager
 def _capture_output_to_html_string():
 
@@ -507,7 +508,7 @@ def captured_print(*args, **kwargs):  # noqa
         # nonlocal captured_output, print_original
         # print_original(*args, **kwargs, file=captured_output)
         nonlocal captured_output
-        captured_output += str(args[0])
+        captured_output += str(args[0]) + "[" + _terminal_color("DEBUG", "red") + "]"
         captured_output += "\n"
     with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):
         yield CapturedOutput(captured_output)

From 8e534f4a2bf4429d87680c65a72cddc808f0426d Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 19:42:30 -0500
Subject: [PATCH 68/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py                     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 3c28ae648..2c73c40c2 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -508,7 +508,7 @@ def captured_print(*args, **kwargs):  # noqa
         # nonlocal captured_output, print_original
         # print_original(*args, **kwargs, file=captured_output)
         nonlocal captured_output
-        captured_output += str(args[0]) + "[" + _terminal_color("DEBUG", "red") + "]"
+        captured_output += str(args[0]) + "[" + colored("DEBUG", "red") + "]"
         captured_output += "\n"
     with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):
         yield CapturedOutput(captured_output)

From fb7ba9be4514ec5f87ff4a38b4fb1227889488b5 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 19:59:49 -0500
Subject: [PATCH 69/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py                    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 2c73c40c2..1292a5142 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -4,6 +4,7 @@
 import re
 from pyramid.request import Request as PyramidRequest
 from io import StringIO
+import os
 from termcolor import colored
 from typing import Any, Callable, List, Optional, Tuple, Union
 from unittest.mock import patch as patch
@@ -112,6 +113,7 @@ def annotate_with_uuids(normalized_results: dict):
 
 
 def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict, debug: bool = False):
+    os.environ["TERM"] = "xterm-256color"
     with _capture_output_to_html_string() as captured_output:
         print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
         if debug is True:
@@ -508,6 +510,7 @@ def captured_print(*args, **kwargs):  # noqa
         # nonlocal captured_output, print_original
         # print_original(*args, **kwargs, file=captured_output)
         nonlocal captured_output
+        os.environ["TERM"] = "xterm-256color"
         captured_output += str(args[0]) + "[" + colored("DEBUG", "red") + "]"
         captured_output += "\n"
     with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):

From 57af5b1e01df7559fcfcc6185c37a34e4f348426 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 20:20:57 -0500
Subject: [PATCH 70/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py           | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 1292a5142..32e729635 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -422,12 +422,23 @@ def _get_properties(data: dict, name: str, fallback: Optional[Any] = None, sort:
     return fallback if isinstance(fallback, list) else ([] if fallback is None else [fallback])
 
 
+def colored_html(value: str, color: Optional[str] = None, attrs: Optional[list] = None) -> str:
+    if isinstance(value, str):
+        if isinstance(color, str) and color:
+            value = f"<span style='color: {color}'>{value}</span>"
+        if isinstance(attrs, list):
+            if "bold" in attrs:
+                value = f"<b>{value}</b>"
+    return value
+
+
 def _terminal_color(value: str,
                     color: Optional[str] = None,
                     dark: bool = False,
                     bold: bool = False,
                     underline: bool = False,
                     nocolor: bool = False) -> str:
+    colored = colored_html
     # This is used only for troubleshooting by
     if nocolor is True:
         return value

From 4a212d1384a5c0d5d4dad341062b1adc00267949 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 21:15:25 -0500
Subject: [PATCH 71/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary.py                   |   3 +-
 .../recent_files_summary_troubleshooting.py   | 101 +++++-------------
 2 files changed, 28 insertions(+), 76 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
index 74903ef62..f90d577c6 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -41,8 +41,7 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
-        text_debug = request_arg_bool(request, "text_debug")
-        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results, debug=text_debug)
+        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
     return results
 
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 32e729635..31df69c37 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -1,10 +1,6 @@
-import builtins
 from contextlib import contextmanager
 from copy import deepcopy
-import re
 from pyramid.request import Request as PyramidRequest
-from io import StringIO
-import os
 from termcolor import colored
 from typing import Any, Callable, List, Optional, Tuple, Union
 from unittest.mock import patch as patch
@@ -112,13 +108,10 @@ def annotate_with_uuids(normalized_results: dict):
         pass
 
 
-def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict, debug: bool = False):
-    os.environ["TERM"] = "xterm-256color"
-    with _capture_output_to_html_string() as captured_output:
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
+    with _capture_output_to_html() as captured_output:
         print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
-        if debug is True:
-            return captured_output.text
-        return captured_output.html
+        return captured_output.text
 
 
 def print_normalized_aggregation_results_for_troubleshooting(normalized_results: dict,
@@ -438,7 +431,6 @@ def _terminal_color(value: str,
                     bold: bool = False,
                     underline: bool = False,
                     nocolor: bool = False) -> str:
-    colored = colored_html
     # This is used only for troubleshooting by
     if nocolor is True:
         return value
@@ -455,74 +447,35 @@ def _terminal_color(value: str,
 
 
 @contextmanager
-def _capture_output_to_html_string():
-
-    def ansi_to_html(text):
-        ANSI_COLOR_MAP = {
-            "30": "black",
-            "31": "red",
-            "32": "green",
-            "33": "yellow",
-            "34": "blue",
-            "35": "magenta",
-            "36": "cyan",
-            "37": "white",
-            "90": "bright_black",
-            "91": "bright_red",
-            "92": "bright_green",
-            "93": "bright_yellow",
-            "94": "bright_blue",
-            "95": "bright_magenta",
-            "96": "bright_cyan",
-            "97": "bright_white",
-        }
-        ANSI_ESCAPE_RE = re.compile(r"\x1b\[([0-9;]*)m")
-        bold_active = False
-        def replace_ansi(match):  # noqa
-            nonlocal bold_active
-            codes = match.group(1).split(";")  # Split multiple codes (e.g., "1;31")
-            html_parts = []
-            for code in codes:
-                if code == "1":  # Bold
-                    if not bold_active:  # Activate bold
-                        html_parts.append("<b>")
-                        bold_active = True
-                elif code in ANSI_COLOR_MAP:  # Colors
-                    color = ANSI_COLOR_MAP[code]
-                    html_parts.append(f"<span style='color: {color};'>")
-                elif code == "0":  # Reset
-                    if bold_active:
-                        html_parts.append("</b>")
-                        bold_active = False
-                    html_parts.append("</span>")  # Close color
-            return "".join(html_parts)
-        text_with_html = ANSI_ESCAPE_RE.sub(replace_ansi, text)
-        if bold_active:
-            text_with_html += "</b>"
-        return f"<pre>{text_with_html}</pre>"
-
-    #captured_output = StringIO()
+def _capture_output_to_html():
+
+    def html_color(value: str,
+                   color: Optional[str] = None,
+                   dark: bool = False,
+                   bold: bool = False,
+                   underline: bool = False,
+                   nocolor: bool = False) -> str:
+        if (nocolor is not True) and isinstance(value, str):
+            if isinstance(color, str) and color:
+                if dark is True:
+                    value = f"<span style='color: dark{color}'>{value}</span>"
+                else:
+                    value = f"<span style='color: {color}'>{value}</span>"
+            if bold is True:
+                value = f"<b>{value}</b>"
+            if underline is True:
+                value = f"<u>{value}</u>"
+        return value
+
     captured_output = ""
-    # print_original = builtins.print
     class CapturedOutput:  # noqa
-        def __init__(self, captured_output: StringIO):
-            self._captured_output = captured_output
         @property  # noqa
         def text(self):
             nonlocal captured_output
             return captured_output
-            # return self._captured_output.getvalue()
-        @property  # noqa
-        def html(self):
-            nonlocal captured_output
-            return ansi_to_html(captured_output)
-            # return ansi_to_html(self._captured_output.getvalue())
     def captured_print(*args, **kwargs):  # noqa
-        # nonlocal captured_output, print_original
-        # print_original(*args, **kwargs, file=captured_output)
         nonlocal captured_output
-        os.environ["TERM"] = "xterm-256color"
-        captured_output += str(args[0]) + "[" + colored("DEBUG", "red") + "]"
-        captured_output += "\n"
-    with patch("encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting.print", captured_print):
-        yield CapturedOutput(captured_output)
+        captured_output += str(args[0]) + "\n"
+    this_module = "encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting"
+    with (patch(f"{this_module}.print", captured_print), patch(f"{this_module}._terminal_color", html_color)):
+        yield CapturedOutput()

From bbddc5ce77c6a98abe223ea9a2842a1a0a38447e Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 21:24:01 -0500
Subject: [PATCH 72/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary/recent_files_summary.py   |  3 ++-
 .../recent_files_summary_troubleshooting.py        | 14 +++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
index f90d577c6..74903ef62 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -41,7 +41,8 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
-        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results)
+        text_debug = request_arg_bool(request, "text_debug")
+        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results, debug=text_debug)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
     return results
 
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 31df69c37..f4796f0a0 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -108,8 +108,8 @@ def annotate_with_uuids(normalized_results: dict):
         pass
 
 
-def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict):
-    with _capture_output_to_html() as captured_output:
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict, debug: bool = False):
+    with _capture_output_to_html(debug=debug) as captured_output:
         print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
         return captured_output.text
 
@@ -447,7 +447,7 @@ def _terminal_color(value: str,
 
 
 @contextmanager
-def _capture_output_to_html():
+def _capture_output_to_html(debug: bool = False):
 
     def html_color(value: str,
                    color: Optional[str] = None,
@@ -477,5 +477,9 @@ def captured_print(*args, **kwargs):  # noqa
         nonlocal captured_output
         captured_output += str(args[0]) + "\n"
     this_module = "encoded.endpoints.recent_files_summary.recent_files_summary_troubleshooting"
-    with (patch(f"{this_module}.print", captured_print), patch(f"{this_module}._terminal_color", html_color)):
-        yield CapturedOutput()
+    if debug is True:
+        with patch(f"{this_module}.print", captured_print):
+            yield CapturedOutput()
+    else:
+        with (patch(f"{this_module}.print", captured_print), patch(f"{this_module}._terminal_color", html_color)):
+            yield CapturedOutput()

From 142626bd6c4e00aaa701195f8cd1fbc34df07d83 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Sun, 15 Dec 2024 21:32:02 -0500
Subject: [PATCH 73/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary/recent_files_summary.py      |  9 ++++++++-
 .../recent_files_summary_troubleshooting.py           | 11 +++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
index 74903ef62..7b9d712ee 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -41,8 +41,15 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
+        text_verbose = request_arg_bool(request, "text_verbose")
+        text_uuids = request_arg_bool(request, "text_uuids", True)
+        text_uuid_details = request_arg_bool(request, "text_uuid_details", True)
         text_debug = request_arg_bool(request, "text_debug")
-        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results, debug=text_debug)
+        results = get_normalized_aggregation_results_as_html_for_troublehshooting(results,
+                                                                                  uuids=text_uuids,
+                                                                                  uuid_details=text_uuid_details,
+                                                                                  verbose=text_verbose,
+                                                                                  debug=text_debug)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
     return results
 
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index f4796f0a0..dfe0d06f4 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -108,9 +108,16 @@ def annotate_with_uuids(normalized_results: dict):
         pass
 
 
-def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict, debug: bool = False):
+def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict,
+                                                                    uuids: bool = True,
+                                                                    uuid_details: bool = True,
+                                                                    verbose: bool = False,
+                                                                    debug: bool = False):
     with _capture_output_to_html(debug=debug) as captured_output:
-        print_normalized_aggregation_results_for_troubleshooting(normalized_results, uuids=True, uuid_details=True)
+        print_normalized_aggregation_results_for_troubleshooting(normalized_results,
+                                                                 uuids=uuids,
+                                                                 uuid_details=uuid_details,
+                                                                 verbose=verbose)
         return captured_output.text
 
 

From dde4db5fc2acc77c0d2bffa90ad019bca70e7537 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 16 Dec 2024 12:14:51 -0500
Subject: [PATCH 74/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary.py                   |  4 +-
 .../recent_files_summary_troubleshooting.py   | 84 ++++++++++++++-----
 2 files changed, 64 insertions(+), 24 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
index 7b9d712ee..4c72af2ab 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -41,13 +41,15 @@ def recent_files_summary_endpoint(context, request):
     text = request_arg_bool(request, "text")
     results = recent_files_summary(request, troubleshooting=text)
     if text:
-        text_verbose = request_arg_bool(request, "text_verbose")
         text_uuids = request_arg_bool(request, "text_uuids", True)
         text_uuid_details = request_arg_bool(request, "text_uuid_details", True)
+        text_query = request_arg_bool(request, "text_query")
+        text_verbose = request_arg_bool(request, "text_verbose")
         text_debug = request_arg_bool(request, "text_debug")
         results = get_normalized_aggregation_results_as_html_for_troublehshooting(results,
                                                                                   uuids=text_uuids,
                                                                                   uuid_details=text_uuid_details,
+                                                                                  query=text_query,
                                                                                   verbose=text_verbose,
                                                                                   debug=text_debug)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index dfe0d06f4..3ed42724e 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -54,7 +54,12 @@ def dedup_list(data: list) -> list:  # noqa
     ])
 
     def annotate_with_uuids(normalized_results: dict):
+
+        def get_unique_release_tracker_description_values(normalized_results: dict) -> List[str]:
+            return _get_properties(normalized_results, "items.items.items.value")
+
         nonlocal aggregation_fields_for_troubleshooting
+        unique_release_tracker_description_values = get_unique_release_tracker_description_values(normalized_results)
         uuid_records = []
         query = normalized_results.get("query")
         if isinstance(normalized_results.get("debug"), dict):
@@ -67,7 +72,19 @@ def annotate_with_uuids(normalized_results: dict):
             for second_item in first_item["items"]:
                 second_property_name = second_item["name"]
                 second_property_value = second_item["value"]
-                for third_item in second_item["items"]:
+                second_item_items = second_item["items"]
+                # Put dummy elements in for AGGREGATION_FIELD_FILE_DESCRIPTOR items values which do not exist.
+                third_item_values = [third_item["value"] for third_item in second_item_items]
+                for unique_release_tracker_description_value in unique_release_tracker_description_values:
+                    if unique_release_tracker_description_value not in third_item_values:
+                        second_item["items"].append({
+                            "name": AGGREGATION_FIELD_FILE_DESCRIPTOR,
+                            "value": unique_release_tracker_description_value,
+                            "count": 0,
+                            "debug_placeholder": True
+                        })
+                third_items_to_delete = []
+                for third_item in second_item_items:
                     third_property_name = third_item["name"]
                     third_property_value = third_item["value"]
                     if debug_elasticsearch_hits := third_item.get("debug_elasticsearch_hits"):
@@ -97,6 +114,12 @@ def annotate_with_uuids(normalized_results: dict):
                                         uuid_records.append(uuid_record)
                                 if third_item.get("debug", {}).get("portal_hits"):
                                     third_item["debug"]["portal_hits"].sort(key=lambda item: item.get("uuid"))
+                    if ((third_item.get("count") == 0) and (third_item.get("debug_placeholder") is True) and
+                        (not third_item.get("debug", {}).get("elasticsearch_hits")) and (not third_item.get("debug", {}).get("portal_hits"))):
+                        third_items_to_delete.append(third_item)
+                if third_items_to_delete:
+                    for third_item in third_items_to_delete:
+                        second_item_items.remove(third_item)
 
         for uuid_record in uuid_records:
             if (count := count_uuid(uuid_records, uuid_record["uuid"])) > 1:
@@ -111,12 +134,14 @@ def annotate_with_uuids(normalized_results: dict):
 def get_normalized_aggregation_results_as_html_for_troublehshooting(normalized_results: dict,
                                                                     uuids: bool = True,
                                                                     uuid_details: bool = True,
+                                                                    query: bool = False,
                                                                     verbose: bool = False,
                                                                     debug: bool = False):
     with _capture_output_to_html(debug=debug) as captured_output:
         print_normalized_aggregation_results_for_troubleshooting(normalized_results,
                                                                  uuids=uuids,
                                                                  uuid_details=uuid_details,
+                                                                 query=query,
                                                                  verbose=verbose)
         return captured_output.text
 
@@ -186,7 +211,7 @@ def print_results(data: dict,
 
         def get_portal_hits(data: dict) -> List[dict]:
             hits = []
-            if isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
+            if isinstance(data, dict) and isinstance(portal_hits := data.get("debug", {}).get("portal_hits"), list):
                 for portal_hit in portal_hits:
                     if isinstance(portal_hit, dict) and isinstance(uuid := portal_hit.get("uuid"), str) and uuid:
                         hits.append(portal_hit)
@@ -314,9 +339,21 @@ def print_hit_property_values(hit: dict, property_name: str,
                     note = f" {chars_check}"
             elif checks:
                 note = f" {chars_check}"
+        if not ((count == 0) and (len(hits) == 0) and (not note)):
+            if ((grouping_name in aggregation_fields) and
+                (len(hits) == 0) and isinstance(items := data.get("items"), list)):
+                # Count the actual hits for this noted aggregation field group.
+                items_nhits = 0
+                for item in items:
+                    items_nhits += len(get_portal_hits(item))
+                if items_nhits > count:
+                    note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {items_nhits - count}")
             print(f"{spaces}{grouping}: {count}{note}")
-        if (query is True) and (query_string := data.get("query")):
-            print(f"{spaces}  {query_string}")
+            if (query is True) and (query_string := data.get("query")):
+                if _terminal_color == _html_color:
+                    print(f"{spaces}  <small><a target=_blank href='{query_string}'>{query_string}</a></small>")
+                else:
+                    print(f"{spaces}  {query_string}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
                 note = ""
@@ -453,27 +490,28 @@ def _terminal_color(value: str,
     return colored(value, attrs=attributes)
 
 
+def _html_color(value: str,
+                color: Optional[str] = None,
+                dark: bool = False,
+                bold: bool = False,
+                underline: bool = False,
+                nocolor: bool = False) -> str:
+    if (nocolor is not True) and isinstance(value, str):
+        if isinstance(color, str) and color:
+            if dark is True:
+                value = f"<span style='color: dark{color}'>{value}</span>"
+            else:
+                value = f"<span style='color: {color}'>{value}</span>"
+        if bold is True:
+            value = f"<b>{value}</b>"
+        if underline is True:
+            value = f"<u>{value}</u>"
+    return value
+
+
 @contextmanager
 def _capture_output_to_html(debug: bool = False):
 
-    def html_color(value: str,
-                   color: Optional[str] = None,
-                   dark: bool = False,
-                   bold: bool = False,
-                   underline: bool = False,
-                   nocolor: bool = False) -> str:
-        if (nocolor is not True) and isinstance(value, str):
-            if isinstance(color, str) and color:
-                if dark is True:
-                    value = f"<span style='color: dark{color}'>{value}</span>"
-                else:
-                    value = f"<span style='color: {color}'>{value}</span>"
-            if bold is True:
-                value = f"<b>{value}</b>"
-            if underline is True:
-                value = f"<u>{value}</u>"
-        return value
-
     captured_output = ""
     class CapturedOutput:  # noqa
         @property  # noqa
@@ -488,5 +526,5 @@ def captured_print(*args, **kwargs):  # noqa
         with patch(f"{this_module}.print", captured_print):
             yield CapturedOutput()
     else:
-        with (patch(f"{this_module}.print", captured_print), patch(f"{this_module}._terminal_color", html_color)):
+        with (patch(f"{this_module}.print", captured_print), patch(f"{this_module}._terminal_color", _html_color)):
             yield CapturedOutput()

From 397aeb1fecc5641982651f3e871481f074ee7f7a Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 16 Dec 2024 12:47:43 -0500
Subject: [PATCH 75/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py   | 39 +++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 3ed42724e..2ff86d486 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -114,8 +114,10 @@ def get_unique_release_tracker_description_values(normalized_results: dict) -> L
                                         uuid_records.append(uuid_record)
                                 if third_item.get("debug", {}).get("portal_hits"):
                                     third_item["debug"]["portal_hits"].sort(key=lambda item: item.get("uuid"))
-                    if ((third_item.get("count") == 0) and (third_item.get("debug_placeholder") is True) and
-                        (not third_item.get("debug", {}).get("elasticsearch_hits")) and (not third_item.get("debug", {}).get("portal_hits"))):
+                    if ((third_item.get("count") == 0) and
+                        (third_item.get("debug_placeholder") is True) and
+                        (not third_item.get("debug", {}).get("elasticsearch_hits")) and
+                        (not third_item.get("debug", {}).get("portal_hits"))):  # noqa
                         third_items_to_delete.append(third_item)
                 if third_items_to_delete:
                     for third_item in third_items_to_delete:
@@ -217,6 +219,24 @@ def get_portal_hits(data: dict) -> List[dict]:
                         hits.append(portal_hit)
             return hits
 
+        def count_unique_portal_hits_recursively(data: dict) -> int:
+            def get_portal_hits_recursively(data: dict) -> List[dict]:  # noqa
+                hits = []
+                if isinstance(data, dict):
+                    for key in data:
+                        if key == "portal_hits":
+                            if isinstance(data[key], list):
+                                hits.extend(data[key])
+                        else:
+                            hits.extend(get_portal_hits_recursively(data[key]))
+                elif isinstance(data, list):
+                    for element in data:
+                        hits.extend(get_portal_hits_recursively(element))
+                return hits
+            hits = get_portal_hits_recursively(data)
+            hits = [hit.get("uuid") for hit in hits]
+            return len(set(hits))
+
         def format_hit_property_values(hit: dict, property_name: str,
                                        color: Optional[Callable] = None) -> Tuple[Optional[str], List[Tuple[str, str]]]:
             nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
@@ -324,8 +344,8 @@ def print_hit_property_values(hit: dict, property_name: str,
             grouping = "RESULTS"
         grouping = f"{chars_diamond} {grouping}"
         hits = get_portal_hits(data) if (uuids is True) else []
+        note = ""
         if isinstance(count := data.get("count"), int):
-            note = ""
             if len(hits) > count:
                 note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
             elif isinstance(items := data.get("items"), list):
@@ -340,14 +360,10 @@ def print_hit_property_values(hit: dict, property_name: str,
             elif checks:
                 note = f" {chars_check}"
         if not ((count == 0) and (len(hits) == 0) and (not note)):
-            if ((grouping_name in aggregation_fields) and
-                (len(hits) == 0) and isinstance(items := data.get("items"), list)):
-                # Count the actual hits for this noted aggregation field group.
-                items_nhits = 0
-                for item in items:
-                    items_nhits += len(get_portal_hits(item))
-                if items_nhits > count:
-                    note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {items_nhits - count}")
+            if (len(hits) == 0) and isinstance(items := data.get("items"), list):
+                # Count the actual hits for this non-terminal group.
+                if (items_nhits := count_unique_portal_hits_recursively(items)) > count:
+                    note += red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {items_nhits - count}")
             print(f"{spaces}{grouping}: {count}{note}")
             if (query is True) and (query_string := data.get("query")):
                 if _terminal_color == _html_color:
@@ -356,7 +372,6 @@ def print_hit_property_values(hit: dict, property_name: str,
                     print(f"{spaces}  {query_string}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
-                note = ""
                 if hit.get("elasticsearch_counted") is False:
                     print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
                     color = red_bold

From c27f1a5e55efad282773f05aa944e16e9bcd2f16 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 16 Dec 2024 16:12:45 -0500
Subject: [PATCH 76/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py              | 9 +++++----
 src/encoded/metadata.py                                  | 5 +++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 2ff86d486..0a6dec01a 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -81,6 +81,7 @@ def get_unique_release_tracker_description_values(normalized_results: dict) -> L
                             "name": AGGREGATION_FIELD_FILE_DESCRIPTOR,
                             "value": unique_release_tracker_description_value,
                             "count": 0,
+                            "elasticsearch_counted": False,
                             "debug_placeholder": True
                         })
                 third_items_to_delete = []
@@ -241,7 +242,7 @@ def format_hit_property_values(hit: dict, property_name: str,
                                        color: Optional[Callable] = None) -> Tuple[Optional[str], List[Tuple[str, str]]]:
             nonlocal parent_grouping_name, parent_grouping_value, green, green_bold, chars_larrow_hollow
             counted_elsewhere = []
-            if hit.get("elasticsearch_counted") is False:
+            if hit.get("elasticsearch_counted", False) is False:
                 counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
             else:
                 counted_grouping_name, counted_grouping_value = (None, None)
@@ -260,7 +261,7 @@ def format_hit_property_values(hit: dict, property_name: str,
                             else:
                                 property_values.append(property_value)
                     property_value = ", ".join(property_values)
-                elif hit.get("elasticsearch_counted") is False:
+                elif hit.get("elasticsearch_counted", False) is False:
                     counted_grouping_name, counted_grouping_value = find_where_aggregated_and_counted(hit.get("uuid"))
                     if (counted_grouping_name == property_name) and (counted_grouping_value == property_value):
                         property_value = green_bold(f"{property_value} {chars_larrow_hollow}") + green(" COUNTED HERE")
@@ -291,7 +292,7 @@ def find_where(data: dict, uuid: str,
                     elif isinstance(hits := data.get("debug", {}).get("portal_hits"), list):
                         for hit in hits:
                             if hit.get("uuid") == uuid:
-                                if hit.get("elasticsearch_counted") is True:
+                                if hit.get("elasticsearch_counted", False) is True:
                                     found_uuid_grouping_names_and_values.add((parent_grouping_name, parent_grouping_value))
                 return found_uuid_grouping_names_and_values
 
@@ -372,7 +373,7 @@ def print_hit_property_values(hit: dict, property_name: str,
                     print(f"{spaces}  {query_string}")
         for hit in hits:
             if isinstance(hit, dict) and isinstance(uuid := hit.get("uuid"), str) and uuid:
-                if hit.get("elasticsearch_counted") is False:
+                if hit.get("elasticsearch_counted", False) is False:
                     print(red(f"{spaces}  {chars_dot} {uuid} {chars_xmark} UNCOUNTED"))
                     color = red_bold
                 else:
diff --git a/src/encoded/metadata.py b/src/encoded/metadata.py
index 6a4badf79..4eba39721 100644
--- a/src/encoded/metadata.py
+++ b/src/encoded/metadata.py
@@ -287,6 +287,11 @@ def peek_metadata(context, request):
     """ Helper for the UI that will retrieve faceting information about data retrieved from /metadata """
     # get arguments from helper
     args = handle_metadata_arguments(context, request)
+    if isinstance(args, Response):
+        # dmichaels/2024-12-16: Hackish fix for now; handle_metadata_arguments not returning MetadataArgs for ...
+        subreq = make_search_subreq(request, '{}?{}'.format('/search', urlencode(request.params, True)), inherit_user=True)
+        result = search(context, subreq)
+        return result['facets']
 
     # Generate search
     search_param = {}

From c5e275a57b3ac46a235458214f08842aa1e671b4 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 16 Dec 2024 16:23:52 -0500
Subject: [PATCH 77/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py                   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index 0a6dec01a..d73c3acd9 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -347,7 +347,7 @@ def print_hit_property_values(hit: dict, property_name: str,
         hits = get_portal_hits(data) if (uuids is True) else []
         note = ""
         if isinstance(count := data.get("count"), int):
-            if len(hits) > count:
+            if (len(hits) > count) and (uuids is True):
                 note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
             elif isinstance(items := data.get("items"), list):
                 subcount = 0
@@ -363,7 +363,7 @@ def print_hit_property_values(hit: dict, property_name: str,
         if not ((count == 0) and (len(hits) == 0) and (not note)):
             if (len(hits) == 0) and isinstance(items := data.get("items"), list):
                 # Count the actual hits for this non-terminal group.
-                if (items_nhits := count_unique_portal_hits_recursively(items)) > count:
+                if ((items_nhits := count_unique_portal_hits_recursively(items)) > count) and (uuids is True):
                     note += red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {items_nhits - count}")
             print(f"{spaces}{grouping}: {count}{note}")
             if (query is True) and (query_string := data.get("query")):

From 56ec43525941fecf50b3ec1d18dc1f3a168c3b07 Mon Sep 17 00:00:00 2001
From: David Michaels <david_michaels@hms.harvard.edu>
Date: Mon, 16 Dec 2024 19:15:39 -0500
Subject: [PATCH 78/78] refactoring /recent_files_summary endpoint

---
 .../recent_files_summary_troubleshooting.py                   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
index d73c3acd9..7c71391fe 100644
--- a/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
+++ b/src/encoded/endpoints/recent_files_summary/recent_files_summary_troubleshooting.py
@@ -349,6 +349,8 @@ def print_hit_property_values(hit: dict, property_name: str,
         if isinstance(count := data.get("count"), int):
             if (len(hits) > count) and (uuids is True):
                 note = red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {len(hits) - count}")
+                if count == 0:
+                    note = red(f' {chars_rarrow_hollow} UNCOUNTED') + note
             elif isinstance(items := data.get("items"), list):
                 subcount = 0
                 for item in items:
@@ -365,6 +367,8 @@ def print_hit_property_values(hit: dict, property_name: str,
                 # Count the actual hits for this non-terminal group.
                 if ((items_nhits := count_unique_portal_hits_recursively(items)) > count) and (uuids is True):
                     note += red(f" {chars_rarrow_hollow} MORE ACTUAL RESULTS: {items_nhits - count}")
+                    if count == 0:
+                        note = red(f' {chars_rarrow_hollow} UNCOUNTED') + note
             print(f"{spaces}{grouping}: {count}{note}")
             if (query is True) and (query_string := data.get("query")):
                 if _terminal_color == _html_color: