From d871b4aae29ebad5be842df18e7bf93e85fec1fe Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Tue, 1 Oct 2024 11:09:28 -0500
Subject: [PATCH 001/143] feat(scrapers.update_from_text): new command

Helps solve: https://github.com/freelawproject/juriscraper/issues/858

- New command to re-run Site.extract_from_text over downloaded opinions
- Able to filter by Docket.court_id ,  OpinionCluster.date_filed, OpinionCluster.precedential_status
- Updates tasks.update_from_document_text to return information for logging purposes
- Updates test_opinion_scraper to get a Site.extract_from_text method
---
 .../management/commands/update_from_text.py   | 159 ++++++++++++++++++
 cl/scrapers/tasks.py                          |  12 +-
 .../test_assets/test_opinion_scraper.py       |  21 +++
 cl/scrapers/tests.py                          | 110 +++++++++++-
 4 files changed, 297 insertions(+), 5 deletions(-)
 create mode 100644 cl/scrapers/management/commands/update_from_text.py

diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
new file mode 100644
index 0000000000..77fe5966af
--- /dev/null
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -0,0 +1,159 @@
+from datetime import datetime
+
+from django.db import transaction
+
+from cl.lib.command_utils import VerboseCommand, logger
+from cl.scrapers.tasks import update_document_from_text
+from cl.search.models import PRECEDENTIAL_STATUS, Opinion, OpinionCluster
+
+
+def update_from_text(
+    opinion: Opinion, juriscraper_module: str, stats: dict[str, int]
+):
+    """Calls `update_document_from_text` as used in the scraper flow
+    and calls the corresponding model's .save()
+
+    :param opinion: the Opinion on which to apply extract_from_text
+    :param juriscraper_module: the scraper module path
+    :param stats: dict to accumulate counts for reporting. Modified in place
+
+    :return None
+    """
+    with transaction.atomic():
+        changes = update_document_from_text(opinion, juriscraper_module)
+        if not changes:
+            logger.info("Did not get any metadata for opinion %s", opinion.id)
+            return
+
+        logger.info("Processing opinion %s", opinion.id)
+
+        # Check if changes exist before saving, to prevent unecessary DB queries
+        if changes.get("Docket"):
+            opinion.cluster.docket.save()
+            logger.debug(
+                "Docket %s updated with data %s",
+                opinion.cluster.docket.id,
+                changes["Docket"],
+            )
+            stats["Docket"] += 1
+
+        if changes.get("OpinionCluster"):
+            opinion.cluster.save()
+            logger.debug(
+                "OpinionCluster %s updated with data %s",
+                opinion.cluster.id,
+                changes["OpinionCluster"],
+            )
+            stats["OpinionCluster"] += 1
+
+        if changes.get("Opinion"):
+            opinion.save()
+            logger.debug("Opinion updated with data %s", changes["Opinion"])
+            stats["Opinion"] += 1
+
+        if changes.get("Citation"):
+            if changes["Citation"].get("citation_created"):
+                logger.info(
+                    "Citation created with data %s", changes["Citation"]
+                )
+                stats["Citation"] += 1
+            else:
+                logger.debug(
+                    "Citation not created. Data %s", changes["Citation"]
+                )
+
+
+class Command(VerboseCommand):
+    help = """Updates objects by running Site.extract_from_text
+    over extracted content found on Opinion.plain_text or Opinion.html.
+
+    If `--opinion-ids` is used, filters will be ignored.
+    If not, the 2 date filters will be required, to prevent triggering
+    unwanted reprocessing of the whole court's dataset
+
+    Recommended use is to run over a sample of the target time period
+    and check if updates over Docket, OpinionCluster, Opinion and
+    Citation are as expected
+    """
+    stats = {}  # assigned at the end of a command run, for testing
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--juriscraper-module",
+            help="""The Juriscraper file which contains the
+            `extract_from_text` method to be used. The `court_id`
+            will be deduced from this. Example:
+            juriscraper.opinions.united_states.federal_appellate.ca1
+            """,
+            required=True,
+        )
+        parser.add_argument(
+            "--opinion-ids",
+            nargs="+",
+            type=int,
+            help="""The Opinion ids to re-process.
+            May be more than one. If this argument is used,
+            other filters will be ignored""",
+        )
+        parser.add_argument(
+            "date-filed-gte",
+            default="",
+            help=r"""A filter value in %Y/%m/%d format.
+            OpinionCluster.date_filed will have to be greater or equal""",
+        )
+        parser.add_argument(
+            "date-filed-lte",
+            default="",
+            help=r"""A filter value in %Y/%m/%d format.
+            OpinionCluster.date_filed will have to be less or equal""",
+        )
+        parser.add_argument(
+            "--cluster-status",
+            default="",
+            choices=[value for value, name in PRECEDENTIAL_STATUS.NAMES],
+            help="""A value of OpinionCluster.precedential_status. To be
+            used for filtering the Opinions to be processed
+            """,
+        )
+
+    def handle(self, *args, **options):
+        super().handle(*args, **options)
+        juriscraper_module = options["juriscraper_module"]
+        # For aggregate reporting
+        stats = {"Docket": 0, "OpinionCluster": 0, "Opinion": 0, "Citation": 0}
+
+        if options["opinion_ids"]:
+            opinions = Opinion.objects.filter(id__in=options["opinion_ids"])
+            for op in opinions:
+                update_from_text(op, juriscraper_module, stats)
+
+            logger.info("Modified objects counts: %s", stats)
+            return
+
+        if not (options["date_filed_gte"] and options["date_filed_lte"]):
+            raise ValueError(
+                "Both `date-filed-gte` and `date-filed-lte` arguments should have values"
+            )
+
+        court_id = juriscraper_module.split(".")[-1].split("_")[0]
+        gte_date = datetime.strptime(options["date_filed_gte"], "%Y/%m/%d")
+        lte_date = datetime.strptime(options["date_filed_lte"], "%Y/%m/%d")
+        query = {
+            "docket__court_id": court_id,
+            "date_filed__gte": gte_date,
+            "date_filed__lte": lte_date,
+        }
+
+        if options["cluster_status"]:
+            query["precedential_status"] = options["cluster_status"]
+
+        qs = OpinionCluster.objects.filter(**query).prefetch_related(
+            "sub_opinions"
+        )
+        for cluster in qs:
+            opinions = cluster.sub_opinions.all()
+            for op in opinions:
+                update_from_text(op, juriscraper_module, stats)
+
+        logger.info("Modified objects counts: %s", stats)
+        self.stats = stats
diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py
index c60971c572..15500e94bb 100644
--- a/cl/scrapers/tasks.py
+++ b/cl/scrapers/tasks.py
@@ -39,7 +39,7 @@
 
 def update_document_from_text(
     opinion: Opinion, juriscraper_module: str = ""
-) -> None:
+) -> dict:
     """Extract additional metadata from document text
 
     We use this code with BIA decisions. Previously Tax.
@@ -54,12 +54,13 @@ def update_document_from_text(
 
     :param opinion: Opinion object
     :param juriscraper_module: full module to get Site object
-    :return: None
+    :return: the extracted data dictionary
     """
     court = opinion.cluster.docket.court.pk
     site = get_scraper_object_by_name(court, juriscraper_module)
     if site is None:
-        return
+        logger.debug("No site found %s", juriscraper_module)
+        return {}
 
     metadata_dict = site.extract_from_text(opinion.plain_text or opinion.html)
     for model_name, data in metadata_dict.items():
@@ -70,7 +71,8 @@ def update_document_from_text(
             opinion.cluster.__dict__.update(data)
         elif model_name == "Citation":
             data["cluster_id"] = opinion.cluster_id
-            ModelClass.objects.get_or_create(**data)
+            _, citation_created = ModelClass.objects.get_or_create(**data)
+            metadata_dict["Citation"]["created"] = citation_created
         elif model_name == "Opinion":
             opinion.__dict__.update(data)
         else:
@@ -78,6 +80,8 @@ def update_document_from_text(
                 f"Object type of {model_name} not yet supported."
             )
 
+    return metadata_dict
+
 
 @app.task(
     bind=True,
diff --git a/cl/scrapers/test_assets/test_opinion_scraper.py b/cl/scrapers/test_assets/test_opinion_scraper.py
index 508be0dfec..18a28d71de 100644
--- a/cl/scrapers/test_assets/test_opinion_scraper.py
+++ b/cl/scrapers/test_assets/test_opinion_scraper.py
@@ -1,3 +1,4 @@
+import re
 from datetime import datetime
 from os.path import join
 
@@ -53,3 +54,23 @@ def _get_nature_of_suit(self):
     def _get_judges(self):
         path = "//judge/text()"
         return list(self.html.xpath(path))
+
+    def extract_from_text(self, scraped_text):
+        metadata = {}
+        docket_regex = r"Docket Number: (?P<docket>\d+-\d+)"
+        disposition_regex = r"Disposition: (?P<disposition>\w+)"
+        citation_regex = r"(?P<volume>20\d{2}) (?P<reporter>VT) (?P<page>\d+)"
+        if docket_match := re.search(docket_regex, scraped_text):
+            metadata["Docket"] = {
+                "docket_number": docket_match.group("docket")
+            }
+
+        if disposition_match := re.search(disposition_regex, scraped_text):
+            metadata["OpinionCluster"] = {
+                "disposition": disposition_match.group("disposition")
+            }
+
+        if citation_match := re.search(citation_regex, scraped_text):
+            metadata["Citation"] = {**citation_match.groupdict(), "type": 8}
+
+        return metadata
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 375987426a..1d818d4f39 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -1,5 +1,5 @@
 import os
-from datetime import datetime, timedelta
+from datetime import date, datetime, timedelta
 from http import HTTPStatus
 from pathlib import Path
 from unittest import TestCase, mock
@@ -30,6 +30,7 @@
     cl_back_scrape_citations,
     cl_scrape_opinions,
     cl_scrape_oral_arguments,
+    update_from_text,
 )
 from cl.scrapers.models import UrlHash
 from cl.scrapers.tasks import extract_doc_content, process_audio_file
@@ -867,3 +868,110 @@ def test_federal_jurisdictions(self):
         self.assertEqual(
             docket, self.ca2_docket, "Should match using docket number core"
         )
+
+
+class UpdateFromTestCommandTest(TestCase):
+    """Test the input processing and DB querying for the command"""
+
+    def setUp(self):
+        self.vt = CourtFactory(id="vt")
+        self.sc = CourtFactory(id="sc")
+        self.docket_sc = DocketFactory(court=self.sc, docket_number="20")
+
+        # Different dates, status and courts to test command behaviour
+        self.opinion_2020 = OpinionFactory(
+            cluster=OpinionClusterFactory(
+                docket=DocketFactory(court=self.vt, docket_number="12"),
+                date_filed=date(2020, 6, 1),
+                precedential_status="Published",
+            ),
+            plain_text="""Docket Number: 2020-12
+            Disposition: Affirmed
+            2020 VT 11""",
+        )
+        self.opinion_2020_unpub = OpinionFactory(
+            cluster=OpinionClusterFactory(
+                docket=DocketFactory(court=self.vt, docket_number="13"),
+                date_filed=date(2020, 7, 1),
+                precedential_status="Unpublished",
+            ),
+            plain_text="Docket Number: 2020-13\nDisposition: Affirmed",
+        )
+
+        self.opinion_sc = OpinionFactory(
+            cluster=OpinionClusterFactory(
+                docket=self.docket_sc,
+                date_filed=date(2021, 6, 1),
+                precedential_status="Published",
+            ),
+            plain_text="Some text with no matches",
+            id=101,
+        )
+
+        self.opinion_2022 = OpinionFactory(
+            cluster=OpinionClusterFactory(
+                docket=DocketFactory(court=self.vt, docket_number="13"),
+                date_filed=date(2022, 6, 1),
+                precedential_status="Unpublished",
+            ),
+            id=100,
+            plain_text="Docket Number: 2022-13\n2022 VT 11",
+        )
+
+    def test_inputs(self):
+        """Do all command inputs work properly?"""
+
+        # will target a single opinion, for which extract_from_text
+        # extracts no metadata. No object should be updated
+        cmd = update_from_text.Command()
+        with mock.patch(
+            "cl.scrapers.tasks.get_scraper_object_by_name",
+            return_value=test_opinion_scraper.Site(),
+        ):
+            cmd.handle(juriscraper_module="somepath.sc", opinion_ids=[101])
+
+        self.assertFalse(
+            any(cmd.stats.values()), "No object should be modified"
+        )
+
+        # will target 1 opinion, there are 2 in the time period
+        # and 3 for the court
+        with mock.patch(
+            "cl.scrapers.tasks.get_scraper_object_by_name",
+            return_value=test_opinion_scraper.Site(),
+        ):
+            update_from_text.Command().handle(
+                juriscraper_module="somepath.vt",
+                opinion_ids=[],
+                date_filed_gte="2020/06/01",
+                date_filed_lte="2021/06/01",
+                cluster_status="Published",
+            )
+
+        # Test that objects were actually updated / created
+        self.assertEqual(
+            Citation.objects.filter(cluster=self.opinion_2020.cluster).count(),
+            1,
+            "There should be a single citation for this cluster",
+        )
+        self.opinion_2020.refresh_from_db()
+        self.opinion_2020.cluster.refresh_from_db()
+        self.opinion_2020.cluster.docket.refresh_from_db()
+        self.assertEqual(
+            self.opinion_2020.cluster.disposition,
+            "Affirmed",
+            "OpinionCluster.disposition was not updated",
+        )
+        self.assertEqual(
+            self.opinion_2020.cluster.docket.docket_number,
+            "2020-12",
+            "Docket.docket_number was not updated",
+        )
+
+        # Check that other objects in the time period and court
+        # were not modified. Meaning, the filter worked
+        self.assertEqual(
+            self.opinion_2020_unpub.cluster.docket.docket_number,
+            "13",
+            "Unpublished docket should not be modified",
+        )

From 4cb004fe9fe82c34cd6061efc3908ff2bb5140b2 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 17 Oct 2024 15:30:11 -0400
Subject: [PATCH 002/143] feat(new_ui): NEW HTML and CSS and JS

---
 cl/assets/static-global/css/override.css      | 706 +++++++++++++++++-
 cl/assets/static-global/js/base.js            | 271 ++++++-
 .../includes/add_download_button.html         |  46 ++
 .../templates/includes/add_note_button.html   |   2 +-
 .../templates/includes/opinion_tabs.html      | 337 +++++++++
 cl/opinion_page/templates/opinion.html        |   2 +-
 cl/opinion_page/templates/opinions.html       | 346 +++++++++
 cl/search/models.py                           |  32 +
 8 files changed, 1735 insertions(+), 7 deletions(-)
 create mode 100644 cl/opinion_page/templates/includes/add_download_button.html
 create mode 100644 cl/opinion_page/templates/includes/opinion_tabs.html
 create mode 100644 cl/opinion_page/templates/opinions.html

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 7a27e9f08f..32c21672a1 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -155,7 +155,30 @@ header {
 
 /* Standard target color. */
 *:target {
-  background-color: lightyellow;
+    -webkit-animation: target-fade 3s;
+    -moz-animation: target-fade 3s;
+    -o-animation: target-fade 3s;
+    animation: target-fade 3s;
+}
+
+@-webkit-keyframes target-fade {
+    from { background-color: lightyellow; }
+    to { background-color: transparent; }
+}
+
+@-moz-keyframes target-fade {
+    from { background-color: lightyellow; }
+    to { background-color: transparent; }
+}
+
+@-o-keyframes target-fade {
+    from { background-color: lightyellow; }
+    to { background-color: transparent; }
+}
+
+@keyframes target-fade {
+    from { background-color: lightyellow; }
+    to { background-color: transparent; }
 }
 
 .alt {
@@ -1603,7 +1626,7 @@ textarea {
 
 
 /* Prevent images inside opinion from overflowing */
-#opinion-content img {
+div.subopinion-content img {
   max-width: 100%;
   height: auto;
 }
@@ -1723,3 +1746,682 @@ rect.series-segment {
     opacity 150ms 150ms ease-in;
   transform: translate3d(0, 0, 0);
 }
+
+
+
+/*Wrap all our changes around an opinion-body class we load up
+ in the opinion template*/
+
+.opinion-body {
+
+  #headmatter {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    text-align: justify;
+    padding:0px;
+    margin: 0px;
+    background-color: white;
+    border: none;
+
+  }
+  #headmatter > parties {
+    text-align: center;
+    font-style: initial;
+    font-size: 2em;
+    display: block;
+  }
+  #headmatter > div.footnotes > .footnote > p {
+      line-height: 1em;
+  }
+
+  #headmatter > * {
+    text-indent: 2em;
+  }
+
+  #headmatter docketnumber,
+  #headmatter court,
+  #headmatter parties,
+  #headmatter attorneys,
+  #headmatter syllabus,
+  #headmatter decisiondate {
+      display: block;
+  }
+
+  #headmatter > div.footnotes {
+      border-top: None;
+      padding-top: 1em;
+  }
+
+  .jump-links > a{
+    position: relative;
+    margin: -8px 20px 0 0;
+    width: 140px;
+    line-height: 18px;
+    font-size: 14px;
+    cursor: pointer;
+    white-space: nowrap;
+    text-overflow: ellipsis;
+    opacity: 1;
+  }
+
+  .hr-opinion {
+    border-top: 2px solid black;
+  }
+
+  /*Clean up the Case Caption section to look large and clean*/
+  .case-caption {
+    font-size: 3em;
+    font-weight: 500;
+    text-align: left;
+    line-height: 1.1em;
+    margin-top: 50px;
+  }
+
+
+  .case-court {
+    font-size: 25px;
+    text-align: left;
+  }
+
+/*Update sidebar jump links to look nice*/
+.jump-links {
+  font-size: 12px;
+  padding-top: 5px;
+}
+
+  li.jump-links.active {
+      color: #B53C2C;
+      font-weight: bold;
+  }
+
+  li.jump-links {
+    list-style-type: none;
+    padding-left: 0;
+  }
+
+  li.jump-links::before {
+    content: "";
+    border-left: 3px solid lightgrey;
+    height: 1em;
+    padding-right: 8px;
+    display: inline-block;
+    margin-right: 5px;
+  }
+
+  li.jump-links.active::before {
+    content: "";
+    border-left: 2px solid #B53C2C;
+    padding-right: 8px;
+    display: inline-block;
+    margin-right: 5px;
+  }
+
+
+  .jump-links {
+  font-size: 12px;
+  padding-top: 5px;
+}
+
+li.jump-links {
+  height:2.5em;
+  list-style-type: none;
+  padding-left: 0;
+  position: relative;
+}
+
+li.jump-links::before {
+  content: "";
+  border-left: 2px solid lightgrey;
+  height: 100%;
+  position: absolute;
+  left: 0;
+  top: 0;
+  padding-right: 8px;
+  display: inline-block;
+}
+
+/* Active link styles */
+li.jump-links > a.active {
+  font-weight: 500;
+  color: black;
+}
+
+li.jump-links > a {
+  padding-left:10px;
+  color: black;
+}
+
+
+div.footnote:first-of-type {
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  /*Columbia specific Fix*/
+  /*Columbia/HTML Law box special footnotes data almost awlays starts with fn1*/
+  footnote_body sup#fn1 {
+    padding-top: 10px;
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  /*HTML law box page numbers*/
+  strong[data-ref] {
+    font-size: 0.8em;
+    fon: italic;
+  }
+
+  strong[data-ref]::before {
+    content: attr(data-ref);
+    display: inline;
+    position: relative;
+    float: right;
+    left: -.5em;
+    font-size: 0.8em;
+    color: dimgray;
+    width: 0;
+  }
+
+
+  div.footnote {
+    padding-top: 10px;
+    display: block;
+    line-height: 1em;
+  }
+
+  div.footnote > p {
+    display: inline;
+  }
+
+  div.footnote::before {
+    content: attr(label) " ";
+    font-weight: bold;
+    color: #000;
+    margin-right: 5px;
+    padding-top: 2em;
+  }
+
+  div.footnote {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+  div.footnote > * {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+
+  /*To help separate footnotes from opinion document*/
+  footnote:first-of-type {
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  footnote {
+    padding-top: 10px;
+    display: block;
+    line-height: 1.5em;
+    /*margin-left: 1em;*/
+    padding-left: 40px;
+  }
+
+  footnote > p {
+    display: inline;
+  }
+
+  footnote::before {
+    content: attr(label);
+    font-weight: bold;
+    color: #000;
+    margin-right: 26px;
+    padding-top: 2em;
+    margin-left: -35px;
+  }
+
+  /*Handle CSS in Columbia opinions*/
+  footnotemark {
+    font-weight: bold;
+    font-size: 0.8em;
+    vertical-align: super;
+    line-height: 0;
+  }
+
+
+  #cited-by {
+    z-index: 1;
+  }
+
+  footnotemark {
+    cursor: pointer;
+    color: blue;
+    text-decoration: underline;
+  }
+
+  footnote {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+
+  .jumpback {
+    color: blue;
+    cursor: pointer;
+    font-weight: bold;
+    margin-left: 5px;
+  }
+
+
+  footnote > * {
+    font-size: 12px;
+  }
+
+  author > page-number {
+    display: block;
+    font-size: 15px;
+  }
+
+  author {
+    display: inline;
+    margin: 0; /* Remove any default margin */
+    text-indent: 2em; /* Indents the first line by 2em */
+  }
+
+  /*Important for indenting harvard opinions correctly*/
+  opinion > p[id^="b"] {
+    text-indent: 2em;
+  }
+
+
+  opinion > [id^="p-"] {
+    padding-left: 2em;
+    text-indent: 2em;
+  }
+}
+
+[id^="A"] {
+  text-indent: 2em;
+  display: inline;
+
+}
+
+.opinion-body {
+  /*I think i did this but i dont know why so im leaving it for now*/
+  /*.tab-pane {*/
+  /*  display: none; */
+  /*}*/
+
+  .tab-pane.active {
+    display: block;
+  }
+
+  @media (min-width: 767px) {
+
+    #sidebar {
+      display: flex;
+      flex-direction: column;
+      height: 100vh;
+      justify-content: space-between; /* Push content apart */
+      padding: 20px;
+      padding-top: 3px;
+      overflow-y: auto;
+      position: -webkit-sticky; /* For Safari */
+      position: sticky;
+      top: 0; /* Stick to the top of the viewport */
+
+    }
+  }
+
+  @media (min-width: 100px) {
+    #sidebar {
+      height: auto;
+    }
+  }
+
+  .sidebar-bottom {
+    margin-top: auto;
+  }
+
+  .support-flp, .sponsored-by {
+    margin-bottom: 20px;
+    text-align: center;
+  }
+
+  #opinion > article > * > p {
+    text-indent: 2em;
+  }
+
+  .active > a {
+    border-bottom-color: #B53C2C;
+  }
+
+  #opinion p {
+    text-indent: 2em;
+  }
+
+
+  .nav-pills > li > a {
+    padding: 1px 15px;
+  }
+
+  blockquote > * {
+    text-indent: 0em;
+  }
+
+  sup {
+    font-size: .9em;
+  }
+
+  .main-document {
+    padding-bottom: 5em;
+  }
+
+  /*Case Caption CSS*/
+  #caption-square {
+    background-color: #F6F2EE;
+    margin-left: -15px;
+    margin-right: -15px;
+    margin-top: -20px;
+  }
+
+  #caption-square > ul > li {
+    background-color: #fcfaf9;
+    border-top-right-radius: 5px 5px; /* Rounds the corners */
+    border-top-left-radius: 5px 5px; /* Rounds the corners */
+    margin-left: 4px;
+  }
+
+  #caption-square > ul > li.active {
+      background-color: #ffffff;
+      border-bottom: 1px solid lightgrey;
+  }
+
+  #caption-square > ul > li.active {
+    background-color: #ffffff;
+    border-bottom: 1px solid white;
+  }
+
+  #caption-square > ul > li.active > a {
+    border: 1px solid white;
+  }
+
+  /*Opinion Date File*/
+  .case-date-new {
+    border: 1px solid #B53C2C;
+    border-radius: 20px; /* Rounds the corners */
+    padding: 5px;
+    padding-left: 8px;
+    padding-right: 8px;
+    padding-top: 8px;
+    color: #B53C2C;
+
+  }
+
+  /*Buttons on Top of Page*/
+  .add-a-note {
+    margin-left: 5px;
+    border: 1px solid black;
+    border-radius: 10px;
+    padding-left: 8px;
+    padding-right: 8px;
+  }
+
+  .add-citation-alert {
+    border: 1px solid black;
+    border-radius: 10px;
+    padding-left: 8px;
+    padding-right: 8px;
+  }
+
+  cross_reference {
+    font-style: italic;
+  }
+
+  #opinion-caption {
+    margin-top: 20px;
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    margin-bottom: 20px;
+    padding-left: 20px;
+    padding-top: 10px;
+    padding-right: 10px;
+  }
+
+  .case-details {
+    font-size: 16px;
+  }
+
+  .case-details li {
+    line-height: 1.5em;
+  }
+
+  span.citation.no-link {
+    font-style: italic;
+  }
+
+  .opinion-button-row {
+    padding-top: 40px;
+  }
+
+  #download-original {
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+    float:right;
+    display:block;
+  }
+
+  #btn-group-download-original {
+    float:right;
+    margin-top: 0px;
+    margin-left:10px;
+    padding-right: 10px;
+  }
+
+  #add-note-button {
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+    float: right;
+  }
+
+  #get-citation-btn-group {
+    float:right;
+  }
+
+  #get-citation-btn-group > a {
+
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+  }
+
+
+  p > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -4.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  div > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -2.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  div.subopinion-content > .harvard {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    text-align: justify;
+  }
+
+  #columbia-text {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    text-align: justify;
+  }
+
+  #columbia-text > div.subopinion-content > div > p > span.star-pagination {
+    color: #555555;
+  }
+
+  #columbia-text > div.subopinion-content > div > p > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -4.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+
+  page-number::after {
+    display: inline;
+    position: relative;
+    content: attr(label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  page-number {
+    font-style: italic;
+    font-size: 0.8em;
+    margin-right: 4px;
+    margin-left: 2px;
+  }
+
+  a.page-label {
+    font-style: italic;
+    font-size: 0.8em;
+    margin-right: 4px;
+    margin-left: 2px;
+    color: #555555;
+  }
+
+
+  a.page-label::after {
+    display: inline;
+    position: relative;
+    content: attr(data-label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  footnote > blockquote > a.page-label::after {
+    right: -2.5em;
+  }
+
+  blockquote[id^="A"] > a.page-label::after {
+    right: -2.5em;
+  }
+
+  blockquote[id^="b"] > a.page-label::after {
+    right: -4.0em;
+  }
+
+  opinion > a.page-label::after {
+    right: -2.5em;
+  }
+
+   /* Adjust to move the entire blockquote to the right */
+  blockquote {
+    margin-left: 3em;
+  }
+
+  a.page-label::after {
+    display: inline;
+    position: relative;
+    attr(label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  footnote > p > a.page-label::after {
+    display: none;
+  }
+
+  footnote > blockquote > a.page-label::after {
+    display: none;
+  }
+
+  /*Remove the header on the opinion page so its flush*/
+  header {
+    margin-bottom: 0px;
+  }
+
+  .harvard > opinion > author {
+      line-height: inherit;
+      font-size: inherit;
+      display: inline-block;
+  }
+
+  .container > .content {
+      margin-bottom: 0em;
+  }
+
+  .meta-data-header {
+      font-size:15px;
+  }
+
+  .case-details {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    letter-spacing: 0.2px;
+    line-height:2.3em;
+  }
+
+  .opinion-section-title {
+    margin-top: 50px;
+    font-family: Merriweather, "Times New Roman", Times, serif;
+  }
+
+  /*Add style to align roman numerals */
+  .center-header {
+    text-align: center;
+    font-size: 2em;
+  }
+
+  /*If XS screen - remove the side page labels*/
+  @media (max-width: 768px) {
+    a.page-label::after {
+      display: none;
+    }
+    a.page-number::after {
+      display: none;
+    }
+  }
+}
+
+html {
+  scroll-behavior: smooth;
+}
diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index 99355aa207..31713c0df5 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -307,11 +307,8 @@ $(document).ready(function () {
   if (modal_exist) {
     $('#open-modal-on-load').modal();
   }
-
 });
 
-
-
 // Debounce - rate limit a function
 // https://davidwalsh.name/javascript-debounce-function
 function debounce(func, wait, immediate) {
@@ -369,3 +366,271 @@ if (form && button) {
     button.disabled = true;
   });
 }
+
+//////////////////
+// SCOTUS STYLE //
+//////////////////
+
+document.querySelectorAll('p').forEach(function (element) {
+  // Bold and Center likely Roman Numerals this improves SCOTUS opinions
+  if (element.textContent.trim().length < 5) {
+    element.classList.add('center-header');
+  }
+});
+
+
+////////////////
+// Pagination //
+////////////////
+
+$('.star-pagination').each(function (index, element) {
+  $(this).attr('label', this.textContent.trim().replace('*Page ', ''));
+});
+
+// Systematize page numbers
+$('page-number').each(function (index, element) {
+  // Get the label and citation index from the current element
+  const label = $(this).attr('label');
+  const citationIndex = $(this).attr('citation-index');
+
+  // Clean up the label (remove '*') and use it for the new href and id
+  const cleanLabel = label.replace('*', '').trim();
+
+  // Create the new <a> element
+  const $newAnchor = $('<a></a>')
+    .addClass('page-label')
+    .attr('data-citation-index', citationIndex)
+    .attr('data-label', cleanLabel)
+    .attr('href', '#' + cleanLabel)
+    .attr('id', cleanLabel)
+    .text('*' + cleanLabel);
+
+  // Replace the <page-number> element with the new <a> element
+  $(this).replaceWith($newAnchor);
+});
+
+// Systematize page numbers
+$('span.star-pagination').each(function (index, element) {
+  // Get the label and citation index from the current element
+  const label = $(this).attr('label');
+  const citationIndex = $(this).attr('citation-index');
+
+  // Clean up the label (remove '*') and use it for the new href and id
+  const cleanLabel = label.replace('*', '').trim();
+
+  // Create the new <a> element
+  const $newAnchor = $('<a></a>')
+    .addClass('page-label')
+    .attr('data-citation-index', citationIndex)
+    .attr('data-label', cleanLabel)
+    .attr('href', '#' + cleanLabel)
+    .attr('id', cleanLabel)
+    .text('*' + cleanLabel);
+
+  // Replace the <span> element with the new <a> element
+  $(this).replaceWith($newAnchor);
+});
+// Fix weird data-ref bug
+document.querySelectorAll('strong').forEach((el) => {
+  if (/\[\d+\]/.test(el.textContent)) {
+    // Check if the text matches the pattern [XXX]
+    const match = el.textContent.match(/\[\d+\]/)[0]; // Get the matched pattern
+    el.setAttribute('data-ref', match); // Set a data-ref attribute
+  }
+});
+
+///////////////
+// Footnotes //
+///////////////
+
+// We formatted the harvard footnotes oddly when they appeared inside the pre-opinion content.
+// this removes the excess a tags and allows us to standardize footnotes across our contents
+// footnote cleanup in harvard
+// Update and modify footnotes to enable linking
+$('div.footnote > a').remove();
+const headfootnotemarks = $('a.footnote');
+const divfootnotes = $('div.footnote');
+
+if (headfootnotemarks.length === divfootnotes.length) {
+  headfootnotemarks.each(function (index) {
+    const footnoteMark = $(this);
+    const footnote = divfootnotes.eq(index);
+
+    const $newElement = $('<footnotemark></footnotemark>');
+    $.each(footnoteMark.attributes, function () {
+      if (footnoteMark.specified) {
+        $newElement.attr(footnoteMark.name, footnoteMark.value);
+      }
+    });
+    $newElement.html(footnoteMark.html());
+    footnoteMark.replaceWith($newElement);
+
+    const $newFootnote = $('<footnote></footnote>');
+    $.each(footnote.attributes, function () {
+      if (footnote.specified) {
+        $newFootnote.attr(footnote.name, footnote.value);
+      }
+    });
+    $newFootnote.attr('label', footnote.attr('label'));
+    $newFootnote.html(footnote.html());
+    footnote.replaceWith($newFootnote);
+  });
+}
+
+// This fixes many of the harvard footnotes so that they can
+// easily link back and forth - we have a second set
+// of harvard footnotes inside headnotes that need to be parsed out now
+// okay.
+
+const footnoteMarks = $('footnotemark');
+const footnotes = $('footnote').not('[orphan="true"]');
+
+if (footnoteMarks.length === footnotes.length) {
+  // we can make this work
+  footnoteMarks.each(function (index) {
+    const footnoteMark = $(this);
+    console.log(index, footnoteMark);
+    const $newElement = $('<a></a>');
+    // Copy attributes from the old element
+    $.each(footnoteMark.attributes, function () {
+      if (footnoteMark.specified) {
+        $newElement.attr(footnoteMark.name, footnoteMark.value);
+        console.log(footnoteMark.name, footnoteMark.value);
+      }
+    });
+    $newElement.html(footnoteMark.html());
+    const $supElement = $('<sup></sup>').append($newElement);
+    footnoteMark.replaceWith($supElement);
+    const footnote = footnotes.eq(index);
+    $newElement.attr('href', `#fn${index}`);
+    $newElement.attr('id', `fnref${index}`);
+    footnote.attr('id', `fn${index}`);
+    console.log(footnoteMark, footnote);
+
+    const $jumpback = $('<a class="jumpback">↵</a>');
+    $jumpback.attr('href', `#fnref${index}`);
+
+    footnote.append($jumpback);
+  });
+} else {
+  //   If the number of footnotes and footnotemarks are inconsistent use the method to scroll to the nearest one
+  //   we dont use this by default because many older opinions will reuse *  ^ and other icons repeatedly on every page
+  //   and so label is no usable to identify the correct footnote.
+
+  footnotes.each(function (index) {
+    console.log($(this));
+
+    const $jumpback = $('<a class="jumpback">↵</a>');
+    $jumpback.attr('label', $(this).attr('label'));
+    $(this).append($jumpback);
+  });
+
+  // There is no silver bullet for footnotes
+  $('footnotemark').on('click', function () {
+    const markText = $(this).text().trim(); // Get the text of the clicked footnotemark
+    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
+
+    // Find the first matching footnote below the current scroll position
+    const targetFootnote = $('footnote')
+      .filter(function () {
+        return $(this).attr('label') === markText && $(this).offset().top > currentScrollPosition;
+      })
+      .first();
+
+    // If a matching footnote is found, scroll to it
+    if (targetFootnote.length > 0) {
+      $('html, body').animate(
+        {
+          scrollTop: targetFootnote.offset().top,
+        },
+        500
+      ); // Adjust the animation duration as needed
+    } else {
+      console.warn('No matching footnote found below the current position for:', markText);
+    }
+  });
+
+
+  //////////////
+  // Sidebar //
+  /////////////
+
+  $('.jumpback').on('click', function () {
+    const footnoteLabel = $(this).attr('label').trim(); // Get the label attribute of the clicked footnote
+    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
+
+    // Find the first matching footnotemark above the current scroll position
+    const targetFootnotemark = $('footnotemark')
+      .filter(function () {
+        return $(this).text().trim() === footnoteLabel && $(this).offset().top < currentScrollPosition;
+      })
+      .last();
+
+    // If a matching footnotemark is found, scroll to it
+    if (targetFootnotemark.length > 0) {
+      $('html, body').animate(
+        {
+          scrollTop: targetFootnotemark.offset().top,
+        },
+        500
+      ); // Adjust the animation duration as needed
+    } else {
+      console.warn('No matching footnotemark found above the current position for label:', footnoteLabel);
+    }
+  });
+}
+
+$(document).ready(function () {
+  function adjustSidebarHeight() {
+    if ($(window).width() > 767) {
+      // Only apply the height adjustment for screens wider than 767px
+      var scrollTop = $(window).scrollTop();
+      if (scrollTop <= 175) {
+        $('.opinion-sidebar').css('height', 'calc(100vh - ' + (175 - scrollTop) + 'px)');
+        // $('.main-document').css('height', 'calc(100vh + ' + (scrollTop) + 'px)');
+      } else {
+        $('.opinion-sidebar').css('height', '100vh');
+      }
+    } else {
+      $('.opinion-sidebar').css('height', 'auto'); // Reset height for mobile view
+    }
+  }
+
+  // Adjust height on document ready and when window is scrolled or resized
+  adjustSidebarHeight();
+  $(window).on('scroll resize', adjustSidebarHeight);
+});
+
+// Update sidebar to show where we are on the page
+document.addEventListener('scroll', function () {
+  let sections = document.querySelectorAll('.jump-link');
+  let links = document.querySelectorAll('.jump-links > a');
+  let currentSection = '';
+
+  // Determine which section is currently in view
+  sections.forEach((section) => {
+    let sectionTop = section.offsetTop;
+    let sectionHeight = section.offsetHeight;
+    if (window.scrollY >= sectionTop - sectionHeight / 3) {
+      currentSection = section.getAttribute('id');
+    }
+  });
+
+  // Remove the active class from all links and their parent elements
+  links.forEach((link) => {
+    link.classList.remove('active');
+    if (link.parentElement) {
+      link.parentElement.classList.remove('active');
+    }
+  });
+
+  // Add the active class to the link and its parent that corresponds to the current section
+  links.forEach((link) => {
+    if (link.getAttribute('href') === `#${currentSection}`) {
+      link.classList.add('active');
+      if (link.parentElement) {
+        link.parentElement.classList.add('active');
+      }
+    }
+  });
+});
diff --git a/cl/opinion_page/templates/includes/add_download_button.html b/cl/opinion_page/templates/includes/add_download_button.html
new file mode 100644
index 0000000000..1d7a4d828e
--- /dev/null
+++ b/cl/opinion_page/templates/includes/add_download_button.html
@@ -0,0 +1,46 @@
+<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1">
+    <button type="button"
+            id="download-original"
+            class="btn btn-primary dropdown-toggle"
+            data-toggle="dropdown" aria-haspopup="true"
+            aria-expanded="false">
+        <i class="fa fa-file-o"></i>
+    <span class="hidden-xs">
+        Download {% if "pdf" in pdf_path %}PDF{% elif "html" in pdf_path %}HTML{% elif "docx" in pdf_path %}DOCX{% elif "rtf" in pdf_path %}RTF{% else %} {% endif %} <span class="caret"></span>
+    </span></button>
+    <ul class="dropdown-menu">
+        {% if cluster.filepath_pdf_harvard %}
+        <li>
+            <a href="{{ cluster.filepath_pdf_harvard.url }}"
+               rel="nofollow">
+                Download Case Law Scan
+            </a>
+          </li>
+        {% endif %}
+        {% for sub_opinion in cluster.sub_opinions.all|dictsort:"type" %}
+            {% if sub_opinion.local_path %}
+                <li>
+                    <a href="{{ sub_opinion.local_path.url }}"
+                       rel="nofollow">
+                        {{ sub_opinion.get_type_display }} from
+                        our Backup
+                    </a>
+                </li>
+            {% endif %}
+        {% endfor %}
+        {% for sub_opinion in cluster.sub_opinions.all|dictsort:"type" %}
+            {% if sub_opinion.download_url %}
+                {% if forloop.counter == 1 %}
+                    <li role="separator" class="divider"></li>
+                {% endif %}
+                <li>
+                    <a href="{{ sub_opinion.download_url }}"
+                       rel="nofollow">
+                        {{ sub_opinion.get_type_display }} from
+                        the Court
+                    </a>
+                </li>
+            {% endif %}
+        {% endfor %}
+    </ul>
+</div>
diff --git a/cl/opinion_page/templates/includes/add_note_button.html b/cl/opinion_page/templates/includes/add_note_button.html
index c5392897e8..fb5fdaac40 100644
--- a/cl/opinion_page/templates/includes/add_note_button.html
+++ b/cl/opinion_page/templates/includes/add_note_button.html
@@ -3,4 +3,4 @@
  data-toggle="modal"
  data-target="#modal-save-note, #modal-logged-out"
  title="{% if form_instance_id %}Edit this note{% else %}Save this record as a note in your profile{% endif %}">
-  <i class="fa fa-bookmark {% if form_instance_id %}gold{% endif %}"></i> <span>{% if form_instance_id %}Edit Note{% else %}Add Note{% endif %}</span></button>
+  <i class="fa fa-bookmark {% if form_instance_id %}gold{% endif %}"></i> <span class="hidden-xs">{% if form_instance_id %}Edit Note{% else %}Add Note{% endif %}</span></button>
diff --git a/cl/opinion_page/templates/includes/opinion_tabs.html b/cl/opinion_page/templates/includes/opinion_tabs.html
new file mode 100644
index 0000000000..9a5334847d
--- /dev/null
+++ b/cl/opinion_page/templates/includes/opinion_tabs.html
@@ -0,0 +1,337 @@
+{% load humanize %}
+{% load text_filters %}
+
+{% if tab == "authorities" %}
+{# Table of Authorities #}
+    <div class="tab-pane fade in active" id="opinion">
+        <h2 class="opinion-section-title jump-link" id="case-details">Table of Authorities</h2>
+        <hr class="hr-opinion">
+        <div id="authorities">
+            {% for authority in authorities_with_data %}
+                <article>
+                    <h3 class="bottom serif">
+                        <a href="{{ authority.get_absolute_url }}" class="visitable">
+                        {{ authority.caption|safe|v_wrapper }}
+                        </a>
+                    </h3>
+                    <div class="bottom">
+                        <div class="inline-block">
+                            <span class="meta-data-header">Date Filed:</span>
+                            <time class="meta-data-value" datetime="1917-12-20">
+                            {{ authority.date_filed }}
+                            </time>
+                        </div>
+                        <div class="inline-block">
+                            <span class="meta-data-header">Status:</span>
+                            <span class="meta-data-value">{{ authority.precedential_status }}</span>
+                        </div>
+                        <div class="inline-block">
+                            <span class="meta-data-header">Citations: </span>
+                            <span class="meta-data-value">{{ authority.citation_string }}</span>
+                        </div>
+                        </div>
+                        <div class="bottom">
+                            <div class="inline-block">
+                                <span class="meta-data-value">
+                                This opinion was cited <b>{{ authority.citation_depth }}</b> time{{ authority.citation_depth|pluralize }}.
+                                </span>
+                            </div>
+                        </div>
+                        <div class="bottom"> </div>
+                </article>
+            {% endfor %}
+        </div>
+    </div>
+{#{% elif tab == "details" %}#}
+{#    {% include "includes/tab_details.html" %}#}
+{% elif tab == "summaries" %}
+    {# Summaries #}
+    <div class="tab-pane fade in active" id="summaries">
+        <h2 class="opinion-section-title jump-link" id="case-details">Summaries ({{ summaries_count|intcomma }})</h2>
+        <hr class="hr-opinion">
+        <div id="all-summaries">
+            <ul>
+                {% for group in parenthetical_groups %}
+                    {% with representative=group.representative %}
+                        {% with representative_cluster=representative.describing_opinion.cluster %}
+                            <article>
+                                <h3 class="bottom serif">
+                                    <a href="{% url 'view_case' representative_cluster.pk representative_cluster.slug %}" class="visitable">
+                                        {{ representative_cluster|best_case_name|safe }}
+                                    </a>
+                                </h3>
+                                <div class="bottom">
+                                    <div class="inline-block">
+                                        <span class="meta-data-header">Date Filed:</span>
+                                        <time class="meta-data-value">{{ representative_cluster.date_filed|date:"F jS, Y" }}</time>
+                                    </div>
+                                    <div class="inline-block">
+                                        <span class="meta-data-header">Status:</span>
+                                        <span class="meta-data-value">{{ representative_cluster.precedential_status }}</span>
+                                    </div>
+                                    <div class="inline-block">
+                                        <span class="meta-data-header">Citations:</span>
+                                        <span class="meta-data-value">{{ representative_cluster.citation_string }}</span>
+                                    </div>
+                                </div>
+                            </article>
+                            <li class="v-offset-below-2">
+                                <span class="meta-data-header">Summaries:</span> {{ representative.text|capfirst }} --
+                                <br/>
+                                <div class="summary-group-metadata">
+                                    {% if group.size > 1 %}
+                                        <button class="toggle-group-summaries btn btn-link" type="button" data-toggle="collapse"
+                                                aria-expanded="false" aria-controls="group-summaries"
+                                                data-target="#group-summaries-{{ representative.pk }}">
+                                            <span class="collapsed">Show all {{ group.size|intcomma }} summaries like this</span>
+                                            <span class="expanded">Hide similar summaries</span>
+                                        </button>
+                                    {% endif %}
+                                </div>
+                            </li>
+                            <ul class="group-summaries collapse" id="group-summaries-{{ representative.pk }}">
+                                {% for summary in group.parentheticals.all %}
+                                    {% with describing_cluster=summary.describing_opinion.cluster %}
+                                        {% if summary != representative %}
+                                            <li class="v-offset-below-1">
+                                                {{ summary.text|capfirst }}
+                                                <br/>
+                                                <span class="bullet-tail">{{ describing_cluster.date_filed }}</span>
+                                                <span class="bullet-tail">
+                                                    <a href="{{ describing_cluster.get_absolute_url }}?{{ request.META.QUERY_STRING }}">
+                                                        {{ describing_cluster|best_case_name|safe }}
+                                                    </a>
+                                                </span>
+                                                <span class="bullet-tail">{{ describing_cluster.docket.court }}</span>
+                                            </li>
+                                        {% endif %}
+                                    {% endwith %}
+                                {% endfor %}
+                            </ul>
+                        {% endwith %}
+                    {% endwith %}
+                {% endfor %}
+            </ul>
+        </div>
+    </div>
+{% elif tab == "cited-by" %}
+    {# Cited By #}
+    <div class="tab-pane fade in active" id="cited-by">
+        <h2 class="opinion-section-title jump-link" id="related-cases">
+            <span>
+                Cited By ({{ citing_cluster_count|intcomma }})
+                <a href="/feed/search/?type=o&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})" rel="nofollow">
+                    <i class="gray fa fa-rss" title="Subscribe to a feed of citations to this case."></i>
+                </a>
+            </span>
+        </h2>
+        <hr class="hr-opinion">
+
+        {% if citing_cluster_count > 0 %}
+            {% for citing_cluster in citing_clusters %}
+                <article>
+                    <h3 class="bottom serif">
+                        <a href="{{ citing_cluster.absolute_url }}">
+                            {{ citing_cluster.caseName|safe|truncatewords:12|v_wrapper }}
+                            ({{ citing_cluster.dateFiled|date:"Y" }})
+                        </a>
+                    </h3>
+                    <div class="bottom">
+                        <div class="inline-block">
+                            <span class="meta-data-header">Date Filed:</span>
+                            <time class="meta-data-value">
+                                {{ citing_cluster.dateFiled|date:"F jS, Y" }}
+                            </time>
+                        </div>
+                        <div class="inline-block">
+                            <span class="meta-data-header">Status:</span>
+                            <span class="meta-data-value">{{ citing_cluster.status }}</span>
+                        </div>
+                        <div class="inline-block">
+                            <span class="meta-data-header">Citations:</span>
+                            <span class="meta-data-value">{{ citing_cluster.citation|join:", " }}</span>
+                        </div>
+                        <div class="inline-block">
+                            <span class="meta-data-header">Docket Number:</span>
+                            <span class="meta-data-value select-all">{{ citing_cluster.docketNumber }}</span>
+                        </div>
+                    </div>
+                </article>
+            {% endfor %}
+        {% else %}
+            <h3>This case has not yet been cited in our system.</h3>
+        {% endif %}
+
+        <br>
+        <p>
+          <a href="/?q=cites%3A({{ cluster.sub_opinions.all|OR_join }})"
+             rel="nofollow"
+             class="btn btn-default"
+          >View Citing Opinions</a>
+        </p>
+    </div>
+
+{% elif tab == "related-cases" %}
+    {# Related Cases #}
+    <div class="tab-pane fade in active" id="related">
+        <h2 class="opinion-section-title jump-link" id="">Related Cases</h2>
+        <hr class="hr-opinion">
+            {% url 'show_results' as show_results_url %}
+                {% with sub_opinion_ids_list=sub_opinion_ids|join:',' pk_str=cluster.pk|stringformat:"s" %}
+                    {% with clusters=related_clusters full_list_url=show_results_url|add:"?q=related:"|add:sub_opinion_ids_list|add:related_search_params %}
+                            {% for cluster in related_clusters %}
+                                <article>
+                                  <h3 class="bottom serif">
+                                      <a href="{{ cluster.absolute_url }}{% querystring %}" class="visitable">
+                                          {{ cluster.caseName }}
+                                      </a>
+                                  </h3>
+                                  <div class="bottom">
+                                      {% if cluster.dateFiled %}
+                                          <div class="inline-block">
+                                            <span class="meta-data-header">Date Filed:</span>
+                                            <time class="meta-data-value">
+                                                {{ cluster.dateFiled|date:"F jS, Y" }}
+                                            </time>
+                                          </div>
+                                      {% endif %}
+                                      {% if cluster.status %}
+                                      <div class="inline-block">
+                                        <span class="meta-data-header">Status:</span>
+                                        <span class="meta-data-value">{{ cluster.status }}</span>
+                                      </div>
+                                    {% endif %}
+                                  {% if cluster.citation %}
+                                        <div class="inline-block">
+                                          <span class="meta-data-header">Citations: </span>
+                                          <span class="meta-data-value">{{ cluster.citation.0 }}</span>
+                                        </div>
+                                  {% endif %}
+                                  {% if cluster.docketNumber %}
+                                        <div class="inline-block">
+                                          <span class="meta-data-header">Docket Number:</span>
+                                          <span class="meta-data-value select-all">{{ cluster.docketNumber }}</span>
+                                        </div>
+                                  {% endif %}
+                                  </div>
+                                  <div class="bottom">
+                                  </div>
+                                </article>
+                            {% endfor %}
+                        <br>
+                        <p>
+                            <a href="{{ full_list_url }}" class="btn btn-default">
+                                Search Full List
+                            </a>
+                        </p>
+                {% endwith %}
+            {% endwith %}
+    </div>
+
+{% elif tab == "pdf" %}
+    {# PDF #}
+    <div class="tab-pane fade in active" id="pdf">
+        <div>
+            <h2 class="opinion-section-title jump-link">
+                Source:
+                <em>
+                    {% if cluster.filepath_pdf_harvard %}
+                        Case Law Access Project
+                    {% elif pdf_path %}
+                        {{ cluster.docket.court }}
+                    {% endif %}
+                </em>
+            </h2>
+        </div>
+        <hr class="hr-opinion">
+        <div class="embed-responsive embed-responsive-8by11">
+            <object data="{% if cluster.filepath_pdf_harvard %}{{ cluster.filepath_pdf_harvard.url }}{% elif pdf_path %}{{ cluster.sub_opinions.all.0.local_path.url }}{% endif %}?no-og=true"
+                    type="application/pdf"
+                    width="100%"
+                    height="100%">
+                <div class="row">
+                    <div class="col-sm-1 col-md-2 col-lg-3"></div>
+                    <div class="col-xs-12 col-sm-10 col-md-8 col-lg-6 text-center">
+                        <p>Oops! Your browser does not support embedded PDF viewing.</p>
+                        <div class="v-offset-below-1 v-offset-above-1">
+                            {% include "includes/rd_download_button.html" %}
+                        </div>
+                    </div>
+                </div>
+            </object>
+        </div>
+    </div>
+{% else %}
+
+    {# The section of the document I refer to as headmatter goes here #}
+    <div class="tab-pane fade in active" id="opinion">
+        <article>
+            {% with opinion_count=cluster.sub_opinions.all.count %}
+                {% if cluster.headnotes %}
+                    <h2 class="opinion-section-title jump-link" id="headnotes">Headnotes</h2>
+                    <hr class="hr-opinion">
+                    <p class="bottom">{{ cluster.headnotes | safe}}</p>
+                {% endif %}
+
+                {% if cluster.headmatter %}
+                    <h2 class="opinion-section-title jump-link" id="o">Opinion</h2>
+                    <hr class="hr-opinion">
+                    <div id="headmatter" class="serif-text">
+                    {{ cluster.headmatter|safe }}
+                    </div>
+                {% endif %}
+
+                {% for sub_opinion in cluster.ordered_opinions %}
+                    <h3 class="opinion-section-title jump-link" id="o{{ forloop.counter }}">
+                        {{ sub_opinion.get_type_display }}
+                        {% if sub_opinion.author %}
+                            by <a href="{{ sub_opinion.author.get_absolute_url }}">{{ sub_opinion.author.name_full }}</a>
+                        {% elif sub_opinion.author_str %}
+                            by {{ sub_opinion.author_str }}
+                        {% endif %}
+                    </h3>
+                    <hr class="hr-opinion">
+
+                     {% if 'U' in cluster.source %}
+                        <div id="harvard-text" class="v-offset-above-2">
+                     {% elif 'Z' in cluster.source %}
+                        <div id="columbia-text" class="v-offset-above-2">
+                    {% elif 'L' in cluster.source %}
+                        <div id="lawbox-text" class="v-offset-above-2">
+                    {% elif 'R' in cluster.source %}
+                        <div id="resource-org-text" class="v-offset-above-2">
+                    {% else %}
+                        <div id="default-text" class="v-offset-above-2">
+                    {% endif %}
+
+                    <div class="subopinion-content">
+                        {% if sub_opinion.xml_harvard and sub_opinion.html_with_citations %}
+                          <div class="serif-text harvard">{{ sub_opinion.html_with_citations|safe }}</div>
+                         {% elif sub_opinion.xml_harvard  %}
+                              <div class="serif-text harvard">{{ sub_opinion.xml_harvard|safe }}</div>
+                        {% elif sub_opinion.html_with_citations %}
+                          {% if cluster.source == "C" %}
+                        {# It's a PDF with no HTML enrichment#}
+                            <div class="plaintext">{{ sub_opinion.html_with_citations|safe|linebreaksbr }}</div>
+                          {% else %}
+                            <div class="serif-text">{{ sub_opinion.html_with_citations|safe }}</div>
+                          {% endif %}
+                        {% elif sub_opinion.html_columbia %}
+                          <div class="serif-text">{{ sub_opinion.html_columbia|safe }}</div>
+                        {% elif sub_opinion.html_lawbox %}
+                          <div class="serif-text">{{ sub_opinion.html_lawbox|safe }}</div>
+                        {% elif sub_opinion.html_anon_2020 %}
+                          <div class="serif-text">{{ sub_opinion.html_anon_2020|safe }}</div>
+                        {% elif sub_opinion.html %}
+                          <div class="serif-text">{{sub_opinion.html|safe}}</div>
+                        {% else %}
+                          <pre>{{sub_opinion.plain_text}}</pre>
+                        {% endif %}
+                    </div>
+
+                {% endfor %}
+            {% endwith %}
+        </article>
+    </div>
+
+{% endif %}
\ No newline at end of file
diff --git a/cl/opinion_page/templates/opinion.html b/cl/opinion_page/templates/opinion.html
index 16a33820fd..a0c4c797c7 100644
--- a/cl/opinion_page/templates/opinion.html
+++ b/cl/opinion_page/templates/opinion.html
@@ -100,7 +100,7 @@ <h3><span>Summaries ({{ summaries_count|intcomma }})</span></h3>
             {% endfor %}
           </ul>
           <h4>
-            <a href="{% url "view_summaries" pk=cluster.pk slug=cluster.slug %}{% querystring %}"
+            <a href="{% url "view_case_summaries" cluster.pk cluster.slug %}{% querystring %}"
                class="btn btn-default">
               View All Summaries
             </a>
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
new file mode 100644
index 0000000000..a32f1d0042
--- /dev/null
+++ b/cl/opinion_page/templates/opinions.html
@@ -0,0 +1,346 @@
+{% extends "base.html" %}
+{% load extras %}
+{% load humanize %}
+{% load static %}
+{% load text_filters %}
+
+
+{% block canonical %}{% get_canonical_element %}{% endblock %}
+{% block title %}{{ title }} – CourtListener.com{% endblock %}
+{% block og_title %}{{ title }} – CourtListener.com{% endblock %}
+{% block description %}{{ title }} — Brought to you by Free Law Project, a non-profit dedicated to creating high quality open legal information.{% endblock %}
+{% block og_description %}{{ cluster|best_case_name }}{% if summaries_count > 0 %} — {{ top_parenthetical_groups.0.representative.text|capfirst }}{% else %} — Brought to you by Free Law Project, a non-profit dedicated to creating high quality open legal information.{% endif %}
+{% endblock %}
+
+{% block head %}
+    <link rel="alternate" type="application/rss+xml" title="Atom feed for cases citing {{cluster|best_case_name|truncatewords:10}}" href="/feed/search/?q=cites:({{ cluster.sub_opinions.all|OR_join }})">
+{% endblock %}
+
+{% block navbar-o %}active{% endblock %}
+
+
+{% block sidebar %}
+    {% with sponsored_logo=STATIC_URL|add:'img/vlex-logo-150-75.png' %}
+        <div class="col-sm-3 opinion-sidebar" id="sidebar">
+            <div class="top-section">
+                {# show the admin tools if applicable #}
+                {% if perms.search.change_docket or perms.search.change_opinioncluster or perms.search.change_citation %}
+                    <div class="sidebar-section">
+                        <h3><span>Admin</span></h3>
+                        <p>
+                            {% if perms.search.change_docket %}
+                                <a href="{% url 'admin:search_docket_change' cluster.docket.pk %}"
+                                   class="btn btn-primary btn-xs">Docket</a>
+                            {% endif %}
+                            {% if perms.search.change_opinioncluster %}
+                                <a href="{% url 'admin:search_opinioncluster_change' cluster.pk %}"
+                                   class="btn btn-primary btn-xs">Cluster</a>
+                            {% endif %}
+                            {% if perms.search.change_opinion %}
+                                {% for sub_opinion in cluster.sub_opinions.all|dictsort:"type" %}
+                                    <a href="{% url 'admin:search_opinion_change' sub_opinion.pk %}"
+                                       class="btn btn-primary btn-xs">{{ sub_opinion.get_type_display|cut:"Opinion" }} opinion</a>
+                                {% endfor %}
+                            {% endif %}
+                            {% if request.user.is_superuser %}
+                                {% if private %}
+                                    <div class="btn btn-danger btn-xs">Blocked <i
+                                            class="fa fa-ban"></i></div>
+                                {% else %}
+                                    <div class="btn btn-success btn-sm block-item"
+                                         data-id="{{ cluster.pk }}"
+                                         data-type="cluster">Block Cluster and Docket <i
+                                            class="fa fa-ban"></i></div>
+                                {% endif %}
+                            {% endif %}
+                        </p>
+                    </div>
+                {% endif %}
+
+                <div id="opinion-toc" class="sidebar-section">
+                    <h3> <span>Jump To</span> </h3>
+                    <li class="jump-links active"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#" >Top</a></li>
+                    <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
+                    {% if cluster.headnotes %}
+                        <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes" >Headnotes</a></li>
+                    {% endif %}
+                    {% if cluster.headmatter %}
+                      <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o" >Opinion</a></li>
+                    {% endif %}
+                    {% for sub_opinion in cluster.ordered_opinions %}
+                      {% if cluster.sub_opinions.all.count > 1 %}
+                        {% if sub_opinion.ordering_key != None %}
+                          <li class="jump-links sub-opinion">
+                            <a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
+                            {% if sub_opinion.get_type_display == "Concurrence Opinion" %}
+                              Concurrence
+                            {% else %}
+                              {{ sub_opinion.get_type_display }}
+                            {% endif %}
+                                {% if sub_opinion.author_str %}
+                                by {{ sub_opinion.author_str }}
+                              {% endif %}
+                            </a>
+                          </li>
+                        {% endif %}
+                      {% else %}
+                        <li class="jump-links sub-opinion">
+                          <a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
+                            {{ sub_opinion.get_type_display }}
+                            {% if sub_opinion.author_str %}
+                              by {{ sub_opinion.author_str }}
+                            {% endif %}
+                          </a>
+                        </li>
+                        {% endif %}
+                    {% endfor %}
+                </div>
+
+            {% if cluster.sub_opinions.all.first.extracted_by_ocr or "U" in cluster.source and tab == "opinions" %}
+                <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                    <p class="bottom">The text of this document was obtained by analyzing a scanned document and may have typos.
+                    </p>
+                </div>
+                <div class="clearfix"></div>
+            {% endif %}
+
+            {% if tab == "authorities" %}
+                <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                    <p class="bottom">
+                        This page displays all the citations that have been extracted and linked in our system. Please note, it does not serve as a comprehensive list of all citations within the document.
+                    </p>
+                </div>
+                <div class="clearfix"></div>
+            {% endif %}
+
+            {% if tab == "related-cases" %}
+                <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                    <p class="bottom">
+                        The Related Cases query is used to find legal cases
+                        related to a given case by analyzing textual similarities.
+                        It identifies and retrieves cases with similar content,
+                        allowing for the generation of a summary of related cases,
+                        including their names, links, and filing dates,
+                        to help users explore precedents or comparable rulings.
+                    </p>
+                </div>
+                <div class="clearfix"></div>
+            {% endif %}
+
+                {% if tab == "summaries" %}
+                <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                    <p class="bottom">
+                        Summaries or parenthetical groupings are used to
+                        provide concise explanations or clarifications about a
+                        case’s procedural posture, legal principles, or
+                        facts that are immediately relevant to the citation,
+                        typically enclosed in parentheses following a case citation.
+
+                    </p>
+                </div>
+                <div class="clearfix"></div>
+            {% endif %}
+            </div>
+
+            <div class="bottom-section">
+                {# Sponsored by #}
+                {% if sponsored %}
+                    <div id="sponsored-by" class="sidebar-section">
+                        <h3>
+                          <span>Sponsored By</span>
+                        </h3>
+                        <p>This opinion added to CourtListener with support from v|Lex.</p>
+                        <a href="https://www.vlex.com" rel="sponsored" target="_blank">
+                            <img id="vlex" src="{{ sponsored_logo }}" alt="v|lex logo" class="img-responsive" width="150" height="75" >
+                        </a>
+                    </div>
+                {% else %}
+                    {% include "includes/donate_sidebar.html" with referrer="o-donate-now" %}
+                {% endif %}
+            </div>
+        </div>
+
+
+    {% endwith %}
+{% endblock %}
+
+{% block body-classes %}opinion-body{% endblock %}
+
+{% block content %}
+
+    <div class="col-sm-9 main-document">
+        <div id="caption-square">
+
+            <div id="opinion-caption">
+                <span class="case-date-new">{{ cluster.date_filed }}</span>
+                  {% include "includes/add_note_button.html" with form_instance_id=note_form.instance.cluster_id %}
+
+                {% if pdf_path %}
+                    {% include "includes/add_download_button.html" %}
+                {% endif %}
+
+                <div id="get-citation-btn-group" class="btn-group">
+                        <a id="get-citation-alerts" href="/?show_alert_modal=yes&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})"
+                           rel="nofollow"
+                           class="btn"
+                        ><i class="fa fa-bell-o"></i><span class="hidden-xs">Get Citation Alerts</span></a>
+                        <a class="btn dropdown-toggle"
+                           data-toggle="dropdown"
+                           aria-haspopup="true"
+                           aria-expanded="false">
+                          <span class="caret"></span>
+                          <span class="sr-only">Toggle Dropdown</span>
+                        </a>
+                        <ul class="dropdown-menu">
+                          <li><a href="{% url "alert_help" %}#citation-alerts">Learn More</a></li>
+                        </ul>
+                      </div>
+
+
+                <div class="case-caption jump-link" id="caption">{{ cluster.docket.case_name }}</div>
+                <h4 class="case-court">{{ cluster.docket.court }}</h4>
+                <br>
+                <div class="case-details">
+                    <ul class="list-unstyled">
+                        <li><strong>Citations:</strong> {{ cluster.citation_string|default:"None known" }}</li>
+
+                        {% if cluster.case_name_full != cluster.case_name and cluster.case_name_full != "" %}
+                        <li><strong>Full Case Name:</strong>
+                            {{ cluster.case_name_full }}
+                        </li>
+                        {% endif %}
+
+                        {% if cluster.docket.court_id != "olc" %}
+                            <li><strong>Docket Number:</strong> {{ cluster.docket.docket_number|default:"Unknown" }}</li>
+                        {% endif %}
+
+                        {% if cluster.get_precedential_status_display != "Precedential" %}
+                        <li><strong>Precedential Status:</strong> {{ cluster.get_precedential_status_display|default:"Unknown" }}</li>
+                        {% endif %}
+
+                        {% if cluster.docket.court_id == 'scotus' and cluster.scbd %}
+                            <li><strong>Supreme Court DB ID:</strong>
+                                <a href="http://scdb.wustl.edu/analysisCaseDetail.php?cid={{ cluster.scdb_id }}-01" target="_blank">
+                                    {{ cluster.scdb_id }}
+                                </a><i class="gray fa fa-external-link"></i>
+                            </li>
+                        {% endif %}
+
+                        {% if cluster.panel.all.count > 0 %}
+                            <li><strong>Panel:</strong>
+                                {% for p in cluster.panel.all %}
+                                    <a href="{{ p.get_absolute_url }}">{{ p.name_full }}</a>{% if not forloop.last %}, {% endif %}
+                                {% endfor %}
+                            </li>
+                        {% endif %}
+
+                        {% if cluster.judges %}
+                            <li><strong>Judges:</strong> {{ cluster.judges }}</li>
+                        {% endif %}
+
+                        {% if opinion.author %}
+                            <li><strong>Author:</strong> <a href="{{ opinion.author.get_absolute_url }}">{{ opinion.author.name_full }}</a></li>
+                        {% endif %}
+
+                        {% if opinion.joined_by.all.count > 0 %}
+                            <li><strong>Joined By:</strong>
+                                {% for p in opinion.joined_by.all %}
+                                    <a href="{{ p.get_absolute_url }}">{{ p.name_full }}</a>{% if not forloop.last %}, {% endif %}
+                                {% endfor %}
+                            </li>
+                        {% endif %}
+
+                        {% if cluster.nature_of_suit %}
+                            <li><strong>Nature of Suit:</strong> {{ cluster.nature_of_suit }}</li>
+                        {% endif %}
+
+                        {% if cluster.nature_of_suit %}
+                            <li><strong>Posture:</strong> {{ cluster.posture }}</li>
+                        {% endif %}
+
+                        {% if cluster.other_dates %}
+                            {{ cluster.other_dates.items }}
+                            <li><strong>Other Dates:</strong> {{ cluster.other_dates }}</li>
+                        {% endif %}
+
+                        {% if cluster.disposition %}
+                            <li><strong>Disposition:</strong> {{ cluster.disposition }}</li>
+                        {% endif %}
+                    </ul>
+                </div>
+            </div>
+
+            <ul class="nav nav-tabs ">
+                <li role="presentation" {% if tab == "opinions" or tab == "" %} class="active" {% endif %}>
+                    {% if tab == "opinions" %}
+                        <a href="#" data-toggle="tab">Opinion</a>
+                    {% else %}
+                        <a href="{% url 'view_case' cluster.pk cluster.slug %}">Opinion</a>
+                    {% endif %}
+                </li>
+                {% if authorities_count > 0 %}
+                    <li role="presentation" {% if tab == "authorities" %} class="active" {% endif %}>
+                        {% if tab == "authorities" %}
+                            <a href="#" data-toggle="tab">Auth<span class="hidden-sm hidden-md hidden-xs">orities&nbsp;({{ authorities_count }})</a>
+                        {% else %}
+                            <a href="{% url "view_case_authorities" cluster.pk cluster.slug %}">Auth<span class="hidden-xs hidden-sm hidden-md">orities&nbsp;({{ authorities_count }})</span></a>
+                        {% endif %}
+                    </li>
+                {% endif %}
+                {% if cited_by_count > 0 %}
+                <li role="presentation" {% if tab == "cited-by" %} class="active" {% endif %}>
+                    {% if tab == "cited-by" %}
+                        <a href="#" data-toggle="tab">Cited<span class="hidden-xs hidden-sm hidden-md">&nbsp;By&nbsp;({{ cited_by_count }})</span></a>
+                    {% else %}
+                        <a href="{% url "view_case_cited_by" cluster.pk cluster.slug %}">Cited<span class="hidden-xs hidden-sm hidden-md">&nbsp;By&nbsp;({{ cited_by_count }})</span></a>
+                    {% endif %}
+                </li>
+                {% endif %}
+                {% if summaries_count > 0 %}
+                  <li role="presentation" {% if tab == "summaries" %} class="active" {% endif %}>
+                    {% if tab == "summaries" %}
+                        <a href="#" data-toggle="tab">Sum<span class="hidden-xs hidden-sm hidden-md">maries&nbsp;({{ summaries_count }})</span></a>
+                    {% else %}
+                        <a href="{% url "view_case_summaries" cluster.pk cluster.slug %}">Sum<span class="hidden-xs hidden-sm hidden-md">maries&nbsp;({{ summaries_count }})</span></a>
+                    {% endif %}
+                  </li>
+                {% endif %}
+                {% if related_cases_count > 0  %}
+                    <li role="presentation" {% if tab == "related-cases" %} class="active" {% endif %}>
+                        {% if tab == "related-cases" %}
+                            <a href="#" data-toggle="tab">Related<span class="hidden-xs hidden-sm hidden-md">&nbsp;Cases&nbsp;({{ related_cases_count }})</span></a>
+                        {% else %}
+                            <a href="{% url 'view_case_related_cases' cluster.pk cluster.slug %}">Related&nbsp;<span class="hidden-xs hidden-sm hidden-md">Cases&nbsp;({{ related_cases_count }})</span></a>
+                        {% endif %}
+                    </li>
+                {% endif %}
+                {% if has_downloads and "pdf" in pdf_path %}
+                    <li role="presentation" {% if tab == "pdf" %} class="active" {% endif %}>
+                        {% if tab == "pdf" %}
+                            <a href="#" data-toggle="tab">{% if "pdf" in pdf_path %}PDF{% endif %}</a>
+                        {% else %}
+                            <a href="{% url 'view_case_pdf' cluster.pk cluster.slug %}">PDF</a>
+                        {% endif %}
+                    </li>
+                {% endif %}
+            </ul>
+        </div>
+        {% include "includes/opinion_tabs.html" %}
+        {% include "includes/notes_modal.html" %}
+
+    </div>
+{% endblock %}
+
+
+{% block footer-scripts %}
+    <script defer type="text/javascript" src="{% static "js/jquery.NobleCount.min.js" %}"></script>
+    <script defer type="text/javascript" src="{% static "js/save-notes.js" %}"></script>
+    {% if request.user.is_staff %}
+        <script defer type="text/javascript" src="{% static "js/admin_tools.js" %}"></script>
+        {% if DEBUG %}
+            <script src="{% static "js/jquery.bootstrap-growl.js" %}"></script>
+        {% else %}
+            <script src="{% static "js/jquery.bootstrap-growl.min.js" %}"></script>
+        {% endif %}
+    {% endif %}
+{% endblock %}
diff --git a/cl/search/models.py b/cl/search/models.py
index a0c808f3d3..3bacd929ab 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2852,6 +2852,26 @@ def caption(self):
             caption += f"&nbsp;({court}&nbsp;{year})"
         return caption
 
+    @property
+    def display_citation(self):
+        citation_list = [citation for citation in self.citations.all()]
+        citations = sorted(citation_list, key=sort_cites)
+        citation = ""
+        if not citations:
+            return ""
+        else:
+            if citations[0].type == Citation.NEUTRAL:
+                return citations[0]
+            elif (
+                len(citations) >= 2
+                and citations[0].type == Citation.WEST
+                and citations[1].type == Citation.LEXIS
+            ):
+                citation += f"{citations[0]}, {citations[1]}"
+            else:
+                citation += f"{citations[0]}"
+        return citation
+
     @property
     def citation_string(self):
         """Make a citation string, joined by commas"""
@@ -2991,6 +3011,18 @@ def __str__(self) -> str:
     def get_absolute_url(self) -> str:
         return reverse("view_case", args=[self.pk, self.slug])
 
+    def ordered_opinions(self):
+        # Fetch all sub-opinions ordered by ordering_key
+        sub_opinions = self.sub_opinions.all().order_by("ordering_key")
+
+        # Check if there is more than one sub-opinion
+        if sub_opinions.count() > 1:
+            # Return only sub-opinions with an ordering key
+            return sub_opinions.exclude(ordering_key__isnull=True)
+
+        # If there's only one or no sub-opinions, return the main opinion
+        return sub_opinions
+
     def save(
         self,
         update_fields=None,

From 2ac21c6b3033d1ba97b3daa00d4c4c36bb514683 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 17 Oct 2024 15:31:12 -0400
Subject: [PATCH 003/143] feat(opinion.urls): Add/remove new endpoints

Add multiple tab specific endpoints
---
 cl/opinion_page/urls.py | 48 ++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index 5e7a9e1a54..be8c9214d8 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -12,14 +12,18 @@
     download_docket_entries_csv,
     redirect_docket_recap,
     redirect_og_lookup,
-    view_authorities,
     view_docket,
     view_docket_feed,
     view_opinion,
+    view_opinion_authorities,
+    view_opinion_cited_by,
+    view_opinion_details,
+    view_opinion_pdf,
+    view_opinion_related_cases,
+    view_opinion_summaries,
     view_parties,
     view_recap_authorities,
     view_recap_document,
-    view_summaries,
 )
 
 urlpatterns = [
@@ -31,16 +35,6 @@
         name="court_publish_page",
     ),
     # Opinion pages
-    path(
-        "opinion/<int:pk>/<blank-slug:slug>/summaries/",
-        view_summaries,  # type: ignore[arg-type]
-        name="view_summaries",
-    ),
-    path(
-        "opinion/<int:pk>/<blank-slug:slug>/authorities/",
-        view_authorities,  # type: ignore[arg-type]
-        name="view_authorities",
-    ),
     path(
         "opinion/<int:pk>/<blank-slug:slug>/visualizations/",
         cluster_visualizations,  # type: ignore[arg-type]
@@ -52,6 +46,36 @@
         name="docket_feed",
     ),
     path("opinion/<int:pk>/<blank-slug:_>/", view_opinion, name="view_case"),  # type: ignore[arg-type]
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/details/",
+        view_opinion_details,
+        name="view_case_details",
+    ),  # with the tab
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/authorities/",
+        view_opinion_authorities,
+        name="view_case_authorities",
+    ),  # with the tab
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/cited-by/",
+        view_opinion_cited_by,
+        name="view_case_cited_by",
+    ),  # with the tab
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/summaries/",
+        view_opinion_summaries,
+        name="view_case_summaries",
+    ),  # with the tab
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/related-cases/",
+        view_opinion_related_cases,
+        name="view_case_related_cases",
+    ),  # with the tab
+    path(
+        "opinion/<int:pk>/<blank-slug:_>/pdf/",
+        view_opinion_pdf,
+        name="view_case_pdf",
+    ),  # with the tab
     path(
         "docket/<int:docket_id>/download/",
         download_docket_entries_csv,  # type: ignore[arg-type]

From c260b60bd654a55e58c33b81d1c5044da2e6f4b2 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 17 Oct 2024 15:32:59 -0400
Subject: [PATCH 004/143] feat(opinion.views): Create new view methods

Rewrite and waffle the new UI changes
Added a number of methods to fetch and/or store
related and cited by data quickly

Implemented new view opinion with waffles
---
 cl/opinion_page/utils.py | 305 ++++++++++++++++++++++++++++++++++++-
 cl/opinion_page/views.py | 314 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 609 insertions(+), 10 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 160453bb1f..b8d5e581dc 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -157,8 +157,19 @@ async def build_cites_clusters_query(
     cluster_cites_query = cluster_search.query(cites_query)
     search_query = (
         cluster_cites_query.sort({"citeCount": {"order": "desc"}})
-        .source(includes=["absolute_url", "caseName", "dateFiled"])
-        .extra(size=5, track_total_hits=True)
+        .source(
+            includes=[
+                "absolute_url",
+                "caseName",
+                "cluster_id",
+                "docketNumber",
+                "citation",
+                "status",
+                "dateFiled",
+            ]
+        )
+        .extra(size=20, track_total_hits=True)
+        .collapse(field="cluster_id")
     )
     return search_query
 
@@ -192,8 +203,18 @@ async def build_related_clusters_query(
     cluster_related_query = cluster_search.query(main_query)
     search_query = (
         cluster_related_query.sort({"_score": {"order": "desc"}})
-        .source(includes=["absolute_url", "caseName", "cluster_id"])
-        .extra(size=5)
+        .source(
+            includes=[
+                "absolute_url",
+                "caseName",
+                "cluster_id",
+                "docketNumber",
+                "citations",
+                "status",
+                "dateFiled",
+            ]
+        )
+        .extra(size=20)
         .collapse(field="cluster_id")
     )
     return search_query
@@ -211,6 +232,202 @@ class RelatedCitingResults:
     timeout: bool = False
 
 
+@dataclass
+class RelatedClusterResults:
+    related_clusters: list[OpinionClusterDocument] = field(
+        default_factory=list
+    )
+    sub_opinion_pks: list[int] = field(default_factory=list)
+    url_search_params: dict[str, str] = field(default_factory=dict)
+    timeout: bool = False
+    has_related_cases: bool = False
+
+
+async def es_get_related_clusters_with_cache(
+    cluster: OpinionCluster,
+    request: HttpRequest,
+) -> RelatedClusterResults:
+    """Elastic Related Clusters Search or Cache
+
+    :param cluster:The cluster to use
+    :param request:The user request
+    :return:Related Cluster Data
+    """
+    cache = caches["db_cache"]
+    mlt_cache_key = f"clusters-mlt-es:{cluster.pk}"
+    # By default, all statuses are included. Retrieve the PRECEDENTIAL_STATUS
+    # attributes (since they're indexed in ES) instead of the NAMES values.
+    search_params: CleanData = {}
+    url_search_params = {
+        f"stat_{v[0]}": "on" for v in PRECEDENTIAL_STATUS.NAMES
+    }
+    sub_opinion_pks = [
+        str(pk)
+        async for pk in cluster.sub_opinions.values_list("pk", flat=True)
+    ]
+    if settings.RELATED_FILTER_BY_STATUS:
+        # Filter results by status (e.g., Precedential)
+        # Update URL parameters accordingly
+        search_params[
+            f"stat_{PRECEDENTIAL_STATUS.get_status_value(settings.RELATED_FILTER_BY_STATUS)}"
+        ] = True
+        url_search_params = {
+            f"stat_{PRECEDENTIAL_STATUS.get_status_value(settings.RELATED_FILTER_BY_STATUS)}": "on"
+        }
+
+    related_cluster_result = RelatedClusterResults(
+        url_search_params=url_search_params
+    )
+
+    if is_bot(request) or not sub_opinion_pks:
+        return related_cluster_result
+
+    cached_related_clusters, timeout_related = (
+        await cache.aget(mlt_cache_key) or (None, False)
+        if settings.RELATED_USE_CACHE
+        else (None, False)
+    )
+
+    # Prepare related cluster query if not cached results.
+    cluster_search = OpinionClusterDocument.search()
+
+    if cached_related_clusters is not None:
+        related_cluster_result.related_clusters = cached_related_clusters
+        related_cluster_result.timeout = timeout_related
+        related_cluster_result.has_related_cases = (
+            True if len(cached_related_clusters) > 0 else False
+        )
+        return related_cluster_result
+
+    # if cached_related_clusters is None:
+    related_query = await build_related_clusters_query(
+        cluster_search, sub_opinion_pks, search_params
+    )
+
+    related_query = related_query.params(
+        timeout=f"{settings.ELASTICSEARCH_FAST_QUERIES_TIMEOUT}s"
+    )
+    related_query = related_query.extra(
+        size=settings.RELATED_COUNT, track_total_hits=False
+    )
+    try:
+        # Execute the Related Query if needed
+        response = related_query.execute()
+        timeout_related = False
+    except (ConnectionError, RequestError, ApiError) as e:
+        logger.warning("Error getting cited and related clusters: %s", e)
+        if settings.DEBUG is True:
+            traceback.print_exc()
+        return related_cluster_result
+    except ConnectionTimeout as e:
+        logger.warning(
+            "ConnectionTimeout getting cited and related clusters: %s", e
+        )
+        response = None
+        timeout_related = True
+
+    related_cluster_result.related_clusters = (
+        response if response is not None else cached_related_clusters or []
+    )
+    related_cluster_result.timeout = False
+    related_cluster_result.sub_opinion_pks = list(map(int, sub_opinion_pks))
+    related_cluster_result.has_related_cases = True if response else False
+
+    if timeout_related == False:
+        # print("SETTING", (
+        #         related_cluster_result.related_clusters,
+        #         timeout_related,
+        #         related_cluster_result.has_related_cases,
+        #      ))
+        await cache.aset(
+            mlt_cache_key,
+            (results.related_clusters, timeout_related),
+            settings.RELATED_CACHE_TIMEOUT,
+        )
+
+        await cache.aset(
+            mlt_cache_key,
+            (
+                related_cluster_result.related_clusters,
+                timeout_related,
+                related_cluster_result.has_related_cases,
+            ),
+            settings.RELATED_CACHE_TIMEOUT,
+        )
+    return related_cluster_result
+
+
+async def es_get_cited_clusters_with_cache(
+    cluster: OpinionCluster,
+    request: HttpRequest,
+):
+    """Elastic cited by cluster search or cache
+
+    :param cluster:The cluster to check
+    :param request:The user request
+    :return:The cited by data
+    """
+    cache = caches["db_cache"]
+    cache_citing_key = f"clusters-cited-es:{cluster.pk}"
+
+    sub_opinion_pks = [
+        str(pk)
+        async for pk in cluster.sub_opinions.values_list("pk", flat=True)
+    ]
+    if is_bot(request) or not sub_opinion_pks:
+        return related_cluster_result
+
+    cached_citing_results, cahced_citing_clusters_count, timeout_cited = (
+        await cache.aget(cache_citing_key) or (None, False, False)
+        if settings.RELATED_USE_CACHE
+        else (None, False, False)
+    )
+
+    if cached_citing_results is not None:
+        return (
+            cached_citing_results,
+            cahced_citing_clusters_count,
+            timeout_cited,
+        )
+
+    cluster_search = OpinionClusterDocument.search()
+    cited_query = await build_cites_clusters_query(
+        cluster_search, sub_opinion_pks
+    )
+    try:
+        # Execute the Related Query if needed
+        response = cited_query.execute()
+        timeout_cited = False
+    except (ConnectionError, RequestError, ApiError) as e:
+        logger.warning("Error getting cited and related clusters: %s", e)
+        if settings.DEBUG is True:
+            traceback.print_exc()
+        return related_cluster_result
+    except ConnectionTimeout as e:
+        logger.warning(
+            "ConnectionTimeout getting cited and related clusters: %s", e
+        )
+        response = None
+        timeout_cited = True
+    citing_clusters = list(response)
+    citing_clusters_count = (
+        response.hits.total.value if response is not None else 0
+    )
+    timeout_cited = False if citing_clusters else timeout_cited
+
+    if not timeout_cited:
+        await cache.aset(
+            cache_citing_key,
+            (
+                citing_clusters,
+                citing_clusters_count,
+                timeout_cited,
+            ),
+            settings.RELATED_CACHE_TIMEOUT,
+        )
+    return citing_clusters, citing_clusters_count, timeout_cited
+
+
 async def es_get_citing_and_related_clusters_with_cache(
     cluster: OpinionCluster,
     request: HttpRequest,
@@ -251,9 +468,11 @@ async def es_get_citing_and_related_clusters_with_cache(
     if is_bot(request) or not sub_opinion_pks:
         return RelatedCitingResults(url_search_params=url_search_params)
 
-    cached_citing_results, cached_citing_cluster_count, timeout_cited = (
-        await cache.aget(cache_citing_key) or (None, 0, False)
-    )
+    (
+        cached_citing_results,
+        cached_citing_cluster_count,
+        timeout_cited,
+    ) = await cache.aget(cache_citing_key) or (None, 0, False)
     cached_related_clusters, timeout_related = (
         await cache.aget(mlt_cache_key) or (None, False)
         if settings.RELATED_USE_CACHE
@@ -340,3 +559,75 @@ async def es_get_citing_and_related_clusters_with_cache(
     results.timeout = any([timeout_cited, timeout_related])
     results.sub_opinion_pks = list(map(int, sub_opinion_pks))
     return results
+
+
+async def es_cited_case_count(cluster_id, sub_opinion_pks: [int]):
+    """Elastic quick cited by count query
+
+    :param cluster_id: The cluster id to search with
+    :param sub_opinion_pks: The subopinion ids of the cluster
+    :return:
+    """
+    cache = caches["db_cache"]
+    cache_cited_by_key = f"cited-by-count-es:{cluster_id}"
+    cached_cited_by_count = await cache.aget(cache_cited_by_key) or None
+    if cached_cited_by_count is not None:
+        return cached_cited_by_count
+
+    cluster_search = OpinionClusterDocument.search()
+    cites_query = Q(
+        "bool",
+        filter=[
+            Q("match", cluster_child="opinion"),
+            Q("terms", **{"cites": sub_opinion_pks}),
+        ],
+    )
+    cluster_cites_query = cluster_search.query(cites_query)
+    cited_by_count = cluster_cites_query.count()
+
+    await cache.aset(
+        cache_cited_by_key,
+        cited_by_count,
+        settings.RELATED_CACHE_TIMEOUT,
+    )
+
+    return cited_by_count
+
+
+async def es_related_case_count(cluster_id, sub_opinion_pks: [int]):
+    """Elastic quick related cases count
+
+    :param cluster_id: The cluster id of the object
+    :param sub_opinion_pks: The sub opinion ids of the cluster
+    :return: The count of related cases in elastic
+    """
+    cache = caches["db_cache"]
+    cache_related_cases_key = f"related-cases-count-es:{cluster_id}"
+    cached_related_cases_count = (
+        await cache.aget(cache_related_cases_key) or None
+    )
+    if cached_related_cases_count is not None:
+        return cached_related_cases_count
+
+    cluster_search = OpinionClusterDocument.search()
+    mlt_query = await build_more_like_this_query(sub_opinion_pks)
+    parent_filters = await sync_to_async(build_join_es_filters)(
+        {"type": SEARCH_TYPES.OPINION, "stat_published": True}
+    )
+    default_parent_filter = [Q("match", cluster_child="opinion")]
+    parent_filters.extend(default_parent_filter)
+    main_query = Q(
+        "bool",
+        filter=default_parent_filter,
+        should=mlt_query,
+        minimum_should_match=1,
+    )
+    cluster_related_query = cluster_search.query(main_query)
+    related_cases_count = cluster_related_query.count()
+    await cache.aset(
+        cache_related_cases_key,
+        related_cases_count,
+        settings.RELATED_CACHE_TIMEOUT,
+    )
+
+    return related_cases_count
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index c96cc3af85..e3f774945a 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -72,7 +72,11 @@
 from cl.opinion_page.types import AuthoritiesContext
 from cl.opinion_page.utils import (
     core_docket_data,
+    es_cited_case_count,
+    es_get_cited_clusters_with_cache,
     es_get_citing_and_related_clusters_with_cache,
+    es_get_related_clusters_with_cache,
+    es_related_case_count,
     generate_docket_entries_csv_data,
     get_case_title,
 )
@@ -352,7 +356,6 @@ async def fetch_docket_entries(docket):
 async def view_docket(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:
-
     sort_order_asc = True
     form = DocketEntryFilterForm(request.GET, request=request)
     docket, context = await core_docket_data(request, pk)
@@ -770,7 +773,9 @@ async def view_recap_authorities(
 
 
 @never_cache
-async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
+async def view_opinion_old(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
     """Using the cluster ID, return the cluster of opinions.
 
     We also test if the cluster ID has a user note, and send data
@@ -855,7 +860,7 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
         sponsored = True
 
     view_authorities_url = reverse(
-        "view_authorities", args=[cluster.pk, cluster.slug]
+        "view_case_authorities", args=[cluster.pk, cluster.slug]
     )
     authorities_context: AuthoritiesContext = AuthoritiesContext(
         citation_record=cluster,
@@ -896,6 +901,151 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
     )
 
 
+async def setup_opinion_context(
+    cluster: OpinionCluster, request: HttpRequest, tab: str
+):
+    """Generate the basic page information we need to load the page
+
+    :param cluster: The opinon cluster
+    :param request: The HTTP request from the user
+    :param tab: The tab to load
+    :return:
+    """
+    title = ", ".join(
+        [
+            s
+            for s in [
+                trunc(best_case_name(cluster), 100, ellipsis="..."),
+                await cluster.acitation_string(),
+            ]
+            if s.strip()
+        ]
+    )
+    has_downloads = False
+    pdf_path = None
+    if cluster.filepath_pdf_harvard:
+        has_downloads = True
+        pdf_path = cluster.filepath_pdf_harvard
+    else:
+        async for sub_opinion in cluster.sub_opinions.all():
+            if str(sub_opinion.local_path).endswith(".pdf"):
+                has_downloads = True
+                pdf_path = sub_opinion.local_path.url
+                break
+            elif sub_opinion.download_url:
+                has_downloads = True
+                pdf_path = sub_opinion.local_path.url
+
+    get_string = make_get_string(request)
+
+    sub_opinion_pks = [
+        str(pk)
+        async for pk in cluster.sub_opinions.values_list("pk", flat=True)
+    ]
+
+    es_has_cited_opinions = await es_cited_case_count(
+        cluster.id, sub_opinion_pks
+    )
+    es_has_related_opinions = await es_related_case_count(
+        cluster.id, sub_opinion_pks
+    )
+
+    try:
+        note = await Note.objects.aget(
+            cluster_id=cluster.pk,
+            user=await request.auser(),  # type: ignore[attr-defined]
+            # type: ignore[attr-defined]
+        )
+    except (ObjectDoesNotExist, TypeError):
+        # Not note or anonymous user
+        note_form = NoteForm(
+            initial={
+                "cluster_id": cluster.pk,
+                "name": trunc(best_case_name(cluster), 100, ellipsis="..."),
+            }
+        )
+    else:
+        note_form = NoteForm(instance=note)
+
+    # Identify opinions updated/added in partnership with v|lex for 3 years
+    sponsored = False
+    if (
+        cluster.date_created.date() > datetime.datetime(2022, 6, 1).date()
+        and cluster.filepath_json_harvard
+    ):
+        sponsored = True
+
+    context = {
+        "tab": tab,
+        "title": title,
+        "caption": await cluster.acaption(),
+        "cluster": cluster,
+        "has_downloads": has_downloads,
+        "pdf_path": pdf_path,
+        "note_form": note_form,
+        "get_string": get_string,
+        "private": cluster.blocked,
+        "sponsored": sponsored,
+        "summaries_count": await cluster.parentheticals.acount(),
+        "authorities_count": await cluster.aauthority_count(),
+        "related_cases_count": es_has_related_opinions,
+        "cited_by_count": es_has_cited_opinions,
+    }
+
+    return context
+
+
+async def render_opinion_view(
+    request: HttpRequest, pk: int, tab: str, additional_context: dict = None
+) -> HttpResponse:
+    """Helper function to render opinion views with common context.
+
+    :param request: The HttpRequest object
+    :param pk: The primary key for the OpinionCluster
+    :param tab: The tab name to display
+    :param additional_context: Any additional context to be passed to the template
+    :return: HttpResponse
+    """
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+
+    ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+        request, "ui_flag_for_o"
+    )
+    user_flag_active = await sync_to_async(waffle.flag_is_active)(
+        request.user, "ui_flag_for_o"
+    )
+    if not any([ui_flag_for_o, user_flag_active]):
+        return await view_opinion_old(request, pk, "str")
+
+    context = await setup_opinion_context(cluster, request, tab=tab)
+
+    if additional_context:
+        context.update(additional_context)
+
+    # Just redirect if people attempt to URL hack to pages without content
+    tab_count_mapping = {
+        "pdf": "has_downloads",
+        "authorities": "authorities_count",
+        "cited-by": "cited_by_count",
+        "related-by": "related_by_count",
+        "summaries": "summaries_count",
+    }
+
+    # Check if the current tab needs a redirect based on the mapping
+    if context["tab"] in tab_count_mapping:
+        count_key = tab_count_mapping[context["tab"]]
+        if not context[count_key]:
+            return HttpResponseRedirect(
+                reverse("view_case", args=[cluster.pk, cluster.slug])
+            )
+
+    return TemplateResponse(
+        request,
+        "opinions.html",
+        context,
+    )
+
+
 async def view_summaries(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:
@@ -948,6 +1098,164 @@ async def view_authorities(
     )
 
 
+async def check_flag_exists(flag_name: str) -> bool:
+    return await sync_to_async(
+        waffle.get_waffle_flag_model().objects.filter(name=flag_name).exists
+    )()
+
+
+@never_cache
+async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
+    """View for displaying opinions."""
+
+    flag_exists = await check_flag_exists("ui_flag_for_o")
+    if flag_exists:
+        ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+            request, "ui_flag_for_o"
+        )
+        user_flag_active = await sync_to_async(waffle.flag_is_active)(
+            request.user, "ui_flag_for_o"
+        )
+        if ui_flag_for_o or user_flag_active:
+            return await render_opinion_view(request, pk, "opinions")
+    # else:
+    # print("~~~~1:", ui_flag_for_o, "~~~2:", user_flag_active, request.user)
+    return await view_opinion_old(request, pk, "str")
+
+
+async def view_opinion_details(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """View for displaying opinion case details."""
+
+    return await render_opinion_view(request, pk, "details")
+
+
+async def view_opinion_pdf(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """View for displaying opinion case details."""
+    return await render_opinion_view(request, pk, "pdf")
+
+
+async def view_opinion_authorities(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """View for displaying opinion authorities."""
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+
+    authorities_context: AuthoritiesContext = AuthoritiesContext(
+        citation_record=cluster,
+        query_string=request.META["QUERY_STRING"],
+        total_authorities_count=await cluster.aauthority_count(),
+        view_all_url="view_authorities_url",
+        doc_type="opinion",
+    )
+    await authorities_context.post_init()
+
+    additional_context = {
+        "authorities_context": authorities_context,
+        "authorities_with_data": await cluster.aauthorities_with_data(),
+    }
+    ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+        request, "ui_flag_for_o"
+    )
+    user_flag_active = await sync_to_async(waffle.flag_is_active)(
+        request.user, "ui_flag_for_o"
+    )
+
+    if ui_flag_for_o or user_flag_active:
+        return await render_opinion_view(
+            request, pk, "authorities", additional_context
+        )
+    else:
+        # Old page to load for people outside the flag
+        return await view_authorities(
+            request=request, pk=pk, slug="authorities"
+        )
+
+
+async def view_opinion_cited_by(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """"""
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+
+    (
+        citing_clusters,
+        citing_cluster_count,
+        _,
+    ) = await es_get_cited_clusters_with_cache(cluster, request)
+    additional_context = {
+        "citing_clusters": citing_clusters,
+        "citing_cluster_count": citing_cluster_count,
+    }
+    return await render_opinion_view(
+        request, pk, "cited-by", additional_context
+    )
+
+
+async def view_opinion_summaries(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """"""
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    parenthetical_groups_qs = await get_or_create_parenthetical_groups(cluster)
+    parenthetical_groups = [
+        parenthetical_group
+        async for parenthetical_group in parenthetical_groups_qs.prefetch_related(
+            Prefetch(
+                "parentheticals",
+                queryset=Parenthetical.objects.order_by("-score"),
+            ),
+            "parentheticals__describing_opinion__cluster__citations",
+            "parentheticals__describing_opinion__cluster__docket__court",
+            "representative__describing_opinion__cluster__citations",
+            "representative__describing_opinion__cluster__docket__court",
+        )
+    ]
+    ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+        request, "ui_flag_for_o"
+    )
+    user_flag_active = await sync_to_async(waffle.flag_is_active)(
+        request.user, "ui_flag_for_o"
+    )
+
+    if ui_flag_for_o or user_flag_active:
+        additional_context = {
+            "parenthetical_groups": parenthetical_groups,
+            "ui_flag_for_o": ui_flag_for_o,
+            "user_flag_active": user_flag_active,
+        }
+        return await render_opinion_view(
+            request, pk, "summaries", additional_context
+        )
+    else:
+        # Old page to load for people outside the flag
+        return await view_summaries(request=request, pk=pk, slug="summaries")
+
+
+async def view_opinion_related_cases(
+    request: HttpRequest, pk: int, _: str
+) -> HttpResponse:
+    """"""
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    related_cluster_object = await es_get_related_clusters_with_cache(
+        cluster, request
+    )
+    additional_context = {
+        "related_algorithm": "mlt",
+        "related_clusters": related_cluster_object.related_clusters,
+        "sub_opinion_ids": related_cluster_object.sub_opinion_pks,
+        "related_search_params": f"&{urlencode(related_cluster_object.url_search_params)}",
+        "queries_timeout": related_cluster_object.timeout,
+        "has_related_cases": related_cluster_object.has_related_cases,
+    }
+    return await render_opinion_view(
+        request, pk, "related-cases", additional_context
+    )
+
+
 async def cluster_visualizations(
     request: HttpRequest, pk: int, slug: str
 ) -> HttpResponse:

From 08a4e8624cc012b0a7741bd1b227d472b1353ed6 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 17 Oct 2024 15:40:16 -0400
Subject: [PATCH 005/143] feat(tests): Update to tests

Generally just override flags to avoid
testing old view opinion page against the new
ui changes.
---
 cl/favorites/tests.py               | 3 +++
 cl/opinion_page/tests.py            | 3 +++
 cl/search/tests/tests.py            | 3 +++
 cl/search/tests/tests_es_opinion.py | 2 ++
 cl/tests/test_feeds.py              | 3 +++
 cl/tests/test_visualizations.py     | 2 ++
 6 files changed, 16 insertions(+)

diff --git a/cl/favorites/tests.py b/cl/favorites/tests.py
index 61d549477b..bdde7f8393 100644
--- a/cl/favorites/tests.py
+++ b/cl/favorites/tests.py
@@ -11,6 +11,7 @@
 from django.utils.timezone import now
 from selenium.webdriver.common.by import By
 from timeout_decorator import timeout_decorator
+from waffle.testutils import override_flag
 
 from cl.favorites.factories import NoteFactory, PrayerFactory
 from cl.favorites.models import DocketTag, Note, Prayer, UserTag
@@ -96,6 +97,7 @@ def setUp(self) -> None:
         super().setUp()
 
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
+    @override_flag("ui_flag_for_o", False)
     def test_anonymous_user_is_prompted_when_favoriting_an_opinion(
         self,
     ) -> None:
@@ -156,6 +158,7 @@ def test_anonymous_user_is_prompted_when_favoriting_an_opinion(
         modal_title = self.browser.find_element(By.ID, "save-note-title")
         self.assertIn("Save Note", modal_title.text)
 
+    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_logged_in_user_can_save_note(self) -> None:
         # Meta: assure no Faves even if part of fixtures
diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index c77afc5ee9..59fc9038b6 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -19,6 +19,7 @@
 from django.urls import reverse
 from django.utils.text import slugify
 from factory import RelatedFactory
+from waffle.models import Flag
 from waffle.testutils import override_flag
 
 from cl.lib.models import THUMBNAIL_STATUSES
@@ -111,6 +112,7 @@ async def test_simple_rd_page(self) -> None:
         self.assertEqual(response.status_code, HTTPStatus.OK)
 
 
+@override_flag("ui_flag_for_o", False)
 class OpinionPageLoadTest(
     ESIndexTestCase,
     CourtTestCase,
@@ -649,6 +651,7 @@ async def test_volume_pagination(self) -> None:
         self.assertEqual(volume_next, None)
 
     @override_flag("o-es-active", False)
+    @override_flag("ui_flag_for_o", False)
     def test_full_citation_redirect(self) -> None:
         """Do we get redirected to the correct URL when we pass in a full
         citation?"""
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index b8f85f719d..8fdd6fbd88 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -25,6 +25,7 @@
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
 from timeout_decorator import timeout_decorator
+from waffle.testutils import override_flag
 
 from cl.audio.factories import AudioFactory
 from cl.lib.elasticsearch_utils import simplify_estimated_count
@@ -1120,6 +1121,7 @@ def test_pagerank_calculation(self) -> None:
             )
 
 
+@override_flag("ui_flag_for_o", False)
 class OpinionSearchFunctionalTest(AudioTestCase, BaseSeleniumTest):
     """
     Test some of the primary search functionality of CL: searching opinions.
@@ -1260,6 +1262,7 @@ def test_search_and_facet_docket_numbers(self) -> None:
         for result in search_results.find_elements(By.TAG_NAME, "article"):
             self.assertIn("1337", result.text)
 
+    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_opinion_search_result_detail_page(self) -> None:
         # Dora navitages to CL and does a simple wild card search
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 60c72aa8d9..6a493aa478 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -19,6 +19,7 @@
 from elasticsearch_dsl import Q
 from factory import RelatedFactory
 from lxml import etree, html
+from waffle.models import Flag
 from waffle.testutils import override_flag
 
 from cl.custom_filters.templatetags.text_filters import html_decode
@@ -2247,6 +2248,7 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
         cluster_2.delete()
 
 
+@override_flag("ui_flag_for_o", False)
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
 ):
diff --git a/cl/tests/test_feeds.py b/cl/tests/test_feeds.py
index a9fb9c8c7c..90bac42ae5 100644
--- a/cl/tests/test_feeds.py
+++ b/cl/tests/test_feeds.py
@@ -10,6 +10,7 @@
 from django.urls import reverse
 from selenium.webdriver.common.by import By
 from timeout_decorator import timeout_decorator
+from waffle.testutils import override_flag
 
 from cl.search.models import Court
 from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
@@ -28,6 +29,7 @@ class FeedsFunctionalTest(BaseSeleniumTest):
         "functest_audio.json",
     ]
 
+    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_can_get_to_feeds_from_homepage(self) -> None:
         """Can we get to the feeds/podcasts page from the homepage?"""
@@ -49,6 +51,7 @@ def test_can_get_to_feeds_from_homepage(self) -> None:
         self.assert_text_in_node("Podcasts", "body")
 
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
+    @override_flag("ui_flag_for_o", False)
     def test_feeds_page_shows_jurisdiction_links(self) -> None:
         """
         Does the feeds page show all the proper links for each jurisdiction?
diff --git a/cl/tests/test_visualizations.py b/cl/tests/test_visualizations.py
index 0e5acb46f7..a0962ede8c 100644
--- a/cl/tests/test_visualizations.py
+++ b/cl/tests/test_visualizations.py
@@ -5,6 +5,7 @@
 from django.contrib.auth.hashers import make_password
 from selenium.webdriver.common.by import By
 from timeout_decorator import timeout_decorator
+from waffle.testutils import override_flag
 
 from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
 from cl.users.factories import UserProfileWithParentsFactory
@@ -30,6 +31,7 @@ def tearDown(self) -> None:
         SCOTUSMap.objects.all().delete()
         JSONVersion.objects.all().delete()
 
+    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_creating_new_visualization(self) -> None:
         """Test if a user can create a new Visualization"""

From bc92162addf4c5b532cbef32897de94d753c2646 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 18 Oct 2024 11:06:02 -0400
Subject: [PATCH 006/143] fix(tests): Fix tests

Remove decorator for selenium tests unaffected
And modify css to only affect scrolling on opinion page
---
 cl/assets/static-global/css/override.css | 7 ++++---
 cl/assets/static-global/js/base.js       | 8 ++++++++
 cl/tests/test_feeds.py                   | 5 ++---
 cl/tests/test_visualizations.py          | 1 -
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 32c21672a1..822b799e09 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -2420,8 +2420,9 @@ div.footnote:first-of-type {
       display: none;
     }
   }
-}
 
-html {
-  scroll-behavior: smooth;
 }
+
+html.smooth-scroll {
+    scroll-behavior: smooth;
+}
\ No newline at end of file
diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index 31713c0df5..149e42a7f8 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -367,6 +367,14 @@ if (form && button) {
   });
 }
 
+
+//////////////////////////////////
+// Smooth Scrolling on Opinions //
+/////////////////////////////////
+if (document.body.classList.contains('opinion-body')) {
+    document.documentElement.classList.add('smooth-scroll');
+}
+
 //////////////////
 // SCOTUS STYLE //
 //////////////////
diff --git a/cl/tests/test_feeds.py b/cl/tests/test_feeds.py
index 90bac42ae5..7a67cd7e6d 100644
--- a/cl/tests/test_feeds.py
+++ b/cl/tests/test_feeds.py
@@ -29,7 +29,6 @@ class FeedsFunctionalTest(BaseSeleniumTest):
         "functest_audio.json",
     ]
 
-    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_can_get_to_feeds_from_homepage(self) -> None:
         """Can we get to the feeds/podcasts page from the homepage?"""
@@ -51,7 +50,6 @@ def test_can_get_to_feeds_from_homepage(self) -> None:
         self.assert_text_in_node("Podcasts", "body")
 
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
-    @override_flag("ui_flag_for_o", False)
     def test_feeds_page_shows_jurisdiction_links(self) -> None:
         """
         Does the feeds page show all the proper links for each jurisdiction?
@@ -67,7 +65,8 @@ def test_feeds_page_shows_jurisdiction_links(self) -> None:
                 link.get_attribute("href"),
                 f"{self.live_server_url}/feed/court/{court.pk}/",
             )
-            link.click()
+            with self.wait_for_page_load(timeout=10):
+                link.click()
             print("clicked...", end=" ")
             self.assertIn(
                 'feed xml:lang="en-us" xmlns="http://www.w3.org/2005/Atom"',
diff --git a/cl/tests/test_visualizations.py b/cl/tests/test_visualizations.py
index a0962ede8c..d6760944d4 100644
--- a/cl/tests/test_visualizations.py
+++ b/cl/tests/test_visualizations.py
@@ -31,7 +31,6 @@ def tearDown(self) -> None:
         SCOTUSMap.objects.all().delete()
         JSONVersion.objects.all().delete()
 
-    @override_flag("ui_flag_for_o", False)
     @timeout_decorator.timeout(SELENIUM_TIMEOUT)
     def test_creating_new_visualization(self) -> None:
         """Test if a user can create a new Visualization"""

From be333b364225bf4524a6c51f25344a6100b8079b Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 18 Oct 2024 11:11:16 -0400
Subject: [PATCH 007/143] fix(opinion_page): Remove comments and fix lint

Remove print statement and fix return
for bot or scraping detection
---
 cl/opinion_page/utils.py | 17 ++++++-----------
 cl/opinion_page/views.py |  6 +++---
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index b8d5e581dc..3bd0eb6144 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -3,7 +3,7 @@
 import traceback
 from dataclasses import dataclass, field
 from io import StringIO
-from typing import Dict, Tuple, Union
+from typing import Dict, List, Tuple, Union
 
 from asgiref.sync import sync_to_async
 from django.conf import settings
@@ -334,14 +334,9 @@ async def es_get_related_clusters_with_cache(
     related_cluster_result.has_related_cases = True if response else False
 
     if timeout_related == False:
-        # print("SETTING", (
-        #         related_cluster_result.related_clusters,
-        #         timeout_related,
-        #         related_cluster_result.has_related_cases,
-        #      ))
         await cache.aset(
             mlt_cache_key,
-            (results.related_clusters, timeout_related),
+            (related_cluster_result.related_clusters, timeout_related),
             settings.RELATED_CACHE_TIMEOUT,
         )
 
@@ -375,7 +370,7 @@ async def es_get_cited_clusters_with_cache(
         async for pk in cluster.sub_opinions.values_list("pk", flat=True)
     ]
     if is_bot(request) or not sub_opinion_pks:
-        return related_cluster_result
+        return (None, False, False)
 
     cached_citing_results, cahced_citing_clusters_count, timeout_cited = (
         await cache.aget(cache_citing_key) or (None, False, False)
@@ -402,7 +397,7 @@ async def es_get_cited_clusters_with_cache(
         logger.warning("Error getting cited and related clusters: %s", e)
         if settings.DEBUG is True:
             traceback.print_exc()
-        return related_cluster_result
+        return (None, False, False)
     except ConnectionTimeout as e:
         logger.warning(
             "ConnectionTimeout getting cited and related clusters: %s", e
@@ -561,7 +556,7 @@ async def es_get_citing_and_related_clusters_with_cache(
     return results
 
 
-async def es_cited_case_count(cluster_id, sub_opinion_pks: [int]):
+async def es_cited_case_count(cluster_id: int, sub_opinion_pks: List[str]):
     """Elastic quick cited by count query
 
     :param cluster_id: The cluster id to search with
@@ -594,7 +589,7 @@ async def es_cited_case_count(cluster_id, sub_opinion_pks: [int]):
     return cited_by_count
 
 
-async def es_related_case_count(cluster_id, sub_opinion_pks: [int]):
+async def es_related_case_count(cluster_id, sub_opinion_pks: List[str]):
     """Elastic quick related cases count
 
     :param cluster_id: The cluster id of the object
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index e3f774945a..fe7e93bc33 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -996,14 +996,14 @@ async def setup_opinion_context(
 
 
 async def render_opinion_view(
-    request: HttpRequest, pk: int, tab: str, additional_context: dict = None
+    request: HttpRequest, pk: int, tab: str, additional_context: dict = {}
 ) -> HttpResponse:
     """Helper function to render opinion views with common context.
 
     :param request: The HttpRequest object
     :param pk: The primary key for the OpinionCluster
-    :param tab: The tab name to display
-    :param additional_context: Any additional context to be passed to the template
+    :param tab: The selected tab
+    :param additional_context: Any additional context to be passed to the view
     :return: HttpResponse
     """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)

From 5b0cf27610ebd8607cc08a0526f47e4722a56bf4 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 18 Oct 2024 14:04:48 -0400
Subject: [PATCH 008/143] feat(printing): Prettify Printing

Hide unwanted content during printing
---
 cl/opinion_page/templates/includes/add_download_button.html | 2 +-
 cl/opinion_page/templates/opinions.html                     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/opinion_page/templates/includes/add_download_button.html b/cl/opinion_page/templates/includes/add_download_button.html
index 1d7a4d828e..a4844bc075 100644
--- a/cl/opinion_page/templates/includes/add_download_button.html
+++ b/cl/opinion_page/templates/includes/add_download_button.html
@@ -1,4 +1,4 @@
-<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1">
+<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1 hidden-print">
     <button type="button"
             id="download-original"
             class="btn btn-primary dropdown-toggle"
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index a32f1d0042..66010340a6 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -21,7 +21,7 @@
 
 {% block sidebar %}
     {% with sponsored_logo=STATIC_URL|add:'img/vlex-logo-150-75.png' %}
-        <div class="col-sm-3 opinion-sidebar" id="sidebar">
+        <div class="col-sm-3 opinion-sidebar hidden-print" id="sidebar">
             <div class="top-section">
                 {# show the admin tools if applicable #}
                 {% if perms.search.change_docket or perms.search.change_opinioncluster or perms.search.change_citation %}
@@ -179,7 +179,7 @@ <h3>
                     {% include "includes/add_download_button.html" %}
                 {% endif %}
 
-                <div id="get-citation-btn-group" class="btn-group">
+                <div id="get-citation-btn-group" class="btn-group hidden-print">
                         <a id="get-citation-alerts" href="/?show_alert_modal=yes&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})"
                            rel="nofollow"
                            class="btn"
@@ -270,7 +270,7 @@ <h4 class="case-court">{{ cluster.docket.court }}</h4>
                 </div>
             </div>
 
-            <ul class="nav nav-tabs ">
+            <ul class="nav nav-tabs hidden-print">
                 <li role="presentation" {% if tab == "opinions" or tab == "" %} class="active" {% endif %}>
                     {% if tab == "opinions" %}
                         <a href="#" data-toggle="tab">Opinion</a>

From 7e8a9e2449d1f46c9b553a9d2652078ebc34e103 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 18 Oct 2024 14:48:45 -0400
Subject: [PATCH 009/143] feat(page-numbers): Fix anon page numbering

update CSS and base.js to handle more
edge case specific page numbering
---
 cl/assets/static-global/css/override.css |  4 ++++
 cl/assets/static-global/js/base.js       | 16 +++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 822b799e09..377724eee2 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -2367,6 +2367,10 @@ div.footnote:first-of-type {
     width: 0;
   }
 
+  div.counsel > a.page-label::after {
+    right: -2.5em;
+  }
+
   footnote > p > a.page-label::after {
     display: none;
   }
diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index 149e42a7f8..f61000e3c3 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -391,8 +391,22 @@ document.querySelectorAll('p').forEach(function (element) {
 // Pagination //
 ////////////////
 
+// Star pagination weirdness for ANON 2020 dataset -
+
 $('.star-pagination').each(function (index, element) {
-  $(this).attr('label', this.textContent.trim().replace('*Page ', ''));
+  if ($(this).attr('pagescheme')) {
+    // For ANON 2020 this has two sets of numbers but only one can be
+    // verified with other databses so only showing one
+    var number = $(this).attr('number')
+    if (number.indexOf("P") > -1) {
+      $(this).attr('label', "");
+    }
+    else {
+      $(this).attr('label', number);
+    }
+  } else {
+    $(this).attr('label', this.textContent.trim().replace('*Page ', ''));
+  }
 });
 
 // Systematize page numbers

From 5adce999146da5a1fe3dda1869073c836dffd68d Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Fri, 18 Oct 2024 14:05:44 -0500
Subject: [PATCH 010/143] refactor(scrapers.update_from_text): change function
 name and docstring

---
 .../management/commands/update_from_text.py        | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
index 77fe5966af..f1450c9c66 100644
--- a/cl/scrapers/management/commands/update_from_text.py
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -7,11 +7,15 @@
 from cl.search.models import PRECEDENTIAL_STATUS, Opinion, OpinionCluster
 
 
-def update_from_text(
+def rerun_extract_from_text(
     opinion: Opinion, juriscraper_module: str, stats: dict[str, int]
 ):
-    """Calls `update_document_from_text` as used in the scraper flow
-    and calls the corresponding model's .save()
+    """
+    Reruns `update_document_from_text` from the scraper flow, saving changes
+
+    `update_document_from_text` calls `Site.extract_from_text` and assigns
+    any changes to the proper objets, in place, but they are not saved.
+    This method saves the ones with actual changes
 
     :param opinion: the Opinion on which to apply extract_from_text
     :param juriscraper_module: the scraper module path
@@ -125,7 +129,7 @@ def handle(self, *args, **options):
         if options["opinion_ids"]:
             opinions = Opinion.objects.filter(id__in=options["opinion_ids"])
             for op in opinions:
-                update_from_text(op, juriscraper_module, stats)
+                rerun_extract_from_text(op, juriscraper_module, stats)
 
             logger.info("Modified objects counts: %s", stats)
             return
@@ -153,7 +157,7 @@ def handle(self, *args, **options):
         for cluster in qs:
             opinions = cluster.sub_opinions.all()
             for op in opinions:
-                update_from_text(op, juriscraper_module, stats)
+                rerun_extract_from_text(op, juriscraper_module, stats)
 
         logger.info("Modified objects counts: %s", stats)
         self.stats = stats

From d15e3491760278655059a65018fa26db4defd17d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 22 Oct 2024 15:13:59 -0400
Subject: [PATCH 011/143] refactor(js): Refactor js and css

Move new js and css into own files
---
 cl/assets/static-global/css/opinions.css | 688 +++++++++++++++++++++++
 cl/assets/static-global/css/override.css | 683 ----------------------
 cl/assets/static-global/js/base.js       | 289 ----------
 cl/assets/static-global/js/opinions.js   | 271 +++++++++
 cl/opinion_page/templates/opinions.html  |   5 +
 5 files changed, 964 insertions(+), 972 deletions(-)
 create mode 100644 cl/assets/static-global/css/opinions.css
 create mode 100644 cl/assets/static-global/js/opinions.js

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
new file mode 100644
index 0000000000..fd97c718ca
--- /dev/null
+++ b/cl/assets/static-global/css/opinions.css
@@ -0,0 +1,688 @@
+
+
+/*Wrap all our changes around an opinion-body class we load up
+ in the opinion template*/
+
+.opinion-body {
+
+  #headmatter {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    text-align: justify;
+    padding:0px;
+    margin: 0px;
+    background-color: white;
+    border: none;
+
+  }
+  #headmatter > parties {
+    text-align: center;
+    font-style: initial;
+    font-size: 2em;
+    display: block;
+  }
+  #headmatter > div.footnotes > .footnote > p {
+      line-height: 1em;
+  }
+
+  #headmatter > * {
+    text-indent: 2em;
+  }
+
+  #headmatter docketnumber,
+  #headmatter court,
+  #headmatter parties,
+  #headmatter attorneys,
+  #headmatter syllabus,
+  #headmatter decisiondate {
+      display: block;
+  }
+
+  #headmatter > div.footnotes {
+      border-top: None;
+      padding-top: 1em;
+  }
+
+  .jump-links > a{
+    position: relative;
+    margin: -8px 20px 0 0;
+    width: 140px;
+    line-height: 18px;
+    font-size: 14px;
+    cursor: pointer;
+    white-space: nowrap;
+    text-overflow: ellipsis;
+    opacity: 1;
+  }
+
+  .hr-opinion {
+    border-top: 2px solid black;
+  }
+
+  /*Clean up the Case Caption section to look large and clean*/
+  .case-caption {
+    font-size: 3em;
+    font-weight: 500;
+    text-align: left;
+    line-height: 1.1em;
+    margin-top: 50px;
+  }
+
+
+  .case-court {
+    font-size: 25px;
+    text-align: left;
+  }
+
+/*Update sidebar jump links to look nice*/
+.jump-links {
+  font-size: 12px;
+  padding-top: 5px;
+}
+
+  li.jump-links.active {
+      color: #B53C2C;
+      font-weight: bold;
+  }
+
+  li.jump-links {
+    list-style-type: none;
+    padding-left: 0;
+  }
+
+  li.jump-links::before {
+    content: "";
+    border-left: 3px solid lightgrey;
+    height: 1em;
+    padding-right: 8px;
+    display: inline-block;
+    margin-right: 5px;
+  }
+
+  li.jump-links.active::before {
+    content: "";
+    border-left: 2px solid #B53C2C;
+    padding-right: 8px;
+    display: inline-block;
+    margin-right: 5px;
+  }
+
+
+  .jump-links {
+  font-size: 12px;
+  padding-top: 5px;
+}
+
+li.jump-links {
+  height:2.5em;
+  list-style-type: none;
+  padding-left: 0;
+  position: relative;
+}
+
+li.jump-links::before {
+  content: "";
+  border-left: 2px solid lightgrey;
+  height: 100%;
+  position: absolute;
+  left: 0;
+  top: 0;
+  padding-right: 8px;
+  display: inline-block;
+}
+
+/* Active link styles */
+li.jump-links > a.active {
+  font-weight: 500;
+  color: black;
+}
+
+li.jump-links > a {
+  padding-left:10px;
+  color: black;
+}
+
+
+div.footnote:first-of-type {
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  /*Columbia specific Fix*/
+  /*Columbia/HTML Law box special footnotes data almost awlays starts with fn1*/
+  footnote_body sup#fn1 {
+    padding-top: 10px;
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  /*HTML law box page numbers*/
+  strong[data-ref] {
+    font-size: 0.8em;
+    fon: italic;
+  }
+
+  strong[data-ref]::before {
+    content: attr(data-ref);
+    display: inline;
+    position: relative;
+    float: right;
+    left: -.5em;
+    font-size: 0.8em;
+    color: dimgray;
+    width: 0;
+  }
+
+
+  div.footnote {
+    padding-top: 10px;
+    display: block;
+    line-height: 1em;
+  }
+
+  div.footnote > p {
+    display: inline;
+  }
+
+  div.footnote::before {
+    content: attr(label) " ";
+    font-weight: bold;
+    color: #000;
+    margin-right: 5px;
+    padding-top: 2em;
+  }
+
+  div.footnote {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+  div.footnote > * {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+
+  /*To help separate footnotes from opinion document*/
+  footnote:first-of-type {
+    border-top: 1px solid black;
+    width: 100%;
+    display: block;
+  }
+
+  footnote {
+    padding-top: 10px;
+    display: block;
+    line-height: 1.5em;
+    /*margin-left: 1em;*/
+    padding-left: 40px;
+  }
+
+  footnote > p {
+    display: inline;
+  }
+
+  footnote::before {
+    content: attr(label);
+    font-weight: bold;
+    color: #000;
+    margin-right: 26px;
+    padding-top: 2em;
+    margin-left: -35px;
+  }
+
+  /*Handle CSS in Columbia opinions*/
+  footnotemark {
+    font-weight: bold;
+    font-size: 0.8em;
+    vertical-align: super;
+    line-height: 0;
+  }
+
+
+  #cited-by {
+    z-index: 1;
+  }
+
+  footnotemark {
+    cursor: pointer;
+    color: blue;
+    text-decoration: underline;
+  }
+
+  footnote {
+    padding-top: 10px;
+    font-size: 12px;
+  }
+
+
+  .jumpback {
+    color: blue;
+    cursor: pointer;
+    font-weight: bold;
+    margin-left: 5px;
+  }
+
+
+  footnote > * {
+    font-size: 12px;
+  }
+
+  author > page-number {
+    display: block;
+    font-size: 15px;
+  }
+
+  author {
+    display: inline;
+    margin: 0; /* Remove any default margin */
+    text-indent: 2em; /* Indents the first line by 2em */
+  }
+
+  /*Important for indenting harvard opinions correctly*/
+  opinion > p[id^="b"] {
+    text-indent: 2em;
+  }
+
+
+  opinion > [id^="p-"] {
+    padding-left: 2em;
+    text-indent: 2em;
+  }
+}
+
+[id^="A"] {
+  text-indent: 2em;
+  display: inline;
+
+}
+
+.opinion-body {
+  /*I think i did this but i dont know why so im leaving it for now*/
+  /*.tab-pane {*/
+  /*  display: none; */
+  /*}*/
+
+  .tab-pane.active {
+    display: block;
+  }
+
+  @media (min-width: 767px) {
+
+    #sidebar {
+      display: flex;
+      flex-direction: column;
+      height: 100vh;
+      justify-content: space-between; /* Push content apart */
+      padding: 20px;
+      padding-top: 3px;
+      overflow-y: auto;
+      position: -webkit-sticky; /* For Safari */
+      position: sticky;
+      top: 0; /* Stick to the top of the viewport */
+
+    }
+  }
+
+  @media (min-width: 100px) {
+    #sidebar {
+      height: auto;
+    }
+  }
+
+  .sidebar-bottom {
+    margin-top: auto;
+  }
+
+  .support-flp, .sponsored-by {
+    margin-bottom: 20px;
+    text-align: center;
+  }
+
+  #opinion > article > * > p {
+    text-indent: 2em;
+  }
+
+  .active > a {
+    border-bottom-color: #B53C2C;
+  }
+
+  #opinion p {
+    text-indent: 2em;
+  }
+
+
+  .nav-pills > li > a {
+    padding: 1px 15px;
+  }
+
+  blockquote > * {
+    text-indent: 0em;
+  }
+
+  sup {
+    font-size: .9em;
+  }
+
+  .main-document {
+    padding-bottom: 5em;
+  }
+
+  /*Case Caption CSS*/
+  #caption-square {
+    background-color: #F6F2EE;
+    margin-left: -15px;
+    margin-right: -15px;
+    margin-top: -20px;
+  }
+
+  #caption-square > ul > li {
+    background-color: #fcfaf9;
+    border-top-right-radius: 5px 5px; /* Rounds the corners */
+    border-top-left-radius: 5px 5px; /* Rounds the corners */
+    margin-left: 4px;
+  }
+
+  #caption-square > ul > li.active {
+      background-color: #ffffff;
+      border-bottom: 1px solid lightgrey;
+  }
+
+  #caption-square > ul > li.active {
+    background-color: #ffffff;
+    border-bottom: 1px solid white;
+  }
+
+  #caption-square > ul > li.active > a {
+    border: 1px solid white;
+  }
+
+  /*Opinion Date File*/
+  .case-date-new {
+    border: 1px solid #B53C2C;
+    border-radius: 20px; /* Rounds the corners */
+    padding: 5px;
+    padding-left: 8px;
+    padding-right: 8px;
+    padding-top: 8px;
+    color: #B53C2C;
+
+  }
+
+  /*Buttons on Top of Page*/
+  .add-a-note {
+    margin-left: 5px;
+    border: 1px solid black;
+    border-radius: 10px;
+    padding-left: 8px;
+    padding-right: 8px;
+  }
+
+  .add-citation-alert {
+    border: 1px solid black;
+    border-radius: 10px;
+    padding-left: 8px;
+    padding-right: 8px;
+  }
+
+  cross_reference {
+    font-style: italic;
+  }
+
+  #opinion-caption {
+    margin-top: 20px;
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    margin-bottom: 20px;
+    padding-left: 20px;
+    padding-top: 10px;
+    padding-right: 10px;
+  }
+
+  .case-details {
+    font-size: 16px;
+  }
+
+  .case-details li {
+    line-height: 1.5em;
+  }
+
+  span.citation.no-link {
+    font-style: italic;
+  }
+
+  .opinion-button-row {
+    padding-top: 40px;
+  }
+
+  #download-original {
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+    float:right;
+    display:block;
+  }
+
+  #btn-group-download-original {
+    float:right;
+    margin-top: 0px;
+    margin-left:10px;
+    padding-right: 10px;
+  }
+
+  #add-note-button {
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+    float: right;
+  }
+
+  #get-citation-btn-group {
+    float:right;
+  }
+
+  #get-citation-btn-group > a {
+
+    color: black;
+    border-color: black;
+    background-color: white;
+    vertical-align: top;
+  }
+
+
+  p > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -4.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  div > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -2.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  div.subopinion-content > .harvard {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    text-align: justify;
+  }
+
+  #columbia-text {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    font-size: 15px;
+    letter-spacing: 0.2px;
+    line-height: 2.3em;
+    text-align: justify;
+  }
+
+  #columbia-text > div.subopinion-content > div > p > span.star-pagination {
+    color: #555555;
+  }
+
+  #columbia-text > div.subopinion-content > div > p > span.star-pagination::after {
+    display: inline;
+    position: relative;
+    content: attr(label);;
+    float: left;
+    left: -4.5em;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+
+  page-number::after {
+    display: inline;
+    position: relative;
+    content: attr(label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  page-number {
+    font-style: italic;
+    font-size: 0.8em;
+    margin-right: 4px;
+    margin-left: 2px;
+  }
+
+  a.page-label {
+    font-style: italic;
+    font-size: 0.8em;
+    margin-right: 4px;
+    margin-left: 2px;
+    color: #555555;
+  }
+
+
+  a.page-label::after {
+    display: inline;
+    position: relative;
+    content: attr(data-label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  footnote > blockquote > a.page-label::after {
+    right: -2.5em;
+  }
+
+  blockquote[id^="A"] > a.page-label::after {
+    right: -2.5em;
+  }
+
+  blockquote[id^="b"] > a.page-label::after {
+    right: -4.0em;
+  }
+
+  opinion > a.page-label::after {
+    right: -2.5em;
+  }
+
+   /* Adjust to move the entire blockquote to the right */
+  blockquote {
+    margin-left: 3em;
+  }
+
+  a.page-label::after {
+    display: inline;
+    position: relative;
+    attr(label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
+  div.counsel > a.page-label::after {
+    right: -2.5em;
+  }
+
+  footnote > p > a.page-label::after {
+    display: none;
+  }
+
+  footnote > blockquote > a.page-label::after {
+    display: none;
+  }
+
+  /*Remove the header on the opinion page so its flush*/
+  header {
+    margin-bottom: 0px;
+  }
+
+  .harvard > opinion > author {
+      line-height: inherit;
+      font-size: inherit;
+      display: inline-block;
+  }
+
+  .container > .content {
+      margin-bottom: 0em;
+  }
+
+  .meta-data-header {
+      font-size:15px;
+  }
+
+  .case-details {
+    font-family: Merriweather, "Times New Roman", Times, serif;
+    letter-spacing: 0.2px;
+    line-height:2.3em;
+  }
+
+  .opinion-section-title {
+    margin-top: 50px;
+    font-family: Merriweather, "Times New Roman", Times, serif;
+  }
+
+  /*Add style to align roman numerals */
+  .center-header {
+    text-align: center;
+    font-size: 2em;
+  }
+
+  /*If XS screen - remove the side page labels*/
+  @media (max-width: 768px) {
+    a.page-label::after {
+      display: none;
+    }
+    a.page-number::after {
+      display: none;
+    }
+  }
+
+  .scraped-html p {
+    display: block;
+    text-indent: 1em;
+  }
+
+}
+
+html {
+    scroll-behavior: smooth;
+}
\ No newline at end of file
diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 377724eee2..ba252a154d 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -1747,686 +1747,3 @@ rect.series-segment {
   transform: translate3d(0, 0, 0);
 }
 
-
-
-/*Wrap all our changes around an opinion-body class we load up
- in the opinion template*/
-
-.opinion-body {
-
-  #headmatter {
-    font-family: Merriweather, "Times New Roman", Times, serif;
-    font-size: 15px;
-    letter-spacing: 0.2px;
-    text-align: justify;
-    padding:0px;
-    margin: 0px;
-    background-color: white;
-    border: none;
-
-  }
-  #headmatter > parties {
-    text-align: center;
-    font-style: initial;
-    font-size: 2em;
-    display: block;
-  }
-  #headmatter > div.footnotes > .footnote > p {
-      line-height: 1em;
-  }
-
-  #headmatter > * {
-    text-indent: 2em;
-  }
-
-  #headmatter docketnumber,
-  #headmatter court,
-  #headmatter parties,
-  #headmatter attorneys,
-  #headmatter syllabus,
-  #headmatter decisiondate {
-      display: block;
-  }
-
-  #headmatter > div.footnotes {
-      border-top: None;
-      padding-top: 1em;
-  }
-
-  .jump-links > a{
-    position: relative;
-    margin: -8px 20px 0 0;
-    width: 140px;
-    line-height: 18px;
-    font-size: 14px;
-    cursor: pointer;
-    white-space: nowrap;
-    text-overflow: ellipsis;
-    opacity: 1;
-  }
-
-  .hr-opinion {
-    border-top: 2px solid black;
-  }
-
-  /*Clean up the Case Caption section to look large and clean*/
-  .case-caption {
-    font-size: 3em;
-    font-weight: 500;
-    text-align: left;
-    line-height: 1.1em;
-    margin-top: 50px;
-  }
-
-
-  .case-court {
-    font-size: 25px;
-    text-align: left;
-  }
-
-/*Update sidebar jump links to look nice*/
-.jump-links {
-  font-size: 12px;
-  padding-top: 5px;
-}
-
-  li.jump-links.active {
-      color: #B53C2C;
-      font-weight: bold;
-  }
-
-  li.jump-links {
-    list-style-type: none;
-    padding-left: 0;
-  }
-
-  li.jump-links::before {
-    content: "";
-    border-left: 3px solid lightgrey;
-    height: 1em;
-    padding-right: 8px;
-    display: inline-block;
-    margin-right: 5px;
-  }
-
-  li.jump-links.active::before {
-    content: "";
-    border-left: 2px solid #B53C2C;
-    padding-right: 8px;
-    display: inline-block;
-    margin-right: 5px;
-  }
-
-
-  .jump-links {
-  font-size: 12px;
-  padding-top: 5px;
-}
-
-li.jump-links {
-  height:2.5em;
-  list-style-type: none;
-  padding-left: 0;
-  position: relative;
-}
-
-li.jump-links::before {
-  content: "";
-  border-left: 2px solid lightgrey;
-  height: 100%;
-  position: absolute;
-  left: 0;
-  top: 0;
-  padding-right: 8px;
-  display: inline-block;
-}
-
-/* Active link styles */
-li.jump-links > a.active {
-  font-weight: 500;
-  color: black;
-}
-
-li.jump-links > a {
-  padding-left:10px;
-  color: black;
-}
-
-
-div.footnote:first-of-type {
-    border-top: 1px solid black;
-    width: 100%;
-    display: block;
-  }
-
-  /*Columbia specific Fix*/
-  /*Columbia/HTML Law box special footnotes data almost awlays starts with fn1*/
-  footnote_body sup#fn1 {
-    padding-top: 10px;
-    border-top: 1px solid black;
-    width: 100%;
-    display: block;
-  }
-
-  /*HTML law box page numbers*/
-  strong[data-ref] {
-    font-size: 0.8em;
-    fon: italic;
-  }
-
-  strong[data-ref]::before {
-    content: attr(data-ref);
-    display: inline;
-    position: relative;
-    float: right;
-    left: -.5em;
-    font-size: 0.8em;
-    color: dimgray;
-    width: 0;
-  }
-
-
-  div.footnote {
-    padding-top: 10px;
-    display: block;
-    line-height: 1em;
-  }
-
-  div.footnote > p {
-    display: inline;
-  }
-
-  div.footnote::before {
-    content: attr(label) " ";
-    font-weight: bold;
-    color: #000;
-    margin-right: 5px;
-    padding-top: 2em;
-  }
-
-  div.footnote {
-    padding-top: 10px;
-    font-size: 12px;
-  }
-
-  div.footnote > * {
-    padding-top: 10px;
-    font-size: 12px;
-  }
-
-
-  /*To help separate footnotes from opinion document*/
-  footnote:first-of-type {
-    border-top: 1px solid black;
-    width: 100%;
-    display: block;
-  }
-
-  footnote {
-    padding-top: 10px;
-    display: block;
-    line-height: 1.5em;
-    /*margin-left: 1em;*/
-    padding-left: 40px;
-  }
-
-  footnote > p {
-    display: inline;
-  }
-
-  footnote::before {
-    content: attr(label);
-    font-weight: bold;
-    color: #000;
-    margin-right: 26px;
-    padding-top: 2em;
-    margin-left: -35px;
-  }
-
-  /*Handle CSS in Columbia opinions*/
-  footnotemark {
-    font-weight: bold;
-    font-size: 0.8em;
-    vertical-align: super;
-    line-height: 0;
-  }
-
-
-  #cited-by {
-    z-index: 1;
-  }
-
-  footnotemark {
-    cursor: pointer;
-    color: blue;
-    text-decoration: underline;
-  }
-
-  footnote {
-    padding-top: 10px;
-    font-size: 12px;
-  }
-
-
-  .jumpback {
-    color: blue;
-    cursor: pointer;
-    font-weight: bold;
-    margin-left: 5px;
-  }
-
-
-  footnote > * {
-    font-size: 12px;
-  }
-
-  author > page-number {
-    display: block;
-    font-size: 15px;
-  }
-
-  author {
-    display: inline;
-    margin: 0; /* Remove any default margin */
-    text-indent: 2em; /* Indents the first line by 2em */
-  }
-
-  /*Important for indenting harvard opinions correctly*/
-  opinion > p[id^="b"] {
-    text-indent: 2em;
-  }
-
-
-  opinion > [id^="p-"] {
-    padding-left: 2em;
-    text-indent: 2em;
-  }
-}
-
-[id^="A"] {
-  text-indent: 2em;
-  display: inline;
-
-}
-
-.opinion-body {
-  /*I think i did this but i dont know why so im leaving it for now*/
-  /*.tab-pane {*/
-  /*  display: none; */
-  /*}*/
-
-  .tab-pane.active {
-    display: block;
-  }
-
-  @media (min-width: 767px) {
-
-    #sidebar {
-      display: flex;
-      flex-direction: column;
-      height: 100vh;
-      justify-content: space-between; /* Push content apart */
-      padding: 20px;
-      padding-top: 3px;
-      overflow-y: auto;
-      position: -webkit-sticky; /* For Safari */
-      position: sticky;
-      top: 0; /* Stick to the top of the viewport */
-
-    }
-  }
-
-  @media (min-width: 100px) {
-    #sidebar {
-      height: auto;
-    }
-  }
-
-  .sidebar-bottom {
-    margin-top: auto;
-  }
-
-  .support-flp, .sponsored-by {
-    margin-bottom: 20px;
-    text-align: center;
-  }
-
-  #opinion > article > * > p {
-    text-indent: 2em;
-  }
-
-  .active > a {
-    border-bottom-color: #B53C2C;
-  }
-
-  #opinion p {
-    text-indent: 2em;
-  }
-
-
-  .nav-pills > li > a {
-    padding: 1px 15px;
-  }
-
-  blockquote > * {
-    text-indent: 0em;
-  }
-
-  sup {
-    font-size: .9em;
-  }
-
-  .main-document {
-    padding-bottom: 5em;
-  }
-
-  /*Case Caption CSS*/
-  #caption-square {
-    background-color: #F6F2EE;
-    margin-left: -15px;
-    margin-right: -15px;
-    margin-top: -20px;
-  }
-
-  #caption-square > ul > li {
-    background-color: #fcfaf9;
-    border-top-right-radius: 5px 5px; /* Rounds the corners */
-    border-top-left-radius: 5px 5px; /* Rounds the corners */
-    margin-left: 4px;
-  }
-
-  #caption-square > ul > li.active {
-      background-color: #ffffff;
-      border-bottom: 1px solid lightgrey;
-  }
-
-  #caption-square > ul > li.active {
-    background-color: #ffffff;
-    border-bottom: 1px solid white;
-  }
-
-  #caption-square > ul > li.active > a {
-    border: 1px solid white;
-  }
-
-  /*Opinion Date File*/
-  .case-date-new {
-    border: 1px solid #B53C2C;
-    border-radius: 20px; /* Rounds the corners */
-    padding: 5px;
-    padding-left: 8px;
-    padding-right: 8px;
-    padding-top: 8px;
-    color: #B53C2C;
-
-  }
-
-  /*Buttons on Top of Page*/
-  .add-a-note {
-    margin-left: 5px;
-    border: 1px solid black;
-    border-radius: 10px;
-    padding-left: 8px;
-    padding-right: 8px;
-  }
-
-  .add-citation-alert {
-    border: 1px solid black;
-    border-radius: 10px;
-    padding-left: 8px;
-    padding-right: 8px;
-  }
-
-  cross_reference {
-    font-style: italic;
-  }
-
-  #opinion-caption {
-    margin-top: 20px;
-    font-family: Merriweather, "Times New Roman", Times, serif;
-    font-size: 15px;
-    letter-spacing: 0.2px;
-    line-height: 2.3em;
-    margin-bottom: 20px;
-    padding-left: 20px;
-    padding-top: 10px;
-    padding-right: 10px;
-  }
-
-  .case-details {
-    font-size: 16px;
-  }
-
-  .case-details li {
-    line-height: 1.5em;
-  }
-
-  span.citation.no-link {
-    font-style: italic;
-  }
-
-  .opinion-button-row {
-    padding-top: 40px;
-  }
-
-  #download-original {
-    color: black;
-    border-color: black;
-    background-color: white;
-    vertical-align: top;
-    float:right;
-    display:block;
-  }
-
-  #btn-group-download-original {
-    float:right;
-    margin-top: 0px;
-    margin-left:10px;
-    padding-right: 10px;
-  }
-
-  #add-note-button {
-    color: black;
-    border-color: black;
-    background-color: white;
-    vertical-align: top;
-    float: right;
-  }
-
-  #get-citation-btn-group {
-    float:right;
-  }
-
-  #get-citation-btn-group > a {
-
-    color: black;
-    border-color: black;
-    background-color: white;
-    vertical-align: top;
-  }
-
-
-  p > span.star-pagination::after {
-    display: inline;
-    position: relative;
-    content: attr(label);;
-    float: left;
-    left: -4.5em;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-  div > span.star-pagination::after {
-    display: inline;
-    position: relative;
-    content: attr(label);;
-    float: left;
-    left: -2.5em;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-  div.subopinion-content > .harvard {
-    font-family: Merriweather, "Times New Roman", Times, serif;
-    font-size: 15px;
-    letter-spacing: 0.2px;
-    line-height: 2.3em;
-    text-align: justify;
-  }
-
-  #columbia-text {
-    font-family: Merriweather, "Times New Roman", Times, serif;
-    font-size: 15px;
-    letter-spacing: 0.2px;
-    line-height: 2.3em;
-    text-align: justify;
-  }
-
-  #columbia-text > div.subopinion-content > div > p > span.star-pagination {
-    color: #555555;
-  }
-
-  #columbia-text > div.subopinion-content > div > p > span.star-pagination::after {
-    display: inline;
-    position: relative;
-    content: attr(label);;
-    float: left;
-    left: -4.5em;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-
-  page-number::after {
-    display: inline;
-    position: relative;
-    content: attr(label);
-    float: right;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-  page-number {
-    font-style: italic;
-    font-size: 0.8em;
-    margin-right: 4px;
-    margin-left: 2px;
-  }
-
-  a.page-label {
-    font-style: italic;
-    font-size: 0.8em;
-    margin-right: 4px;
-    margin-left: 2px;
-    color: #555555;
-  }
-
-
-  a.page-label::after {
-    display: inline;
-    position: relative;
-    content: attr(data-label);
-    float: right;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-  footnote > blockquote > a.page-label::after {
-    right: -2.5em;
-  }
-
-  blockquote[id^="A"] > a.page-label::after {
-    right: -2.5em;
-  }
-
-  blockquote[id^="b"] > a.page-label::after {
-    right: -4.0em;
-  }
-
-  opinion > a.page-label::after {
-    right: -2.5em;
-  }
-
-   /* Adjust to move the entire blockquote to the right */
-  blockquote {
-    margin-left: 3em;
-  }
-
-  a.page-label::after {
-    display: inline;
-    position: relative;
-    attr(label);
-    float: right;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
-  div.counsel > a.page-label::after {
-    right: -2.5em;
-  }
-
-  footnote > p > a.page-label::after {
-    display: none;
-  }
-
-  footnote > blockquote > a.page-label::after {
-    display: none;
-  }
-
-  /*Remove the header on the opinion page so its flush*/
-  header {
-    margin-bottom: 0px;
-  }
-
-  .harvard > opinion > author {
-      line-height: inherit;
-      font-size: inherit;
-      display: inline-block;
-  }
-
-  .container > .content {
-      margin-bottom: 0em;
-  }
-
-  .meta-data-header {
-      font-size:15px;
-  }
-
-  .case-details {
-    font-family: Merriweather, "Times New Roman", Times, serif;
-    letter-spacing: 0.2px;
-    line-height:2.3em;
-  }
-
-  .opinion-section-title {
-    margin-top: 50px;
-    font-family: Merriweather, "Times New Roman", Times, serif;
-  }
-
-  /*Add style to align roman numerals */
-  .center-header {
-    text-align: center;
-    font-size: 2em;
-  }
-
-  /*If XS screen - remove the side page labels*/
-  @media (max-width: 768px) {
-    a.page-label::after {
-      display: none;
-    }
-    a.page-number::after {
-      display: none;
-    }
-  }
-
-}
-
-html.smooth-scroll {
-    scroll-behavior: smooth;
-}
\ No newline at end of file
diff --git a/cl/assets/static-global/js/base.js b/cl/assets/static-global/js/base.js
index f61000e3c3..9d69f158bb 100644
--- a/cl/assets/static-global/js/base.js
+++ b/cl/assets/static-global/js/base.js
@@ -367,292 +367,3 @@ if (form && button) {
   });
 }
 
-
-//////////////////////////////////
-// Smooth Scrolling on Opinions //
-/////////////////////////////////
-if (document.body.classList.contains('opinion-body')) {
-    document.documentElement.classList.add('smooth-scroll');
-}
-
-//////////////////
-// SCOTUS STYLE //
-//////////////////
-
-document.querySelectorAll('p').forEach(function (element) {
-  // Bold and Center likely Roman Numerals this improves SCOTUS opinions
-  if (element.textContent.trim().length < 5) {
-    element.classList.add('center-header');
-  }
-});
-
-
-////////////////
-// Pagination //
-////////////////
-
-// Star pagination weirdness for ANON 2020 dataset -
-
-$('.star-pagination').each(function (index, element) {
-  if ($(this).attr('pagescheme')) {
-    // For ANON 2020 this has two sets of numbers but only one can be
-    // verified with other databses so only showing one
-    var number = $(this).attr('number')
-    if (number.indexOf("P") > -1) {
-      $(this).attr('label', "");
-    }
-    else {
-      $(this).attr('label', number);
-    }
-  } else {
-    $(this).attr('label', this.textContent.trim().replace('*Page ', ''));
-  }
-});
-
-// Systematize page numbers
-$('page-number').each(function (index, element) {
-  // Get the label and citation index from the current element
-  const label = $(this).attr('label');
-  const citationIndex = $(this).attr('citation-index');
-
-  // Clean up the label (remove '*') and use it for the new href and id
-  const cleanLabel = label.replace('*', '').trim();
-
-  // Create the new <a> element
-  const $newAnchor = $('<a></a>')
-    .addClass('page-label')
-    .attr('data-citation-index', citationIndex)
-    .attr('data-label', cleanLabel)
-    .attr('href', '#' + cleanLabel)
-    .attr('id', cleanLabel)
-    .text('*' + cleanLabel);
-
-  // Replace the <page-number> element with the new <a> element
-  $(this).replaceWith($newAnchor);
-});
-
-// Systematize page numbers
-$('span.star-pagination').each(function (index, element) {
-  // Get the label and citation index from the current element
-  const label = $(this).attr('label');
-  const citationIndex = $(this).attr('citation-index');
-
-  // Clean up the label (remove '*') and use it for the new href and id
-  const cleanLabel = label.replace('*', '').trim();
-
-  // Create the new <a> element
-  const $newAnchor = $('<a></a>')
-    .addClass('page-label')
-    .attr('data-citation-index', citationIndex)
-    .attr('data-label', cleanLabel)
-    .attr('href', '#' + cleanLabel)
-    .attr('id', cleanLabel)
-    .text('*' + cleanLabel);
-
-  // Replace the <span> element with the new <a> element
-  $(this).replaceWith($newAnchor);
-});
-// Fix weird data-ref bug
-document.querySelectorAll('strong').forEach((el) => {
-  if (/\[\d+\]/.test(el.textContent)) {
-    // Check if the text matches the pattern [XXX]
-    const match = el.textContent.match(/\[\d+\]/)[0]; // Get the matched pattern
-    el.setAttribute('data-ref', match); // Set a data-ref attribute
-  }
-});
-
-///////////////
-// Footnotes //
-///////////////
-
-// We formatted the harvard footnotes oddly when they appeared inside the pre-opinion content.
-// this removes the excess a tags and allows us to standardize footnotes across our contents
-// footnote cleanup in harvard
-// Update and modify footnotes to enable linking
-$('div.footnote > a').remove();
-const headfootnotemarks = $('a.footnote');
-const divfootnotes = $('div.footnote');
-
-if (headfootnotemarks.length === divfootnotes.length) {
-  headfootnotemarks.each(function (index) {
-    const footnoteMark = $(this);
-    const footnote = divfootnotes.eq(index);
-
-    const $newElement = $('<footnotemark></footnotemark>');
-    $.each(footnoteMark.attributes, function () {
-      if (footnoteMark.specified) {
-        $newElement.attr(footnoteMark.name, footnoteMark.value);
-      }
-    });
-    $newElement.html(footnoteMark.html());
-    footnoteMark.replaceWith($newElement);
-
-    const $newFootnote = $('<footnote></footnote>');
-    $.each(footnote.attributes, function () {
-      if (footnote.specified) {
-        $newFootnote.attr(footnote.name, footnote.value);
-      }
-    });
-    $newFootnote.attr('label', footnote.attr('label'));
-    $newFootnote.html(footnote.html());
-    footnote.replaceWith($newFootnote);
-  });
-}
-
-// This fixes many of the harvard footnotes so that they can
-// easily link back and forth - we have a second set
-// of harvard footnotes inside headnotes that need to be parsed out now
-// okay.
-
-const footnoteMarks = $('footnotemark');
-const footnotes = $('footnote').not('[orphan="true"]');
-
-if (footnoteMarks.length === footnotes.length) {
-  // we can make this work
-  footnoteMarks.each(function (index) {
-    const footnoteMark = $(this);
-    console.log(index, footnoteMark);
-    const $newElement = $('<a></a>');
-    // Copy attributes from the old element
-    $.each(footnoteMark.attributes, function () {
-      if (footnoteMark.specified) {
-        $newElement.attr(footnoteMark.name, footnoteMark.value);
-        console.log(footnoteMark.name, footnoteMark.value);
-      }
-    });
-    $newElement.html(footnoteMark.html());
-    const $supElement = $('<sup></sup>').append($newElement);
-    footnoteMark.replaceWith($supElement);
-    const footnote = footnotes.eq(index);
-    $newElement.attr('href', `#fn${index}`);
-    $newElement.attr('id', `fnref${index}`);
-    footnote.attr('id', `fn${index}`);
-    console.log(footnoteMark, footnote);
-
-    const $jumpback = $('<a class="jumpback">↵</a>');
-    $jumpback.attr('href', `#fnref${index}`);
-
-    footnote.append($jumpback);
-  });
-} else {
-  //   If the number of footnotes and footnotemarks are inconsistent use the method to scroll to the nearest one
-  //   we dont use this by default because many older opinions will reuse *  ^ and other icons repeatedly on every page
-  //   and so label is no usable to identify the correct footnote.
-
-  footnotes.each(function (index) {
-    console.log($(this));
-
-    const $jumpback = $('<a class="jumpback">↵</a>');
-    $jumpback.attr('label', $(this).attr('label'));
-    $(this).append($jumpback);
-  });
-
-  // There is no silver bullet for footnotes
-  $('footnotemark').on('click', function () {
-    const markText = $(this).text().trim(); // Get the text of the clicked footnotemark
-    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
-
-    // Find the first matching footnote below the current scroll position
-    const targetFootnote = $('footnote')
-      .filter(function () {
-        return $(this).attr('label') === markText && $(this).offset().top > currentScrollPosition;
-      })
-      .first();
-
-    // If a matching footnote is found, scroll to it
-    if (targetFootnote.length > 0) {
-      $('html, body').animate(
-        {
-          scrollTop: targetFootnote.offset().top,
-        },
-        500
-      ); // Adjust the animation duration as needed
-    } else {
-      console.warn('No matching footnote found below the current position for:', markText);
-    }
-  });
-
-
-  //////////////
-  // Sidebar //
-  /////////////
-
-  $('.jumpback').on('click', function () {
-    const footnoteLabel = $(this).attr('label').trim(); // Get the label attribute of the clicked footnote
-    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
-
-    // Find the first matching footnotemark above the current scroll position
-    const targetFootnotemark = $('footnotemark')
-      .filter(function () {
-        return $(this).text().trim() === footnoteLabel && $(this).offset().top < currentScrollPosition;
-      })
-      .last();
-
-    // If a matching footnotemark is found, scroll to it
-    if (targetFootnotemark.length > 0) {
-      $('html, body').animate(
-        {
-          scrollTop: targetFootnotemark.offset().top,
-        },
-        500
-      ); // Adjust the animation duration as needed
-    } else {
-      console.warn('No matching footnotemark found above the current position for label:', footnoteLabel);
-    }
-  });
-}
-
-$(document).ready(function () {
-  function adjustSidebarHeight() {
-    if ($(window).width() > 767) {
-      // Only apply the height adjustment for screens wider than 767px
-      var scrollTop = $(window).scrollTop();
-      if (scrollTop <= 175) {
-        $('.opinion-sidebar').css('height', 'calc(100vh - ' + (175 - scrollTop) + 'px)');
-        // $('.main-document').css('height', 'calc(100vh + ' + (scrollTop) + 'px)');
-      } else {
-        $('.opinion-sidebar').css('height', '100vh');
-      }
-    } else {
-      $('.opinion-sidebar').css('height', 'auto'); // Reset height for mobile view
-    }
-  }
-
-  // Adjust height on document ready and when window is scrolled or resized
-  adjustSidebarHeight();
-  $(window).on('scroll resize', adjustSidebarHeight);
-});
-
-// Update sidebar to show where we are on the page
-document.addEventListener('scroll', function () {
-  let sections = document.querySelectorAll('.jump-link');
-  let links = document.querySelectorAll('.jump-links > a');
-  let currentSection = '';
-
-  // Determine which section is currently in view
-  sections.forEach((section) => {
-    let sectionTop = section.offsetTop;
-    let sectionHeight = section.offsetHeight;
-    if (window.scrollY >= sectionTop - sectionHeight / 3) {
-      currentSection = section.getAttribute('id');
-    }
-  });
-
-  // Remove the active class from all links and their parent elements
-  links.forEach((link) => {
-    link.classList.remove('active');
-    if (link.parentElement) {
-      link.parentElement.classList.remove('active');
-    }
-  });
-
-  // Add the active class to the link and its parent that corresponds to the current section
-  links.forEach((link) => {
-    if (link.getAttribute('href') === `#${currentSection}`) {
-      link.classList.add('active');
-      if (link.parentElement) {
-        link.parentElement.classList.add('active');
-      }
-    }
-  });
-});
diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
new file mode 100644
index 0000000000..d57c596bd3
--- /dev/null
+++ b/cl/assets/static-global/js/opinions.js
@@ -0,0 +1,271 @@
+
+////////////////
+// Pagination //
+////////////////
+
+// Star pagination weirdness for ANON 2020 dataset -
+
+$('.star-pagination').each(function (index, element) {
+  if ($(this).attr('pagescheme')) {
+    // For ANON 2020 this has two sets of numbers but only one can be
+    // verified with other databses so only showing one
+    var number = $(this).attr('number')
+    if (number.indexOf("P") > -1) {
+      $(this).attr('label', "");
+    }
+    else {
+      $(this).attr('label', number);
+    }
+  } else {
+    $(this).attr('label', this.textContent.trim().replace('*Page ', ''));
+  }
+});
+
+// Systematize page numbers
+$('page-number').each(function (index, element) {
+  // Get the label and citation index from the current element
+  const label = $(this).attr('label');
+  const citationIndex = $(this).attr('citation-index');
+
+  // Clean up the label (remove '*') and use it for the new href and id
+  const cleanLabel = label.replace('*', '').trim();
+
+  // Create the new <a> element
+  const $newAnchor = $('<a></a>')
+    .addClass('page-label')
+    .attr('data-citation-index', citationIndex)
+    .attr('data-label', cleanLabel)
+    .attr('href', '#' + cleanLabel)
+    .attr('id', cleanLabel)
+    .text('*' + cleanLabel);
+
+  // Replace the <page-number> element with the new <a> element
+  $(this).replaceWith($newAnchor);
+});
+
+// Systematize page numbers
+$('span.star-pagination').each(function (index, element) {
+  // Get the label and citation index from the current element
+  const label = $(this).attr('label');
+  const citationIndex = $(this).attr('citation-index');
+
+  // Clean up the label (remove '*') and use it for the new href and id
+  const cleanLabel = label.replace('*', '').trim();
+
+  // Create the new <a> element
+  const $newAnchor = $('<a></a>')
+    .addClass('page-label')
+    .attr('data-citation-index', citationIndex)
+    .attr('data-label', cleanLabel)
+    .attr('href', '#' + cleanLabel)
+    .attr('id', cleanLabel)
+    .text('*' + cleanLabel);
+
+  // Replace the <span> element with the new <a> element
+  $(this).replaceWith($newAnchor);
+});
+// Fix weird data-ref bug
+document.querySelectorAll('strong').forEach((el) => {
+  if (/\[\d+\]/.test(el.textContent)) {
+    // Check if the text matches the pattern [XXX]
+    const match = el.textContent.match(/\[\d+\]/)[0]; // Get the matched pattern
+    el.setAttribute('data-ref', match); // Set a data-ref attribute
+  }
+});
+
+///////////////
+// Footnotes //
+///////////////
+
+
+// We formatted the harvard footnotes oddly when they appeared inside the pre-opinion content.
+// this removes the excess a tags and allows us to standardize footnotes across our contents
+// footnote cleanup in harvard
+// Update and modify footnotes to enable linking
+$('div.footnote > a').remove();
+const headfootnotemarks = $('a.footnote');
+const divfootnotes = $('div.footnote');
+
+if (headfootnotemarks.length === divfootnotes.length) {
+  headfootnotemarks.each(function (index) {
+    const footnoteMark = $(this);
+    const footnote = divfootnotes.eq(index);
+
+    const $newElement = $('<footnotemark></footnotemark>');
+    $.each(footnoteMark.attributes, function () {
+      if (footnoteMark.specified) {
+        $newElement.attr(footnoteMark.name, footnoteMark.value);
+      }
+    });
+    $newElement.html(footnoteMark.html());
+    footnoteMark.replaceWith($newElement);
+
+    const $newFootnote = $('<footnote></footnote>');
+    $.each(footnote.attributes, function () {
+      if (footnote.specified) {
+        $newFootnote.attr(footnote.name, footnote.value);
+      }
+    });
+    $newFootnote.attr('label', footnote.attr('label'));
+    $newFootnote.html(footnote.html());
+    footnote.replaceWith($newFootnote);
+  });
+}
+
+// This fixes many of the harvard footnotes so that they can
+// easily link back and forth - we have a second set
+// of harvard footnotes inside headnotes that need to be parsed out now
+// okay.
+
+const footnoteMarks = $('footnotemark');
+const footnotes = $('footnote').not('[orphan="true"]');
+
+if (footnoteMarks.length === footnotes.length) {
+  // we can make this work
+  footnoteMarks.each(function (index) {
+    const footnoteMark = $(this);
+    console.log(index, footnoteMark);
+    const $newElement = $('<a></a>');
+    // Copy attributes from the old element
+    $.each(footnoteMark.attributes, function () {
+      if (footnoteMark.specified) {
+        $newElement.attr(footnoteMark.name, footnoteMark.value);
+        console.log(footnoteMark.name, footnoteMark.value);
+      }
+    });
+    $newElement.html(footnoteMark.html());
+    const $supElement = $('<sup></sup>').append($newElement);
+    footnoteMark.replaceWith($supElement);
+    const footnote = footnotes.eq(index);
+    $newElement.attr('href', `#fn${index}`);
+    $newElement.attr('id', `fnref${index}`);
+    footnote.attr('id', `fn${index}`);
+    console.log(footnoteMark, footnote);
+
+    const $jumpback = $('<a class="jumpback">↵</a>');
+    $jumpback.attr('href', `#fnref${index}`);
+
+    footnote.append($jumpback);
+  });
+} else {
+  //   If the number of footnotes and footnotemarks are inconsistent use the method to scroll to the nearest one
+  //   we dont use this by default because many older opinions will reuse *  ^ and other icons repeatedly on every page
+  //   and so label is no usable to identify the correct footnote.
+
+  footnotes.each(function (index) {
+    console.log($(this));
+
+    const $jumpback = $('<a class="jumpback">↵</a>');
+    $jumpback.attr('label', $(this).attr('label'));
+    $(this).append($jumpback);
+  });
+
+  // There is no silver bullet for footnotes
+  $('footnotemark').on('click', function () {
+    const markText = $(this).text().trim(); // Get the text of the clicked footnotemark
+    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
+
+    // Find the first matching footnote below the current scroll position
+    const targetFootnote = $('footnote')
+      .filter(function () {
+        return $(this).attr('label') === markText && $(this).offset().top > currentScrollPosition;
+      })
+      .first();
+
+    // If a matching footnote is found, scroll to it
+    if (targetFootnote.length > 0) {
+      $('html, body').animate(
+        {
+          scrollTop: targetFootnote.offset().top,
+        },
+        500
+      ); // Adjust the animation duration as needed
+    } else {
+      console.warn('No matching footnote found below the current position for:', markText);
+    }
+  });
+
+
+  //////////////
+  // Sidebar //
+  /////////////
+
+  $('.jumpback').on('click', function () {
+    const footnoteLabel = $(this).attr('label').trim(); // Get the label attribute of the clicked footnote
+    const currentScrollPosition = $(window).scrollTop(); // Get the current scroll position
+
+    // Find the first matching footnotemark above the current scroll position
+    const targetFootnotemark = $('footnotemark')
+      .filter(function () {
+        return $(this).text().trim() === footnoteLabel && $(this).offset().top < currentScrollPosition;
+      })
+      .last();
+
+    // If a matching footnotemark is found, scroll to it
+    if (targetFootnotemark.length > 0) {
+      $('html, body').animate(
+        {
+          scrollTop: targetFootnotemark.offset().top,
+        },
+        500
+      ); // Adjust the animation duration as needed
+    } else {
+      console.warn('No matching footnotemark found above the current position for label:', footnoteLabel);
+    }
+  });
+}
+
+$(document).ready(function () {
+  function adjustSidebarHeight() {
+    if ($(window).width() > 767) {
+      // Only apply the height adjustment for screens wider than 767px
+      var scrollTop = $(window).scrollTop();
+      if (scrollTop <= 175) {
+        $('.opinion-sidebar').css('height', 'calc(100vh - ' + (175 - scrollTop) + 'px)');
+        // $('.main-document').css('height', 'calc(100vh + ' + (scrollTop) + 'px)');
+      } else {
+        $('.opinion-sidebar').css('height', '100vh');
+      }
+    } else {
+      $('.opinion-sidebar').css('height', 'auto'); // Reset height for mobile view
+    }
+  }
+
+  // Adjust height on document ready and when window is scrolled or resized
+  adjustSidebarHeight();
+  $(window).on('scroll resize', adjustSidebarHeight);
+});
+
+// Update sidebar to show where we are on the page
+document.addEventListener('scroll', function () {
+  let sections = document.querySelectorAll('.jump-link');
+  let links = document.querySelectorAll('.jump-links > a');
+  let currentSection = '';
+
+  // Determine which section is currently in view
+  sections.forEach((section) => {
+    let sectionTop = section.offsetTop;
+    let sectionHeight = section.offsetHeight;
+    if (window.scrollY >= sectionTop - sectionHeight / 3) {
+      currentSection = section.getAttribute('id');
+    }
+  });
+
+  // Remove the active class from all links and their parent elements
+  links.forEach((link) => {
+    link.classList.remove('active');
+    if (link.parentElement) {
+      link.parentElement.classList.remove('active');
+    }
+  });
+
+  // Add the active class to the link and its parent that corresponds to the current section
+  links.forEach((link) => {
+    if (link.getAttribute('href') === `#${currentSection}`) {
+      link.classList.add('active');
+      if (link.parentElement) {
+        link.parentElement.classList.add('active');
+      }
+    }
+  });
+});
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index 66010340a6..8d6fc1b41a 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -14,8 +14,12 @@
 
 {% block head %}
     <link rel="alternate" type="application/rss+xml" title="Atom feed for cases citing {{cluster|best_case_name|truncatewords:10}}" href="/feed/search/?q=cites:({{ cluster.sub_opinions.all|OR_join }})">
+    <!-- Additional head content specific to this child template -->
+    <link rel="stylesheet" href="{% static "css/opinions.css" %}" type="text/css"
+        media="screen, projection">
 {% endblock %}
 
+
 {% block navbar-o %}active{% endblock %}
 
 
@@ -343,4 +347,5 @@ <h4 class="case-court">{{ cluster.docket.court }}</h4>
             <script src="{% static "js/jquery.bootstrap-growl.min.js" %}"></script>
         {% endif %}
     {% endif %}
+    <script type="text/javascript" src="{% static "js/opinions.js" %}"></script>
 {% endblock %}

From 353ebd2d1d103a464a451968af8002484b06916a Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 22 Oct 2024 15:37:58 -0400
Subject: [PATCH 012/143] fix(urls): Remove details

Was holding off on removing details but
can re-add it if necessary
---
 cl/opinion_page/urls.py  | 6 ------
 cl/opinion_page/views.py | 8 --------
 2 files changed, 14 deletions(-)

diff --git a/cl/opinion_page/urls.py b/cl/opinion_page/urls.py
index be8c9214d8..994b257993 100644
--- a/cl/opinion_page/urls.py
+++ b/cl/opinion_page/urls.py
@@ -17,7 +17,6 @@
     view_opinion,
     view_opinion_authorities,
     view_opinion_cited_by,
-    view_opinion_details,
     view_opinion_pdf,
     view_opinion_related_cases,
     view_opinion_summaries,
@@ -46,11 +45,6 @@
         name="docket_feed",
     ),
     path("opinion/<int:pk>/<blank-slug:_>/", view_opinion, name="view_case"),  # type: ignore[arg-type]
-    path(
-        "opinion/<int:pk>/<blank-slug:_>/details/",
-        view_opinion_details,
-        name="view_case_details",
-    ),  # with the tab
     path(
         "opinion/<int:pk>/<blank-slug:_>/authorities/",
         view_opinion_authorities,
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index fe7e93bc33..bd73226c4b 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1123,14 +1123,6 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
     return await view_opinion_old(request, pk, "str")
 
 
-async def view_opinion_details(
-    request: HttpRequest, pk: int, _: str
-) -> HttpResponse:
-    """View for displaying opinion case details."""
-
-    return await render_opinion_view(request, pk, "details")
-
-
 async def view_opinion_pdf(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:

From e3fa07e0059e37c47f5ee9510d91b8ff4c0ea113 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 22 Oct 2024 15:41:40 -0400
Subject: [PATCH 013/143] fix(utils): Fix missing data in elastic related cache
 call

---
 cl/opinion_page/utils.py | 20 +++++---------------
 cl/opinion_page/views.py | 13 ++++++++-----
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 3bd0eb6144..1e5ce8c87b 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -240,7 +240,6 @@ class RelatedClusterResults:
     sub_opinion_pks: list[int] = field(default_factory=list)
     url_search_params: dict[str, str] = field(default_factory=dict)
     timeout: bool = False
-    has_related_cases: bool = False
 
 
 async def es_get_related_clusters_with_cache(
@@ -294,12 +293,12 @@ async def es_get_related_clusters_with_cache(
     if cached_related_clusters is not None:
         related_cluster_result.related_clusters = cached_related_clusters
         related_cluster_result.timeout = timeout_related
-        related_cluster_result.has_related_cases = (
-            True if len(cached_related_clusters) > 0 else False
+        related_cluster_result.sub_opinion_pks = list(
+            map(int, sub_opinion_pks)
         )
+        related_cluster_result.url_search_params = url_search_params
         return related_cluster_result
 
-    # if cached_related_clusters is None:
     related_query = await build_related_clusters_query(
         cluster_search, sub_opinion_pks, search_params
     )
@@ -331,7 +330,7 @@ async def es_get_related_clusters_with_cache(
     )
     related_cluster_result.timeout = False
     related_cluster_result.sub_opinion_pks = list(map(int, sub_opinion_pks))
-    related_cluster_result.has_related_cases = True if response else False
+    # related_cluster_result.has_related_cases = True if response else False
 
     if timeout_related == False:
         await cache.aset(
@@ -339,16 +338,6 @@ async def es_get_related_clusters_with_cache(
             (related_cluster_result.related_clusters, timeout_related),
             settings.RELATED_CACHE_TIMEOUT,
         )
-
-        await cache.aset(
-            mlt_cache_key,
-            (
-                related_cluster_result.related_clusters,
-                timeout_related,
-                related_cluster_result.has_related_cases,
-            ),
-            settings.RELATED_CACHE_TIMEOUT,
-        )
     return related_cluster_result
 
 
@@ -468,6 +457,7 @@ async def es_get_citing_and_related_clusters_with_cache(
         cached_citing_cluster_count,
         timeout_cited,
     ) = await cache.aget(cache_citing_key) or (None, 0, False)
+
     cached_related_clusters, timeout_related = (
         await cache.aget(mlt_cache_key) or (None, False)
         if settings.RELATED_USE_CACHE
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index bd73226c4b..98680e6791 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -934,7 +934,7 @@ async def setup_opinion_context(
                 break
             elif sub_opinion.download_url:
                 has_downloads = True
-                pdf_path = sub_opinion.local_path.url
+                pdf_path = None
 
     get_string = make_get_string(request)
 
@@ -1118,8 +1118,6 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
         )
         if ui_flag_for_o or user_flag_active:
             return await render_opinion_view(request, pk, "opinions")
-    # else:
-    # print("~~~~1:", ui_flag_for_o, "~~~2:", user_flag_active, request.user)
     return await view_opinion_old(request, pk, "str")
 
 
@@ -1230,7 +1228,13 @@ async def view_opinion_summaries(
 async def view_opinion_related_cases(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:
-    """"""
+    """View Related Cases Tab
+
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: Return related cases tab
+    """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     related_cluster_object = await es_get_related_clusters_with_cache(
         cluster, request
@@ -1241,7 +1245,6 @@ async def view_opinion_related_cases(
         "sub_opinion_ids": related_cluster_object.sub_opinion_pks,
         "related_search_params": f"&{urlencode(related_cluster_object.url_search_params)}",
         "queries_timeout": related_cluster_object.timeout,
-        "has_related_cases": related_cluster_object.has_related_cases,
     }
     return await render_opinion_view(
         request, pk, "related-cases", additional_context

From 1ad10402884ae59d38e4875e9f8351a8e9946550 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 22 Oct 2024 15:46:20 -0400
Subject: [PATCH 014/143] feat(opinions.html): Tweak to tab display

---
 cl/opinion_page/templates/includes/opinion_tabs.html | 11 ++++++++---
 cl/opinion_page/templates/opinions.html              |  4 ++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/cl/opinion_page/templates/includes/opinion_tabs.html b/cl/opinion_page/templates/includes/opinion_tabs.html
index 9a5334847d..8c28c00089 100644
--- a/cl/opinion_page/templates/includes/opinion_tabs.html
+++ b/cl/opinion_page/templates/includes/opinion_tabs.html
@@ -117,7 +117,7 @@ <h3 class="bottom serif">
 {% elif tab == "cited-by" %}
     {# Cited By #}
     <div class="tab-pane fade in active" id="cited-by">
-        <h2 class="opinion-section-title jump-link" id="related-cases">
+        <h2 class="opinion-section-title jump-link">
             <span>
                 Cited By ({{ citing_cluster_count|intcomma }})
                 <a href="/feed/search/?type=o&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})" rel="nofollow">
@@ -174,7 +174,7 @@ <h3>This case has not yet been cited in our system.</h3>
 {% elif tab == "related-cases" %}
     {# Related Cases #}
     <div class="tab-pane fade in active" id="related">
-        <h2 class="opinion-section-title jump-link" id="">Related Cases</h2>
+        <h2 class="opinion-section-title jump-link">Related Cases</h2>
         <hr class="hr-opinion">
             {% url 'show_results' as show_results_url %}
                 {% with sub_opinion_ids_list=sub_opinion_ids|join:',' pk_str=cluster.pk|stringformat:"s" %}
@@ -312,7 +312,12 @@ <h3 class="opinion-section-title jump-link" id="o{{ forloop.counter }}">
                         {% elif sub_opinion.html_with_citations %}
                           {% if cluster.source == "C" %}
                         {# It's a PDF with no HTML enrichment#}
-                            <div class="plaintext">{{ sub_opinion.html_with_citations|safe|linebreaksbr }}</div>
+                              {% if sub_opinion.html %}
+                                  {# for scrpaed HTML eg. Colo, Okla we do not want to insert line breaks #}
+                                  <div class="serif-text scraped-html">{{ sub_opinion.html_with_citations|safe }}</div>
+                              {% else %}
+                                  <div class="plaintext">{{ sub_opinion.html_with_citations|safe|linebreaksbr }}</div>
+                              {% endif %}
                           {% else %}
                             <div class="serif-text">{{ sub_opinion.html_with_citations|safe }}</div>
                           {% endif %}
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index 8d6fc1b41a..ffa08a461d 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -312,9 +312,9 @@ <h4 class="case-court">{{ cluster.docket.court }}</h4>
                 {% if related_cases_count > 0  %}
                     <li role="presentation" {% if tab == "related-cases" %} class="active" {% endif %}>
                         {% if tab == "related-cases" %}
-                            <a href="#" data-toggle="tab">Related<span class="hidden-xs hidden-sm hidden-md">&nbsp;Cases&nbsp;({{ related_cases_count }})</span></a>
+                            <a href="#" data-toggle="tab">Related<span class="hidden-xs hidden-sm hidden-md">&nbsp;Cases</span></a>
                         {% else %}
-                            <a href="{% url 'view_case_related_cases' cluster.pk cluster.slug %}">Related&nbsp;<span class="hidden-xs hidden-sm hidden-md">Cases&nbsp;({{ related_cases_count }})</span></a>
+                            <a href="{% url 'view_case_related_cases' cluster.pk cluster.slug %}">Related&nbsp;<span class="hidden-xs hidden-sm hidden-md">Cases</span></a>
                         {% endif %}
                     </li>
                 {% endif %}

From dfdca1fb059118a1ed93a43a6485f9fa47cbf70d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Wed, 23 Oct 2024 16:52:34 -0400
Subject: [PATCH 015/143] feat(opinions): Some small UI tweaks

Also include the independent sections
if the headmatter is separated out and
the headmatter field is empty
---
 cl/assets/static-global/css/opinions.css      | 11 +++-
 cl/assets/static-global/css/override.css      |  7 ---
 .../templates/includes/opinion_tabs.html      | 62 ++++++++++++++++---
 cl/opinion_page/templates/opinions.html       | 28 +++++++--
 4 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
index fd97c718ca..ffe98a46c6 100644
--- a/cl/assets/static-global/css/opinions.css
+++ b/cl/assets/static-global/css/opinions.css
@@ -5,7 +5,7 @@
 
 .opinion-body {
 
-  #headmatter {
+  .harvard > * {
     font-family: Merriweather, "Times New Roman", Times, serif;
     font-size: 15px;
     letter-spacing: 0.2px;
@@ -14,8 +14,9 @@
     margin: 0px;
     background-color: white;
     border: none;
-
+    line-height: 2.3em;
   }
+
   #headmatter > parties {
     text-align: center;
     font-style: initial;
@@ -601,6 +602,12 @@ div.footnote:first-of-type {
 
   opinion > a.page-label::after {
     right: -2.5em;
+    text-indent: 0;
+  }
+
+  .harvard a.page-label::after {
+      right: -2.5em;
+      text-indent: 0;
   }
 
    /* Adjust to move the entire blockquote to the right */
diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index ba252a154d..021b6e6996 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -1035,13 +1035,6 @@ closely the content in the book*/
   font-size: 1em;
 }
 
-#headmatter {
-  border: 1px rgba(210, 210, 210, 0.55) solid;
-  padding: 10px;
-  background: rgba(232, 232, 232, 0.37);
-  margin: 10px;
-}
-
 #headmatter > attorneys, docketnumbers, judges, footnotes, court, decisiondate {
   line-height: 2em;
   font-size: 14px;
diff --git a/cl/opinion_page/templates/includes/opinion_tabs.html b/cl/opinion_page/templates/includes/opinion_tabs.html
index 8c28c00089..e5eaf1b7c0 100644
--- a/cl/opinion_page/templates/includes/opinion_tabs.html
+++ b/cl/opinion_page/templates/includes/opinion_tabs.html
@@ -267,18 +267,66 @@ <h2 class="opinion-section-title jump-link">
     <div class="tab-pane fade in active" id="opinion">
         <article>
             {% with opinion_count=cluster.sub_opinions.all.count %}
-                {% if cluster.headnotes %}
-                    <h2 class="opinion-section-title jump-link" id="headnotes">Headnotes</h2>
-                    <hr class="hr-opinion">
-                    <p class="bottom">{{ cluster.headnotes | safe}}</p>
-                {% endif %}
 
                 {% if cluster.headmatter %}
-                    <h2 class="opinion-section-title jump-link" id="o">Opinion</h2>
+                    <h2 class="opinion-section-title jump-link" id="o">Headmatter</h2>
                     <hr class="hr-opinion">
-                    <div id="headmatter" class="serif-text">
+                    <div class="serif-text harvard" id="headmatter">
                     {{ cluster.headmatter|safe }}
                     </div>
+                {% else %}
+                    {% if cluster.correction %}
+                        <h2 class="opinion-section-title jump-link" id="correction">Correction</h2>
+                        <hr class="hr-opinion">
+                        <div id="" class="serif-text harvard">
+                        {{ cluster.correction|safe }}
+                        </div>
+                    {% endif %}
+
+                    {% if cluster.attorneys %}
+                        <h2 class="opinion-section-title jump-link" id="attorneys">Attorneys</h2>
+                        <hr class="hr-opinion">
+                        <div  class="serif-text harvard">
+                        <p>{{ cluster.attorneys|safe|linebreaksbr }}</p>
+                        </div>
+                    {% endif %}
+
+                    {% if cluster.headnotes %}
+                        <h2 class="opinion-section-title jump-link" id="headnotes">Headnotes</h2>
+                        <hr class="hr-opinion">
+                        <p class="bottom">{{ cluster.headnotes | safe}}</p>
+                    {% endif %}
+
+                    {% if cluster.syllabus %}
+                        <h2 class="opinion-section-title jump-link" id="syllabus">Syllabus</h2>
+                        <hr class="hr-opinion">
+                        <div id="" class="serif-text harvard">
+                        {{ cluster.syllabus|safe }}
+                        </div>
+                    {% endif %}
+
+                    {% if cluster.summary %}
+                        <h2 class="opinion-section-title jump-link" id="summary">Summary</h2>
+                        <hr class="hr-opinion">
+                        <div id="" class="serif-text harvard">
+                        {{ cluster.summary|safe }}
+                        </div>
+                    {% endif %}
+                    {% if cluster.history %}
+                        <h2 class="opinion-section-title jump-link" id="history">History</h2>
+                        <hr class="hr-opinion">
+                        <div id="" class="serif-text harvard">
+                        {{ cluster.history|safe }}
+                        </div>
+                    {% endif %}
+
+                    {% if cluster.disposition %}
+                        <h2 class="opinion-section-title jump-link" id="disposition">Disposition</h2>
+                        <hr class="hr-opinion">
+                        <div class="serif-text harvard">
+                        {{ cluster.disposition|safe }}
+                        </div>
+                    {% endif %}
                 {% endif %}
 
                 {% for sub_opinion in cluster.ordered_opinions %}
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index ffa08a461d..bf2cf23ebc 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -65,11 +65,31 @@ <h3><span>Admin</span></h3>
                     <h3> <span>Jump To</span> </h3>
                     <li class="jump-links active"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#" >Top</a></li>
                     <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
-                    {% if cluster.headnotes %}
-                        <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes" >Headnotes</a></li>
-                    {% endif %}
                     {% if cluster.headmatter %}
-                      <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o" >Opinion</a></li>
+                      <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o">Headmatter</a></li>
+                    {% else %}
+
+                        {% if cluster.correction %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#correction">Correction</a></li>
+                        {% endif %}
+                        {% if cluster.attorneys %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#attorneys">Attorneys</a></li>
+                        {% endif %}
+                        {% if cluster.headnotes %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes">Headnotes</a></li>
+                        {% endif %}
+                        {% if cluster.syllabus %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#syllabus">Syllabus</a></li>
+                        {% endif %}
+                        {% if cluster.summary %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#summary">Summary</a></li>
+                        {% endif %}
+                        {% if cluster.history %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#history">History</a></li>
+                        {% endif %}
+                        {% if cluster.disposition %}
+                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#disposition">Disposition</a></li>
+                        {% endif %}
                     {% endif %}
                     {% for sub_opinion in cluster.ordered_opinions %}
                       {% if cluster.sub_opinions.all.count > 1 %}

From dbe7bf3dfb696773b51b9f4040c1fd4761f9f07f Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 06:59:57 -0400
Subject: [PATCH 016/143] Update cl/search/models.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/search/models.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 3bacd929ab..8d53988fd1 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2859,17 +2859,17 @@ def display_citation(self):
         citation = ""
         if not citations:
             return ""
+        if citations[0].type == Citation.NEUTRAL:
+            return citations[0]
+        elif (
+            len(citations) >= 2
+            and citations[0].type == Citation.WEST
+            and citations[1].type == Citation.LEXIS
+        ):
+            citation += f"{citations[0]}, {citations[1]}"
         else:
-            if citations[0].type == Citation.NEUTRAL:
-                return citations[0]
-            elif (
-                len(citations) >= 2
-                and citations[0].type == Citation.WEST
-                and citations[1].type == Citation.LEXIS
-            ):
-                citation += f"{citations[0]}, {citations[1]}"
-            else:
-                citation += f"{citations[0]}"
+            citation += f"{citations[0]}"
+            
         return citation
 
     @property

From d210bdde544c80875fb19e1adf598b314ebb6573 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 07:03:00 -0400
Subject: [PATCH 017/143] Update cl/opinion_page/views.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 98680e6791..8a973a5f00 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -903,7 +903,7 @@ async def view_opinion_old(
 
 async def setup_opinion_context(
     cluster: OpinionCluster, request: HttpRequest, tab: str
-):
+)-> dict[str, Any]:
     """Generate the basic page information we need to load the page
 
     :param cluster: The opinon cluster

From 6477b6f349da7dab003ba12cf68c07bed07b6402 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 07:03:16 -0400
Subject: [PATCH 018/143] Update cl/opinion_page/views.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 8a973a5f00..cb70bb2993 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1006,7 +1006,8 @@ async def render_opinion_view(
     :param additional_context: Any additional context to be passed to the view
     :return: HttpResponse
     """
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    queryset = OpinionCluster.objects.prefetch_related("sub_opinions")
+    cluster: OpinionCluster = await aget_object_or_404(queryset, pk=pk)
 
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"

From 3d968696a9e596a9986316e4a464028edf19e94d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 09:42:45 -0400
Subject: [PATCH 019/143] fix(op_pages/test): Lint and typing changes

---
 cl/opinion_page/tests.py        |  1 -
 cl/opinion_page/utils.py        | 10 ++++++----
 cl/opinion_page/views.py        |  2 +-
 cl/tests/test_feeds.py          |  1 -
 cl/tests/test_visualizations.py |  1 -
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index 59fc9038b6..dae469a160 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -19,7 +19,6 @@
 from django.urls import reverse
 from django.utils.text import slugify
 from factory import RelatedFactory
-from waffle.models import Flag
 from waffle.testutils import override_flag
 
 from cl.lib.models import THUMBNAIL_STATUSES
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 1e5ce8c87b..96ad584d75 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -3,7 +3,7 @@
 import traceback
 from dataclasses import dataclass, field
 from io import StringIO
-from typing import Dict, List, Tuple, Union
+from typing import Dict, Tuple, Union
 
 from asgiref.sync import sync_to_async
 from django.conf import settings
@@ -546,12 +546,14 @@ async def es_get_citing_and_related_clusters_with_cache(
     return results
 
 
-async def es_cited_case_count(cluster_id: int, sub_opinion_pks: List[str]):
+async def es_cited_case_count(
+    cluster_id: int, sub_opinion_pks: list[str]
+) -> int:
     """Elastic quick cited by count query
 
     :param cluster_id: The cluster id to search with
     :param sub_opinion_pks: The subopinion ids of the cluster
-    :return:
+    :return: Opinion Cited Count
     """
     cache = caches["db_cache"]
     cache_cited_by_key = f"cited-by-count-es:{cluster_id}"
@@ -579,7 +581,7 @@ async def es_cited_case_count(cluster_id: int, sub_opinion_pks: List[str]):
     return cited_by_count
 
 
-async def es_related_case_count(cluster_id, sub_opinion_pks: List[str]):
+async def es_related_case_count(cluster_id, sub_opinion_pks: list[str]) -> int:
     """Elastic quick related cases count
 
     :param cluster_id: The cluster id of the object
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index cb70bb2993..53db4c25d5 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -903,7 +903,7 @@ async def view_opinion_old(
 
 async def setup_opinion_context(
     cluster: OpinionCluster, request: HttpRequest, tab: str
-)-> dict[str, Any]:
+) -> dict[str, Any]:
     """Generate the basic page information we need to load the page
 
     :param cluster: The opinon cluster
diff --git a/cl/tests/test_feeds.py b/cl/tests/test_feeds.py
index 7a67cd7e6d..17b42ab235 100644
--- a/cl/tests/test_feeds.py
+++ b/cl/tests/test_feeds.py
@@ -10,7 +10,6 @@
 from django.urls import reverse
 from selenium.webdriver.common.by import By
 from timeout_decorator import timeout_decorator
-from waffle.testutils import override_flag
 
 from cl.search.models import Court
 from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
diff --git a/cl/tests/test_visualizations.py b/cl/tests/test_visualizations.py
index d6760944d4..0e5acb46f7 100644
--- a/cl/tests/test_visualizations.py
+++ b/cl/tests/test_visualizations.py
@@ -5,7 +5,6 @@
 from django.contrib.auth.hashers import make_password
 from selenium.webdriver.common.by import By
 from timeout_decorator import timeout_decorator
-from waffle.testutils import override_flag
 
 from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
 from cl.users.factories import UserProfileWithParentsFactory

From 70c0c49d0f4c2ce10ed108cb1f14bcb06c058633 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 09:49:16 -0400
Subject: [PATCH 020/143] fix(search.test): Remove unused import

---
 cl/search/tests/tests_es_opinion.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 6a493aa478..d6b659c25b 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -19,7 +19,6 @@
 from elasticsearch_dsl import Q
 from factory import RelatedFactory
 from lxml import etree, html
-from waffle.models import Flag
 from waffle.testutils import override_flag
 
 from cl.custom_filters.templatetags.text_filters import html_decode

From 1735fae258f15cf946d7debc53d6e523486db4d0 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 10:21:16 -0400
Subject: [PATCH 021/143] refactor(cl.opinion_page): Simplify code around
 waffles

Refactor returned object in es_get_cited...with cache
to re-use dataclass in system

Add some improved doc strings
---
 cl/opinion_page/utils.py |  31 +++++------
 cl/opinion_page/views.py | 114 ++++++++++++++++++++-------------------
 2 files changed, 74 insertions(+), 71 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 96ad584d75..df6f52603d 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -344,7 +344,7 @@ async def es_get_related_clusters_with_cache(
 async def es_get_cited_clusters_with_cache(
     cluster: OpinionCluster,
     request: HttpRequest,
-):
+) -> RelatedCitingResults:
     """Elastic cited by cluster search or cache
 
     :param cluster:The cluster to check
@@ -360,6 +360,7 @@ async def es_get_cited_clusters_with_cache(
     ]
     if is_bot(request) or not sub_opinion_pks:
         return (None, False, False)
+    cluster_results = RelatedCitingResults()
 
     cached_citing_results, cahced_citing_clusters_count, timeout_cited = (
         await cache.aget(cache_citing_key) or (None, False, False)
@@ -368,11 +369,10 @@ async def es_get_cited_clusters_with_cache(
     )
 
     if cached_citing_results is not None:
-        return (
-            cached_citing_results,
-            cahced_citing_clusters_count,
-            timeout_cited,
-        )
+        cluster_results.citing_clusters = cached_citing_results
+        cluster_results.citing_cluster_count = cahced_citing_clusters_count
+        cluster_results.timeout = timeout_cited
+        return cluster_result
 
     cluster_search = OpinionClusterDocument.search()
     cited_query = await build_cites_clusters_query(
@@ -393,23 +393,24 @@ async def es_get_cited_clusters_with_cache(
         )
         response = None
         timeout_cited = True
-    citing_clusters = list(response)
-    citing_clusters_count = (
+
+    citing_clusters = list(response) if not timeout_cited else []
+    cluster_results.citing_clusters = citing_clusters
+    cluster_results.citing_cluster_count = (
         response.hits.total.value if response is not None else 0
     )
-    timeout_cited = False if citing_clusters else timeout_cited
-
-    if not timeout_cited:
+    cluster_results.timeout = False if citing_clusters else timeout_cited
+    if not cluster_results.timeout:
         await cache.aset(
             cache_citing_key,
             (
-                citing_clusters,
-                citing_clusters_count,
-                timeout_cited,
+                cluster_result.citing_clusters,
+                cluster_result.citing_cluster_count,
+                cluster_result.timeout,
             ),
             settings.RELATED_CACHE_TIMEOUT,
         )
-    return citing_clusters, citing_clusters_count, timeout_cited
+    return cluster_result
 
 
 async def es_get_citing_and_related_clusters_with_cache(
diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 53db4c25d5..8b5cae3cef 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -906,10 +906,10 @@ async def setup_opinion_context(
 ) -> dict[str, Any]:
     """Generate the basic page information we need to load the page
 
-    :param cluster: The opinon cluster
+    :param cluster: The opinion cluster
     :param request: The HTTP request from the user
     :param tab: The tab to load
-    :return:
+    :return: The opinion page context used to generate the page
     """
     title = ", ".join(
         [
@@ -1099,40 +1099,46 @@ async def view_authorities(
     )
 
 
-async def check_flag_exists(flag_name: str) -> bool:
-    return await sync_to_async(
-        waffle.get_waffle_flag_model().objects.filter(name=flag_name).exists
-    )()
-
-
 @never_cache
 async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
-    """View for displaying opinions."""
+    """View Opinions
 
-    flag_exists = await check_flag_exists("ui_flag_for_o")
-    if flag_exists:
-        ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
-            request, "ui_flag_for_o"
-        )
-        user_flag_active = await sync_to_async(waffle.flag_is_active)(
-            request.user, "ui_flag_for_o"
-        )
-        if ui_flag_for_o or user_flag_active:
-            return await render_opinion_view(request, pk, "opinions")
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: The old or new opinion HTML
+    """
+    ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+        request, "ui_flag_for_o"
+    )
+    if ui_flag_for_o:
+        return await render_opinion_view(request, pk, "opinions")
     return await view_opinion_old(request, pk, "str")
 
 
 async def view_opinion_pdf(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:
-    """View for displaying opinion case details."""
+    """View Opinion PDF Tab
+
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: Opinion PDF tab
+    """
     return await render_opinion_view(request, pk, "pdf")
 
 
 async def view_opinion_authorities(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:
-    """View for displaying opinion authorities."""
+    """View Opinion Table of Authorities
+
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: Table of Authorities tab
+    """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
 
     authorities_context: AuthoritiesContext = AuthoritiesContext(
@@ -1148,38 +1154,34 @@ async def view_opinion_authorities(
         "authorities_context": authorities_context,
         "authorities_with_data": await cluster.aauthorities_with_data(),
     }
+
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    user_flag_active = await sync_to_async(waffle.flag_is_active)(
-        request.user, "ui_flag_for_o"
-    )
-
-    if ui_flag_for_o or user_flag_active:
+    if ui_flag_for_o:
         return await render_opinion_view(
             request, pk, "authorities", additional_context
         )
-    else:
-        # Old page to load for people outside the flag
-        return await view_authorities(
-            request=request, pk=pk, slug="authorities"
-        )
+
+    # Old page to load for people outside the flag
+    return await view_authorities(request=request, pk=pk, slug="authorities")
 
 
 async def view_opinion_cited_by(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:
-    """"""
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    """View Cited By Tab
 
-    (
-        citing_clusters,
-        citing_cluster_count,
-        _,
-    ) = await es_get_cited_clusters_with_cache(cluster, request)
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: Cited By tab
+    """
+    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cited_query = await es_get_cited_clusters_with_cache(cluster, request)
     additional_context = {
-        "citing_clusters": citing_clusters,
-        "citing_cluster_count": citing_cluster_count,
+        "citing_clusters": cited_query.citing_clusters,
+        "citing_cluster_count": cited_query.citing_cluster_count,
     }
     return await render_opinion_view(
         request, pk, "cited-by", additional_context
@@ -1189,7 +1191,13 @@ async def view_opinion_cited_by(
 async def view_opinion_summaries(
     request: HttpRequest, pk: int, _: str
 ) -> HttpResponse:
-    """"""
+    """View Opinion Summaries tab
+
+    :param request: HTTP request
+    :param pk: The cluster PK
+    :param _: url slug
+    :return: Summaries tab
+    """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     parenthetical_groups_qs = await get_or_create_parenthetical_groups(cluster)
     parenthetical_groups = [
@@ -1208,22 +1216,16 @@ async def view_opinion_summaries(
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    user_flag_active = await sync_to_async(waffle.flag_is_active)(
-        request.user, "ui_flag_for_o"
-    )
-
-    if ui_flag_for_o or user_flag_active:
-        additional_context = {
-            "parenthetical_groups": parenthetical_groups,
-            "ui_flag_for_o": ui_flag_for_o,
-            "user_flag_active": user_flag_active,
-        }
-        return await render_opinion_view(
-            request, pk, "summaries", additional_context
-        )
-    else:
+    if not ui_flag_for_o:
         # Old page to load for people outside the flag
         return await view_summaries(request=request, pk=pk, slug="summaries")
+    additional_context = {
+        "parenthetical_groups": parenthetical_groups,
+        "ui_flag_for_o": ui_flag_for_o,
+    }
+    return await render_opinion_view(
+        request, pk, "summaries", additional_context
+    )
 
 
 async def view_opinion_related_cases(
@@ -1234,7 +1236,7 @@ async def view_opinion_related_cases(
     :param request: HTTP request
     :param pk: The cluster PK
     :param _: url slug
-    :return: Return related cases tab
+    :return: Related Cases tab
     """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
     related_cluster_object = await es_get_related_clusters_with_cache(

From 3c009bec67c59577bea16bc4e9c8cfa53eeb92d5 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 10:25:19 -0400
Subject: [PATCH 022/143] refactor(search.models): Update display citation

And add docstrings
---
 cl/search/models.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 8d53988fd1..cd33171745 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2854,22 +2854,26 @@ def caption(self):
 
     @property
     def display_citation(self):
+        """Find favorite citation to display
+
+        Identify the proper or favorite citation(s) to display on the front end
+        but don't wrap it together with a title
+        :return: The citation if applicable
+        """
         citation_list = [citation for citation in self.citations.all()]
         citations = sorted(citation_list, key=sort_cites)
-        citation = ""
         if not citations:
-            return ""
-        if citations[0].type == Citation.NEUTRAL:
-            return citations[0]
+            citation = ""
+        elif citations[0].type == Citation.NEUTRAL:
+            citation = citations[0]
         elif (
             len(citations) >= 2
             and citations[0].type == Citation.WEST
             and citations[1].type == Citation.LEXIS
         ):
-            citation += f"{citations[0]}, {citations[1]}"
+            citation = f"{citations[0]}, {citations[1]}"
         else:
-            citation += f"{citations[0]}"
-            
+            citation = citations[0]
         return citation
 
     @property

From ecd7bb2fdc29f53fde488f4d24438ffb20a2a1b0 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 10:29:04 -0400
Subject: [PATCH 023/143] fix(cl_opinion_page): Fix typo

---
 cl/opinion_page/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index df6f52603d..30b6d481ad 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -404,13 +404,13 @@ async def es_get_cited_clusters_with_cache(
         await cache.aset(
             cache_citing_key,
             (
-                cluster_result.citing_clusters,
-                cluster_result.citing_cluster_count,
-                cluster_result.timeout,
+                cluster_results.citing_clusters,
+                cluster_results.citing_cluster_count,
+                cluster_results.timeout,
             ),
             settings.RELATED_CACHE_TIMEOUT,
         )
-    return cluster_result
+    return cluster_results
 
 
 async def es_get_citing_and_related_clusters_with_cache(

From 7606b512a5cd6379f096237230cd06640e8d8eb5 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 10:35:05 -0400
Subject: [PATCH 024/143] fix(cl_opinion_page): Update bot response to empty
 data

---
 cl/opinion_page/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 30b6d481ad..ec0986bbed 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -358,9 +358,9 @@ async def es_get_cited_clusters_with_cache(
         str(pk)
         async for pk in cluster.sub_opinions.values_list("pk", flat=True)
     ]
-    if is_bot(request) or not sub_opinion_pks:
-        return (None, False, False)
     cluster_results = RelatedCitingResults()
+    if is_bot(request) or not sub_opinion_pks:
+        return cluster_results
 
     cached_citing_results, cahced_citing_clusters_count, timeout_cited = (
         await cache.aget(cache_citing_key) or (None, False, False)
@@ -372,7 +372,7 @@ async def es_get_cited_clusters_with_cache(
         cluster_results.citing_clusters = cached_citing_results
         cluster_results.citing_cluster_count = cahced_citing_clusters_count
         cluster_results.timeout = timeout_cited
-        return cluster_result
+        return cluster_results
 
     cluster_search = OpinionClusterDocument.search()
     cited_query = await build_cites_clusters_query(
@@ -386,7 +386,7 @@ async def es_get_cited_clusters_with_cache(
         logger.warning("Error getting cited and related clusters: %s", e)
         if settings.DEBUG is True:
             traceback.print_exc()
-        return (None, False, False)
+        return cluster_results
     except ConnectionTimeout as e:
         logger.warning(
             "ConnectionTimeout getting cited and related clusters: %s", e

From 0117ed05b5d1344c4d9889fdbc367f0d97f0d5e7 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 24 Oct 2024 11:04:32 -0400
Subject: [PATCH 025/143] tests(cl_opinion_page): Tweak citation redirect to
 use new opinions

---
 cl/opinion_page/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/tests.py b/cl/opinion_page/tests.py
index dae469a160..5f7d73d490 100644
--- a/cl/opinion_page/tests.py
+++ b/cl/opinion_page/tests.py
@@ -739,7 +739,7 @@ async def test_can_filter_out_non_case_law_citation(self):
         )
 
         self.assertEqual(r.status_code, HTTPStatus.OK)
-        self.assertTemplateUsed(r, "opinion.html")
+        self.assertTemplateUsed(r, "opinions.html")
         self.assertIn(str(chests_of_tea), r.content.decode())
 
     async def test_show_error_for_non_opinion_citations(self):

From b693643591e9f129d0955d133dde4242e2c76603 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Sat, 2 Nov 2024 09:49:31 -0600
Subject: [PATCH 026/143] feat(resource): new command to update resource
 casenames

---
 .../commands/update_resource_casenames.py     | 258 ++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 cl/corpus_importer/management/commands/update_resource_casenames.py

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
new file mode 100644
index 0000000000..b33a7572c0
--- /dev/null
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -0,0 +1,258 @@
+import logging
+import re
+from datetime import datetime
+
+import pandas as pd
+from django.core.management.base import BaseCommand, CommandError
+from eyecite import get_citations
+from eyecite.models import FullCaseCitation
+from eyecite.tokenizers import HyperscanTokenizer
+
+from cl.corpus_importer.utils import winnow_case_name
+from cl.search.models import Citation, OpinionCluster
+
+logger = logging.getLogger("django.db.backends")
+logger.setLevel(logging.WARNING)
+HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
+
+
+class Command(BaseCommand):
+    help = "Match and compare case details from a CSV file with existing records in the database."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--filepath",
+            type=str,
+            required=True,
+            help="Path to the CSV file to process.",
+        )
+        parser.add_argument(
+            "--strict",
+            action="store_true",
+            help="Enable strict matching for docket and date.",
+        )
+        parser.add_argument(
+            "--delay",
+            type=float,
+            default=0.1,
+            help="How long to wait to update each opinion and docket (in seconds, allows floating numbers).",
+        )
+
+    def handle(self, *args, **options):
+        filepath = options["filepath"]
+        strict = options["strict"]
+
+        if not filepath:
+            raise CommandError(
+                "Filepath is required. Use --filepath to specify the CSV file location."
+            )
+
+        # self.stdout.write(self.style.NOTICE(
+        #     f"Processing CSV at {filepath} with strict mode set to {strict}"))
+        process_csv(filepath, strict)
+
+
+def process_csv(filepath: str, strict: bool):
+    chunksize = 10**5  # Adjust for memory management
+    match_count = 0
+    total = 0
+    rcount = 0
+    row_count = pd.read_csv(filepath).shape[0]
+    print(f"Total rows in CSV: {row_count}")
+    start_time = datetime.now()
+    print(f"Start time: {start_time}")
+
+    for chunk in pd.read_csv(filepath, chunksize=chunksize):
+        for _, row in chunk.iterrows():
+            rcount += 1
+            match_found = False
+            try:
+                # Retrieve fields and parse date
+                citation, docket_num, case_title, parallel_cite, filed_date = (
+                    row.get("Citation"),
+                    row.get("Docket Num"),
+                    row.get("Title"),
+                    row.get("Parallel Cite"),
+                    row.get("Filed Date"),
+                )
+
+                citation_str = citation
+                parallel_cite_str = parallel_cite
+
+                clean_cite = re.sub(r"\s+", " ", citation)
+                cites = get_citations(
+                    clean_cite, tokenizer=HYPERSCAN_TOKENIZER
+                )
+                cites = [
+                    cite
+                    for cite in cites
+                    if isinstance(cite, FullCaseCitation)
+                ]
+                if not cites:
+                    logger.warning(f"Invalid citation: {clean_cite}")
+                    continue
+
+                clean_cite = re.sub(r"\s+", " ", parallel_cite)
+                parallel_cites = get_citations(
+                    clean_cite, tokenizer=HYPERSCAN_TOKENIZER
+                )
+                parallel_cites = [
+                    cite
+                    for cite in parallel_cites
+                    if isinstance(cite, FullCaseCitation)
+                ]
+                if not parallel_cites:
+                    logger.warning(f"Invalid parallel citation: {clean_cite}")
+                    continue
+
+                main_citation = cites[0]
+                parallel_cite = parallel_cites[0]
+
+                if Citation.objects.filter(
+                    volume=parallel_cite.groups["volume"],
+                    reporter=parallel_cite.corrected_reporter(),
+                    page=parallel_cite.groups["page"],
+                ).exists():
+                    continue
+
+                if not all([main_citation, docket_num, case_title]):
+                    logger.warning(
+                        "Missing essential fields in row; skipping."
+                    )
+                    continue
+
+                formatted_date = parse_date(filed_date)
+
+                # TODO validate for any of the reporters
+                if not formatted_date or "F.3d" not in citation:
+                    print("skip?")
+                    continue
+
+                # Query citations in the database
+                citations = Citation.objects.filter(
+                    volume=main_citation.groups["volume"],
+                    reporter=main_citation.corrected_reporter(),
+                    page=main_citation.groups["page"],
+                )
+                if not citations:
+                    continue
+                total += 1
+
+                for citation_obj in citations:
+                    if is_match(
+                        citation_obj, docket_num, formatted_date, case_title
+                    ):
+                        match_found = True
+                        match_count += 1
+                        display_match_info(
+                            citation_obj,
+                            case_title,
+                            parallel_cite_str,
+                            match_count,
+                            total,
+                            rcount,
+                        )
+                        break
+
+                if not match_found:
+                    print(
+                        "\n-----------\nFailed",
+                        citation_str,
+                        docket_num,
+                        case_title,
+                        parallel_cite_str,
+                        filed_date,
+                    )
+
+            except Exception as e:
+                logger.error(f"Unexpected error processing row {row}: {e}")
+
+    # self.stdout.write(
+    #     self.style.SUCCESS(f"Total matches found: {match_count}"))
+    print(f"Total matches found: {match_count}")
+    end_time = datetime.now()
+    print(f"End time: {end_time-start_time}")
+
+
+def parse_date(date_str: str):
+    """Attempts to parse the filed date into a datetime object."""
+    for fmt in ("%B %d, %Y", "%d-%b-%y"):
+        try:
+            return datetime.strptime(date_str, fmt).date()
+        except (ValueError, TypeError):
+            continue
+    logger.warning(f"Invalid date format: {date_str}")
+    return None
+
+
+def is_match(citation, docket_num, formatted_date, case_title):
+    """
+    Checks if the database citation matches docket number, filing date, and case title.
+    """
+    # Check docket number and date
+    failed = 0
+    if (
+        citation.cluster.docket.docket_number.lower() not in docket_num.lower()
+        or citation.cluster.date_filed != formatted_date
+    ):
+        if (
+            citation.cluster.docket.docket_number.lower()
+            not in docket_num.lower()
+        ):
+            failed += 1
+        if citation.cluster.date_filed != formatted_date:
+            failed += 10
+        return False
+
+    # Compare case name overlaps if strict matching on
+    if citation.cluster.case_name_full == "":
+        cn = citation.cluster.case_name
+    else:
+        cn = citation.cluster.case_name_full
+
+    c1 = winnow_case_name(cn)
+    c2 = winnow_case_name(case_title)
+
+    overlap = c1 & c2
+    if overlap:
+        cf = (
+            citation.cluster.case_name_full
+            if citation.cluster.case_name_full
+            else citation.cluster.case_name
+        )
+        order1 = get_term_indices(overlap, cf)
+        order2 = get_term_indices(overlap, case_title)
+
+        return list(order1.keys()) == list(order2.keys())
+
+    return False
+
+
+def get_term_indices(terms, text: str):
+    """
+    Returns a dictionary of each term's index in the text, sorted by appearance order.
+    """
+    term_indices = {
+        term: match.start()
+        for term in terms
+        if (match := re.search(r"\b" + re.escape(term) + r"\b", text.lower()))
+    }
+    return dict(sorted(term_indices.items(), key=lambda item: item[1]))
+
+
+def display_match_info(
+    citation, case_title, parallel_cite, match_count, total, rcount
+):
+    """
+    Displays information about a match in a structured format.
+    """
+    print(
+        f"\n============================= {match_count} {total} {100 * (rcount / 285417)}"
+    )
+    print(
+        f"Matched Case in DB: {citation.cluster.id} - {citation.cluster.case_name_full if citation.cluster.case_name_full else citation.cluster.case_name}"
+    )
+    print(f"CSV Case Title: {case_title}")
+    print(f"Matching Citation: {citation}")
+    print(f"Parallel Cite: {parallel_cite}")
+    # TODO save in DB

From 71708855d15f673cb543837d6f8d8da13400f8e1 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 4 Nov 2024 11:26:09 -0500
Subject: [PATCH 027/143] Update cl/opinion_page/utils.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index ec0986bbed..6ece6dd61c 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -370,7 +370,7 @@ async def es_get_cited_clusters_with_cache(
 
     if cached_citing_results is not None:
         cluster_results.citing_clusters = cached_citing_results
-        cluster_results.citing_cluster_count = cahced_citing_clusters_count
+        cluster_results.citing_cluster_count = cached_citing_clusters_count
         cluster_results.timeout = timeout_cited
         return cluster_results
 

From f33d1928776d7c566aa1fc911e1e2a091cc7ac5d Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 4 Nov 2024 11:26:40 -0500
Subject: [PATCH 028/143] Update cl/opinion_page/views.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/views.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 8b5cae3cef..7541121569 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -939,8 +939,7 @@ async def setup_opinion_context(
     get_string = make_get_string(request)
 
     sub_opinion_pks = [
-        str(pk)
-        async for pk in cluster.sub_opinions.values_list("pk", flat=True)
+       str(opinion.pk) async for opinion in cluster.sub_opinions.all()
     ]
 
     es_has_cited_opinions = await es_cited_case_count(

From 8aa0950f110686b5eac6eb2010dc7309ae761ba2 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 4 Nov 2024 11:58:11 -0500
Subject: [PATCH 029/143] Update cl/opinion_page/utils.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 6ece6dd61c..d199bb395c 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -362,7 +362,7 @@ async def es_get_cited_clusters_with_cache(
     if is_bot(request) or not sub_opinion_pks:
         return cluster_results
 
-    cached_citing_results, cahced_citing_clusters_count, timeout_cited = (
+    cached_citing_results, cached_citing_clusters_count, timeout_cited = (
         await cache.aget(cache_citing_key) or (None, False, False)
         if settings.RELATED_USE_CACHE
         else (None, False, False)

From 97405c8c6325f4efb83a8b8bdab0495d23d629ec Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 4 Nov 2024 15:17:07 -0500
Subject: [PATCH 030/143] Update cl/opinion_page/views.py

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/opinion_page/views.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 7541121569..c158e92bb5 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1011,10 +1011,7 @@ async def render_opinion_view(
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    user_flag_active = await sync_to_async(waffle.flag_is_active)(
-        request.user, "ui_flag_for_o"
-    )
-    if not any([ui_flag_for_o, user_flag_active]):
+    if not ui_flag_for_o:
         return await view_opinion_old(request, pk, "str")
 
     context = await setup_opinion_context(cluster, request, tab=tab)

From 158bb5c93c343679fd9b314d50f43b410f77a0c1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 4 Nov 2024 16:57:17 -0600
Subject: [PATCH 031/143] feat(resource): New command to update resource
 casenames

---
 .../commands/update_resource_casenames.py     | 177 ++++++++++++------
 1 file changed, 116 insertions(+), 61 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index b33a7572c0..d355ec0a36 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -1,6 +1,8 @@
 import logging
 import re
-from datetime import datetime
+import time
+from datetime import date, datetime
+from typing import Set
 
 import pandas as pd
 from django.core.management.base import BaseCommand, CommandError
@@ -9,10 +11,9 @@
 from eyecite.tokenizers import HyperscanTokenizer
 
 from cl.corpus_importer.utils import winnow_case_name
-from cl.search.models import Citation, OpinionCluster
+from cl.search.models import Citation
 
-logger = logging.getLogger("django.db.backends")
-logger.setLevel(logging.WARNING)
+logger = logging.getLogger(__name__)
 HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
 
 
@@ -26,43 +27,57 @@ def add_arguments(self, parser):
             required=True,
             help="Path to the CSV file to process.",
         )
-        parser.add_argument(
-            "--strict",
-            action="store_true",
-            help="Enable strict matching for docket and date.",
-        )
         parser.add_argument(
             "--delay",
             type=float,
             default=0.1,
             help="How long to wait to update each opinion and docket (in seconds, allows floating numbers).",
         )
+        parser.add_argument(
+            "--dry-run",
+            action="store_true",
+            help="Simulate the update process without making changes",
+        )
+        parser.add_argument(
+            "--chunk-size",
+            type=int,
+            default=100000,
+            help="The number of rows to read at a time",
+        )
 
     def handle(self, *args, **options):
         filepath = options["filepath"]
-        strict = options["strict"]
+        delay = options["delay"]
+        dry_run = options["dry_run"]
+        chunk_size = options["chunk_size"]
 
         if not filepath:
             raise CommandError(
                 "Filepath is required. Use --filepath to specify the CSV file location."
             )
 
-        # self.stdout.write(self.style.NOTICE(
-        #     f"Processing CSV at {filepath} with strict mode set to {strict}"))
-        process_csv(filepath, strict)
+        process_csv(filepath, delay, dry_run, chunk_size)
+
 
+def process_csv(
+    filepath: str, delay: float, dry_run: bool, chunk_size: int
+) -> None:
+    """Process rows from csv file
 
-def process_csv(filepath: str, strict: bool):
-    chunksize = 10**5  # Adjust for memory management
+    :param filepath: path to csv file
+    :param delay: delay between saves in seconds
+    :param dry_run: flag to simulate update process
+    :param chunk_size: number of rows to read at a time
+    """
     match_count = 0
     total = 0
     rcount = 0
     row_count = pd.read_csv(filepath).shape[0]
-    print(f"Total rows in CSV: {row_count}")
+    logger.info(f"Total rows in CSV: {row_count}")
     start_time = datetime.now()
-    print(f"Start time: {start_time}")
+    logger.info(f"Start time: {start_time}")
 
-    for chunk in pd.read_csv(filepath, chunksize=chunksize):
+    for chunk in pd.read_csv(filepath, chunksize=chunk_size):
         for _, row in chunk.iterrows():
             rcount += 1
             match_found = False
@@ -123,11 +138,6 @@ def process_csv(filepath: str, strict: bool):
 
                 formatted_date = parse_date(filed_date)
 
-                # TODO validate for any of the reporters
-                if not formatted_date or "F.3d" not in citation:
-                    print("skip?")
-                    continue
-
                 # Query citations in the database
                 citations = Citation.objects.filter(
                     volume=main_citation.groups["volume"],
@@ -144,38 +154,75 @@ def process_csv(filepath: str, strict: bool):
                     ):
                         match_found = True
                         match_count += 1
+
                         display_match_info(
                             citation_obj,
                             case_title,
                             parallel_cite_str,
-                            match_count,
-                            total,
-                            rcount,
                         )
+
+                        if not dry_run:
+                            cluster_casename = (
+                                citation_obj.cluster.case_name
+                                if citation_obj.cluster.case_name
+                                else citation_obj.cluster.case_name_full
+                            )
+                            docket_casename = (
+                                citation_obj.cluster.docket.case_name
+                                if citation_obj.cluster.docket.case_name
+                                else citation_obj.cluster.docket.case_name_full
+                            )
+                            if len(case_title) < len(cluster_casename):
+                                # Save new case name in cluster
+                                logger.info(
+                                    f"Case name updated for cluster id: {citation_obj.cluster_id}"
+                                )
+                                citation_obj.cluster.case_name = case_title
+                                citation_obj.cluster.save()
+                            else:
+                                logger.info(
+                                    f"Cluster: {citation_obj.cluster_id} already have the best name."
+                                )
+
+                            if len(case_title) < len(docket_casename):
+                                # Save new case name in docket
+                                logger.info(
+                                    f"Case name updated for docket id: {citation_obj.cluster.docket_id}"
+                                )
+                                citation_obj.cluster.docket.case_name = (
+                                    case_title
+                                )
+                                citation_obj.cluster.docket.save()
+
+                            else:
+                                logger.info(
+                                    f"Docket: {citation_obj.cluster.docket_id} already have the best name."
+                                )
+
+                            # Wait between updates to avoid issues with redis memory
+                            time.sleep(delay)
+
                         break
 
                 if not match_found:
-                    print(
-                        "\n-----------\nFailed",
-                        citation_str,
-                        docket_num,
-                        case_title,
-                        parallel_cite_str,
-                        filed_date,
+                    logger.info(
+                        f"Failed: {citation_str} - {docket_num} - {case_title} - {parallel_cite_str} - {filed_date}"
                     )
 
             except Exception as e:
                 logger.error(f"Unexpected error processing row {row}: {e}")
 
-    # self.stdout.write(
-    #     self.style.SUCCESS(f"Total matches found: {match_count}"))
-    print(f"Total matches found: {match_count}")
+    logger.info(f"Total matches found: {match_count}")
     end_time = datetime.now()
-    print(f"End time: {end_time-start_time}")
+    logger.info(f"End time: {end_time - start_time}")
 
 
-def parse_date(date_str: str):
-    """Attempts to parse the filed date into a datetime object."""
+def parse_date(date_str: str) -> date | None:
+    """Attempts to parse the filed date into a datetime object.
+
+    :param date_str: date string
+    :return: date object or none
+    """
     for fmt in ("%B %d, %Y", "%d-%b-%y"):
         try:
             return datetime.strptime(date_str, fmt).date()
@@ -185,9 +232,15 @@ def parse_date(date_str: str):
     return None
 
 
-def is_match(citation, docket_num, formatted_date, case_title):
-    """
-    Checks if the database citation matches docket number, filing date, and case title.
+def is_match(citation, docket_num, formatted_date, case_title) -> bool:
+    """Checks if the database citation matches docket number, filing date, and case
+    title.
+
+    :param citation: Citation object that matched csv citation
+    :param docket_num: Docket number from csv
+    :param formatted_date: Formated date from csv
+    :param case_title: Case name from csv
+    :return: True if match found else False
     """
     # Check docket number and date
     failed = 0
@@ -204,8 +257,10 @@ def is_match(citation, docket_num, formatted_date, case_title):
             failed += 10
         return False
 
-    # Compare case name overlaps if strict matching on
-    if citation.cluster.case_name_full == "":
+    if (
+        not citation.cluster.case_name_full
+        or citation.cluster.case_name_full == ""
+    ):
         cn = citation.cluster.case_name
     else:
         cn = citation.cluster.case_name_full
@@ -223,14 +278,17 @@ def is_match(citation, docket_num, formatted_date, case_title):
         order1 = get_term_indices(overlap, cf)
         order2 = get_term_indices(overlap, case_title)
 
-        return list(order1.keys()) == list(order2.keys())
+        return list(order1.keys()) == list(order2.keys()) and len(overlap) > 1
 
     return False
 
 
-def get_term_indices(terms, text: str):
-    """
-    Returns a dictionary of each term's index in the text, sorted by appearance order.
+def get_term_indices(terms: Set, text: str) -> dict:
+    """Returns a dictionary of each term's index in the text, sorted by appearance order.
+
+    :param terms: set of terms to search for
+    :param text: text to search for terms
+    :return: dict of each term's index in the text
     """
     term_indices = {
         term: match.start()
@@ -240,19 +298,16 @@ def get_term_indices(terms, text: str):
     return dict(sorted(term_indices.items(), key=lambda item: item[1]))
 
 
-def display_match_info(
-    citation, case_title, parallel_cite, match_count, total, rcount
-):
-    """
-    Displays information about a match in a structured format.
+def display_match_info(citation, case_title, parallel_cite):
+    """Displays information about a match in a structured format.
+
+    :param citation: Citation object
+    :param case_title: case name from csv
+    :param parallel_cite: cite from csv
     """
-    print(
-        f"\n============================= {match_count} {total} {100 * (rcount / 285417)}"
-    )
-    print(
+    logger.info(
         f"Matched Case in DB: {citation.cluster.id} - {citation.cluster.case_name_full if citation.cluster.case_name_full else citation.cluster.case_name}"
     )
-    print(f"CSV Case Title: {case_title}")
-    print(f"Matching Citation: {citation}")
-    print(f"Parallel Cite: {parallel_cite}")
-    # TODO save in DB
+    logger.info(f"CSV Case Title: {case_title}")
+    logger.info(f"Matching Citation: {citation}")
+    logger.info(f"Parallel Cite: {parallel_cite}")

From 95c7d0154714746b2d3b72c66e1325a14ee5c856 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Nov 2024 10:36:47 -0600
Subject: [PATCH 032/143] feat(resource): clean docket numbers before matching
 them, improve winnow_case_name function

---
 .../commands/update_resource_casenames.py         | 15 ++++++++++-----
 cl/corpus_importer/utils.py                       |  5 +++--
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index d355ec0a36..08a8b2676e 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -11,6 +11,7 @@
 from eyecite.tokenizers import HyperscanTokenizer
 
 from cl.corpus_importer.utils import winnow_case_name
+from cl.lib.model_helpers import clean_docket_number
 from cl.search.models import Citation
 
 logger = logging.getLogger(__name__)
@@ -242,16 +243,20 @@ def is_match(citation, docket_num, formatted_date, case_title) -> bool:
     :param case_title: Case name from csv
     :return: True if match found else False
     """
+
+    # Prepare docket numbers
+    cluster_docket_number = clean_docket_number(
+        citation.cluster.docket.docket_number
+    )
+    docket_num = clean_docket_number(docket_num)
+
     # Check docket number and date
     failed = 0
     if (
-        citation.cluster.docket.docket_number.lower() not in docket_num.lower()
+        cluster_docket_number.lower() not in docket_num.lower()
         or citation.cluster.date_filed != formatted_date
     ):
-        if (
-            citation.cluster.docket.docket_number.lower()
-            not in docket_num.lower()
-        ):
+        if cluster_docket_number.lower() not in docket_num.lower():
             failed += 1
         if citation.cluster.date_filed != formatted_date:
             failed += 10
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index efcab347a5..7456b88523 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -765,15 +765,16 @@ def winnow_case_name(case_name: str) -> Set:
     # Fix case name to be cleaner
     case_name = harmonize(case_name)
 
-    # Join abbreviations/acronyms
+    # Join abbreviations/acronyms and keep single char abbreviations
     # e.g.
     # "D.L.M. v. T.J.S." -> "DLM v. TJS"
     # "In the Matter of E. B." -> "In the Matter of EB"
     # "R. L. C. R. v. L. Z. S." -> "RLCR v. LZS"
     # "J. B. v. C. E." -> "JB v. CE"
     # "County v. A. D. B. County" -> "County v. ADB County"
+    # "In re Gregory C. KAPORDELIS" -> "In re Gregory C. KAPORDELIS"
     case_name = re.sub(
-        r"\b[A-Z][A-Z\.\s]*[A-Z]\b\.?",
+        r"\b([A-Z]\.?)(?:\s+([A-Z]\.?))*\b",
         lambda m: m.group().replace(".", "").replace(" ", ""),
         case_name,
     )

From f5e663762f65ace59e1c66f7016555ec3b7847a1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Nov 2024 11:27:03 -0600
Subject: [PATCH 033/143] feat(resource): clean docket numbers

---
 .../commands/update_resource_casenames.py     | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index 08a8b2676e..5c20d88501 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -245,18 +245,29 @@ def is_match(citation, docket_num, formatted_date, case_title) -> bool:
     """
 
     # Prepare docket numbers
-    cluster_docket_number = clean_docket_number(
+    cleaned_cluster_docket_number = clean_docket_number(
         citation.cluster.docket.docket_number
     )
-    docket_num = clean_docket_number(docket_num)
+    cleaned_docket_num = clean_docket_number(docket_num)
+
+    # In some cases clean_docket_number returns an empty string, try with original
+    # docket numbers
+    if not cleaned_cluster_docket_number:
+        cleaned_cluster_docket_number = citation.cluster.docket.docket_number
+
+    if not cleaned_docket_num:
+        cleaned_docket_num = docket_num
 
     # Check docket number and date
     failed = 0
     if (
-        cluster_docket_number.lower() not in docket_num.lower()
+        cleaned_cluster_docket_number.lower() not in cleaned_docket_num.lower()
         or citation.cluster.date_filed != formatted_date
     ):
-        if cluster_docket_number.lower() not in docket_num.lower():
+        if (
+            cleaned_cluster_docket_number.lower()
+            not in cleaned_docket_num.lower()
+        ):
             failed += 1
         if citation.cluster.date_filed != formatted_date:
             failed += 10

From 202c649eaee96643d33bd5854b01000eed44e281 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Nov 2024 14:34:06 -0600
Subject: [PATCH 034/143] feat(resource): update regex in winnow_case_name

---
 cl/corpus_importer/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index 7456b88523..5082e5961c 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -765,16 +765,16 @@ def winnow_case_name(case_name: str) -> Set:
     # Fix case name to be cleaner
     case_name = harmonize(case_name)
 
-    # Join abbreviations/acronyms and keep single char abbreviations
+    # Join abbreviations/acronyms and ignore single char abbreviations
     # e.g.
     # "D.L.M. v. T.J.S." -> "DLM v. TJS"
     # "In the Matter of E. B." -> "In the Matter of EB"
     # "R. L. C. R. v. L. Z. S." -> "RLCR v. LZS"
     # "J. B. v. C. E." -> "JB v. CE"
     # "County v. A. D. B. County" -> "County v. ADB County"
-    # "In re Gregory C. KAPORDELIS" -> "In re Gregory C. KAPORDELIS"
+    # "In re Gregory C. KAPORDELIS" -> "In re Gregory C. KAPORDELIS" remains the same
     case_name = re.sub(
-        r"\b([A-Z]\.?)(?:\s+([A-Z]\.?))*\b",
+        r"\b[A-Z][A-Z\.\s]*[A-Z]\b\.",
         lambda m: m.group().replace(".", "").replace(" ", ""),
         case_name,
     )

From e674781d72d545f2291a865bd592c2017fa2b863 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Wed, 6 Nov 2024 08:55:35 -0500
Subject: [PATCH 035/143] feat(scrapers.admin): create materialized view and
 admin page

Concept for https://github.com/freelawproject/courtlistener/issues/3950

For this to work, the materialized view must be created directly
on the DB
---
 cl/scrapers/admin.py | 68 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/cl/scrapers/admin.py b/cl/scrapers/admin.py
index 694fb79d17..c7cc689a6e 100644
--- a/cl/scrapers/admin.py
+++ b/cl/scrapers/admin.py
@@ -1,4 +1,5 @@
 from django.contrib import admin
+from django.db import models
 
 from cl.scrapers.models import (
     PACERFreeDocumentLog,
@@ -29,3 +30,70 @@ class PACERFreeDocumentRowAdmin(admin.ModelAdmin):
 
 
 admin.site.register(UrlHash)
+
+
+class MVLatestOpinions(models.Model):
+    """
+    Model linked to materialized view for monitoring scrapers
+
+    Must use `REFRESH MATERIALIZED VIEW scrapers_mv_latest_opinion`
+    periodically
+    """
+
+    query = """
+    CREATE MATERIALIZED VIEW
+        scrapers_mv_latest_opinion
+    AS
+    (
+    SELECT
+        court_id,
+        max(so.date_created) as latest_creation_date,
+        (now() - max(so.date_created))::text as time_since
+    FROM
+        (
+            SELECT id, court_id
+            FROM search_docket
+            WHERE court_id IN (
+                SELECT id
+                FROM search_court
+                /*
+                    Only check courts with scrapers in use
+                */
+                WHERE
+                    has_opinion_scraper
+                    AND in_use
+            )
+        ) sd
+    INNER JOIN
+        (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
+    INNER JOIN
+        search_opinion so ON so.cluster_id = soc.id
+    GROUP BY
+        sd.court_id
+    HAVING
+        /*
+            Only return results for courts with no updates in a week
+        */
+        now() - max(so.date_created) > interval '7 days'
+    ORDER BY
+        2 DESC
+    )
+    """
+    # a django model must have a primary key
+    court_id = models.TextField(primary_key=True)
+    latest_creation_date = models.DateField()
+    time_since = models.TextField()
+
+    class Meta:
+        managed = False  # ignore this model in migrations
+        db_table = "scrapers_mv_latest_opinion"
+
+
+@admin.register(MVLatestOpinions)
+class MVLatestOpinionsAdmin(admin.ModelAdmin):
+    """Admin page to look at the latest opinion for each court
+
+    Use this to monitor silently failing scrapers
+    """
+
+    list_display = ["court_id", "latest_creation_date", "time_since"]

From 3536343279fe201d2166ab2572c1e711bf8c8904 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 6 Nov 2024 18:56:36 -0600
Subject: [PATCH 036/143] feat(resource): handle no docket number in matched
 cluster docket

update date string formats
---
 .../commands/update_resource_casenames.py          | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index 5c20d88501..56644067b4 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -221,10 +221,18 @@ def process_csv(
 def parse_date(date_str: str) -> date | None:
     """Attempts to parse the filed date into a datetime object.
 
+    # January 10, 1999
+    # 24-Jul-97
+    # 21-Jan-94
+    # 1/17/1961
+    # 12/1/1960
+    # 26-Sep-00
+    # Feb. 28, 2001
+
     :param date_str: date string
     :return: date object or none
     """
-    for fmt in ("%B %d, %Y", "%d-%b-%y"):
+    for fmt in ("%B %d, %Y", "%d-%b-%y", "%m/%d/%Y", "%m/%d/%y", "%b. %d, %Y"):
         try:
             return datetime.strptime(date_str, fmt).date()
         except (ValueError, TypeError):
@@ -244,6 +252,10 @@ def is_match(citation, docket_num, formatted_date, case_title) -> bool:
     :return: True if match found else False
     """
 
+    if not citation.cluster.docket.docket_number:
+        # There is no docket number, abort
+        return False
+
     # Prepare docket numbers
     cleaned_cluster_docket_number = clean_docket_number(
         citation.cluster.docket.docket_number

From ce22a586d4ab65ade0d2572825b521e724cca9b0 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Fri, 25 Oct 2024 21:09:56 -0500
Subject: [PATCH 037/143] feat(scrapers.update_from_text): refactor from code
 review

- validate citation objects from `Site.extract_from_text`. Add tests for this
- abstract --courts required argument for scrapers into ScraperCommand class
also, made it more flexible
- refactor cl_scrape_opinions; cl_scrape_oral_arguments to account for this
- delete cl.scrapers.utils.extract_recap_documents which was generating a
circular import. This function was not used anywhere
---
 cl/lib/command_utils.py                       | 36 +++++++
 cl/lib/juriscraper_utils.py                   | 52 +++++++++-
 .../commands/cl_back_scrape_citations.py      |  1 +
 .../management/commands/cl_scrape_opinions.py | 20 +---
 .../commands/cl_scrape_oral_arguments.py      |  1 +
 .../management/commands/update_from_text.py   | 97 +++++++++++++------
 cl/scrapers/tasks.py                          |  6 +-
 cl/scrapers/tests.py                          | 37 ++++++-
 cl/scrapers/utils.py                          | 83 ++++++----------
 9 files changed, 225 insertions(+), 108 deletions(-)

diff --git a/cl/lib/command_utils.py b/cl/lib/command_utils.py
index 2c3797f9f5..ee86463812 100644
--- a/cl/lib/command_utils.py
+++ b/cl/lib/command_utils.py
@@ -3,6 +3,8 @@
 
 from django.core.management import BaseCommand, CommandError
 
+from cl.lib.juriscraper_utils import get_module_by_court_id
+
 logger = logging.getLogger(__name__)
 
 
@@ -22,6 +24,40 @@ def handle(self, *args, **options):
             juriscraper_logger.setLevel(logging.DEBUG)
 
 
+class ScraperCommand(VerboseCommand):
+    """Base class for cl.scrapers commands that use Juriscraper
+
+    Implements the `--courts` argument to lookup for a Site object
+    """
+
+    # To be used on get_module_by_court_id
+    # Defined by inheriting classes
+    juriscraper_module_type = ""
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--courts",
+            dest="court_id",
+            metavar="COURTID",
+            type=lambda s: (
+                s
+                if "." in s
+                else get_module_by_court_id(s, self.juriscraper_module_type)
+            ),
+            required=True,
+            help=(
+                "The court(s) to scrape and extract. One of: "
+                "1. a python module or package import from the Juriscraper library, e.g."
+                "'juriscraper.opinions.united_states.federal_appellate.ca1' "
+                "or simply 'juriscraper.opinions' to do all opinions."
+                ""
+                "2. a court_id, to be used to lookup for a full module path"
+                "An error will be raised if the `court_id` matches more than "
+                "one module path. In that case, use the full path"
+            ),
+        )
+
+
 class CommandUtils:
     """A mixin to give some useful methods to sub classes."""
 
diff --git a/cl/lib/juriscraper_utils.py b/cl/lib/juriscraper_utils.py
index ae8c090f41..2eb902352b 100644
--- a/cl/lib/juriscraper_utils.py
+++ b/cl/lib/juriscraper_utils.py
@@ -5,6 +5,12 @@
 import juriscraper
 
 
+def walk_juriscraper():
+    return pkgutil.walk_packages(
+        juriscraper.__path__, f"{juriscraper.__name__}."
+    )
+
+
 def get_scraper_object_by_name(court_id: str, juriscraper_module: str = ""):
     """Identify and instantiate a Site() object given the name of a court
 
@@ -25,9 +31,7 @@ def get_scraper_object_by_name(court_id: str, juriscraper_module: str = ""):
 
         return importlib.import_module(juriscraper_module).Site()
 
-    for _, full_module_path, _ in pkgutil.walk_packages(
-        juriscraper.__path__, f"{juriscraper.__name__}."
-    ):
+    for _, full_module_path, _ in walk_juriscraper():
         # Get the module name from the full path and trim
         # any suffixes like _p, _u
         module_name = full_module_path.rsplit(".", 1)[1].rsplit("_", 1)[0]
@@ -42,3 +46,45 @@ def get_scraper_object_by_name(court_id: str, juriscraper_module: str = ""):
                 # has been stripped off it. In any case, just ignore it when
                 # this happens.
                 continue
+
+
+def get_module_by_court_id(court_id: str, module_type: str):
+    """Given a `court_id` return a juriscraper module path
+
+    Some court_ids match multiple scraper files. These will force the user
+    to use the full module path. For example, "lactapp_1" and "lactapp_5"
+    match the same `court_id`, but scrape totally different sites, and
+    their Site objects are expected to have different `extract_from_text`
+    behavior
+
+    :param court_id: court id to look for
+    :param module_type: 'opinions' or 'oral_args'. Without this, some
+        court_ids may match the 2 classes of scrapers
+
+    :raises: ValueError if there is no match or there is more than 1 match
+    :return: the full module path
+    """
+    if module_type not in ["opinions", "oral_args"]:
+        raise ValueError(
+            "module_type has to be one of ['opinions', 'oral_args']"
+        )
+
+    matches = []
+    for _, module_string, _ in walk_juriscraper():
+        if module_string.count(".") != 4 or module_type not in module_string:
+            # Skip folder and lib modules. Skip type
+            continue
+
+        module_court_id = module_string.rsplit(".", 1)[1].rsplit("_", 1)[0]
+        if module_court_id == court_id:
+            matches.append(module_string)
+
+    if len(matches) == 1:
+        return matches[0]
+    elif len(matches) == 0:
+        raise ValueError(f"'{court_id}' doesn't match any juriscraper module")
+    else:
+        raise ValueError(
+            f"'{court_id}' matches more than 1 juriscraper module."
+            f"Use a full module path. Matches: '{matches}'"
+        )
diff --git a/cl/scrapers/management/commands/cl_back_scrape_citations.py b/cl/scrapers/management/commands/cl_back_scrape_citations.py
index b2da0a4581..a445df9438 100644
--- a/cl/scrapers/management/commands/cl_back_scrape_citations.py
+++ b/cl/scrapers/management/commands/cl_back_scrape_citations.py
@@ -24,6 +24,7 @@
 
 class Command(cl_back_scrape_opinions.Command):
     scrape_target_descr = "citations"
+    juriscraper_module_type = "opinions"
 
     def scrape_court(
         self,
diff --git a/cl/scrapers/management/commands/cl_scrape_opinions.py b/cl/scrapers/management/commands/cl_scrape_opinions.py
index 67dac880ab..8fe42e893a 100644
--- a/cl/scrapers/management/commands/cl_scrape_opinions.py
+++ b/cl/scrapers/management/commands/cl_scrape_opinions.py
@@ -18,7 +18,7 @@
 
 from cl.alerts.models import RealTimeQueue
 from cl.citations.utils import map_reporter_db_cite_type
-from cl.lib.command_utils import VerboseCommand, logger
+from cl.lib.command_utils import ScraperCommand, logger
 from cl.lib.crypto import sha1
 from cl.lib.string_utils import trunc
 from cl.people_db.lookup_utils import lookup_judges_by_messy_str
@@ -217,14 +217,16 @@ def save_everything(
         )
 
 
-class Command(VerboseCommand):
+class Command(ScraperCommand):
     help = "Runs the Juriscraper toolkit against one or many jurisdictions."
+    juriscraper_module_type = "opinions"
     scrape_target_descr = "opinions"  # for logging purposes
 
     def __init__(self, stdout=None, stderr=None, no_color=False):
         super().__init__(stdout=None, stderr=None, no_color=False)
 
     def add_arguments(self, parser):
+        super().add_arguments(parser)
         parser.add_argument(
             "--daemon",
             action="store_true",
@@ -246,20 +248,6 @@ def add_arguments(self, parser):
                 "is 30 minutes."
             ),
         )
-        parser.add_argument(
-            "--courts",
-            type=str,
-            dest="court_id",
-            metavar="COURTID",
-            required=True,
-            help=(
-                "The court(s) to scrape and extract. This should be "
-                "in the form of a python module or package import "
-                "from the Juriscraper library, e.g. "
-                '"juriscraper.opinions.united_states.federal_appellate.ca1" '
-                'or simply "opinions" to do all opinions.'
-            ),
-        )
         parser.add_argument(
             "--fullcrawl",
             dest="full_crawl",
diff --git a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
index ad284381f4..62377a98ec 100644
--- a/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
+++ b/cl/scrapers/management/commands/cl_scrape_oral_arguments.py
@@ -107,6 +107,7 @@ def make_objects(
 
 class Command(cl_scrape_opinions.Command):
     scrape_target_descr = "oral arguments"
+    juriscraper_module_type = "oral_args"
 
     def ingest_a_case(
         self,
diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
index f1450c9c66..0c7da06ef3 100644
--- a/cl/scrapers/management/commands/update_from_text.py
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -1,8 +1,9 @@
+import traceback
 from datetime import datetime
 
 from django.db import transaction
 
-from cl.lib.command_utils import VerboseCommand, logger
+from cl.lib.command_utils import ScraperCommand, logger
 from cl.scrapers.tasks import update_document_from_text
 from cl.search.models import PRECEDENTIAL_STATUS, Opinion, OpinionCluster
 
@@ -23,15 +24,37 @@ def rerun_extract_from_text(
 
     :return None
     """
+    if not opinion.plain_text and not opinion.html:
+        # May be an opinion entirely from a merged corpus
+        # or an error during text extraction
+        logger.info(
+            "Opinion %s has no `plain_text` or `html` to extract from",
+            opinion.id,
+        )
+        stats["No text to extract from"] += 1
+        return
+
     with transaction.atomic():
-        changes = update_document_from_text(opinion, juriscraper_module)
+        try:
+            changes = update_document_from_text(opinion, juriscraper_module)
+        except:
+            # Probably a bad implementation of `extract_from_text`
+            logger.debug(
+                "`update_document_from_text` failed for opinion %s. Traceback: %s",
+                opinion.id,
+                traceback.format_exc(),
+            )
+            stats["Error"] += 1
+            return
+
         if not changes:
             logger.info("Did not get any metadata for opinion %s", opinion.id)
+            stats["No metadata extracted"] += 1
             return
 
         logger.info("Processing opinion %s", opinion.id)
 
-        # Check if changes exist before saving, to prevent unecessary DB queries
+        # Check if changes exist before saving, to prevent unnecessary DB queries
         if changes.get("Docket"):
             opinion.cluster.docket.save()
             logger.debug(
@@ -67,7 +90,7 @@ def rerun_extract_from_text(
                 )
 
 
-class Command(VerboseCommand):
+class Command(ScraperCommand):
     help = """Updates objects by running Site.extract_from_text
     over extracted content found on Opinion.plain_text or Opinion.html.
 
@@ -79,18 +102,20 @@ class Command(VerboseCommand):
     and check if updates over Docket, OpinionCluster, Opinion and
     Citation are as expected
     """
-    stats = {}  # assigned at the end of a command run, for testing
+    # For aggregate reporting at the end of the command
+    stats = {
+        "Docket": 0,
+        "OpinionCluster": 0,
+        "Opinion": 0,
+        "Citation": 0,
+        "No text to extract from": 0,
+        "No metadata extracted": 0,
+        "Error": 0,
+    }
+    juriscraper_module_type = "opinions"
 
     def add_arguments(self, parser):
-        parser.add_argument(
-            "--juriscraper-module",
-            help="""The Juriscraper file which contains the
-            `extract_from_text` method to be used. The `court_id`
-            will be deduced from this. Example:
-            juriscraper.opinions.united_states.federal_appellate.ca1
-            """,
-            required=True,
-        )
+        super().add_arguments(parser)
         parser.add_argument(
             "--opinion-ids",
             nargs="+",
@@ -100,15 +125,17 @@ def add_arguments(self, parser):
             other filters will be ignored""",
         )
         parser.add_argument(
-            "date-filed-gte",
+            "--date-filed-gte",
             default="",
-            help=r"""A filter value in %Y/%m/%d format.
+            type=self.parse_input_date,
+            help=r"""A filter value in %Y-%m-%d or %Y/%m/%d format.
             OpinionCluster.date_filed will have to be greater or equal""",
         )
         parser.add_argument(
-            "date-filed-lte",
+            "--date-filed-lte",
             default="",
-            help=r"""A filter value in %Y/%m/%d format.
+            type=self.parse_input_date,
+            help=r"""A filter value in %Y-%m-%d or %Y/%m/%d format.
             OpinionCluster.date_filed will have to be less or equal""",
         )
         parser.add_argument(
@@ -122,16 +149,14 @@ def add_arguments(self, parser):
 
     def handle(self, *args, **options):
         super().handle(*args, **options)
-        juriscraper_module = options["juriscraper_module"]
-        # For aggregate reporting
-        stats = {"Docket": 0, "OpinionCluster": 0, "Opinion": 0, "Citation": 0}
+        juriscraper_module = options["court_id"]
 
         if options["opinion_ids"]:
             opinions = Opinion.objects.filter(id__in=options["opinion_ids"])
             for op in opinions:
-                rerun_extract_from_text(op, juriscraper_module, stats)
+                rerun_extract_from_text(op, juriscraper_module, self.stats)
 
-            logger.info("Modified objects counts: %s", stats)
+            logger.info("Modified objects counts: %s", self.stats)
             return
 
         if not (options["date_filed_gte"] and options["date_filed_lte"]):
@@ -140,12 +165,10 @@ def handle(self, *args, **options):
             )
 
         court_id = juriscraper_module.split(".")[-1].split("_")[0]
-        gte_date = datetime.strptime(options["date_filed_gte"], "%Y/%m/%d")
-        lte_date = datetime.strptime(options["date_filed_lte"], "%Y/%m/%d")
         query = {
             "docket__court_id": court_id,
-            "date_filed__gte": gte_date,
-            "date_filed__lte": lte_date,
+            "date_filed__gte": options["date_filed_lte"],
+            "date_filed__lte": options["date_filed_gte"],
         }
 
         if options["cluster_status"]:
@@ -157,7 +180,19 @@ def handle(self, *args, **options):
         for cluster in qs:
             opinions = cluster.sub_opinions.all()
             for op in opinions:
-                rerun_extract_from_text(op, juriscraper_module, stats)
-
-        logger.info("Modified objects counts: %s", stats)
-        self.stats = stats
+                rerun_extract_from_text(op, juriscraper_module, self.stats)
+
+        logger.info("Modified objects counts: %s", self.stats)
+
+    def parse_input_date(self, date_string: str) -> datetime | str:
+        """Parses a date string in accepted formats
+
+        :param date_string: the date string in "%Y/%m/%d" or "%Y-%m-%d"
+        :return: an empty string if the input was empty; or the date object
+        """
+        parsed_date = ""
+        if "/" in date_string:
+            parsed_date = datetime.strptime(date_string, "%Y/%m/%d")
+        elif "-" in date_string:
+            parsed_date = datetime.strptime(date_string, "%Y-%m-%d")
+        return parsed_date
diff --git a/cl/scrapers/tasks.py b/cl/scrapers/tasks.py
index 15500e94bb..7bbc8bb40b 100644
--- a/cl/scrapers/tasks.py
+++ b/cl/scrapers/tasks.py
@@ -30,6 +30,7 @@
 from cl.lib.string_utils import trunc
 from cl.lib.utils import is_iter
 from cl.recap.mergers import save_iquery_to_docket
+from cl.scrapers.utils import scraped_citation_object_is_valid
 from cl.search.models import Docket, Opinion, RECAPDocument
 
 logger = logging.getLogger(__name__)
@@ -71,8 +72,9 @@ def update_document_from_text(
             opinion.cluster.__dict__.update(data)
         elif model_name == "Citation":
             data["cluster_id"] = opinion.cluster_id
-            _, citation_created = ModelClass.objects.get_or_create(**data)
-            metadata_dict["Citation"]["created"] = citation_created
+            if scraped_citation_object_is_valid(data):
+                _, citation_created = ModelClass.objects.get_or_create(**data)
+                metadata_dict["Citation"]["created"] = citation_created
         elif model_name == "Opinion":
             opinion.__dict__.update(data)
         else:
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 95e1586a21..287e1dbd8c 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -41,6 +41,7 @@
     get_binary_content,
     get_existing_docket,
     get_extension,
+    scraped_citation_object_is_valid,
     update_or_create_docket,
 )
 from cl.search.factories import (
@@ -874,7 +875,7 @@ def test_federal_jurisdictions(self):
         )
 
 
-class UpdateFromTestCommandTest(TestCase):
+class UpdateFromTextCommandTest(TestCase):
     """Test the input processing and DB querying for the command"""
 
     def setUp(self):
@@ -932,10 +933,18 @@ def test_inputs(self):
             "cl.scrapers.tasks.get_scraper_object_by_name",
             return_value=test_opinion_scraper.Site(),
         ):
-            cmd.handle(juriscraper_module="somepath.sc", opinion_ids=[101])
+            cmd.handle(court_id="somepath.sc", opinion_ids=[101])
 
         self.assertFalse(
-            any(cmd.stats.values()), "No object should be modified"
+            any(
+                [
+                    cmd.stats["Docket"],
+                    cmd.stats["OpinionCluster"],
+                    cmd.stats["Citation"],
+                    cmd.stats["Opinion"],
+                ]
+            ),
+            "No object should be modified",
         )
 
         # will target 1 opinion, there are 2 in the time period
@@ -945,7 +954,7 @@ def test_inputs(self):
             return_value=test_opinion_scraper.Site(),
         ):
             update_from_text.Command().handle(
-                juriscraper_module="somepath.vt",
+                court_id="somepath.vt",
                 opinion_ids=[],
                 date_filed_gte="2020/06/01",
                 date_filed_lte="2021/06/01",
@@ -979,3 +988,23 @@ def test_inputs(self):
             "13",
             "Unpublished docket should not be modified",
         )
+
+    def test_scraped_citation_object_is_valid(self):
+        """Can we validate Citation dicts got from `Site.extract_from_text`"""
+        bad_type = {"reporter": "WI", "type": Citation.FEDERAL}
+        self.assertFalse(
+            scraped_citation_object_is_valid(bad_type),
+            "Citation should be marked as invalid. Type does not match reporter",
+        )
+
+        bad_reporter = {"reporter": "Some text"}
+        self.assertFalse(
+            scraped_citation_object_is_valid(bad_reporter),
+            "Citation should be marked as invalid. Reporter does not exist",
+        )
+
+        valid_citation = {"reporter": "WI", "type": Citation.NEUTRAL}
+        self.assertTrue(
+            scraped_citation_object_is_valid(valid_citation),
+            "Citation object should be marked as valid",
+        )
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 31134ce3d2..2203bbe2c1 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -1,5 +1,4 @@
 import os
-import sys
 from datetime import date
 from typing import Optional, Tuple
 from urllib.parse import urljoin
@@ -9,15 +8,16 @@
 from asgiref.sync import async_to_sync
 from courts_db import find_court_by_id, find_court_ids_by_name
 from django.conf import settings
-from django.db.models import Q, QuerySet
+from django.db.models import Q
 from juriscraper import AbstractSite
 from juriscraper.AbstractSite import logger
 from juriscraper.lib.test_utils import MockRequest
 from lxml import html
+from reporters_db import REPORTERS
 from requests import Response, Session
 
+from cl.citations.utils import map_reporter_db_cite_type
 from cl.corpus_importer.utils import winnow_case_name
-from cl.lib.celery_utils import CeleryThrottle
 from cl.lib.decorators import retry
 from cl.lib.microservice_utils import microservice
 from cl.recap.mergers import find_docket_object
@@ -26,8 +26,7 @@
     NoDownloadUrlError,
     UnexpectedContentTypeError,
 )
-from cl.scrapers.tasks import extract_recap_pdf
-from cl.search.models import Court, Docket, RECAPDocument
+from cl.search.models import Court, Docket
 
 
 def get_child_court(child_court_name: str, court_id: str) -> Optional[Court]:
@@ -242,53 +241,6 @@ def signal_handler(signal, frame):
     die_now = True
 
 
-def extract_recap_documents(
-    docs: QuerySet,
-    ocr_available: bool = True,
-    order_by: Optional[str] = None,
-    queue: Optional[str] = None,
-) -> None:
-    """Loop over RECAPDocuments and extract their contents. Use OCR if requested.
-
-    :param docs: A queryset containing the RECAPDocuments to be processed.
-    :type docs: Django Queryset
-    :param ocr_available: Whether OCR should be completed (True) or whether items
-    should simply be updated to have status OCR_NEEDED.
-    :type ocr_available: Bool
-    :param order_by: An optimization parameter. You may opt to order the
-    processing by 'small-first' or 'big-first'.
-    :type order_by: str
-    :param queue: The celery queue to send the content to.
-    :type queue: str
-    """
-    docs = docs.exclude(filepath_local="")
-    if ocr_available:
-        # We're doing OCR. Only work with those items that require it.
-        docs = docs.filter(ocr_status=RECAPDocument.OCR_NEEDED)
-    else:
-        # Focus on the items that we don't know if they need OCR.
-        docs = docs.filter(ocr_status=None)
-
-    if order_by is not None:
-        if order_by == "small-first":
-            docs = docs.order_by("page_count")
-        elif order_by == "big-first":
-            docs = docs.order_by("-page_count")
-
-    count = docs.count()
-    throttle = CeleryThrottle(queue_name=queue)
-    for i, pk in enumerate(docs.values_list("pk", flat=True)):
-        throttle.maybe_wait()
-        extract_recap_pdf.apply_async(
-            (pk, ocr_available), priority=5, queue=queue
-        )
-        if i % 1000 == 0:
-            msg = f"Sent {i + 1}/{count} tasks to celery so far."
-            logger.info(msg)
-            sys.stdout.write(f"\r{msg}")
-            sys.stdout.flush()
-
-
 def get_existing_docket(
     court_id: str, docket_number: str, appeal_from_str: str = ""
 ) -> Docket | None:
@@ -466,3 +418,30 @@ def update_or_create_docket(
             setattr(docket, field, value)
 
     return docket
+
+
+def scraped_citation_object_is_valid(citation_object: dict) -> bool:
+    """Validate Citation objects from `Site.extract_from_text`
+
+    Check that the parsed `Citation.reporter` exists in reporters-db
+    and that the `Citation.type` matches the reporters-db type
+
+    :param citation_object: dict got from `Site.extract_from_text`
+    :return: True if the parsed reporter and type match with reporters-db
+        False otherwise
+    """
+    parsed_reporter = citation_object["reporter"]
+    try:
+        reporter = REPORTERS[parsed_reporter]
+        mapped_type = map_reporter_db_cite_type(reporter[0].get("cite_type"))
+        if mapped_type == citation_object["type"]:
+            return True
+        logger.error(
+            "Citation.type '%s' from `extract_from_text` does not match reporters-db type '%s'",
+            citation_object["type"],
+            parsed_reporter,
+        )
+    except KeyError:
+        logger.error("Parsed reporter '%s' does not exist", parsed_reporter)
+
+    return False

From abc27aa5a8b9beed0f49d7126cfcfe3e3d854039 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Fri, 8 Nov 2024 11:59:04 -0500
Subject: [PATCH 038/143] fix(scrapers.tests): UpdateFromTextTest fix dates
 input type

---
 cl/scrapers/management/commands/update_from_text.py | 6 ++++--
 cl/scrapers/tests.py                                | 4 ++--
 cl/scrapers/utils.py                                | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
index 0c7da06ef3..ca1fe21d64 100644
--- a/cl/scrapers/management/commands/update_from_text.py
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -167,8 +167,8 @@ def handle(self, *args, **options):
         court_id = juriscraper_module.split(".")[-1].split("_")[0]
         query = {
             "docket__court_id": court_id,
-            "date_filed__gte": options["date_filed_lte"],
-            "date_filed__lte": options["date_filed_gte"],
+            "date_filed__gte": options["date_filed_gte"],
+            "date_filed__lte": options["date_filed_lte"],
         }
 
         if options["cluster_status"]:
@@ -177,6 +177,8 @@ def handle(self, *args, **options):
         qs = OpinionCluster.objects.filter(**query).prefetch_related(
             "sub_opinions"
         )
+        logger.debug("Found %s objects matching query %s", qs.count(), query)
+
         for cluster in qs:
             opinions = cluster.sub_opinions.all()
             for op in opinions:
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 287e1dbd8c..cff8aef0d4 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -956,8 +956,8 @@ def test_inputs(self):
             update_from_text.Command().handle(
                 court_id="somepath.vt",
                 opinion_ids=[],
-                date_filed_gte="2020/06/01",
-                date_filed_lte="2021/06/01",
+                date_filed_gte=datetime(2020, 5, 1),
+                date_filed_lte=datetime(2021, 6, 1),
                 cluster_status="Published",
             )
 
diff --git a/cl/scrapers/utils.py b/cl/scrapers/utils.py
index 2203bbe2c1..370e2a4542 100644
--- a/cl/scrapers/utils.py
+++ b/cl/scrapers/utils.py
@@ -437,8 +437,9 @@ def scraped_citation_object_is_valid(citation_object: dict) -> bool:
         if mapped_type == citation_object["type"]:
             return True
         logger.error(
-            "Citation.type '%s' from `extract_from_text` does not match reporters-db type '%s'",
+            "Citation.type '%s' from `extract_from_text` does not match reporters-db type '%s' for reporter '%s'",
             citation_object["type"],
+            mapped_type,
             parsed_reporter,
         )
     except KeyError:

From 23e3ef7a6dc4a30dae7a453b9c574f2b46ded34f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 12 Nov 2024 20:43:33 -0600
Subject: [PATCH 039/143] feat(resource): command to update case names using wl
 dataset

---
 .../commands/update_resource_casenames.py     | 500 ++++++++----------
 1 file changed, 223 insertions(+), 277 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index 56644067b4..0074b99bf5 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -2,7 +2,6 @@
 import re
 import time
 from datetime import date, datetime
-from typing import Set
 
 import pandas as pd
 from django.core.management.base import BaseCommand, CommandError
@@ -10,212 +9,82 @@
 from eyecite.models import FullCaseCitation
 from eyecite.tokenizers import HyperscanTokenizer
 
-from cl.corpus_importer.utils import winnow_case_name
-from cl.lib.model_helpers import clean_docket_number
 from cl.search.models import Citation
 
 logger = logging.getLogger(__name__)
 HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
 
 
-class Command(BaseCommand):
-    help = "Match and compare case details from a CSV file with existing records in the database."
-
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--filepath",
-            type=str,
-            required=True,
-            help="Path to the CSV file to process.",
-        )
-        parser.add_argument(
-            "--delay",
-            type=float,
-            default=0.1,
-            help="How long to wait to update each opinion and docket (in seconds, allows floating numbers).",
-        )
-        parser.add_argument(
-            "--dry-run",
-            action="store_true",
-            help="Simulate the update process without making changes",
-        )
-        parser.add_argument(
-            "--chunk-size",
-            type=int,
-            default=100000,
-            help="The number of rows to read at a time",
-        )
-
-    def handle(self, *args, **options):
-        filepath = options["filepath"]
-        delay = options["delay"]
-        dry_run = options["dry_run"]
-        chunk_size = options["chunk_size"]
-
-        if not filepath:
-            raise CommandError(
-                "Filepath is required. Use --filepath to specify the CSV file location."
-            )
+def find_matches(csv_case_name: str, cl_case_name: str):
+    """Compare two case name and decide whether they are the same or not
 
-        process_csv(filepath, delay, dry_run, chunk_size)
-
-
-def process_csv(
-    filepath: str, delay: float, dry_run: bool, chunk_size: int
-) -> None:
-    """Process rows from csv file
-
-    :param filepath: path to csv file
-    :param delay: delay between saves in seconds
-    :param dry_run: flag to simulate update process
-    :param chunk_size: number of rows to read at a time
+    :param csv_case_name: case name from csv
+    :param cl_case_name: case name from cluster
+    :return: True if they match else False
     """
-    match_count = 0
-    total = 0
-    rcount = 0
-    row_count = pd.read_csv(filepath).shape[0]
-    logger.info(f"Total rows in CSV: {row_count}")
-    start_time = datetime.now()
-    logger.info(f"Start time: {start_time}")
-
-    for chunk in pd.read_csv(filepath, chunksize=chunk_size):
-        for _, row in chunk.iterrows():
-            rcount += 1
-            match_found = False
-            try:
-                # Retrieve fields and parse date
-                citation, docket_num, case_title, parallel_cite, filed_date = (
-                    row.get("Citation"),
-                    row.get("Docket Num"),
-                    row.get("Title"),
-                    row.get("Parallel Cite"),
-                    row.get("Filed Date"),
-                )
-
-                citation_str = citation
-                parallel_cite_str = parallel_cite
-
-                clean_cite = re.sub(r"\s+", " ", citation)
-                cites = get_citations(
-                    clean_cite, tokenizer=HYPERSCAN_TOKENIZER
-                )
-                cites = [
-                    cite
-                    for cite in cites
-                    if isinstance(cite, FullCaseCitation)
-                ]
-                if not cites:
-                    logger.warning(f"Invalid citation: {clean_cite}")
-                    continue
+    # Tokenize each string, capturing both words and abbreviations with periods
+    csv_case_name_words = re.findall(r"\b\w+\b|\b\w+\.\b", csv_case_name)
+    cluster_case_name_words = re.findall(r"\b\w+\b|\b\w+\.\b", cl_case_name)
+
+    # Helper function to check if word1 is an abbreviation of word2 or vice versa
+    # Convert all words to lowercase for case-insensitive matching
+    csv_case_name_words_lower = [
+        word.lower() for word in csv_case_name_words if len(word) > 1
+    ]
+    cluster_case_name_words_lower = [
+        word.lower() for word in cluster_case_name_words if len(word) > 1
+    ]
+
+    overlap = set(csv_case_name_words_lower) & set(
+        cluster_case_name_words_lower
+    )
 
-                clean_cite = re.sub(r"\s+", " ", parallel_cite)
-                parallel_cites = get_citations(
-                    clean_cite, tokenizer=HYPERSCAN_TOKENIZER
-                )
-                parallel_cites = [
-                    cite
-                    for cite in parallel_cites
-                    if isinstance(cite, FullCaseCitation)
-                ]
-                if not parallel_cites:
-                    logger.warning(f"Invalid parallel citation: {clean_cite}")
-                    continue
+    # print("overlap", overlap)
+
+    false_positive_set = {
+        "and",
+        "personal",
+        "restraint",
+        "matter",
+        "county",
+        "city",
+        "of",
+        "the",
+        "estate",
+        "in",
+        "inc",
+        "re",
+        "st",
+        "ex",
+        "rel",
+        "v",
+        "vs",
+        "for",
+        "a",
+    }
 
-                main_citation = cites[0]
-                parallel_cite = parallel_cites[0]
+    # these are valid overlaps, excluding anything in false_positive_set
+    hits = [item for item in overlap if item not in false_positive_set]
 
-                if Citation.objects.filter(
-                    volume=parallel_cite.groups["volume"],
-                    reporter=parallel_cite.corrected_reporter(),
-                    page=parallel_cite.groups["page"],
-                ).exists():
-                    continue
+    if not hits:
+        # if no hits no match on name - move along
+        return False
 
-                if not all([main_citation, docket_num, case_title]):
-                    logger.warning(
-                        "Missing essential fields in row; skipping."
-                    )
-                    continue
+    # Check for "v." in title
+    if "v." not in csv_case_name.lower():
+        # in the matter of Smith
+        # if no V. - likely a in re. case and only match on atleast 1 name
+        return True
 
-                formatted_date = parse_date(filed_date)
+    # otherwise check if a match occurs on both sides of the V
+    v_index = csv_case_name.lower().index("v.")
+    hit_index = [csv_case_name.lower().index(hit) for hit in hits]
 
-                # Query citations in the database
-                citations = Citation.objects.filter(
-                    volume=main_citation.groups["volume"],
-                    reporter=main_citation.corrected_reporter(),
-                    page=main_citation.groups["page"],
-                )
-                if not citations:
-                    continue
-                total += 1
-
-                for citation_obj in citations:
-                    if is_match(
-                        citation_obj, docket_num, formatted_date, case_title
-                    ):
-                        match_found = True
-                        match_count += 1
-
-                        display_match_info(
-                            citation_obj,
-                            case_title,
-                            parallel_cite_str,
-                        )
-
-                        if not dry_run:
-                            cluster_casename = (
-                                citation_obj.cluster.case_name
-                                if citation_obj.cluster.case_name
-                                else citation_obj.cluster.case_name_full
-                            )
-                            docket_casename = (
-                                citation_obj.cluster.docket.case_name
-                                if citation_obj.cluster.docket.case_name
-                                else citation_obj.cluster.docket.case_name_full
-                            )
-                            if len(case_title) < len(cluster_casename):
-                                # Save new case name in cluster
-                                logger.info(
-                                    f"Case name updated for cluster id: {citation_obj.cluster_id}"
-                                )
-                                citation_obj.cluster.case_name = case_title
-                                citation_obj.cluster.save()
-                            else:
-                                logger.info(
-                                    f"Cluster: {citation_obj.cluster_id} already have the best name."
-                                )
-
-                            if len(case_title) < len(docket_casename):
-                                # Save new case name in docket
-                                logger.info(
-                                    f"Case name updated for docket id: {citation_obj.cluster.docket_id}"
-                                )
-                                citation_obj.cluster.docket.case_name = (
-                                    case_title
-                                )
-                                citation_obj.cluster.docket.save()
-
-                            else:
-                                logger.info(
-                                    f"Docket: {citation_obj.cluster.docket_id} already have the best name."
-                                )
-
-                            # Wait between updates to avoid issues with redis memory
-                            time.sleep(delay)
-
-                        break
-
-                if not match_found:
-                    logger.info(
-                        f"Failed: {citation_str} - {docket_num} - {case_title} - {parallel_cite_str} - {filed_date}"
-                    )
+    if min(hit_index) < v_index < max(hit_index):
+        return True
 
-            except Exception as e:
-                logger.error(f"Unexpected error processing row {row}: {e}")
-
-    logger.info(f"Total matches found: {match_count}")
-    end_time = datetime.now()
-    logger.info(f"End time: {end_time - start_time}")
+    # logger.info(f"Row index: {row_index} - No match found with: {match.cluster}")
+    return False
 
 
 def parse_date(date_str: str) -> date | None:
@@ -241,101 +110,178 @@ def parse_date(date_str: str) -> date | None:
     return None
 
 
-def is_match(citation, docket_num, formatted_date, case_title) -> bool:
-    """Checks if the database citation matches docket number, filing date, and case
-    title.
+def make_citations(cite1, cite2) -> list[FullCaseCitation]:
+    """Validate citations with eyecite
 
-    :param citation: Citation object that matched csv citation
-    :param docket_num: Docket number from csv
-    :param formatted_date: Formated date from csv
-    :param case_title: Case name from csv
-    :return: True if match found else False
+    :param cite1: first string citation
+    :param cite2: second string citation
+    :return: list of valid FullCaseCitation objects
     """
+    cite_one = get_citations(cite1, tokenizer=HYPERSCAN_TOKENIZER)
+    cite_two = get_citations(cite2, tokenizer=HYPERSCAN_TOKENIZER)
 
-    if not citation.cluster.docket.docket_number:
-        # There is no docket number, abort
-        return False
+    citations = cite_one + cite_two
+    cites = [cite for cite in citations if isinstance(cite, FullCaseCitation)]
+    return cites
 
-    # Prepare docket numbers
-    cleaned_cluster_docket_number = clean_docket_number(
-        citation.cluster.docket.docket_number
-    )
-    cleaned_docket_num = clean_docket_number(docket_num)
-
-    # In some cases clean_docket_number returns an empty string, try with original
-    # docket numbers
-    if not cleaned_cluster_docket_number:
-        cleaned_cluster_docket_number = citation.cluster.docket.docket_number
-
-    if not cleaned_docket_num:
-        cleaned_docket_num = docket_num
-
-    # Check docket number and date
-    failed = 0
-    if (
-        cleaned_cluster_docket_number.lower() not in cleaned_docket_num.lower()
-        or citation.cluster.date_filed != formatted_date
-    ):
-        if (
-            cleaned_cluster_docket_number.lower()
-            not in cleaned_docket_num.lower()
-        ):
-            failed += 1
-        if citation.cluster.date_filed != formatted_date:
-            failed += 10
-        return False
 
-    if (
-        not citation.cluster.case_name_full
-        or citation.cluster.case_name_full == ""
-    ):
-        cn = citation.cluster.case_name
-    else:
-        cn = citation.cluster.case_name_full
-
-    c1 = winnow_case_name(cn)
-    c2 = winnow_case_name(case_title)
-
-    overlap = c1 & c2
-    if overlap:
-        cf = (
-            citation.cluster.case_name_full
-            if citation.cluster.case_name_full
-            else citation.cluster.case_name
-        )
-        order1 = get_term_indices(overlap, cf)
-        order2 = get_term_indices(overlap, case_title)
+def process_csv(
+    filepath: str, delay: float, dry_run: bool, chunk_size: int
+) -> None:
+    """Process rows from csv file
 
-        return list(order1.keys()) == list(order2.keys()) and len(overlap) > 1
+    :param filepath: path to csv file
+    :param delay: delay between saves in seconds
+    :param dry_run: flag to simulate update process
+    :param chunk_size: number of rows to read at a time
+    """
 
-    return False
+    logger.info(f"Processing {filepath}")
+    for chunk in pd.read_csv(filepath, chunksize=chunk_size):
+        for row in chunk.dropna().itertuples():
+            index, title, court, date_str, cite1, cite2, docket, volume = row
+            valid_citations = make_citations(cite1, cite2)
 
+            if len(valid_citations) < 2:
+                # Skipping row without two citations
+                continue
 
-def get_term_indices(terms: Set, text: str) -> dict:
-    """Returns a dictionary of each term's index in the text, sorted by appearance order.
+            # Check if already have both citations from row
+            try:
+                c = Citation.objects.filter(
+                    **valid_citations[0].groups
+                ).values_list("cluster", flat=True)
+                d = Citation.objects.filter(
+                    **valid_citations[1].groups
+                ).values_list("cluster", flat=True)
+            except ValueError:
+                logger.warning(
+                    f"Row index: {index} - Citation parsing failed."
+                )
+                continue
 
-    :param terms: set of terms to search for
-    :param text: text to search for terms
-    :return: dict of each term's index in the text
-    """
-    term_indices = {
-        term: match.start()
-        for term in terms
-        if (match := re.search(r"\b" + re.escape(term) + r"\b", text.lower()))
-    }
-    return dict(sorted(term_indices.items(), key=lambda item: item[1]))
+            overlapping_clusters = c.intersection(d)
+            if overlapping_clusters:
+                logger.info(
+                    f"Row index: {index} - Both citations exist for this cluster: {list(overlapping_clusters)}"
+                )
+                continue
 
+            if not valid_citations:
+                logger.info(f"Row index: {index} - No valid citations found.")
+                continue
 
-def display_match_info(citation, case_title, parallel_cite):
-    """Displays information about a match in a structured format.
+            clean_docket_num = docket.strip('="').strip('"')
 
-    :param citation: Citation object
-    :param case_title: case name from csv
-    :param parallel_cite: cite from csv
-    """
-    logger.info(
-        f"Matched Case in DB: {citation.cluster.id} - {citation.cluster.case_name_full if citation.cluster.case_name_full else citation.cluster.case_name}"
-    )
-    logger.info(f"CSV Case Title: {case_title}")
-    logger.info(f"Matching Citation: {citation}")
-    logger.info(f"Parallel Cite: {parallel_cite}")
+            date_filed = parse_date(date_str)
+            if not date_filed:
+                logger.info(f"Row index: {index} - No valid date found.")
+                continue
+
+            # Keep non westlaw citations, use them to try to find a match
+            remaining_citations = [
+                cite
+                for cite in valid_citations
+                if cite.corrected_reporter() != "WL"
+            ]
+
+            for citation in remaining_citations:
+
+                possible_matches = Citation.objects.filter(
+                    **citation.groups,
+                    cluster__docket__docket_number__contains=clean_docket_num,
+                    cluster__date_filed=date_filed,
+                )
+                if not possible_matches:
+                    # Match not found with citation, docket number and date filed
+                    continue
+
+                single_match = None
+                for match in possible_matches:
+                    case_name = (
+                        match.cluster.case_name_full
+                        if match.cluster.case_name_full
+                        else match.cluster.case_name
+                    )
+                    match_on_caption = find_matches(title, case_name)
+                    if match_on_caption:
+                        if not single_match:
+                            single_match = match.cluster
+                            continue
+                        else:
+                            logger.warning(
+                                f"Row index: {index} - Failed: too many matches"
+                            )
+                            single_match = None
+                            # Exit possible_matches loop
+                            break
+
+                if single_match:
+                    m = (
+                        single_match.case_name_full
+                        if single_match.case_name_full
+                        else single_match.case_name
+                    )
+                    citation_to_add = [
+                        cite
+                        for cite in valid_citations
+                        if cite.corrected_citation()
+                        != citation.corrected_citation()
+                    ]
+                    logger.info(
+                        f"Row index: {index} - Match found: {single_match} - New casename: {title} - Used citation: {citation.corrected_citation()} - To add: {citation_to_add[0].corrected_citation() if citation_to_add else 'Invalid'}"
+                    )
+                    # We already have matched the case using one of the citations
+
+                    if not dry_run:
+                        # Logic to save new case name and add citation
+
+                        # Wait between each processed cluster to avoid issues with redis memory
+                        time.sleep(delay)
+                    else:
+                        pass
+
+                    # Exit remaining_citations loop
+                    break
+
+
+class Command(BaseCommand):
+    help = "Match and compare case details from a CSV file with existing records in the database."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--filepath",
+            type=str,
+            required=True,
+            help="Path to the CSV file to process.",
+        )
+        parser.add_argument(
+            "--delay",
+            type=float,
+            default=0.1,
+            help="How long to wait to update each opinion and docket (in seconds, allows floating numbers).",
+        )
+        parser.add_argument(
+            "--dry-run",
+            action="store_true",
+            help="Simulate the update process without making changes",
+        )
+        parser.add_argument(
+            "--chunk-size",
+            type=int,
+            default=100000,
+            help="The number of rows to read at a time",
+        )
+
+    def handle(self, *args, **options):
+        filepath = options["filepath"]
+        delay = options["delay"]
+        dry_run = options["dry_run"]
+        chunk_size = options["chunk_size"]
+
+        if not filepath:
+            raise CommandError(
+                "Filepath is required. Use --filepath to specify the CSV file location."
+            )
+
+        process_csv(filepath, delay, dry_run, chunk_size)

From 54f5f619a649ae3f4b21a86ddd45670c5a325dec Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 13 Nov 2024 10:50:21 -0600
Subject: [PATCH 040/143] feat(resource): command to update case names using wl
 dataset

---
 .../commands/update_resource_casenames.py     | 158 +++++++++++-------
 1 file changed, 99 insertions(+), 59 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_resource_casenames.py
index 0074b99bf5..153500d560 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_resource_casenames.py
@@ -9,11 +9,52 @@
 from eyecite.models import FullCaseCitation
 from eyecite.tokenizers import HyperscanTokenizer
 
+from cl.corpus_importer.utils import add_citations_to_cluster
 from cl.search.models import Citation
 
 logger = logging.getLogger(__name__)
 HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
 
+# Compile regex pattern once for efficiency
+WORD_PATTERN = re.compile(r"\b\w+\b|\b\w+\.\b")
+
+FALSE_POSITIVES = {
+    "and",
+    "personal",
+    "restraint",
+    "matter",
+    "county",
+    "city",
+    "of",
+    "the",
+    "estate",
+    "in",
+    "inc",
+    "re",
+    "st",
+    "ex",
+    "rel",
+    "v",
+    "vs",
+    "for",
+    "a",
+}
+
+DATE_FORMATS = ("%B %d, %Y", "%d-%b-%y", "%m/%d/%Y", "%m/%d/%y", "%b. %d, %Y")
+
+
+def tokenize_case_name(case_name: str) -> list[str]:
+    """Tokenizes case name and removes single-character words except for letters with periods.
+
+    :param case_name: case name to tokenize
+    :return: list of words
+    """
+    return [
+        word.lower()
+        for word in WORD_PATTERN.findall(case_name)
+        if len(word) > 1
+    ]
+
 
 def find_matches(csv_case_name: str, cl_case_name: str):
     """Compare two case name and decide whether they are the same or not
@@ -22,69 +63,29 @@ def find_matches(csv_case_name: str, cl_case_name: str):
     :param cl_case_name: case name from cluster
     :return: True if they match else False
     """
-    # Tokenize each string, capturing both words and abbreviations with periods
-    csv_case_name_words = re.findall(r"\b\w+\b|\b\w+\.\b", csv_case_name)
-    cluster_case_name_words = re.findall(r"\b\w+\b|\b\w+\.\b", cl_case_name)
-
-    # Helper function to check if word1 is an abbreviation of word2 or vice versa
-    # Convert all words to lowercase for case-insensitive matching
-    csv_case_name_words_lower = [
-        word.lower() for word in csv_case_name_words if len(word) > 1
-    ]
-    cluster_case_name_words_lower = [
-        word.lower() for word in cluster_case_name_words if len(word) > 1
-    ]
+    # Tokenize each string, capturing both words and abbreviations with periods and
+    # convert all words to lowercase for case-insensitive matching
+    csv_case_name_tokens = set(tokenize_case_name(csv_case_name))
+    cluster_case_name_tokens = set(tokenize_case_name(cl_case_name))
 
-    overlap = set(csv_case_name_words_lower) & set(
-        cluster_case_name_words_lower
-    )
-
-    # print("overlap", overlap)
-
-    false_positive_set = {
-        "and",
-        "personal",
-        "restraint",
-        "matter",
-        "county",
-        "city",
-        "of",
-        "the",
-        "estate",
-        "in",
-        "inc",
-        "re",
-        "st",
-        "ex",
-        "rel",
-        "v",
-        "vs",
-        "for",
-        "a",
-    }
-
-    # these are valid overlaps, excluding anything in false_positive_set
-    hits = [item for item in overlap if item not in false_positive_set]
-
-    if not hits:
+    # Check if there is an overlap between case names and remove false positive words
+    overlap = csv_case_name_tokens & cluster_case_name_tokens - FALSE_POSITIVES
+
+    if not overlap:
         # if no hits no match on name - move along
         return False
 
     # Check for "v." in title
     if "v." not in csv_case_name.lower():
         # in the matter of Smith
-        # if no V. - likely a in re. case and only match on atleast 1 name
+        # if no V. - likely an "in re" case and only match on at least 1 name
         return True
 
     # otherwise check if a match occurs on both sides of the V
     v_index = csv_case_name.lower().index("v.")
-    hit_index = [csv_case_name.lower().index(hit) for hit in hits]
-
-    if min(hit_index) < v_index < max(hit_index):
-        return True
+    hit_indices = [csv_case_name.lower().find(hit) for hit in overlap]
 
-    # logger.info(f"Row index: {row_index} - No match found with: {match.cluster}")
-    return False
+    return min(hit_indices) < v_index < max(hit_indices)
 
 
 def parse_date(date_str: str) -> date | None:
@@ -101,7 +102,7 @@ def parse_date(date_str: str) -> date | None:
     :param date_str: date string
     :return: date object or none
     """
-    for fmt in ("%B %d, %Y", "%d-%b-%y", "%m/%d/%Y", "%m/%d/%y", "%b. %d, %Y"):
+    for fmt in DATE_FORMATS:
         try:
             return datetime.strptime(date_str, fmt).date()
         except (ValueError, TypeError):
@@ -136,6 +137,9 @@ def process_csv(
     :param chunk_size: number of rows to read at a time
     """
 
+    total_clusters_updated = 0
+    total_dockets_updated = 0
+    total_citations_added = 0
     logger.info(f"Processing {filepath}")
     for chunk in pd.read_csv(filepath, chunksize=chunk_size):
         for row in chunk.dropna().itertuples():
@@ -146,6 +150,10 @@ def process_csv(
                 # Skipping row without two citations
                 continue
 
+            # 2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
+            valid_citations[0].groups.pop("date_filed", None)
+            valid_citations[1].groups.pop("date_filed", None)
+
             # Check if already have both citations from row
             try:
                 c = Citation.objects.filter(
@@ -155,6 +163,7 @@ def process_csv(
                     **valid_citations[1].groups
                 ).values_list("cluster", flat=True)
             except ValueError:
+                # Usually when the volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
                 logger.warning(
                     f"Row index: {index} - Citation parsing failed."
                 )
@@ -223,27 +232,58 @@ def process_csv(
                         else single_match.case_name
                     )
                     citation_to_add = [
-                        cite
+                        cite.corrected_citation()
                         for cite in valid_citations
                         if cite.corrected_citation()
                         != citation.corrected_citation()
                     ]
-                    logger.info(
-                        f"Row index: {index} - Match found: {single_match} - New casename: {title} - Used citation: {citation.corrected_citation()} - To add: {citation_to_add[0].corrected_citation() if citation_to_add else 'Invalid'}"
-                    )
-                    # We already have matched the case using one of the citations
 
+                    # We already have matched the case using one of the citations
                     if not dry_run:
                         # Logic to save new case name and add citation
+                        if not single_match.case_name or len(title) < len(
+                            single_match.case_name
+                        ):
+                            # Save case name in cluster when we don't have it or when the case name in csv is smaller than the current case name
+                            single_match.case_name = title
+                            single_match.save()
+                            logger.info(
+                                f"Case name updated for cluster id: {single_match.id}"
+                            )
+                            total_clusters_updated += 1
+
+                        if not single_match.docket.case_name or len(
+                            title
+                        ) < len(single_match.docket.case_name):
+                            # Save case name in docket when we don't have it or when the case name in csv is smaller than the current case name
+                            single_match.docket.case_name = title
+                            single_match.docket.save()
+                            logger.info(
+                                f"Case name updated for docket id: {single_match.docket.id}"
+                            )
+                            total_dockets_updated += 1
+
+                        if citation_to_add:
+                            citations_added = add_citations_to_cluster(
+                                citation_to_add, single_match.id
+                            )
+                            total_citations_added += citations_added
 
-                        # Wait between each processed cluster to avoid issues with redis memory
+                        # Wait between each processed row to avoid sending to many indexing tasks
                         time.sleep(delay)
                     else:
-                        pass
+                        # Dry run, only log a message
+                        logger.info(
+                            f"Row index: {index} - Match found: {single_match} - New case name: {title} - Used citation: {citation.corrected_citation()} - To add: {citation_to_add[0] if citation_to_add else 'Invalid'}"
+                        )
 
                     # Exit remaining_citations loop
                     break
 
+    logger.info(f"Clusters updated: {total_clusters_updated}")
+    logger.info(f"Dockets updated: {total_dockets_updated}")
+    logger.info(f"Citations added: {total_citations_added}")
+
 
 class Command(BaseCommand):
     help = "Match and compare case details from a CSV file with existing records in the database."

From f4c6b88ca18d38c8a5867ee0c35e6ae58a9feb99 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Wed, 13 Nov 2024 15:22:38 -0500
Subject: [PATCH 041/143] fix(opinions.js): Reduce console logs

---
 cl/assets/static-global/js/opinions.js | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index d57c596bd3..14c8498b57 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -124,13 +124,11 @@ if (footnoteMarks.length === footnotes.length) {
   // we can make this work
   footnoteMarks.each(function (index) {
     const footnoteMark = $(this);
-    console.log(index, footnoteMark);
     const $newElement = $('<a></a>');
     // Copy attributes from the old element
     $.each(footnoteMark.attributes, function () {
       if (footnoteMark.specified) {
         $newElement.attr(footnoteMark.name, footnoteMark.value);
-        console.log(footnoteMark.name, footnoteMark.value);
       }
     });
     $newElement.html(footnoteMark.html());
@@ -140,7 +138,6 @@ if (footnoteMarks.length === footnotes.length) {
     $newElement.attr('href', `#fn${index}`);
     $newElement.attr('id', `fnref${index}`);
     footnote.attr('id', `fn${index}`);
-    console.log(footnoteMark, footnote);
 
     const $jumpback = $('<a class="jumpback">↵</a>');
     $jumpback.attr('href', `#fnref${index}`);
@@ -153,8 +150,6 @@ if (footnoteMarks.length === footnotes.length) {
   //   and so label is no usable to identify the correct footnote.
 
   footnotes.each(function (index) {
-    console.log($(this));
-
     const $jumpback = $('<a class="jumpback">↵</a>');
     $jumpback.attr('label', $(this).attr('label'));
     $(this).append($jumpback);
@@ -181,7 +176,7 @@ if (footnoteMarks.length === footnotes.length) {
         500
       ); // Adjust the animation duration as needed
     } else {
-      console.warn('No matching footnote found below the current position for:', markText);
+      // console.warn('No matching footnote found below the current position for:', markText);
     }
   });
 
@@ -210,7 +205,7 @@ if (footnoteMarks.length === footnotes.length) {
         500
       ); // Adjust the animation duration as needed
     } else {
-      console.warn('No matching footnotemark found above the current position for label:', footnoteLabel);
+      // console.warn('No matching footnotemark found above the current position for label:', footnoteLabel);
     }
   });
 }

From 4385c9be561063cf0be7a557d055fcca40495022 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Wed, 13 Nov 2024 15:23:16 -0500
Subject: [PATCH 042/143] fix(opinions.js): Reduce whitespace

---
 cl/assets/static-global/js/opinions.js | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index 14c8498b57..3101e5e687 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -12,8 +12,7 @@ $('.star-pagination').each(function (index, element) {
     var number = $(this).attr('number')
     if (number.indexOf("P") > -1) {
       $(this).attr('label', "");
-    }
-    else {
+    } else {
       $(this).attr('label', number);
     }
   } else {

From bdf5adc4e1af5c6e1d6ae211854afc2b58e017d0 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 13 Nov 2024 15:22:45 -0600
Subject: [PATCH 043/143] feat(casenames): refactor code

---
 ...ames.py => update_casenames_wl_dataset.py} | 242 +++++++++++-------
 cl/corpus_importer/utils.py                   |   7 +-
 2 files changed, 151 insertions(+), 98 deletions(-)
 rename cl/corpus_importer/management/commands/{update_resource_casenames.py => update_casenames_wl_dataset.py} (55%)

diff --git a/cl/corpus_importer/management/commands/update_resource_casenames.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
similarity index 55%
rename from cl/corpus_importer/management/commands/update_resource_casenames.py
rename to cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 153500d560..2c7151d330 100644
--- a/cl/corpus_importer/management/commands/update_resource_casenames.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -10,7 +10,7 @@
 from eyecite.tokenizers import HyperscanTokenizer
 
 from cl.corpus_importer.utils import add_citations_to_cluster
-from cl.search.models import Citation
+from cl.search.models import Citation, OpinionCluster
 
 logger = logging.getLogger(__name__)
 HYPERSCAN_TOKENIZER = HyperscanTokenizer(cache_dir=".hyperscan")
@@ -56,7 +56,7 @@ def tokenize_case_name(case_name: str) -> list[str]:
     ]
 
 
-def find_matches(csv_case_name: str, cl_case_name: str):
+def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
     """Compare two case name and decide whether they are the same or not
 
     :param csv_case_name: case name from csv
@@ -111,7 +111,7 @@ def parse_date(date_str: str) -> date | None:
     return None
 
 
-def make_citations(cite1, cite2) -> list[FullCaseCitation]:
+def make_citations(cite1: str, cite2: str) -> list[FullCaseCitation]:
     """Validate citations with eyecite
 
     :param cite1: first string citation
@@ -126,6 +126,88 @@ def make_citations(cite1, cite2) -> list[FullCaseCitation]:
     return cites
 
 
+def find_matches(
+    valid_citations: list[FullCaseCitation],
+    csv_docket_num: str,
+    csv_date_filed: date,
+    csv_case_name: str,
+) -> list[tuple[OpinionCluster, str]]:
+    """Find matches for row data
+
+    :param valid_citations: list of FullCaseCitation objects
+    :param csv_docket_num: cleaned docket number from row
+    :param csv_date_filed: formatted filed date from row
+    :param csv_case_name: case name from csv row
+    :return: list of tuples of matched OpinionCluster and used citation
+    """
+    matches = []
+
+    # Try to match row using both citations
+    for citation in valid_citations:
+
+        possible_matches = Citation.objects.filter(
+            **citation.groups,
+            cluster__docket__docket_number__contains=csv_docket_num,
+            cluster__date_filed=csv_date_filed,
+        )
+        if not possible_matches:
+            # Match not found with citation, docket number and date filed
+            continue
+
+        for match in possible_matches:
+            case_name = (
+                match.cluster.case_name_full
+                if match.cluster.case_name_full
+                else match.cluster.case_name
+            )
+            match_on_caption = check_case_names_match(csv_case_name, case_name)
+            if match_on_caption:
+                if not any(
+                    cluster.id == match.cluster.id
+                    for cluster, citation in matches
+                ):
+                    # Avoid duplicates
+                    matches.append(
+                        (match.cluster, citation.corrected_citation())
+                    )
+
+    return matches
+
+
+def update_matched_case_name(
+    matched_cluster: OpinionCluster, csv_case_name: str
+) -> tuple[bool, bool]:
+    """Update case name of matched cluster and related docket
+
+    :param matched_cluster: OpinionCluster object
+    :param csv_case_name: case name from csv row
+    :return: tuple with boolean values if cluster and related docket case name updated
+    """
+    cluster_case_name_updated = False
+    docket_case_name_updated = False
+    if not matched_cluster.case_name or len(csv_case_name) < len(
+        matched_cluster.case_name
+    ):
+        # Save case name in cluster when we don't have it or when the case name in csv is smaller than the current case name
+        matched_cluster.case_name = csv_case_name
+        matched_cluster.save()
+        logger.info(f"Case name updated for cluster id: {matched_cluster.id}")
+        cluster_case_name_updated = True
+
+    if not matched_cluster.docket.case_name or len(csv_case_name) < len(
+        matched_cluster.docket.case_name
+    ):
+        # Save case name in docket when we don't have it or when the case name in csv is smaller than the current case name
+        matched_cluster.docket.case_name = csv_case_name
+        matched_cluster.docket.save()
+        logger.info(
+            f"Case name updated for docket id: {matched_cluster.docket.id}"
+        )
+        docket_case_name_updated = True
+
+    return cluster_case_name_updated, docket_case_name_updated
+
+
 def process_csv(
     filepath: str, delay: float, dry_run: bool, chunk_size: int
 ) -> None:
@@ -143,7 +225,16 @@ def process_csv(
     logger.info(f"Processing {filepath}")
     for chunk in pd.read_csv(filepath, chunksize=chunk_size):
         for row in chunk.dropna().itertuples():
-            index, title, court, date_str, cite1, cite2, docket, volume = row
+            (
+                index,
+                csv_case_name,
+                court,
+                date_str,
+                cite1,
+                cite2,
+                docket,
+                volume,
+            ) = row
             valid_citations = make_citations(cite1, cite2)
 
             if len(valid_citations) < 2:
@@ -187,102 +278,61 @@ def process_csv(
                 logger.info(f"Row index: {index} - No valid date found.")
                 continue
 
-            # Keep non westlaw citations, use them to try to find a match
-            remaining_citations = [
-                cite
-                for cite in valid_citations
-                if cite.corrected_reporter() != "WL"
-            ]
-
-            for citation in remaining_citations:
+            # Try to match csv row with a cluster
+            matches = find_matches(
+                valid_citations, clean_docket_num, date_filed, csv_case_name
+            )
 
-                possible_matches = Citation.objects.filter(
-                    **citation.groups,
-                    cluster__docket__docket_number__contains=clean_docket_num,
-                    cluster__date_filed=date_filed,
-                )
-                if not possible_matches:
-                    # Match not found with citation, docket number and date filed
-                    continue
-
-                single_match = None
-                for match in possible_matches:
-                    case_name = (
-                        match.cluster.case_name_full
-                        if match.cluster.case_name_full
-                        else match.cluster.case_name
-                    )
-                    match_on_caption = find_matches(title, case_name)
-                    if match_on_caption:
-                        if not single_match:
-                            single_match = match.cluster
-                            continue
-                        else:
-                            logger.warning(
-                                f"Row index: {index} - Failed: too many matches"
-                            )
-                            single_match = None
-                            # Exit possible_matches loop
-                            break
-
-                if single_match:
-                    m = (
-                        single_match.case_name_full
-                        if single_match.case_name_full
-                        else single_match.case_name
+            if len(matches) == 1:
+                # Only one match, we can update case name and add citation
+                matched_cluster, used_citation = matches[0]
+
+                citation_to_add = [
+                    cite.corrected_citation()
+                    for cite in valid_citations
+                    if cite.corrected_citation() != used_citation
+                ]
+
+                # We already have matched the case using one of the citations
+                if not dry_run:
+                    # Update case names
+                    cluster_updated, docket_updated = update_matched_case_name(
+                        matched_cluster, csv_case_name
                     )
-                    citation_to_add = [
-                        cite.corrected_citation()
-                        for cite in valid_citations
-                        if cite.corrected_citation()
-                        != citation.corrected_citation()
-                    ]
-
-                    # We already have matched the case using one of the citations
-                    if not dry_run:
-                        # Logic to save new case name and add citation
-                        if not single_match.case_name or len(title) < len(
-                            single_match.case_name
-                        ):
-                            # Save case name in cluster when we don't have it or when the case name in csv is smaller than the current case name
-                            single_match.case_name = title
-                            single_match.save()
-                            logger.info(
-                                f"Case name updated for cluster id: {single_match.id}"
-                            )
-                            total_clusters_updated += 1
-
-                        if not single_match.docket.case_name or len(
-                            title
-                        ) < len(single_match.docket.case_name):
-                            # Save case name in docket when we don't have it or when the case name in csv is smaller than the current case name
-                            single_match.docket.case_name = title
-                            single_match.docket.save()
-                            logger.info(
-                                f"Case name updated for docket id: {single_match.docket.id}"
-                            )
-                            total_dockets_updated += 1
-
-                        if citation_to_add:
-                            citations_added = add_citations_to_cluster(
-                                citation_to_add, single_match.id
-                            )
-                            total_citations_added += citations_added
-
-                        # Wait between each processed row to avoid sending to many indexing tasks
-                        time.sleep(delay)
-                    else:
-                        # Dry run, only log a message
-                        logger.info(
-                            f"Row index: {index} - Match found: {single_match} - New case name: {title} - Used citation: {citation.corrected_citation()} - To add: {citation_to_add[0] if citation_to_add else 'Invalid'}"
-                        )
 
-                    # Exit remaining_citations loop
-                    break
+                    if cluster_updated:
+                        total_clusters_updated = +1
+
+                    if docket_updated:
+                        total_dockets_updated = +1
 
-    logger.info(f"Clusters updated: {total_clusters_updated}")
-    logger.info(f"Dockets updated: {total_dockets_updated}")
-    logger.info(f"Citations added: {total_citations_added}")
+                    if citation_to_add:
+                        citations_added = add_citations_to_cluster(
+                            citation_to_add, matched_cluster.id
+                        )
+                        total_citations_added += citations_added
+
+                    # Wait between each processed row to avoid sending to many indexing tasks
+                    time.sleep(delay)
+                else:
+                    # Dry run, only log a message
+                    logger.info(
+                        f"Row index: {index} - Match found: {matched_cluster} - New case name: {csv_case_name} - Used citation: {used_citation} - To add: {citation_to_add[0] if citation_to_add else 'Invalid'}"
+                    )
+
+            elif len(matches) > 1:
+                # More than one match
+                logger.warning(
+                    f"Row index: {index} - Failed: too many matches: {len(matches)} - Matches: {[(cluster.id, citation) for cluster, citation in matches]}"
+                )
+            else:
+                # No matches
+                logger.info(f"Row index: {index} - No matches found.")
+
+    if not dry_run:
+        logger.info(f"Clusters updated: {total_clusters_updated}")
+        logger.info(f"Dockets updated: {total_dockets_updated}")
+        logger.info(f"Citations added: {total_citations_added}")
 
 
 class Command(BaseCommand):
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index 5082e5961c..5a0f384a39 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -615,14 +615,15 @@ def merge_overlapping_data(
 
 def add_citations_to_cluster(
     cites: list[str], cluster_id: int, save_again_if_exists: bool = False
-) -> None:
+) -> int:
     """Add string citations to OpinionCluster if it has not yet been added
 
     :param cites: citation list
     :param cluster_id: cluster id related to citations
     :param save_again_if_exists: force save citation if it already exists
-    :return: None
+    :return: total of citations added
     """
+    total_added = 0
     for cite in cites:
         clean_cite = re.sub(r"\s+", " ", cite)
         citation = get_citations(clean_cite, tokenizer=HYPERSCAN_TOKENIZER)
@@ -676,10 +677,12 @@ def add_citations_to_cluster(
                 logger.info(
                     f"New citation: {cite} added to cluster id: {cluster_id}"
                 )
+                total_added += 1
             except IntegrityError:
                 logger.warning(
                     f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}"
                 )
+    return total_added
 
 
 def update_cluster_panel(

From 18164ae50006c24cd14d214a96b4640b344768a1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 13 Nov 2024 15:42:16 -0600
Subject: [PATCH 044/143] feat(casenames): refactor code

---
 .../commands/update_casenames_wl_dataset.py   | 45 +++++++++++++------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 2c7151d330..0b6d914e77 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -18,6 +18,8 @@
 # Compile regex pattern once for efficiency
 WORD_PATTERN = re.compile(r"\b\w+\b|\b\w+\.\b")
 
+NUMBER_PATTERN = re.compile(r"^[+-]?[0-9]+$")
+
 FALSE_POSITIVES = {
     "and",
     "personal",
@@ -208,6 +210,21 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
+def get_citation_filter(citation: FullCaseCitation) -> dict:
+    """Get citation as a dict to use it as a filter
+
+    2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
+
+    :param citation:
+    :return: dict with volume, reporter and page
+    """
+    return {
+        key: citation.groups[key]
+        for key in ["volume", "reporter", "page"]
+        if key in citation.groups
+    }
+
+
 def process_csv(
     filepath: str, delay: float, dry_run: bool, chunk_size: int
 ) -> None:
@@ -241,25 +258,25 @@ def process_csv(
                 # Skipping row without two citations
                 continue
 
-            # 2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
-            valid_citations[0].groups.pop("date_filed", None)
-            valid_citations[1].groups.pop("date_filed", None)
-
-            # Check if already have both citations from row
-            try:
-                c = Citation.objects.filter(
-                    **valid_citations[0].groups
-                ).values_list("cluster", flat=True)
-                d = Citation.objects.filter(
-                    **valid_citations[1].groups
-                ).values_list("cluster", flat=True)
-            except ValueError:
-                # Usually when the volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
+            if not NUMBER_PATTERN.match(
+                valid_citations[0].groups.get("volume")
+            ) or not NUMBER_PATTERN.match(
+                valid_citations[1].groups.get("volume")
+            ):
+                # Volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
                 logger.warning(
                     f"Row index: {index} - Citation parsing failed."
                 )
                 continue
 
+            # Check if already have both citations from row
+            c = Citation.objects.filter(
+                **get_citation_filter(valid_citations[0])
+            ).values_list("cluster", flat=True)
+            d = Citation.objects.filter(
+                **get_citation_filter(valid_citations[1])
+            ).values_list("cluster", flat=True)
+
             overlapping_clusters = c.intersection(d)
             if overlapping_clusters:
                 logger.info(

From 3603a9d4c6843e1e148b3fc74b7fa403f8ffdbf3 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Wed, 13 Nov 2024 17:03:40 -0500
Subject: [PATCH 045/143] fix(opinions.js): Tweak css and js

---
 cl/assets/static-global/css/opinions.css | 20 +++++++++++++-------
 cl/assets/static-global/js/opinions.js   | 17 +++++++++++++++++
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
index ffe98a46c6..235fcc9173 100644
--- a/cl/assets/static-global/css/opinions.css
+++ b/cl/assets/static-global/css/opinions.css
@@ -267,6 +267,11 @@ div.footnote:first-of-type {
     margin-left: 5px;
   }
 
+  /*Jump backs are empty in resource.org documents for now*/
+  #resource-org-text .jumpback {
+    display: none;
+  }
+
 
   footnote > * {
     font-size: 12px;
@@ -589,25 +594,26 @@ div.footnote:first-of-type {
   }
 
   footnote > blockquote > a.page-label::after {
-    right: -2.5em;
+    right: -1.0em;
   }
 
   blockquote[id^="A"] > a.page-label::after {
-    right: -2.5em;
+    right: -1.0em;
   }
 
   blockquote[id^="b"] > a.page-label::after {
-    right: -4.0em;
+    right: -1.0em;
   }
 
   opinion > a.page-label::after {
-    right: -2.5em;
+    right: -1.0em;
     text-indent: 0;
   }
 
   .harvard a.page-label::after {
-      right: -2.5em;
-      text-indent: 0;
+    right: -1.0em;
+    text-indent: 0;
+    position: absolute;
   }
 
    /* Adjust to move the entire blockquote to the right */
@@ -626,7 +632,7 @@ div.footnote:first-of-type {
   }
 
   div.counsel > a.page-label::after {
-    right: -2.5em;
+    right: -1.0em;
   }
 
   footnote > p > a.page-label::after {
diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index 3101e5e687..0c81aa6e65 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -77,10 +77,27 @@ document.querySelectorAll('strong').forEach((el) => {
 ///////////////
 
 
+
+
 // We formatted the harvard footnotes oddly when they appeared inside the pre-opinion content.
 // this removes the excess a tags and allows us to standardize footnotes across our contents
 // footnote cleanup in harvard
 // Update and modify footnotes to enable linking
+
+  // This is needed for variations in resource.org footnotes
+  $(".footnotes > .footnote").each(function() {
+      var $this = $(this);
+      var newElement = $("<footnote />"); // Create a new <footnote> element
+
+      // Copy attributes and content from the original element
+      $.each(this.attributes, function(_, attr) {
+          newElement.attr(attr.name, attr.value);
+      });
+      newElement.html($this.html()); // Copy the inner content
+      $this.replaceWith(newElement); // Replace the original <div> with <footnote>
+  });
+
+
 $('div.footnote > a').remove();
 const headfootnotemarks = $('a.footnote');
 const divfootnotes = $('div.footnote');

From d262178fc12bab26033fe33ba6dee3365900dd16 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 13 Nov 2024 18:04:00 -0600
Subject: [PATCH 046/143] feat(casenames): refactor code

---
 .../commands/update_casenames_wl_dataset.py   | 53 ++++++-------------
 cl/corpus_importer/utils.py                   |  7 +--
 2 files changed, 18 insertions(+), 42 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 0b6d914e77..e61fde26d8 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -142,13 +142,13 @@ def find_matches(
     :param csv_case_name: case name from csv row
     :return: list of tuples of matched OpinionCluster and used citation
     """
-    matches = []
+    matches: list[tuple[OpinionCluster, str]] = []
 
     # Try to match row using both citations
     for citation in valid_citations:
 
         possible_matches = Citation.objects.filter(
-            **citation.groups,
+            **get_citation_filter(citation),
             cluster__docket__docket_number__contains=csv_docket_num,
             cluster__date_filed=csv_date_filed,
         )
@@ -162,8 +162,7 @@ def find_matches(
                 if match.cluster.case_name_full
                 else match.cluster.case_name
             )
-            match_on_caption = check_case_names_match(csv_case_name, case_name)
-            if match_on_caption:
+            if check_case_names_match(csv_case_name, case_name):
                 if not any(
                     cluster.id == match.cluster.id
                     for cluster, citation in matches
@@ -178,15 +177,14 @@ def find_matches(
 
 def update_matched_case_name(
     matched_cluster: OpinionCluster, csv_case_name: str
-) -> tuple[bool, bool]:
+) -> bool:
     """Update case name of matched cluster and related docket
 
     :param matched_cluster: OpinionCluster object
     :param csv_case_name: case name from csv row
     :return: tuple with boolean values if cluster and related docket case name updated
     """
-    cluster_case_name_updated = False
-    docket_case_name_updated = False
+
     if not matched_cluster.case_name or len(csv_case_name) < len(
         matched_cluster.case_name
     ):
@@ -194,20 +192,9 @@ def update_matched_case_name(
         matched_cluster.case_name = csv_case_name
         matched_cluster.save()
         logger.info(f"Case name updated for cluster id: {matched_cluster.id}")
-        cluster_case_name_updated = True
-
-    if not matched_cluster.docket.case_name or len(csv_case_name) < len(
-        matched_cluster.docket.case_name
-    ):
-        # Save case name in docket when we don't have it or when the case name in csv is smaller than the current case name
-        matched_cluster.docket.case_name = csv_case_name
-        matched_cluster.docket.save()
-        logger.info(
-            f"Case name updated for docket id: {matched_cluster.docket.id}"
-        )
-        docket_case_name_updated = True
+        return True
 
-    return cluster_case_name_updated, docket_case_name_updated
+    return False
 
 
 def get_citation_filter(citation: FullCaseCitation) -> dict:
@@ -219,9 +206,9 @@ def get_citation_filter(citation: FullCaseCitation) -> dict:
     :return: dict with volume, reporter and page
     """
     return {
-        key: citation.groups[key]
-        for key in ["volume", "reporter", "page"]
-        if key in citation.groups
+        "volume": citation.groups["volume"],
+        "reporter": citation.corrected_reporter(),
+        "page": citation.groups["page"],
     }
 
 
@@ -237,8 +224,6 @@ def process_csv(
     """
 
     total_clusters_updated = 0
-    total_dockets_updated = 0
-    total_citations_added = 0
     logger.info(f"Processing {filepath}")
     for chunk in pd.read_csv(filepath, chunksize=chunk_size):
         for row in chunk.dropna().itertuples():
@@ -300,6 +285,9 @@ def process_csv(
                 valid_citations, clean_docket_num, date_filed, csv_case_name
             )
 
+            if not matches:
+                logger.info(f"Row index: {index} - No matches found.")
+
             if len(matches) == 1:
                 # Only one match, we can update case name and add citation
                 matched_cluster, used_citation = matches[0]
@@ -313,21 +301,17 @@ def process_csv(
                 # We already have matched the case using one of the citations
                 if not dry_run:
                     # Update case names
-                    cluster_updated, docket_updated = update_matched_case_name(
+                    cluster_updated = update_matched_case_name(
                         matched_cluster, csv_case_name
                     )
 
                     if cluster_updated:
                         total_clusters_updated = +1
 
-                    if docket_updated:
-                        total_dockets_updated = +1
-
                     if citation_to_add:
-                        citations_added = add_citations_to_cluster(
+                        add_citations_to_cluster(
                             citation_to_add, matched_cluster.id
                         )
-                        total_citations_added += citations_added
 
                     # Wait between each processed row to avoid sending to many indexing tasks
                     time.sleep(delay)
@@ -342,14 +326,9 @@ def process_csv(
                 logger.warning(
                     f"Row index: {index} - Failed: too many matches: {len(matches)} - Matches: {[(cluster.id, citation) for cluster, citation in matches]}"
                 )
-            else:
-                # No matches
-                logger.info(f"Row index: {index} - No matches found.")
 
     if not dry_run:
         logger.info(f"Clusters updated: {total_clusters_updated}")
-        logger.info(f"Dockets updated: {total_dockets_updated}")
-        logger.info(f"Citations added: {total_citations_added}")
 
 
 class Command(BaseCommand):
@@ -366,7 +345,7 @@ def add_arguments(self, parser):
             "--delay",
             type=float,
             default=0.1,
-            help="How long to wait to update each opinion and docket (in seconds, allows floating numbers).",
+            help="How long to wait to update each opinion cluster (in seconds, allows floating numbers).",
         )
         parser.add_argument(
             "--dry-run",
diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index 5a0f384a39..5082e5961c 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -615,15 +615,14 @@ def merge_overlapping_data(
 
 def add_citations_to_cluster(
     cites: list[str], cluster_id: int, save_again_if_exists: bool = False
-) -> int:
+) -> None:
     """Add string citations to OpinionCluster if it has not yet been added
 
     :param cites: citation list
     :param cluster_id: cluster id related to citations
     :param save_again_if_exists: force save citation if it already exists
-    :return: total of citations added
+    :return: None
     """
-    total_added = 0
     for cite in cites:
         clean_cite = re.sub(r"\s+", " ", cite)
         citation = get_citations(clean_cite, tokenizer=HYPERSCAN_TOKENIZER)
@@ -677,12 +676,10 @@ def add_citations_to_cluster(
                 logger.info(
                     f"New citation: {cite} added to cluster id: {cluster_id}"
                 )
-                total_added += 1
             except IntegrityError:
                 logger.warning(
                     f"Reporter mismatch for cluster: {cluster_id} on cite: {cite}"
                 )
-    return total_added
 
 
 def update_cluster_panel(

From 263b1cec56eae83664b31c2c5902cd5d86390a9d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 13 Nov 2024 18:35:50 -0600
Subject: [PATCH 047/143] feat(webhooks): Allow users to select the preferred
 webhook version

Fixes: #4657
---
 .../0013_add_webhook_version_choices_noop.py  | 31 ++++++++
 .../0013_add_webhook_version_choices_noop.sql | 10 +++
 cl/api/models.py                              |  9 ++-
 cl/assets/static-global/js/webhooks-page.js   |  6 +-
 cl/users/api_views.py                         | 40 +++++++++-
 cl/users/forms.py                             | 27 +++++--
 .../webhooks_htmx/webhook-version-select.html |  8 ++
 .../webhooks_htmx/webhooks-form-common.html   | 43 ++++++++++-
 .../includes/webhooks_htmx/webhooks-list.html |  5 ++
 cl/users/templates/profile/webhooks.html      |  1 +
 cl/users/tests.py                             | 74 ++++++++++++++++++-
 11 files changed, 243 insertions(+), 11 deletions(-)
 create mode 100644 cl/api/migrations/0013_add_webhook_version_choices_noop.py
 create mode 100644 cl/api/migrations/0013_add_webhook_version_choices_noop.sql
 create mode 100644 cl/users/templates/includes/webhooks_htmx/webhook-version-select.html

diff --git a/cl/api/migrations/0013_add_webhook_version_choices_noop.py b/cl/api/migrations/0013_add_webhook_version_choices_noop.py
new file mode 100644
index 0000000000..7f8253b50b
--- /dev/null
+++ b/cl/api/migrations/0013_add_webhook_version_choices_noop.py
@@ -0,0 +1,31 @@
+# Generated by Django 5.1.2 on 2024-11-13 23:45
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("api", "0012_alter_webhookevent_status_code"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="webhook",
+            name="version",
+            field=models.IntegerField(
+                choices=[(1, "V1"), (2, "V2")],
+                default=1,
+                help_text="The specific version of the webhook provisioned.",
+            ),
+        ),
+        migrations.AlterField(
+            model_name="webhookhistoryevent",
+            name="version",
+            field=models.IntegerField(
+                choices=[(1, "V1"), (2, "V2")],
+                default=1,
+                help_text="The specific version of the webhook provisioned.",
+            ),
+        ),
+    ]
diff --git a/cl/api/migrations/0013_add_webhook_version_choices_noop.sql b/cl/api/migrations/0013_add_webhook_version_choices_noop.sql
new file mode 100644
index 0000000000..074abd1666
--- /dev/null
+++ b/cl/api/migrations/0013_add_webhook_version_choices_noop.sql
@@ -0,0 +1,10 @@
+BEGIN;
+--
+-- Alter field version on webhook
+--
+-- (no-op)
+--
+-- Alter field version on webhookhistoryevent
+--
+-- (no-op)
+COMMIT;
diff --git a/cl/api/models.py b/cl/api/models.py
index ca4cc8ed54..e0070f4f31 100644
--- a/cl/api/models.py
+++ b/cl/api/models.py
@@ -16,6 +16,11 @@ class WebhookEventType(models.IntegerChoices):
     OLD_DOCKET_ALERTS_REPORT = 4, "Old Docket Alerts Report"
 
 
+class WebhookVersions(models.IntegerChoices):
+    V1 = 1, "V1"
+    V2 = 2, "V2"
+
+
 HttpStatusCodes = models.IntegerChoices(  # type: ignore
     "HttpStatusCodes", [(s.name, s.value) for s in HTTPStatus]  # type: ignore[arg-type]
 )
@@ -48,7 +53,9 @@ class Webhook(AbstractDateTimeModel):
         help_text="An on/off switch for the webhook.", default=False
     )
     version: models.IntegerField = models.IntegerField(
-        help_text="The specific version of the webhook provisioned.", default=1
+        help_text="The specific version of the webhook provisioned.",
+        choices=WebhookVersions.choices,
+        default=WebhookVersions.V1,
     )
     failure_count: models.IntegerField = models.IntegerField(
         help_text="The number of failures (400+ status) responses the webhook "
diff --git a/cl/assets/static-global/js/webhooks-page.js b/cl/assets/static-global/js/webhooks-page.js
index bad5cfdd82..22fe9184e4 100644
--- a/cl/assets/static-global/js/webhooks-page.js
+++ b/cl/assets/static-global/js/webhooks-page.js
@@ -20,10 +20,12 @@ htmx.on('htmx:afterSwap', (e) => {
   let webhook_form = document.getElementById('webhooks-body');
   if (e.detail.target.id === 'webhooks-body') {
     // If the user already have a webhook configured for each type of event, show a message.
-    let event_type_options = document.getElementById('id_event_type').options.length;
+    let event_type_options = Array.from(document.getElementById('id_event_type').options)
+      .filter(option => option.value !== "")  //Filter out <option value="">Select an event type</option> default option
+      .length;
     if (event_type_options === 0) {
       webhook_form.innerHTML =
-        "<b class='text-center'>You already have a webhook configured for each type of event. Please delete one before making another.</b>";
+        "<b class='text-center'>You already have a webhook configured for each type of event and version available. Please delete one before making another.</b>";
     }
     //Toggle form modal
     $('#webhook-modal').modal('toggle');
diff --git a/cl/users/api_views.py b/cl/users/api_views.py
index f35bad827a..2d91308fa5 100644
--- a/cl/users/api_views.py
+++ b/cl/users/api_views.py
@@ -10,7 +10,12 @@
 from rest_framework.viewsets import ModelViewSet
 
 from cl.api.api_permissions import IsOwner
-from cl.api.models import Webhook, WebhookEvent, WebhookEventType
+from cl.api.models import (
+    Webhook,
+    WebhookEvent,
+    WebhookEventType,
+    WebhookVersions,
+)
 from cl.api.tasks import send_test_webhook_event
 from cl.users.filters import WebhookEventViewFilter
 from cl.users.forms import WebhookForm
@@ -193,6 +198,39 @@ def test_webhook(self, request, *args, **kwargs):
             status=HTTPStatus.OK,
         )
 
+    @action(detail=False, methods=["get"])
+    def get_available_versions(self, request, *args, **kwargs):
+        """Render the webhook version field containing available versions for
+        the select event type.
+        """
+
+        event_type = request.GET.get("event_type")
+        htmx_template = "includes/webhooks_htmx/webhook-version-select.html"
+        context = {"version_choices": []}
+        if not event_type:
+            return render(request, htmx_template, context)
+
+        # Get user webhooks for this event type
+        existing_webhooks = Webhook.objects.filter(
+            user=request.user, event_type=event_type
+        )
+        used_versions = set(
+            existing_webhooks.values_list("version", flat=True)
+        )
+        # Get available webhook versions
+        version_labels = dict(WebhookVersions.choices)
+        version_choices = [
+            (v, version_labels[v])
+            for v in version_labels
+            if v not in used_versions
+        ]
+        context["version_choices"] = version_choices
+        return render(
+            request,
+            htmx_template,
+            context,
+        )
+
 
 class WebhookEventViewSet(ModelViewSet):
     """
diff --git a/cl/users/forms.py b/cl/users/forms.py
index 4ec758d611..cedc891671 100644
--- a/cl/users/forms.py
+++ b/cl/users/forms.py
@@ -16,7 +16,7 @@
 from localflavor.us.forms import USStateField, USZipCodeField
 from localflavor.us.us_states import STATE_CHOICES
 
-from cl.api.models import Webhook, WebhookEventType
+from cl.api.models import Webhook, WebhookEventType, WebhookVersions
 from cl.lib.types import EmailType
 from cl.users.models import UserProfile
 from cl.users.utils import emails
@@ -342,18 +342,31 @@ def __init__(self, update=None, request_user=None, *args, **kwargs):
                 for i in WebhookEventType.choices
                 if i[0] == self.instance.event_type
             ]
+            instance_version = [
+                i
+                for i in WebhookVersions.choices
+                if i[0] == self.instance.version
+            ]
             self.fields["event_type"].choices = instance_type
             self.fields["event_type"].widget.attrs["readonly"] = True
+            self.fields["version"].choices = instance_version
+            self.fields["version"].widget.attrs["readonly"] = True
+
         else:
             # If we're creating a new webhook, show the webhook type options
             # that are available for the user. One webhook for each event type
             # is allowed.
             webhooks = request_user.webhooks.all()
-            used_types = [w.event_type for w in webhooks]
-            available_choices = [
-                i for i in WebhookEventType.choices if i[0] not in used_types
+            used_version_types = [
+                f"{w.event_type}_{w.version}" for w in webhooks
             ]
-            self.fields["event_type"].choices = available_choices
+            available_type_choices = {
+                w_type
+                for w_type in WebhookEventType.choices
+                for w_version in WebhookVersions.choices
+                if f"{w_type[0]}_{w_version[0]}" not in used_version_types
+            }
+            self.fields["event_type"].choices = available_type_choices
 
     class Meta:
         model = Webhook
@@ -361,6 +374,7 @@ class Meta:
             "url",
             "event_type",
             "enabled",
+            "version",
         )
         widgets = {
             "event_type": forms.Select(
@@ -372,4 +386,7 @@ class Meta:
             "enabled": forms.CheckboxInput(
                 attrs={"class": "webhook-checkbox"},
             ),
+            "version": forms.Select(
+                attrs={"class": "form-control"},
+            ),
         }
diff --git a/cl/users/templates/includes/webhooks_htmx/webhook-version-select.html b/cl/users/templates/includes/webhooks_htmx/webhook-version-select.html
new file mode 100644
index 0000000000..72639b3b10
--- /dev/null
+++ b/cl/users/templates/includes/webhooks_htmx/webhook-version-select.html
@@ -0,0 +1,8 @@
+<select name="version" id="id_version" class="form-control" {% if not version_choices %}readonly{% endif %}>
+  <option value="">
+    {% if not version_choices %}Select an event type first{% else %}Select a version{% endif %}
+  </option>
+  {% for value, label in version_choices %}
+    <option value="{{ value }}">{{ label }}</option>
+  {% endfor %}
+</select>
diff --git a/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html b/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
index a5166284c6..316a58473f 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
@@ -19,7 +19,22 @@
   <div class="row">
     <label class="col-xs-12">Event Type</label>
     <div class="col-xs-12">
-      {{ webhook_form.event_type }}
+      {% with option_count=webhook_form.event_type.field.choices|length %}
+        <select name="event_type" id="id_event_type" class="form-control"
+                {% if not webhook_form.event_type.field.widget.attrs.readonly %}
+                  hx-get="{% url 'webhooks-get-available-versions' %}"
+                  hx-target="#version-wrapper"
+                  hx-trigger="{% if option_count == 1 %}load{% else %}change{% endif %}"
+                {% endif %}
+                {% if webhook_form.event_type.field.widget.attrs.readonly %}readonly{% endif %}>
+        {% if option_count > 1 %}
+          <option value="">Select an event type</option>
+        {% endif %}
+          {% for value, label in webhook_form.event_type.field.choices %}
+            <option value="{{ value }}">{{ label }}</option>
+          {% endfor %}
+        </select>
+      {% endwith %}
       {% if webhook_form.event_type.errors %}
         <p class="help-block">
           {% for error in webhook_form.event_type.errors %}
@@ -43,3 +58,29 @@
       {% endif %}
 </div>
 
+<div class="form-group">
+  <div class="row">
+    <label class="col-xs-12">Version</label>
+    <div class="col-xs-12">
+      <div id="version-wrapper">
+        <select name="version" id="id_version" class="form-control" {% if webhook_form.version.field.widget.attrs.readonly %}readonly{% endif %}>
+          {% if webhook_form.instance.pk %}
+            <option value="{{ webhook_form.instance.version }}" selected>
+              {{ webhook_form.instance.get_version_display }}
+            </option>
+          {% else %}
+            <option value="">Select an event type first</option>
+          {% endif %}
+        </select>
+      </div>
+      {% if webhook_form.version.errors %}
+        <p class="help-block">
+          {% for error in webhook_form.version.errors %}
+            {{ error|escape }}
+          {% endfor %}
+        </p>
+      {% endif %}
+    </div>
+  </div>
+</div>
+
diff --git a/cl/users/templates/includes/webhooks_htmx/webhooks-list.html b/cl/users/templates/includes/webhooks_htmx/webhooks-list.html
index d30891c455..be7ef17c35 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhooks-list.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhooks-list.html
@@ -6,6 +6,11 @@
        {{ webhook.get_event_type_display }}
       </p>
     </td>
+    <td>
+      <p class="bottom">
+        {{ webhook.get_version_display }}
+      </p>
+    </td>
     <td>
       <p class="bottom">
         {% if webhook.enabled is True %}
diff --git a/cl/users/templates/profile/webhooks.html b/cl/users/templates/profile/webhooks.html
index ebcb6b8864..a10defee00 100644
--- a/cl/users/templates/profile/webhooks.html
+++ b/cl/users/templates/profile/webhooks.html
@@ -13,6 +13,7 @@
         <thead>
           <tr class="active">
             <th>Event&nbsp;type</th>
+            <th>Version</th>
             <th>Status</th>
             <th>URL</th>
             <th>Failure count</th>
diff --git a/cl/users/tests.py b/cl/users/tests.py
index 26d997d139..2e4afaaaa9 100644
--- a/cl/users/tests.py
+++ b/cl/users/tests.py
@@ -31,7 +31,12 @@
 from cl.alerts.factories import DocketAlertFactory
 from cl.alerts.models import DocketAlert, DocketAlertEvent
 from cl.api.factories import WebhookEventFactory, WebhookFactory
-from cl.api.models import Webhook, WebhookEvent, WebhookEventType
+from cl.api.models import (
+    Webhook,
+    WebhookEvent,
+    WebhookEventType,
+    WebhookVersions,
+)
 from cl.favorites.factories import UserTagFactory
 from cl.favorites.models import (
     DocketTag,
@@ -3190,11 +3195,13 @@ async def make_a_webhook(
         url="https://example.com",
         event_type=WebhookEventType.DOCKET_ALERT,
         enabled=True,
+        version=WebhookVersions.V1,
     ):
         data = {
             "url": url,
             "event_type": event_type,
             "enabled": enabled,
+            "version": version,
         }
         return await client.post(self.webhook_path, data)
 
@@ -3323,6 +3330,7 @@ async def test_webhook_update(self) -> None:
             "url": "https://example.com/updated",
             "event_type": webhooks_first.event_type,
             "enabled": webhooks_first.enabled,
+            "version": webhooks_first.version,
         }
         response = await self.client.put(webhook_1_path_detail, data_updated)
 
@@ -3435,6 +3443,70 @@ async def test_list_webhook_events(self) -> None:
         # There should be results for user_1
         self.assertNotEqual(response.content, b"\n\n")
 
+    async def test_get_available_webhook_versions(self) -> None:
+        """Can we get users available versions for a webhook event type?"""
+
+        await sync_to_async(WebhookFactory)(
+            user=self.user_2,
+            event_type=WebhookEventType.DOCKET_ALERT,
+            url="https://example.com/",
+            version=WebhookVersions.V1,
+            enabled=True,
+        )
+
+        available_versions_path = reverse(
+            "webhooks-get-available-versions",
+            kwargs={"format": "html"},
+        )
+        webhooks = Webhook.objects.all()
+        self.assertEqual(await webhooks.acount(), 1)
+
+        # Test without event_type parameter
+        response = await self.client.get(available_versions_path)
+        self.assertEqual(response.status_code, HTTPStatus.OK)
+        # No version choices
+        self.assertIn("Select an event type first", response.content.decode())
+
+        # Test with event_type parameter for user_1 (no existing webhooks)
+        response = await self.client.get(
+            available_versions_path,
+            {"event_type": WebhookEventType.DOCKET_ALERT},
+        )
+        self.assertEqual(response.status_code, HTTPStatus.OK)
+        # Should return all versions available (1,2)
+        self.assertIn('value="1"', response.content.decode())
+        self.assertIn('value="2"', response.content.decode())
+
+        # Create a webhook with version 1 for user_1
+        await sync_to_async(WebhookFactory)(
+            user=self.user_1,
+            event_type=WebhookEventType.DOCKET_ALERT,
+            url="https://example.com/",
+            version=WebhookVersions.V1,
+            enabled=True,
+        )
+        self.assertEqual(await webhooks.acount(), 2)
+
+        # Test with event_type parameter for user_1 (has webhook with version 1)
+        response = await self.client.get(
+            available_versions_path,
+            {"event_type": WebhookEventType.DOCKET_ALERT},
+        )
+        self.assertEqual(response.status_code, HTTPStatus.OK)
+        # Should return  only version 2 available
+        self.assertNotIn('value="1"', response.content.decode())
+        self.assertIn('value="2"', response.content.decode())
+
+        # Test with a different event_type
+        response = await self.client.get(
+            available_versions_path,
+            {"event_type": WebhookEventType.SEARCH_ALERT},
+        )
+        self.assertEqual(response.status_code, HTTPStatus.OK)
+        # Should return all versions available for SEARCH_ALERT event type
+        self.assertIn('value="1"', response.content.decode())
+        self.assertIn('value="2"', response.content.decode())
+
 
 @override_settings(DEVELOPMENT=False)
 @patch("cl.users.tasks.NeonClient")

From 6ca4f3fa3f7c303059936a53e38684654c2ebdeb Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 13 Nov 2024 20:18:06 -0600
Subject: [PATCH 048/143] feat(casenames): log if we have both citations, but
 still try to improve case name

---
 .../management/commands/update_casenames_wl_dataset.py      | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index e61fde26d8..e6e1c38a1a 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -194,6 +194,10 @@ def update_matched_case_name(
         logger.info(f"Case name updated for cluster id: {matched_cluster.id}")
         return True
 
+    logger.info(
+        f"Cluster id: {matched_cluster.id} already has the smallest case name."
+    )
+
     return False
 
 
@@ -264,10 +268,10 @@ def process_csv(
 
             overlapping_clusters = c.intersection(d)
             if overlapping_clusters:
+                # Only log a message, continue with the process to try to improve the case names instead of continuing to the next row
                 logger.info(
                     f"Row index: {index} - Both citations exist for this cluster: {list(overlapping_clusters)}"
                 )
-                continue
 
             if not valid_citations:
                 logger.info(f"Row index: {index} - No valid citations found.")

From 3a598b2900419a602d54401dc152d4953d442c4f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 14 Nov 2024 10:02:10 -0600
Subject: [PATCH 049/143] feat(casenames): rollback changes in utils.py

---
 cl/corpus_importer/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py
index 5082e5961c..efcab347a5 100644
--- a/cl/corpus_importer/utils.py
+++ b/cl/corpus_importer/utils.py
@@ -765,16 +765,15 @@ def winnow_case_name(case_name: str) -> Set:
     # Fix case name to be cleaner
     case_name = harmonize(case_name)
 
-    # Join abbreviations/acronyms and ignore single char abbreviations
+    # Join abbreviations/acronyms
     # e.g.
     # "D.L.M. v. T.J.S." -> "DLM v. TJS"
     # "In the Matter of E. B." -> "In the Matter of EB"
     # "R. L. C. R. v. L. Z. S." -> "RLCR v. LZS"
     # "J. B. v. C. E." -> "JB v. CE"
     # "County v. A. D. B. County" -> "County v. ADB County"
-    # "In re Gregory C. KAPORDELIS" -> "In re Gregory C. KAPORDELIS" remains the same
     case_name = re.sub(
-        r"\b[A-Z][A-Z\.\s]*[A-Z]\b\.",
+        r"\b[A-Z][A-Z\.\s]*[A-Z]\b\.?",
         lambda m: m.group().replace(".", "").replace(" ", ""),
         case_name,
     )

From 0d0616044126f810a5c686fd6e34dacfd994f2fd Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 14 Nov 2024 12:08:19 -0600
Subject: [PATCH 050/143] feat(casenames): rename functions, update variable
 names and code

---
 .../commands/update_casenames_wl_dataset.py   | 140 ++++++++----------
 1 file changed, 59 insertions(+), 81 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index e6e1c38a1a..d748bb0ffa 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -113,18 +113,33 @@ def parse_date(date_str: str) -> date | None:
     return None
 
 
-def make_citations(cite1: str, cite2: str) -> list[FullCaseCitation]:
+def validate_citations(
+    cite_1: str, cite_2: str, index: int
+) -> list[FullCaseCitation]:
     """Validate citations with eyecite
 
-    :param cite1: first string citation
-    :param cite2: second string citation
+    :param cite_1: first string citation
+    :param cite_2: second string citation
+    :param index: row index
     :return: list of valid FullCaseCitation objects
     """
-    cite_one = get_citations(cite1, tokenizer=HYPERSCAN_TOKENIZER)
-    cite_two = get_citations(cite2, tokenizer=HYPERSCAN_TOKENIZER)
+    cite_one = get_citations(cite_1, tokenizer=HYPERSCAN_TOKENIZER)
+    cite_two = get_citations(cite_2, tokenizer=HYPERSCAN_TOKENIZER)
 
     citations = cite_one + cite_two
     cites = [cite for cite in citations if isinstance(cite, FullCaseCitation)]
+
+    if len(cites) < 2:
+        # Skipping row without two citations
+        return []
+
+    if not NUMBER_PATTERN.match(
+        cites[0].groups.get("volume")
+    ) or not NUMBER_PATTERN.match(cites[1].groups.get("volume")):
+        # Volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
+        logger.warning(f"Row index: {index} - Citation parsing failed.")
+        return []
+
     return cites
 
 
@@ -133,7 +148,7 @@ def find_matches(
     csv_docket_num: str,
     csv_date_filed: date,
     csv_case_name: str,
-) -> list[tuple[OpinionCluster, str]]:
+) -> list[OpinionCluster]:
     """Find matches for row data
 
     :param valid_citations: list of FullCaseCitation objects
@@ -142,13 +157,13 @@ def find_matches(
     :param csv_case_name: case name from csv row
     :return: list of tuples of matched OpinionCluster and used citation
     """
-    matches: list[tuple[OpinionCluster, str]] = []
+    matches: list[OpinionCluster] = []
 
     # Try to match row using both citations
     for citation in valid_citations:
 
         possible_matches = Citation.objects.filter(
-            **get_citation_filter(citation),
+            **make_citation(citation),
             cluster__docket__docket_number__contains=csv_docket_num,
             cluster__date_filed=csv_date_filed,
         )
@@ -164,13 +179,10 @@ def find_matches(
             )
             if check_case_names_match(csv_case_name, case_name):
                 if not any(
-                    cluster.id == match.cluster.id
-                    for cluster, citation in matches
+                    cluster.id == match.cluster.id for cluster in matches
                 ):
                     # Avoid duplicates
-                    matches.append(
-                        (match.cluster, citation.corrected_citation())
-                    )
+                    matches.append(match.cluster)
 
     return matches
 
@@ -201,10 +213,11 @@ def update_matched_case_name(
     return False
 
 
-def get_citation_filter(citation: FullCaseCitation) -> dict:
+def make_citation(citation: FullCaseCitation) -> dict:
     """Get citation as a dict to use it as a filter
 
-    2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
+    It only keeps the values that we have in db, in some cases we have extra data
+    e.g. 2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
 
     :param citation:
     :return: dict with volume, reporter and page
@@ -241,37 +254,8 @@ def process_csv(
                 docket,
                 volume,
             ) = row
-            valid_citations = make_citations(cite1, cite2)
-
-            if len(valid_citations) < 2:
-                # Skipping row without two citations
-                continue
 
-            if not NUMBER_PATTERN.match(
-                valid_citations[0].groups.get("volume")
-            ) or not NUMBER_PATTERN.match(
-                valid_citations[1].groups.get("volume")
-            ):
-                # Volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
-                logger.warning(
-                    f"Row index: {index} - Citation parsing failed."
-                )
-                continue
-
-            # Check if already have both citations from row
-            c = Citation.objects.filter(
-                **get_citation_filter(valid_citations[0])
-            ).values_list("cluster", flat=True)
-            d = Citation.objects.filter(
-                **get_citation_filter(valid_citations[1])
-            ).values_list("cluster", flat=True)
-
-            overlapping_clusters = c.intersection(d)
-            if overlapping_clusters:
-                # Only log a message, continue with the process to try to improve the case names instead of continuing to the next row
-                logger.info(
-                    f"Row index: {index} - Both citations exist for this cluster: {list(overlapping_clusters)}"
-                )
+            valid_citations = validate_citations(cite1, cite2, index)
 
             if not valid_citations:
                 logger.info(f"Row index: {index} - No valid citations found.")
@@ -284,51 +268,45 @@ def process_csv(
                 logger.info(f"Row index: {index} - No valid date found.")
                 continue
 
-            # Try to match csv row with a cluster
+            # Query for possible matches using data from row
             matches = find_matches(
                 valid_citations, clean_docket_num, date_filed, csv_case_name
             )
 
-            if not matches:
-                logger.info(f"Row index: {index} - No matches found.")
-
-            if len(matches) == 1:
-                # Only one match, we can update case name and add citation
-                matched_cluster, used_citation = matches[0]
-
-                citation_to_add = [
-                    cite.corrected_citation()
-                    for cite in valid_citations
-                    if cite.corrected_citation() != used_citation
-                ]
-
-                # We already have matched the case using one of the citations
-                if not dry_run:
-                    # Update case names
-                    cluster_updated = update_matched_case_name(
-                        matched_cluster, csv_case_name
+            if not matches or len(matches) > 1:
+                if len(matches) > 1:
+                    # These could be bad matches or duplicates
+                    logger.warning(
+                        f"Row index: {index} - Failed: too many matches: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
                     )
+                else:
+                    logger.info(f"Row index: {index} - No matches found.")
 
-                    if cluster_updated:
-                        total_clusters_updated = +1
+                # Go to next row
+                continue
 
-                    if citation_to_add:
-                        add_citations_to_cluster(
-                            citation_to_add, matched_cluster.id
-                        )
+            # We matched the row with a cluster
+            if not dry_run:
+                # Update case names
+                cluster_updated = update_matched_case_name(
+                    matches[0], csv_case_name
+                )
 
-                    # Wait between each processed row to avoid sending to many indexing tasks
-                    time.sleep(delay)
-                else:
-                    # Dry run, only log a message
-                    logger.info(
-                        f"Row index: {index} - Match found: {matched_cluster} - New case name: {csv_case_name} - Used citation: {used_citation} - To add: {citation_to_add[0] if citation_to_add else 'Invalid'}"
-                    )
+                if cluster_updated:
+                    total_clusters_updated = +1
 
-            elif len(matches) > 1:
-                # More than one match
-                logger.warning(
-                    f"Row index: {index} - Failed: too many matches: {len(matches)} - Matches: {[(cluster.id, citation) for cluster, citation in matches]}"
+                # Add any of the citations if possible
+                add_citations_to_cluster(
+                    [cite.corrected_citation() for cite in valid_citations],
+                    matches[0].id,
+                )
+
+                # Wait between each processed row to avoid sending to many indexing tasks
+                time.sleep(delay)
+            else:
+                # Dry run, only log a message
+                logger.info(
+                    f"Row index: {index} - Match found: {matches[0]} - Csv case name: {csv_case_name}"
                 )
 
     if not dry_run:

From e8fc910c62b053a496c9a1fa05e7dbe6dc7d15cb Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 14 Nov 2024 14:47:15 -0600
Subject: [PATCH 051/143] feat(casenames): update docket case name when
 possible

---
 .../commands/update_casenames_wl_dataset.py   | 51 +++++++++++--------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index d748bb0ffa..cada5e09d2 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -93,13 +93,13 @@ def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
 def parse_date(date_str: str) -> date | None:
     """Attempts to parse the filed date into a datetime object.
 
-    # January 10, 1999
-    # 24-Jul-97
-    # 21-Jan-94
-    # 1/17/1961
-    # 12/1/1960
-    # 26-Sep-00
-    # Feb. 28, 2001
+    January 10, 1999
+    24-Jul-97
+    21-Jan-94
+    1/17/1961
+    12/1/1960
+    26-Sep-00
+    Feb. 28, 2001
 
     :param date_str: date string
     :return: date object or none
@@ -189,28 +189,33 @@ def find_matches(
 
 def update_matched_case_name(
     matched_cluster: OpinionCluster, csv_case_name: str
-) -> bool:
-    """Update case name of matched cluster and related docket
+) -> tuple[bool, bool]:
+    """Update case name of matched cluster and related docket if empty any of them
 
     :param matched_cluster: OpinionCluster object
     :param csv_case_name: case name from csv row
     :return: tuple with boolean values if cluster and related docket case name updated
     """
+    cluster_case_name_updated = False
+    docket_case_name_updated = False
 
-    if not matched_cluster.case_name or len(csv_case_name) < len(
-        matched_cluster.case_name
-    ):
-        # Save case name in cluster when we don't have it or when the case name in csv is smaller than the current case name
+    if not matched_cluster.case_name:
+        # Save case name in cluster when we don't have it
         matched_cluster.case_name = csv_case_name
         matched_cluster.save()
         logger.info(f"Case name updated for cluster id: {matched_cluster.id}")
-        return True
-
-    logger.info(
-        f"Cluster id: {matched_cluster.id} already has the smallest case name."
-    )
+        cluster_case_name_updated = True
+
+    if not matched_cluster.docket.case_name:
+        # Save case name in docket when we don't have it
+        matched_cluster.docket.case_name = csv_case_name
+        matched_cluster.docket.save()
+        logger.info(
+            f"Case name updated for docket id: {matched_cluster.docket.id}"
+        )
+        docket_case_name_updated = True
 
-    return False
+    return cluster_case_name_updated, docket_case_name_updated
 
 
 def make_citation(citation: FullCaseCitation) -> dict:
@@ -241,6 +246,8 @@ def process_csv(
     """
 
     total_clusters_updated = 0
+    total_dockets_updated = 0
+
     logger.info(f"Processing {filepath}")
     for chunk in pd.read_csv(filepath, chunksize=chunk_size):
         for row in chunk.dropna().itertuples():
@@ -288,13 +295,16 @@ def process_csv(
             # We matched the row with a cluster
             if not dry_run:
                 # Update case names
-                cluster_updated = update_matched_case_name(
+                cluster_updated, docket_updated = update_matched_case_name(
                     matches[0], csv_case_name
                 )
 
                 if cluster_updated:
                     total_clusters_updated = +1
 
+                if docket_updated:
+                    total_dockets_updated = +1
+
                 # Add any of the citations if possible
                 add_citations_to_cluster(
                     [cite.corrected_citation() for cite in valid_citations],
@@ -311,6 +321,7 @@ def process_csv(
 
     if not dry_run:
         logger.info(f"Clusters updated: {total_clusters_updated}")
+        logger.info(f"Dockets updated: {total_dockets_updated}")
 
 
 class Command(BaseCommand):

From bec60abf9732116cc97d4b5d4423f5f488c75abe Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 14 Nov 2024 14:59:09 -0600
Subject: [PATCH 052/143] fix(webhooks): Use the selected webhook version in
 the webhook payload.

Fixes: #4653
---
 cl/alerts/tests/tests.py                      | 29 ++++++--
 cl/alerts/tests/tests_recap_alerts.py         | 51 +++++++++++--
 cl/api/models.py                              |  2 +-
 cl/recap/tests.py                             | 72 ++++++++++++++++---
 cl/users/api_views.py                         | 25 ++++---
 .../includes/docket_alert_webhook_dummy.txt   |  2 +-
 .../docket_alert_webhook_dummy_curl.txt       |  2 +-
 .../old_alerts_report_webhook_dummy.txt       |  2 +-
 .../old_alerts_report_webhook_dummy_curl.txt  |  2 +-
 .../includes/recap_fetch_webhook_dummy.txt    |  2 +-
 .../recap_fetch_webhook_dummy_curl.txt        |  2 +-
 .../includes/search_alert_webhook_dummy.txt   |  2 +-
 .../search_alert_webhook_dummy_curl.txt       |  2 +-
 .../webhooks_htmx/webhooks-test-webhook.html  |  4 +-
 14 files changed, 161 insertions(+), 38 deletions(-)

diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
index 8af31c06ae..f3ae317ea0 100644
--- a/cl/alerts/tests/tests.py
+++ b/cl/alerts/tests/tests.py
@@ -1544,6 +1544,14 @@ def setUpTestData(cls):
             event_type=WebhookEventType.OLD_DOCKET_ALERTS_REPORT,
             url="https://example.com/",
             enabled=True,
+            version=1,
+        )
+        cls.webhook_v2_enabled = WebhookFactory(
+            user=cls.user_profile.user,
+            event_type=WebhookEventType.OLD_DOCKET_ALERTS_REPORT,
+            url="https://example.com/",
+            enabled=True,
+            version=2,
         )
         cls.disabled_docket_alert = DocketAlertWithParentsFactory(
             docket__source=Docket.RECAP,
@@ -1605,9 +1613,16 @@ def test_send_old_docket_alerts_webhook(self):
         self.assertEqual(active_docket_alerts.count(), 2)
 
         webhook_events = WebhookEvent.objects.all()
-        # Only one webhook event should be triggered for user_profile since
+        # Two webhook events (v1, v2) should be triggered for user_profile since
         # user_profile_2 webhook endpoint is disabled.
-        self.assertEqual(len(webhook_events), 1)
+        self.assertEqual(len(webhook_events), 2)
+
+        # Confirm webhooks for V1 and V2 are properly triggered.
+        webhook_versions = {
+            webhook.content["webhook"]["version"] for webhook in webhook_events
+        }
+        self.assertEqual(webhook_versions, {1, 2})
+
         self.assertEqual(
             webhook_events[0].event_status,
             WEBHOOK_EVENT_STATUS.SUCCESSFUL,
@@ -1701,10 +1716,16 @@ def test_send_old_docket_alerts_webhook_only_warn(self):
         # user_profile_2
         self.assertEqual(len(mail.outbox), 2)
 
-        # Only one webhook event should be triggered for user_profile since
+        # Two webhook events (v1, v2) should be triggered for user_profile since
         # user_profile_2 webhook endpoint is disabled.
         webhook_events = WebhookEvent.objects.all()
-        self.assertEqual(len(webhook_events), 1)
+        self.assertEqual(len(webhook_events), 2)
+
+        # Confirm webhooks for V1 and V2 are properly triggered.
+        webhook_versions = {
+            webhook.content["webhook"]["version"] for webhook in webhook_events
+        }
+        self.assertEqual(webhook_versions, {1, 2})
 
         self.assertEqual(
             webhook_events[0].webhook.user,
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index d7940209da..7556a80836 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -1517,6 +1517,15 @@ def test_percolator_plus_sweep_alerts_integration(
             query=f'q=pacer_doc_id:0190645981 AND "SUBPOENAS SERVED CASE UPDATED"&type=r',
         )
 
+        with mock.patch("cl.users.signals.notify_new_or_updated_webhook"):
+            webhook_2_1 = WebhookFactory(
+                user=self.user_profile.user,
+                event_type=WebhookEventType.SEARCH_ALERT,
+                url="https://example.com/",
+                enabled=True,
+                version=2,
+            )
+
         with mock.patch(
             "cl.api.webhooks.requests.post",
             side_effect=lambda *args, **kwargs: MockResponse(
@@ -1560,10 +1569,25 @@ def test_percolator_plus_sweep_alerts_integration(
         webhook_events = WebhookEvent.objects.all().values_list(
             "content", flat=True
         )
-        # 2 webhooks should be triggered one for each document ingested that
-        # matched each alert.
+        # One webhook should be triggered for each webhook version (V1, V2) and
+        # for each document ingested that matched each alert. 4 Webhook events total.
+        self.assertEqual(
+            len(webhook_events), 4, msg="Webhook events didn't match."
+        )
+
+        # Confirm webhooks for V1 and V2 are properly triggered.
+        webhook_versions = [
+            webhook["webhook"]["version"] for webhook in webhook_events
+        ]
+        self.assertEqual(
+            webhook_versions.count(2),
+            2,
+            msg="Wrong number of V2 webhook events.",
+        )
         self.assertEqual(
-            len(webhook_events), 2, msg="Webhook events didn't match."
+            webhook_versions.count(1),
+            2,
+            msg="Wrong number of V1 webhook events.",
         )
 
         html_content = self.get_html_content_from_email(mail.outbox[0])
@@ -1623,10 +1647,25 @@ def test_percolator_plus_sweep_alerts_integration(
         webhook_events = WebhookEvent.objects.all().values_list(
             "content", flat=True
         )
-        # 3 webhooks should be triggered one for each document ingested that
-        # matched each alert.
+        # One webhook should be triggered for each webhook version (V1, V2) and
+        # for each document ingested that matched each alert. 6 Webhook events total.
+        self.assertEqual(
+            len(webhook_events), 6, msg="Webhook events didn't match."
+        )
+
+        # Confirm webhooks for V1 and V2 are properly triggered.
+        webhook_versions = [
+            webhook["webhook"]["version"] for webhook in webhook_events
+        ]
         self.assertEqual(
-            len(webhook_events), 3, msg="Webhook events didn't match."
+            webhook_versions.count(2),
+            3,
+            msg="Wrong number of V2 webhook events.",
+        )
+        self.assertEqual(
+            webhook_versions.count(1),
+            3,
+            msg="Wrong number of V1 webhook events.",
         )
 
         html_content = self.get_html_content_from_email(mail.outbox[1])
diff --git a/cl/api/models.py b/cl/api/models.py
index e0070f4f31..5582c55e88 100644
--- a/cl/api/models.py
+++ b/cl/api/models.py
@@ -64,7 +64,7 @@ class Webhook(AbstractDateTimeModel):
     )
 
     def __str__(self) -> str:
-        return f"<Webhook: {self.pk} for event type '{self.get_event_type_display()}'>"
+        return f"<Webhook:{self.pk} V{self.version} for event type '{self.get_event_type_display()}'>"
 
 
 class WEBHOOK_EVENT_STATUS:
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 0c19b4838f..6305deed75 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -3499,12 +3499,14 @@ def setUpTestData(cls):
             event_type=WebhookEventType.DOCKET_ALERT,
             url="https://example.com/",
             enabled=True,
+            version=1,
         )
         cls.webhook_2 = WebhookFactory(
             user=cls.user_profile_2.user,
             event_type=WebhookEventType.DOCKET_ALERT,
             url="https://example.com/",
             enabled=True,
+            version=2,
         )
         test_dir = Path(settings.INSTALL_ROOT) / "cl" / "recap" / "test_assets"
         with (
@@ -3779,6 +3781,19 @@ async def test_new_recap_email_case_auto_subscription_prev_user(
             WEBHOOK_EVENT_STATUS.SUCCESSFUL,
         )
 
+        with mock.patch("cl.users.signals.notify_new_or_updated_webhook"):
+            webhook_2_1 = await sync_to_async(WebhookFactory)(
+                user=self.user_profile.user,
+                event_type=WebhookEventType.DOCKET_ALERT,
+                url="https://example.com/",
+                enabled=True,
+                version=2,
+            )
+        self.assertEqual(
+            await Webhook.objects.all().acount(),
+            3,
+            msg="Wrong number of webhook endpoints",
+        )
         # Trigger a new recap.email notification, same case, different document
         # from testing_1@recap.email, auto-subscription option enabled
         await self.async_client.post(self.path, self.data, format="json")
@@ -3804,10 +3819,14 @@ async def test_new_recap_email_case_auto_subscription_prev_user(
         self.assertEqual(message_sent.to, [self.recipient_user.user.email])
         self.assertEqual(len(mail.outbox), 3)
 
-        # Two more webhooks should be triggered, one for testing_2@recap.email
-        # and one for testing_1@recap.email
+        # 3 more webhooks should be triggered, one for testing_2@recap.email
+        # and 2 for testing_1@recap.email
         webhooks_triggered = WebhookEvent.objects.filter()
-        self.assertEqual(await webhooks_triggered.acount(), 3)
+        self.assertEqual(
+            await webhooks_triggered.acount(),
+            4,
+            msg="Wrong number of webhooks.",
+        )
 
         async for webhook_sent in webhooks_triggered:
             self.assertEqual(
@@ -3817,6 +3836,21 @@ async def test_new_recap_email_case_auto_subscription_prev_user(
         self.assertEqual(await webhook_user_2.acount(), 2)
         webhook_user_1 = WebhookEvent.objects.filter(webhook=self.webhook)
         self.assertEqual(await webhook_user_1.acount(), 1)
+        webhook_2_user_1 = WebhookEvent.objects.filter(webhook=webhook_2_1)
+        self.assertEqual(await webhook_2_user_1.acount(), 1)
+
+        # Confirm webhook versions.
+        version_1_webhook = await webhook_user_1.afirst()
+        webhook_version = version_1_webhook.content["webhook"]["version"]
+        self.assertEqual(webhook_version, 1)
+
+        version_2_webhook = await webhook_2_user_1.afirst()
+        webhook_version = version_2_webhook.content["webhook"]["version"]
+        self.assertEqual(webhook_version, 2)
+
+        version_2_webhook = await webhook_user_2.afirst()
+        webhook_version = version_2_webhook.content["webhook"]["version"]
+        self.assertEqual(webhook_version, 2)
 
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
@@ -7500,11 +7534,19 @@ class RecapFetchWebhooksTest(TestCase):
     def setUpTestData(cls):
         cls.court = CourtFactory(id="canb", jurisdiction="FB")
         cls.user_profile = UserProfileWithParentsFactory()
-        cls.webhook_enabled = WebhookFactory(
+        cls.webhook_v1_enabled = WebhookFactory(
+            user=cls.user_profile.user,
+            event_type=WebhookEventType.RECAP_FETCH,
+            url="https://example.com/",
+            enabled=True,
+            version=1,
+        )
+        cls.webhook_v2_enabled = WebhookFactory(
             user=cls.user_profile.user,
             event_type=WebhookEventType.RECAP_FETCH,
             url="https://example.com/",
             enabled=True,
+            version=2,
         )
 
         cls.user_profile_2 = UserProfileWithParentsFactory()
@@ -7558,14 +7600,18 @@ def test_recap_fetch_docket_webhook(
 
         self.assertEqual(dockets.count(), 2)
 
-        # Only one webhook event should be triggered for user_profile since
+        # Two webhook events (v1, v2) should be triggered for user_profile since
         # user_profile_2 webhook endpoint is disabled.
         webhook_events = WebhookEvent.objects.all()
-        self.assertEqual(len(webhook_events), 1)
+        self.assertEqual(len(webhook_events), 2)
         self.assertEqual(
             webhook_events[0].webhook.user,
             self.user_profile.user,
         )
+        self.assertEqual(
+            webhook_events[1].webhook.user,
+            self.user_profile.user,
+        )
         content = webhook_events[0].content
         # Compare the webhook event payload
         self.assertEqual(
@@ -7578,6 +7624,12 @@ def test_recap_fetch_docket_webhook(
         )
         self.assertNotEqual(content["payload"]["date_completed"], None)
 
+        # Confirm webhooks for V1 and V2 are properly triggered.
+        webhook_versions = {
+            webhook.content["webhook"]["version"] for webhook in webhook_events
+        }
+        self.assertEqual(webhook_versions, {1, 2})
+
     @mock.patch(
         "cl.recap.mergers.AttachmentPage",
         new=fakes.FakeAttachmentPage,
@@ -7616,10 +7668,10 @@ def test_recap_attachment_page_webhook(
         fq.refresh_from_db()
         self.assertEqual(fq.status, PROCESSING_STATUS.SUCCESSFUL)
 
-        # Only one webhook event should be triggered for user_profile since
+        # Two webhook events (v1, v2) should be triggered for user_profile since
         # user_profile_2 webhook endpoint is disabled.
         webhook_events = WebhookEvent.objects.all()
-        self.assertEqual(len(webhook_events), 1)
+        self.assertEqual(len(webhook_events), 2)
 
         self.assertEqual(
             webhook_events[0].webhook.user,
@@ -7677,10 +7729,10 @@ def test_recap_pacer_doc_webhook(
         fq.refresh_from_db()
         self.assertEqual(fq.status, PROCESSING_STATUS.SUCCESSFUL)
 
-        # Only one webhook event should be triggered for user_profile since
+        # Two webhook events (v1, v2) should be triggered for user_profile since
         # user_profile_2 webhook endpoint is disabled.
         webhook_events = WebhookEvent.objects.all()
-        self.assertEqual(len(webhook_events), 1)
+        self.assertEqual(len(webhook_events), 2)
 
         self.assertEqual(
             webhook_events[0].webhook.user,
diff --git a/cl/users/api_views.py b/cl/users/api_views.py
index 2d91308fa5..294c489a34 100644
--- a/cl/users/api_views.py
+++ b/cl/users/api_views.py
@@ -120,50 +120,59 @@ def test_webhook(self, request, *args, **kwargs):
 
         webhook = self.get_object()
         event_type = webhook.event_type
+        version = webhook.version
         match event_type:
             case WebhookEventType.DOCKET_ALERT:
                 event_template = loader.get_template(
                     "includes/docket_alert_webhook_dummy.txt"
                 )
-                event_dummy_content = event_template.render().strip()
+                event_dummy_content = event_template.render(
+                    {"webhook_version": version}
+                ).strip()
                 event_curl_template = loader.get_template(
                     "includes/docket_alert_webhook_dummy_curl.txt"
                 )
                 event_dummy_curl = event_curl_template.render(
-                    {"endpoint_url": webhook.url}
+                    {"endpoint_url": webhook.url, "webhook_version": version}
                 ).strip()
             case WebhookEventType.SEARCH_ALERT:
                 event_template = loader.get_template(
                     "includes/search_alert_webhook_dummy.txt"
                 )
-                event_dummy_content = event_template.render().strip()
+                event_dummy_content = event_template.render(
+                    {"webhook_version": version}
+                ).strip()
                 event_curl_template = loader.get_template(
                     "includes/search_alert_webhook_dummy_curl.txt"
                 )
                 event_dummy_curl = event_curl_template.render(
-                    {"endpoint_url": webhook.url}
+                    {"endpoint_url": webhook.url, "webhook_version": version}
                 ).strip()
             case WebhookEventType.OLD_DOCKET_ALERTS_REPORT:
                 event_template = loader.get_template(
                     "includes/old_alerts_report_webhook_dummy.txt"
                 )
-                event_dummy_content = event_template.render().strip()
+                event_dummy_content = event_template.render(
+                    {"webhook_version": version}
+                ).strip()
                 event_curl_template = loader.get_template(
                     "includes/old_alerts_report_webhook_dummy_curl.txt"
                 )
                 event_dummy_curl = event_curl_template.render(
-                    {"endpoint_url": webhook.url}
+                    {"endpoint_url": webhook.url, "webhook_version": version}
                 ).strip()
             case WebhookEventType.RECAP_FETCH:
                 event_template = loader.get_template(
                     "includes/recap_fetch_webhook_dummy.txt"
                 )
-                event_dummy_content = event_template.render().strip()
+                event_dummy_content = event_template.render(
+                    {"webhook_version": version}
+                ).strip()
                 event_curl_template = loader.get_template(
                     "includes/recap_fetch_webhook_dummy_curl.txt"
                 )
                 event_dummy_curl = event_curl_template.render(
-                    {"endpoint_url": webhook.url}
+                    {"endpoint_url": webhook.url, "webhook_version": version}
                 ).strip()
             case _:
                 # Webhook types with no support yet.
diff --git a/cl/users/templates/includes/docket_alert_webhook_dummy.txt b/cl/users/templates/includes/docket_alert_webhook_dummy.txt
index d49c02e207..870b1ba068 100644
--- a/cl/users/templates/includes/docket_alert_webhook_dummy.txt
+++ b/cl/users/templates/includes/docket_alert_webhook_dummy.txt
@@ -65,7 +65,7 @@
       ]
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":1,
       "date_created":"2022-10-11T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/docket_alert_webhook_dummy_curl.txt b/cl/users/templates/includes/docket_alert_webhook_dummy_curl.txt
index 9b95f40133..c5447d55ee 100644
--- a/cl/users/templates/includes/docket_alert_webhook_dummy_curl.txt
+++ b/cl/users/templates/includes/docket_alert_webhook_dummy_curl.txt
@@ -69,7 +69,7 @@ curl --request POST \
       ]
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":1,
       "date_created":"2022-10-11T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/old_alerts_report_webhook_dummy.txt b/cl/users/templates/includes/old_alerts_report_webhook_dummy.txt
index b70c2b2566..91a7107d0b 100644
--- a/cl/users/templates/includes/old_alerts_report_webhook_dummy.txt
+++ b/cl/users/templates/includes/old_alerts_report_webhook_dummy.txt
@@ -22,7 +22,7 @@
       ]
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":4,
       "date_created":"2022-12-28T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/old_alerts_report_webhook_dummy_curl.txt b/cl/users/templates/includes/old_alerts_report_webhook_dummy_curl.txt
index c737326672..e2d6d7c2ce 100644
--- a/cl/users/templates/includes/old_alerts_report_webhook_dummy_curl.txt
+++ b/cl/users/templates/includes/old_alerts_report_webhook_dummy_curl.txt
@@ -26,7 +26,7 @@ curl --request POST \
       ]
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":4,
       "date_created":"2022-12-28T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/recap_fetch_webhook_dummy.txt b/cl/users/templates/includes/recap_fetch_webhook_dummy.txt
index 91b3130cb8..c42684e2e7 100644
--- a/cl/users/templates/includes/recap_fetch_webhook_dummy.txt
+++ b/cl/users/templates/includes/recap_fetch_webhook_dummy.txt
@@ -21,7 +21,7 @@
       "show_list_of_member_cases":false
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":3,
       "date_created":"2024-01-06T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/recap_fetch_webhook_dummy_curl.txt b/cl/users/templates/includes/recap_fetch_webhook_dummy_curl.txt
index 8fa67e1257..2e25a45d38 100644
--- a/cl/users/templates/includes/recap_fetch_webhook_dummy_curl.txt
+++ b/cl/users/templates/includes/recap_fetch_webhook_dummy_curl.txt
@@ -25,7 +25,7 @@ curl --request POST \
       "show_list_of_member_cases":false
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":3,
       "date_created":"2024-01-06T14:21:40.855097-07:00",
       "deprecation_date":null
diff --git a/cl/users/templates/includes/search_alert_webhook_dummy.txt b/cl/users/templates/includes/search_alert_webhook_dummy.txt
index 22e68940a0..0fe3076d17 100644
--- a/cl/users/templates/includes/search_alert_webhook_dummy.txt
+++ b/cl/users/templates/includes/search_alert_webhook_dummy.txt
@@ -63,7 +63,7 @@
       }
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":2,
       "date_created":"2022-12-02T23:42:34.894411+00:00",
       "deprecation_date":"None"
diff --git a/cl/users/templates/includes/search_alert_webhook_dummy_curl.txt b/cl/users/templates/includes/search_alert_webhook_dummy_curl.txt
index ce1557976f..97b0786f18 100644
--- a/cl/users/templates/includes/search_alert_webhook_dummy_curl.txt
+++ b/cl/users/templates/includes/search_alert_webhook_dummy_curl.txt
@@ -67,7 +67,7 @@ curl --request POST \
       }
    },
    "webhook":{
-      "version":1,
+      "version":{{ webhook_version }},
       "event_type":2,
       "date_created":"2022-12-02T23:42:34.894411+00:00",
       "deprecation_date":"None"
diff --git a/cl/users/templates/includes/webhooks_htmx/webhooks-test-webhook.html b/cl/users/templates/includes/webhooks_htmx/webhooks-test-webhook.html
index 2d35dda8ad..b5db8fed00 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhooks-test-webhook.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhooks-test-webhook.html
@@ -1,5 +1,7 @@
 <div class="row form-group">
-    <label class="col-md-3">Endpoint URL: </label><p class="col-md-9">{{ webhook.url }}</p>
+  <label class="col-md-3">Endpoint URL: </label><p class="col-md-9">{{ webhook.url }}</p>
+  <label class="col-md-3">Event Type: </label><p class="col-md-9">{{ webhook.get_event_type_display }}</p>
+  <label class="col-md-3">Version: </label><p class="col-md-9">{{ webhook.get_version_display }}</p>
 </div>
 
 <ul class="nav nav-tabs">

From 37d3df413585f917f4790b8973ccb5a7c6aac079 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 14 Nov 2024 17:41:45 -0600
Subject: [PATCH 053/143] fix(webhook): Renamed webhook versions to use
 lowercase

---
 cl/api/migrations/0013_add_webhook_version_choices_noop.py | 6 +++---
 cl/api/models.py                                           | 6 +++---
 cl/users/tests.py                                          | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/cl/api/migrations/0013_add_webhook_version_choices_noop.py b/cl/api/migrations/0013_add_webhook_version_choices_noop.py
index 7f8253b50b..b19e00dbcb 100644
--- a/cl/api/migrations/0013_add_webhook_version_choices_noop.py
+++ b/cl/api/migrations/0013_add_webhook_version_choices_noop.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.1.2 on 2024-11-13 23:45
+# Generated by Django 5.1.2 on 2024-11-14 23:36
 
 from django.db import migrations, models
 
@@ -14,7 +14,7 @@ class Migration(migrations.Migration):
             model_name="webhook",
             name="version",
             field=models.IntegerField(
-                choices=[(1, "V1"), (2, "V2")],
+                choices=[(1, "v1"), (2, "v2")],
                 default=1,
                 help_text="The specific version of the webhook provisioned.",
             ),
@@ -23,7 +23,7 @@ class Migration(migrations.Migration):
             model_name="webhookhistoryevent",
             name="version",
             field=models.IntegerField(
-                choices=[(1, "V1"), (2, "V2")],
+                choices=[(1, "v1"), (2, "v2")],
                 default=1,
                 help_text="The specific version of the webhook provisioned.",
             ),
diff --git a/cl/api/models.py b/cl/api/models.py
index e0070f4f31..f63c9faace 100644
--- a/cl/api/models.py
+++ b/cl/api/models.py
@@ -17,8 +17,8 @@ class WebhookEventType(models.IntegerChoices):
 
 
 class WebhookVersions(models.IntegerChoices):
-    V1 = 1, "V1"
-    V2 = 2, "V2"
+    v1 = 1, "v1"
+    v2 = 2, "v2"
 
 
 HttpStatusCodes = models.IntegerChoices(  # type: ignore
@@ -55,7 +55,7 @@ class Webhook(AbstractDateTimeModel):
     version: models.IntegerField = models.IntegerField(
         help_text="The specific version of the webhook provisioned.",
         choices=WebhookVersions.choices,
-        default=WebhookVersions.V1,
+        default=WebhookVersions.v1,
     )
     failure_count: models.IntegerField = models.IntegerField(
         help_text="The number of failures (400+ status) responses the webhook "
diff --git a/cl/users/tests.py b/cl/users/tests.py
index 2e4afaaaa9..9f0b536c3b 100644
--- a/cl/users/tests.py
+++ b/cl/users/tests.py
@@ -3195,7 +3195,7 @@ async def make_a_webhook(
         url="https://example.com",
         event_type=WebhookEventType.DOCKET_ALERT,
         enabled=True,
-        version=WebhookVersions.V1,
+        version=WebhookVersions.v1,
     ):
         data = {
             "url": url,
@@ -3450,7 +3450,7 @@ async def test_get_available_webhook_versions(self) -> None:
             user=self.user_2,
             event_type=WebhookEventType.DOCKET_ALERT,
             url="https://example.com/",
-            version=WebhookVersions.V1,
+            version=WebhookVersions.v1,
             enabled=True,
         )
 
@@ -3482,7 +3482,7 @@ async def test_get_available_webhook_versions(self) -> None:
             user=self.user_1,
             event_type=WebhookEventType.DOCKET_ALERT,
             url="https://example.com/",
-            version=WebhookVersions.V1,
+            version=WebhookVersions.v1,
             enabled=True,
         )
         self.assertEqual(await webhooks.acount(), 2)

From 32556f7898a2ad804cce7bc58e911f18cc4982ca Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 15 Nov 2024 10:52:36 -0600
Subject: [PATCH 054/143] fix(webhook): Added deprecation date for V1 Webhooks

---
 cl/alerts/tests/tests.py              | 16 ++++++++++
 cl/alerts/tests/tests_recap_alerts.py | 17 +++++++++++
 cl/api/utils.py                       | 42 +++++++++++++++++++++++++--
 cl/recap/tests.py                     | 35 +++++++++++++++++++++-
 cl/settings/misc.py                   |  5 ++++
 5 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
index f3ae317ea0..82b5f3c952 100644
--- a/cl/alerts/tests/tests.py
+++ b/cl/alerts/tests/tests.py
@@ -49,6 +49,7 @@
     WebhookEvent,
     WebhookEventType,
 )
+from cl.api.utils import get_webhook_deprecation_date
 from cl.audio.factories import AudioWithParentsFactory
 from cl.audio.models import Audio
 from cl.donate.models import NeonMembership
@@ -1668,6 +1669,21 @@ def test_send_old_docket_alerts_webhook(self):
             self.very_old_docket_alert.docket.pk,
         )
 
+        # Confirm deprecation date webhooks according the version.
+        v1_webhook_event = WebhookEvent.objects.filter(
+            webhook=self.webhook_enabled
+        ).first()
+        v2_webhook_event = WebhookEvent.objects.filter(
+            webhook=self.webhook_v2_enabled
+        ).first()
+        self.assertEqual(
+            v1_webhook_event.content["webhook"]["deprecation_date"],
+            get_webhook_deprecation_date(settings.WEBHOOK_V1_DEPRECATION_DATE),
+        )
+        self.assertEqual(
+            v2_webhook_event.content["webhook"]["deprecation_date"], None
+        )
+
         # Run command again
         with mock.patch(
             "cl.api.webhooks.requests.post",
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index 7556a80836..c40aa3b349 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -2,6 +2,7 @@
 from unittest import mock
 
 import time_machine
+from django.conf import settings
 from django.core import mail
 from django.core.management import call_command
 from django.test.utils import override_settings
@@ -26,6 +27,7 @@
 )
 from cl.api.factories import WebhookFactory
 from cl.api.models import WebhookEvent, WebhookEventType
+from cl.api.utils import get_webhook_deprecation_date
 from cl.donate.models import NeonMembership
 from cl.lib.redis_utils import get_redis_interface
 from cl.lib.test_helpers import RECAPSearchTestCase
@@ -1668,6 +1670,21 @@ def test_percolator_plus_sweep_alerts_integration(
             msg="Wrong number of V1 webhook events.",
         )
 
+        # Confirm deprecation date webhooks according the version.
+        v1_webhook_event = WebhookEvent.objects.filter(
+            webhook=self.webhook_enabled
+        ).first()
+        v2_webhook_event = WebhookEvent.objects.filter(
+            webhook=webhook_2_1
+        ).first()
+        self.assertEqual(
+            v1_webhook_event.content["webhook"]["deprecation_date"],
+            get_webhook_deprecation_date(settings.WEBHOOK_V1_DEPRECATION_DATE),
+        )
+        self.assertEqual(
+            v2_webhook_event.content["webhook"]["deprecation_date"], None
+        )
+
         html_content = self.get_html_content_from_email(mail.outbox[1])
         self.assertIn(cross_object_alert_after_update.name, html_content)
         self._confirm_number_of_alerts(html_content, 1)
diff --git a/cl/api/utils.py b/cl/api/utils.py
index 3068cd4369..5b4b7e031f 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -28,7 +28,12 @@
 from rest_framework_filters import FilterSet, RelatedFilter
 from rest_framework_filters.backends import RestFrameworkFilterBackend
 
-from cl.api.models import WEBHOOK_EVENT_STATUS, Webhook, WebhookEvent
+from cl.api.models import (
+    WEBHOOK_EVENT_STATUS,
+    Webhook,
+    WebhookEvent,
+    WebhookVersions,
+)
 from cl.citations.utils import filter_out_non_case_law_and_non_valid_citations
 from cl.lib.redis_utils import get_redis_interface
 from cl.stats.models import Event
@@ -878,12 +883,45 @@ class WebhookKeyType(TypedDict):
     deprecation_date: str | None
 
 
+def get_webhook_deprecation_date(webhook_deprecation_date: str) -> str:
+    """Convert a webhook deprecation date string to ISO-8601 format with
+     UTC timezone.
+
+    :param webhook_deprecation_date: The deprecation date as a string in
+    "YYYY-MM-DD" format.
+    :return: The ISO-8601 formatted date string with UTC timezone.
+    """
+
+    deprecation_date = (
+        datetime.strptime(webhook_deprecation_date, "%Y-%m-%d")
+        .replace(
+            hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
+        )
+        .isoformat()
+    )
+    return deprecation_date
+
+
 def generate_webhook_key_content(webhook: Webhook) -> WebhookKeyType:
+    """Generate a dictionary representing the content for the webhook key.
+
+    :param webhook: The Webhook instance.
+    :return: A dictionary containing webhook details, event type, version,
+    creation date in ISO format, and deprecation date according webhook version.
+    """
+
+    match webhook.version:
+        case WebhookVersions.v1:
+            deprecation_date = get_webhook_deprecation_date(
+                settings.WEBHOOK_V1_DEPRECATION_DATE
+            )
+        case WebhookVersions.v2:
+            deprecation_date = None
     return {
         "event_type": webhook.event_type,
         "version": webhook.version,
         "date_created": webhook.date_created.isoformat(),
-        "deprecation_date": None,
+        "deprecation_date": deprecation_date,
     }
 
 
diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 6305deed75..4633c888c3 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -37,7 +37,10 @@
     retry_webhook_events,
 )
 from cl.api.models import Webhook, WebhookEvent, WebhookEventType
-from cl.api.utils import get_next_webhook_retry_date
+from cl.api.utils import (
+    get_next_webhook_retry_date,
+    get_webhook_deprecation_date,
+)
 from cl.lib.pacer import is_pacer_court_accessible, lookup_and_save
 from cl.lib.recap_utils import needs_ocr
 from cl.lib.redis_utils import get_redis_interface
@@ -3852,6 +3855,21 @@ async def test_new_recap_email_case_auto_subscription_prev_user(
         webhook_version = version_2_webhook.content["webhook"]["version"]
         self.assertEqual(webhook_version, 2)
 
+        # Confirm deprecation date webhooks according the version.
+        v1_webhook_event = await WebhookEvent.objects.filter(
+            webhook=self.webhook
+        ).afirst()
+        v2_webhook_event = await WebhookEvent.objects.filter(
+            webhook=webhook_2_1
+        ).afirst()
+        self.assertEqual(
+            v1_webhook_event.content["webhook"]["deprecation_date"],
+            get_webhook_deprecation_date(settings.WEBHOOK_V1_DEPRECATION_DATE),
+        )
+        self.assertEqual(
+            v2_webhook_event.content["webhook"]["deprecation_date"], None
+        )
+
     @mock.patch(
         "cl.recap.tasks.download_pdf_by_magic_number",
         side_effect=lambda z, x, c, v, b, d, e: (None, ""),
@@ -7630,6 +7648,21 @@ def test_recap_fetch_docket_webhook(
         }
         self.assertEqual(webhook_versions, {1, 2})
 
+        # Confirm deprecation date webhooks according the version.
+        v1_webhook_event = WebhookEvent.objects.filter(
+            webhook=self.webhook_v1_enabled
+        ).first()
+        v2_webhook_event = WebhookEvent.objects.filter(
+            webhook=self.webhook_v2_enabled
+        ).first()
+        self.assertEqual(
+            v1_webhook_event.content["webhook"]["deprecation_date"],
+            get_webhook_deprecation_date(settings.WEBHOOK_V1_DEPRECATION_DATE),
+        )
+        self.assertEqual(
+            v2_webhook_event.content["webhook"]["deprecation_date"], None
+        )
+
     @mock.patch(
         "cl.recap.mergers.AttachmentPage",
         new=fakes.FakeAttachmentPage,
diff --git a/cl/settings/misc.py b/cl/settings/misc.py
index fb30282ab4..6448bfcac6 100644
--- a/cl/settings/misc.py
+++ b/cl/settings/misc.py
@@ -63,3 +63,8 @@
 CAP_R2_ACCESS_KEY_ID = env("CAP_R2_ACCESS_KEY_ID", default="")
 CAP_R2_SECRET_ACCESS_KEY = env("CAP_R2_SECRET_ACCESS_KEY", default="")
 CAP_R2_BUCKET_NAME = env("CAP_R2_BUCKET_NAME", default="cap-static")
+
+# Webhooks
+WEBHOOK_V1_DEPRECATION_DATE = env(
+    "WEBHOOK_V1_DEPRECATION_DATE", default="2024-11-18"
+)

From 9ade1a8bf19d7c9d1702be62ba371031623f78e0 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 15 Nov 2024 11:02:09 -0600
Subject: [PATCH 055/143] fix(webhook): Fixed mypy type hinting issue

---
 cl/api/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cl/api/utils.py b/cl/api/utils.py
index 5b4b7e031f..45188fbbfe 100644
--- a/cl/api/utils.py
+++ b/cl/api/utils.py
@@ -910,13 +910,15 @@ def generate_webhook_key_content(webhook: Webhook) -> WebhookKeyType:
     creation date in ISO format, and deprecation date according webhook version.
     """
 
+    deprecation_date: str | None = None
     match webhook.version:
         case WebhookVersions.v1:
             deprecation_date = get_webhook_deprecation_date(
-                settings.WEBHOOK_V1_DEPRECATION_DATE
+                settings.WEBHOOK_V1_DEPRECATION_DATE  # type: ignore
             )
         case WebhookVersions.v2:
             deprecation_date = None
+
     return {
         "event_type": webhook.event_type,
         "version": webhook.version,

From cf7c905594335ee2fdee3ac35d8bdf4019a848e7 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 15 Nov 2024 16:00:06 -0600
Subject: [PATCH 056/143] feat(casenames): update code

---
 .../commands/update_casenames_wl_dataset.py   | 226 +++++++++---------
 1 file changed, 110 insertions(+), 116 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index cada5e09d2..76b6cc03e1 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from django.core.management.base import BaseCommand, CommandError
+from django.db.models import Q, QuerySet
 from eyecite import get_citations
 from eyecite.models import FullCaseCitation
 from eyecite.tokenizers import HyperscanTokenizer
@@ -18,8 +19,6 @@
 # Compile regex pattern once for efficiency
 WORD_PATTERN = re.compile(r"\b\w+\b|\b\w+\.\b")
 
-NUMBER_PATTERN = re.compile(r"^[+-]?[0-9]+$")
-
 FALSE_POSITIVES = {
     "and",
     "personal",
@@ -113,78 +112,65 @@ def parse_date(date_str: str) -> date | None:
     return None
 
 
-def validate_citations(
-    cite_1: str, cite_2: str, index: int
-) -> list[FullCaseCitation]:
-    """Validate citations with eyecite
+def parse_citations(citation_strings: list[str]) -> list[dict]:
+    """Validate citations with Eyecite.
 
-    :param cite_1: first string citation
-    :param cite_2: second string citation
-    :param index: row index
-    :return: list of valid FullCaseCitation objects
+    :param citation_strings: List of citation strings to validate.
+    :return: List of validated citation dictionaries with volume, reporter, and page.
     """
-    cite_one = get_citations(cite_1, tokenizer=HYPERSCAN_TOKENIZER)
-    cite_two = get_citations(cite_2, tokenizer=HYPERSCAN_TOKENIZER)
-
-    citations = cite_one + cite_two
-    cites = [cite for cite in citations if isinstance(cite, FullCaseCitation)]
-
-    if len(cites) < 2:
-        # Skipping row without two citations
-        return []
-
-    if not NUMBER_PATTERN.match(
-        cites[0].groups.get("volume")
-    ) or not NUMBER_PATTERN.match(cites[1].groups.get("volume")):
-        # Volume number is not an integer e.g. 2001-1 Trade Cases P 73,218
-        logger.warning(f"Row index: {index} - Citation parsing failed.")
-        return []
+    validated_citations = []
+
+    for cite_str in citation_strings:
+        # Get citations from the string
+        found_cites = get_citations(cite_str, tokenizer=HYPERSCAN_TOKENIZER)
+
+        # Ensure we have valid citations to process
+        for citation in found_cites:
+            if isinstance(citation, FullCaseCitation):
+                volume = citation.groups.get("volume")
+
+                # Validate the volume
+                if volume and volume.isdigit():
+                    # Append the validated citation as a dictionary
+                    validated_citations.append(
+                        {
+                            "volume": citation.groups["volume"],
+                            "reporter": citation.corrected_reporter(),
+                            "page": citation.groups["page"],
+                        }
+                    )
+                else:
+                    # If volume is invalid, skip this citation
+                    continue
 
-    return cites
+    return validated_citations
 
 
-def find_matches(
-    valid_citations: list[FullCaseCitation],
-    csv_docket_num: str,
-    csv_date_filed: date,
-    csv_case_name: str,
-) -> list[OpinionCluster]:
+def query_possible_matches(
+    valid_citations: list[dict], docket_number: str, date_filed: date
+) -> QuerySet[Citation]:
     """Find matches for row data
 
     :param valid_citations: list of FullCaseCitation objects
-    :param csv_docket_num: cleaned docket number from row
-    :param csv_date_filed: formatted filed date from row
-    :param csv_case_name: case name from csv row
-    :return: list of tuples of matched OpinionCluster and used citation
+    :param docket_number: cleaned docket number from row
+    :param date_filed: formatted filed date from row
+
+    :return: list of matched OpinionCluster
     """
-    matches: list[OpinionCluster] = []
 
-    # Try to match row using both citations
-    for citation in valid_citations:
+    citation_queries = Q()
 
-        possible_matches = Citation.objects.filter(
-            **make_citation(citation),
-            cluster__docket__docket_number__contains=csv_docket_num,
-            cluster__date_filed=csv_date_filed,
+    for citation in valid_citations:
+        citation_query = Q(**citation) & Q(
+            cluster__docket__docket_number__contains=docket_number,
+            cluster__date_filed=date_filed,
         )
-        if not possible_matches:
-            # Match not found with citation, docket number and date filed
-            continue
+        citation_queries |= citation_query
+    possible_matches = Citation.objects.filter(
+        citation_queries
+    ).select_related("cluster")
 
-        for match in possible_matches:
-            case_name = (
-                match.cluster.case_name_full
-                if match.cluster.case_name_full
-                else match.cluster.case_name
-            )
-            if check_case_names_match(csv_case_name, case_name):
-                if not any(
-                    cluster.id == match.cluster.id for cluster in matches
-                ):
-                    # Avoid duplicates
-                    matches.append(match.cluster)
-
-    return matches
+    return possible_matches
 
 
 def update_matched_case_name(
@@ -218,22 +204,6 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
-def make_citation(citation: FullCaseCitation) -> dict:
-    """Get citation as a dict to use it as a filter
-
-    It only keeps the values that we have in db, in some cases we have extra data
-    e.g. 2012-635 (La.App. 3 Cir. 12/5/12) also includes date_filed when it is parsed
-
-    :param citation:
-    :return: dict with volume, reporter and page
-    """
-    return {
-        "volume": citation.groups["volume"],
-        "reporter": citation.corrected_reporter(),
-        "page": citation.groups["page"],
-    }
-
-
 def process_csv(
     filepath: str, delay: float, dry_run: bool, chunk_size: int
 ) -> None:
@@ -253,7 +223,7 @@ def process_csv(
         for row in chunk.dropna().itertuples():
             (
                 index,
-                csv_case_name,
+                west_case_name,
                 court,
                 date_str,
                 cite1,
@@ -262,62 +232,86 @@ def process_csv(
                 volume,
             ) = row
 
-            valid_citations = validate_citations(cite1, cite2, index)
-
-            if not valid_citations:
-                logger.info(f"Row index: {index} - No valid citations found.")
-                continue
-
             clean_docket_num = docket.strip('="').strip('"')
+            if not clean_docket_num:
+                logger.info(f"Row index: {index} - No docket number found.")
+                continue
 
             date_filed = parse_date(date_str)
             if not date_filed:
                 logger.info(f"Row index: {index} - No valid date found.")
                 continue
 
+            valid_citations = parse_citations([cite1, cite2])
+
+            if not valid_citations:
+                logger.info(f"Row index: {index} - Missing two citations.")
+                continue
+
             # Query for possible matches using data from row
-            matches = find_matches(
-                valid_citations, clean_docket_num, date_filed, csv_case_name
+            possible_matches = query_possible_matches(
+                valid_citations=valid_citations,
+                docket_number=clean_docket_num,
+                date_filed=date_filed,
             )
 
-            if not matches or len(matches) > 1:
-                if len(matches) > 1:
-                    # These could be bad matches or duplicates
-                    logger.warning(
-                        f"Row index: {index} - Failed: too many matches: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
-                    )
-                else:
-                    logger.info(f"Row index: {index} - No matches found.")
+            if not possible_matches:
+                logger.info(f"Row index: {index} - No matches found.")
+                continue
+
+            matches = []
+            for match in possible_matches:
+                cl_case_name = (
+                    match.cluster.case_name_full
+                    if match.cluster.case_name_full
+                    else match.cluster.case_name
+                )
+                case_name_match = check_case_names_match(
+                    west_case_name, cl_case_name
+                )
+                if case_name_match:
+                    matches.append(match)
 
-                # Go to next row
+            if not possible_matches:
+                logger.info(f"Row index: {index} - No matches found.")
                 continue
 
-            # We matched the row with a cluster
-            if not dry_run:
-                # Update case names
-                cluster_updated, docket_updated = update_matched_case_name(
-                    matches[0], csv_case_name
+            if len(matches) != 1:
+                logger.warning(
+                    f"Row index: {index} - Failed, Matches found: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
                 )
+                continue
 
-                if cluster_updated:
-                    total_clusters_updated = +1
+            logger.info(
+                f"Row index: {index} - Match found: {matches[0].cluster_id} - Csv case name: {west_case_name}"
+            )
 
-                if docket_updated:
-                    total_dockets_updated = +1
+            if dry_run:
+                # Dry run, don't save anything
+                continue
 
-                # Add any of the citations if possible
-                add_citations_to_cluster(
-                    [cite.corrected_citation() for cite in valid_citations],
-                    matches[0].id,
-                )
+            # Update case names
+            cluster_updated, docket_updated = update_matched_case_name(
+                matches[0].cluster, west_case_name
+            )
 
-                # Wait between each processed row to avoid sending to many indexing tasks
-                time.sleep(delay)
-            else:
-                # Dry run, only log a message
-                logger.info(
-                    f"Row index: {index} - Match found: {matches[0]} - Csv case name: {csv_case_name}"
-                )
+            if cluster_updated:
+                total_clusters_updated = +1
+
+            if docket_updated:
+                total_dockets_updated = +1
+
+            # Add any of the citations if possible
+            add_citations_to_cluster(
+                [
+                    f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
+                    for cite in valid_citations
+                ],
+                matches[0].cluster_id,
+            )
+
+            # Wait between each processed row to avoid sending to many indexing tasks
+            time.sleep(delay)
 
     if not dry_run:
         logger.info(f"Clusters updated: {total_clusters_updated}")

From 1d5df53331eff6fb2342bf92c82b333e0d1bea98 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Mon, 18 Nov 2024 00:38:12 -0400
Subject: [PATCH 057/143] fix(elasticsearch): Remove size parameter from
 auxiliary queries

---
 cl/lib/elasticsearch_utils.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 129115ff20..65a409ba15 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3216,19 +3216,13 @@ def do_es_sweep_alert_query(
     multi_search = multi_search.add(main_query)
     if parent_query:
         parent_search = search_query.query(parent_query)
-        parent_search = parent_search.extra(
-            from_=0, size=settings.SCHEDULED_ALERT_HITS_LIMIT
-        )
+        parent_search = parent_search.extra(from_=0)
         parent_search = parent_search.source(includes=["docket_id"])
         multi_search = multi_search.add(parent_search)
 
     if child_query:
         child_search = child_search_query.query(child_query)
-        child_search = child_search.extra(
-            from_=0,
-            size=settings.SCHEDULED_ALERT_HITS_LIMIT
-            * settings.RECAP_CHILD_HITS_PER_RESULT,
-        )
+        child_search = child_search.extra(from_=0)
         child_search = child_search.source(includes=["id"])
         multi_search = multi_search.add(child_search)
 

From 8c449eb146ddfea77bd29944b3a958b91e71f5ae Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Mon, 18 Nov 2024 19:48:35 -0300
Subject: [PATCH 058/143] feat(api): Ensure PacerFetchQueues get updated with
 the corresponding docket after fetching it

---
 cl/recap/tasks.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cl/recap/tasks.py b/cl/recap/tasks.py
index d624c24aad..6da5f5c39c 100644
--- a/cl/recap/tasks.py
+++ b/cl/recap/tasks.py
@@ -2114,6 +2114,12 @@ def fetch_docket(self, fq_pk):
         newly_enqueued = enqueue_docket_alert(d_pk)
         if newly_enqueued:
             send_alert_and_webhook(d_pk, start_time)
+
+    # Link docket to fq if not previously linked
+    if not fq.docket_id:
+        fq.docket_id = d_pk
+        fq.save()
+
     return result
 
 

From 5e034bf75c885c4ab3d4be3569272ca80f6f5b65 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Mon, 18 Nov 2024 19:49:44 -0300
Subject: [PATCH 059/143] test(api): Test that PacerFetchQueues were updated
 with their corresponding docket id after fetching

---
 cl/recap/tests.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cl/recap/tests.py b/cl/recap/tests.py
index 0c19b4838f..edd13de534 100644
--- a/cl/recap/tests.py
+++ b/cl/recap/tests.py
@@ -820,6 +820,7 @@ def test_fetch_docket_by_docket_number(
         result.get()
 
         fq.refresh_from_db()
+        self.assertEqual(fq.docket, self.docket)
         self.assertEqual(fq.status, PROCESSING_STATUS.SUCCESSFUL)
         rds = RECAPDocument.objects.all()
         self.assertEqual(rds.count(), 1)
@@ -836,6 +837,7 @@ def test_fetch_docket_by_pacer_case_id(
         result = do_pacer_fetch(fq)
         result.get()
         fq.refresh_from_db()
+        self.assertEqual(fq.docket, self.docket)
         self.assertEqual(fq.status, PROCESSING_STATUS.SUCCESSFUL)
         rds = RECAPDocument.objects.all()
         self.assertEqual(rds.count(), 1)
@@ -872,6 +874,8 @@ def test_fetch_docket_send_alert(
         result.get()
         self.assertEqual(len(mail.outbox), 1)
         self.assertIn(fakes.CASE_NAME, mail.outbox[0].subject)
+        fq.refresh_from_db()
+        self.assertEqual(fq.docket, self.docket)
 
 
 @mock.patch("cl.recap.api_serializers.get_or_cache_pacer_cookies")

From feee7a27dda5f79ccf7caf10b6d3612908af19dc Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 18 Nov 2024 18:10:12 -0600
Subject: [PATCH 060/143] feat(alerts): Added V2 webhook support to
 cl_send_alerts

Fixes: #4682
---
 .../management/commands/cl_send_alerts.py     | 110 +++++++++++----
 cl/alerts/templates/alert_email_es.html       |  67 +++++++---
 cl/alerts/templates/alert_email_es.txt        |  15 ++-
 cl/alerts/tests/tests.py                      | 126 +++++++++++++++---
 cl/alerts/tests/tests_recap_alerts.py         |  18 ++-
 cl/api/tasks.py                               |  35 +----
 cl/api/webhooks.py                            |  23 +++-
 cl/lib/elasticsearch_utils.py                 |  60 +++++++--
 cl/search/api_utils.py                        |  13 +-
 cl/tests/cases.py                             |  22 ++-
 10 files changed, 361 insertions(+), 128 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_alerts.py b/cl/alerts/management/commands/cl_send_alerts.py
index 897b077fea..19eeb511fd 100644
--- a/cl/alerts/management/commands/cl_send_alerts.py
+++ b/cl/alerts/management/commands/cl_send_alerts.py
@@ -13,17 +13,25 @@
 from django.template import loader
 from django.urls import reverse
 from django.utils.timezone import now
+from elasticsearch_dsl import MultiSearch
 from elasticsearch_dsl import Q as ES_Q
+from elasticsearch_dsl.response import Response
 
 from cl.alerts.models import Alert, RealTimeQueue
 from cl.alerts.utils import InvalidDateError
-from cl.api.models import WebhookEventType
+from cl.api.models import WebhookEventType, WebhookVersions
 from cl.api.webhooks import send_search_alert_webhook
 from cl.lib import search_utils
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.lib.elasticsearch_utils import do_es_api_query
+from cl.lib.elasticsearch_utils import (
+    do_es_api_query,
+    limit_inner_hits,
+    set_results_child_docs,
+    set_results_highlights,
+)
 from cl.lib.scorched_utils import ExtraSolrInterface
 from cl.lib.search_utils import regroup_snippets
+from cl.lib.types import CleanData
 from cl.search.constants import ALERTS_HL_TAG, SEARCH_ALERTS_OPINION_HL_FIELDS
 from cl.search.documents import OpinionDocument
 from cl.search.forms import SearchForm
@@ -106,6 +114,59 @@ def send_alert(user_profile, hits):
     msg.send(fail_silently=False)
 
 
+def query_alerts_es(
+    cd: CleanData, v1_webhook: bool = False
+) -> tuple[Response, Response | None]:
+    """Query ES for opinion alerts, optionally handling a V1 webhook query.
+
+    :param cd: A CleanData object containing the query parameters.
+    :param v1_webhook: A boolean indicating whether to include a V1 webhook query.
+    :return: A tuple containing the main search response and an optional V1
+    query response.
+    """
+
+    v1_results = None
+    search_query = OpinionDocument.search()
+    cd["highlight"] = True
+    main_query, _ = do_es_api_query(
+        search_query,
+        cd,
+        SEARCH_ALERTS_OPINION_HL_FIELDS,
+        ALERTS_HL_TAG,
+        "v4",
+    )
+    main_query = main_query.extra(
+        from_=0,
+        size=settings.SCHEDULED_ALERT_HITS_LIMIT,
+    )
+    multi_search = MultiSearch()
+    multi_search = multi_search.add(main_query)
+
+    if v1_webhook:
+        search_query = OpinionDocument.search()
+        v1_query, _ = do_es_api_query(
+            search_query,
+            cd,
+            SEARCH_ALERTS_OPINION_HL_FIELDS,
+            ALERTS_HL_TAG,
+            "v3",
+        )
+        v1_query = v1_query.extra(
+            from_=0,
+            size=settings.SCHEDULED_ALERT_HITS_LIMIT,
+        )
+        multi_search = multi_search.add(v1_query)
+
+    responses = multi_search.execute()
+    results = responses[0]
+    limit_inner_hits({}, results, cd["type"])
+    set_results_highlights(results, cd["type"])
+    set_results_child_docs(results)
+    if v1_webhook:
+        v1_results = responses[1]
+    return results, v1_results
+
+
 class Command(VerboseCommand):
     help = (
         "Sends the alert emails on a real time, daily, weekly or monthly "
@@ -152,10 +213,9 @@ def handle(self, *args, **options):
         if options["rate"] == Alert.REAL_TIME:
             self.clean_rt_queue()
 
-    def run_query(self, alert, rate):
+    def run_query(self, alert, rate, v1_webhook=False):
         results = []
-        cd = {}
-        main_params = {}
+        v1_results = None
         logger.info(f"Now running the query: {alert.query}\n")
 
         # Make a dict from the query string.
@@ -175,7 +235,7 @@ def run_query(self, alert, rate):
             if waffle.switch_is_active("oa-es-alerts-active"):
                 # Return empty results for OA alerts. They are now handled
                 # by Elasticsearch.
-                return query_type, results
+                return query_type, results, v1_results
 
         logger.info(f"Data sent to SearchForm is: {qd}\n")
         search_form = SearchForm(qd, is_es_form=self.o_es_alerts)
@@ -187,7 +247,7 @@ def run_query(self, alert, rate):
                 and len(self.valid_ids[query_type]) == 0
             ):
                 # Bail out. No results will be found if no valid_ids.
-                return query_type, results
+                return query_type, results, v1_results
 
             main_params = search_utils.build_main_query(
                 cd,
@@ -220,19 +280,7 @@ def run_query(self, alert, rate):
                     )
 
             if self.o_es_alerts:
-                search_query = OpinionDocument.search()
-                s, _ = do_es_api_query(
-                    search_query,
-                    cd,
-                    SEARCH_ALERTS_OPINION_HL_FIELDS,
-                    ALERTS_HL_TAG,
-                    "v3",
-                )
-                s = s.extra(
-                    from_=0,
-                    size=settings.SCHEDULED_ALERT_HITS_LIMIT,
-                )
-                results = s.execute()
+                results, v1_results = query_alerts_es(cd, v1_webhook)
             else:
                 # Ignore warnings from this bit of code. Otherwise, it complains
                 # about the query URL being too long and having to POST it instead
@@ -248,7 +296,7 @@ def run_query(self, alert, rate):
                 regroup_snippets(results)
 
         logger.info(f"There were {len(results)} results.")
-        return qd, results
+        return qd, results, v1_results
 
     def send_emails_and_webhooks(self, rate):
         """Send out an email and webhook events to every user whose alert has a
@@ -261,6 +309,13 @@ def send_emails_and_webhooks(self, rate):
             alerts = user.alerts.filter(rate=rate)
             logger.info(f"Running alerts for user '{user}': {alerts}")
 
+            # Query user's webhooks.
+            user_webhooks = user.webhooks.filter(
+                event_type=WebhookEventType.SEARCH_ALERT, enabled=True
+            )
+            v1_webhook = WebhookVersions.v1 in {
+                webhook.version for webhook in user_webhooks
+            }
             if rate == Alert.REAL_TIME:
                 if not user.profile.is_member:
                     continue
@@ -268,7 +323,9 @@ def send_emails_and_webhooks(self, rate):
             hits = []
             for alert in alerts:
                 try:
-                    qd, results = self.run_query(alert, rate)
+                    qd, results, v1_results = self.run_query(
+                        alert, rate, v1_webhook
+                    )
                 except:
                     traceback.print_exc()
                     logger.info(
@@ -293,10 +350,13 @@ def send_emails_and_webhooks(self, rate):
 
                     # Send webhook event if the user has a SEARCH_ALERT
                     # endpoint enabled.
-                    user_webhooks = user.webhooks.filter(
-                        event_type=WebhookEventType.SEARCH_ALERT, enabled=True
-                    )
                     for user_webhook in user_webhooks:
+                        results = (
+                            v1_results
+                            if alert.alert_type == SEARCH_TYPES.OPINION
+                            and user_webhook.version == WebhookVersions.v1
+                            else results
+                        )
                         send_search_alert_webhook(
                             self.sis[search_type], results, user_webhook, alert
                         )
diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html
index dc2f797268..7ef71cfcfe 100644
--- a/cl/alerts/templates/alert_email_es.html
+++ b/cl/alerts/templates/alert_email_es.html
@@ -75,17 +75,56 @@ <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height
                       </a>
                       <br>
                     {% endif %}
-                {% else %}
-                   <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
-                    <strong style="font-weight: bold;">
-                        View original:
-                    </strong>
-                    {% if result.download_url %}
-                      <a href="{{result.download_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
-                          From the court
-                      </a>
-                      &nbsp;&nbsp;|&nbsp;&nbsp;
-                    {% endif %}
+                  {% endif %}
+                  {% if type == 'o' %}
+                    <ul>
+                    {% for doc in result.child_docs %}
+                      {% with doc=doc|get_es_doc_content:True %}
+                        <li style="margin-bottom: 5px;">
+                          {% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}
+                            {% if doc.text %}
+                              <strong>{{ doc.type_text }}</strong>
+                            {% endif %}
+                          {% endif %}
+                          {% if doc.text %}
+                            {% contains_highlights doc.text.0 True as highlighted %}
+                            <span style="display: block;">{% if highlighted %}&hellip; {% endif %}{{ doc.text|render_string_or_list|safe|underscore_to_space }} &hellip;</span>
+                          {% endif %}
+                          {% if doc.download_url or doc.local_path %}
+                            <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
+                              <strong style="font-weight: bold;">
+                                View original:
+                              </strong>
+                              {% if doc.download_url %}
+                                <a href="{{doc.download_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                                  From the court
+                                </a>
+                                &nbsp;&nbsp;|&nbsp;&nbsp;
+                              {% endif %}
+                              {% if doc.local_path %}
+                                {# Provide link to S3. #}
+                                <a href="https://storage.courtlistener.com/{{doc.local_path}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                                  Our backup
+                                </a>
+                              {% endif %}
+                            </p>
+                        {% endif %}
+                        </li>
+                      {% endwith %}
+                    {% endfor %}
+                    </ul>
+                {% endif %}
+                {% if type == 'oa' %}
+                  <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0; padding: 0;">
+                  <strong style="font-weight: bold;">
+                      View original:
+                  </strong>
+                  {% if result.download_url %}
+                    <a href="{{result.download_url}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
+                        From the court
+                    </a>
+                    &nbsp;&nbsp;|&nbsp;&nbsp;
+                  {% endif %}
                   {% if result.local_path %}
                     {# Provide link to S3. #}
                     <a href="https://storage.courtlistener.com/{{result.local_path}}" style="font-size: 100%; font-weight: inherit; font-family: inherit; color: #009; border: 0; font-style: inherit; padding: 0; text-decoration: none; vertical-align: baseline; margin: 0;">
@@ -93,8 +132,6 @@ <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height
                     </a>
                   {% endif %}
                   </p>
-                {% endif %}
-                {% if type == 'oa' %}
                   <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0 0 0; padding: 0;">
                     <strong style="font-weight: bold;">Date Argued: </strong>
                     {% if result.dateArgued %}
@@ -116,9 +153,7 @@ <h3 class="alt bottom" style="font-size: 1.5em; font-weight: normal; line-height
                       {{ result|get_highlight:"judge"|safe|underscore_to_space }}
                     {% endif %}
                   </p>
-                {% endif %}
-                {% if type == 'o' or type == 'oa' %}
-                    <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0 0 1.5em; padding: 0;">
+                   <p style="font-size: 100%; font-weight: inherit; font-family: inherit; border: 0; vertical-align: baseline; font-style: inherit; margin: 0 0 1.5em; padding: 0;">
                       {% if result|get_highlight:"text" %}
                         &hellip;{{ result|get_highlight:"text"|safe|underscore_to_space }}&hellip;
                       {% endif %}
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index 2b7ec3b569..8c0324e8f1 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -16,8 +16,14 @@ View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.que
 Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %}
 
 {{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
-{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
-{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
+{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}
+{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+{% endif %}
+{% if type == 'o' %}{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:True %}{% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}{% if doc.text %}{{ doc.type_text }}{% endif %}{% endif %}
+   {% if doc.text %}...{{ doc.text|render_string_or_list|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+   {% if doc.download_url %} - Download original from the court: {{doc.download_url}}{% endif %}
+   {% if doc.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ doc.local_path }}{% endif %}
+{% endwith %}{% endfor %}{% endif %}
 {% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
 {% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
    {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %}
@@ -27,9 +33,8 @@ Disable this Alert (one click): https://www.courtlistener.com{% url "disable_ale
 {% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
 {% endif %}~~~~~
  - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}
-{% if result.download_url %} - Download original from the court: {{result.download_url}}
-{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %}
-
+{% if type == 'oa' %}{% if result.download_url %} - Download original from the court: {{result.download_url}}
+{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endif %}{% endfor %}
 {% endfor %}
 ************************
 This alert brought to you by the 501(c)(3) non-profit Free Law Project
diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
index 82b5f3c952..b048ed9507 100644
--- a/cl/alerts/tests/tests.py
+++ b/cl/alerts/tests/tests.py
@@ -48,6 +48,7 @@
     Webhook,
     WebhookEvent,
     WebhookEventType,
+    WebhookVersions,
 )
 from cl.api.utils import get_webhook_deprecation_date
 from cl.audio.factories import AudioWithParentsFactory
@@ -74,10 +75,14 @@
     Opinion,
     RECAPDocument,
 )
-from cl.search.tasks import add_items_to_solr
 from cl.stats.models import Stat
 from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
-from cl.tests.cases import APITestCase, ESIndexTestCase, TestCase
+from cl.tests.cases import (
+    APITestCase,
+    ESIndexTestCase,
+    SearchAlertsAssertions,
+    TestCase,
+)
 from cl.tests.utils import MockResponse, make_client
 from cl.users.factories import UserFactory, UserProfileWithParentsFactory
 from cl.users.models import EmailSent
@@ -565,7 +570,9 @@ async def test_alert_update(self) -> None:
 
 @override_switch("o-es-alerts-active", active=True)
 @mock.patch("cl.search.tasks.percolator_alerts_models_supported", new=[Audio])
-class SearchAlertsWebhooksTest(ESIndexTestCase, TestCase):
+class SearchAlertsWebhooksTest(
+    ESIndexTestCase, TestCase, SearchAlertsAssertions
+):
     """Test Search Alerts Webhooks"""
 
     @classmethod
@@ -582,6 +589,7 @@ def setUpTestData(cls):
             event_type=WebhookEventType.SEARCH_ALERT,
             url="https://example.com/",
             enabled=True,
+            version=2,
         )
         cls.webhook_enabled_1 = WebhookFactory(
             user=cls.user_profile_1.user,
@@ -648,6 +656,7 @@ def setUpTestData(cls):
             event_type=WebhookEventType.SEARCH_ALERT,
             url="https://example.com/",
             enabled=True,
+            version=1,
         )
         cls.search_alert_3 = AlertFactory(
             user=cls.user_profile_3.user,
@@ -782,7 +791,7 @@ def test_send_search_alert_webhooks(self):
             len(mail.outbox), 4, msg="Outgoing emails don't match."
         )
 
-        # Opinion email alert assertions
+        # First Opinion email alert assertions search_alert
         self.assertEqual(mail.outbox[0].to[0], self.user_profile.user.email)
         # Plain text assertions
         opinion_alert_content = mail.outbox[0].body
@@ -794,18 +803,36 @@ def test_send_search_alert_webhooks(self):
             opinion_alert_content,
         )
         self.assertIn("California vs Lorem", opinion_alert_content)
-        self.assertIn("california sit amet", opinion_alert_content)
+        self.assertIn(
+            "california sit amet",
+            opinion_alert_content,
+            msg="Alert content didn't match",
+        )
         self.assertIn(self.dly_opinion_2.download_url, opinion_alert_content)
         self.assertIn(
             str(self.dly_opinion_2.local_path), opinion_alert_content
         )
 
-        html_content = None
-        for content, content_type in mail.outbox[0].alternatives:
-            if content_type == "text/html":
-                html_content = content
-                break
+        html_content = self.get_html_content_from_email(mail.outbox[0])
         # HTML assertions
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            self.search_alert.name,
+            1,
+            self.dly_opinion.cluster.case_name,
+            2,
+        )
+
+        self._assert_child_hits_content(
+            html_content,
+            self.search_alert.name,
+            self.dly_opinion.cluster.case_name,
+            [
+                self.dly_opinion.get_type_display(),
+                self.dly_opinion_2.get_type_display(),
+            ],
+        )
+
         self.assertIn("had 1 hit", html_content)
         self.assertIn(
             self.dly_opinion_2.cluster.docket.court.citation_string.replace(
@@ -831,7 +858,27 @@ def test_send_search_alert_webhooks(self):
             mail.outbox[0].extra_headers["List-Unsubscribe"],
         )
 
-        # Second Opinion alert
+        # Second Opinion alert search_alert_2
+        html_content = self.get_html_content_from_email(mail.outbox[1])
+        # HTML assertions
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            self.search_alert.name,
+            1,
+            self.dly_opinion.cluster.case_name,
+            2,
+        )
+
+        self._assert_child_hits_content(
+            html_content,
+            self.search_alert.name,
+            self.dly_opinion.cluster.case_name,
+            [
+                self.dly_opinion.get_type_display(),
+                self.dly_opinion_2.get_type_display(),
+            ],
+        )
+
         self.assertEqual(mail.outbox[1].to[0], self.user_profile_2.user.email)
         self.assertIn("daily opinion alert", mail.outbox[1].body)
         self.assertEqual(
@@ -846,6 +893,24 @@ def test_send_search_alert_webhooks(self):
             mail.outbox[1].extra_headers["List-Unsubscribe"],
         )
 
+        # Third Opinion alert search_alert_3
+        html_content = self.get_html_content_from_email(mail.outbox[2])
+        # HTML assertions
+        self._count_alert_hits_and_child_hits(
+            html_content,
+            self.search_alert_3.name,
+            1,
+            self.dly_opinion.cluster.case_name,
+            1,
+        )
+
+        self._assert_child_hits_content(
+            html_content,
+            self.search_alert_3.name,
+            self.dly_opinion.cluster.case_name,
+            [self.dly_opinion.get_type_display()],
+        )
+
         # Oral Argument Alert
         self.assertEqual(mail.outbox[3].to[0], self.user_profile.user.email)
         self.assertIn("daily oral argument alert ", mail.outbox[3].body)
@@ -861,8 +926,11 @@ def test_send_search_alert_webhooks(self):
             mail.outbox[3].extra_headers["List-Unsubscribe"],
         )
 
-        # Two webhook events should be sent, both of them to user_profile user
-        webhook_events = WebhookEvent.objects.all()
+        # 3 webhook events should be sent, 2 user_profile user and 1 user_profile_3
+        webhook_events = WebhookEvent.objects.filter().values_list(
+            "content", flat=True
+        )
+
         self.assertEqual(
             len(webhook_events), 3, msg="Webhook events don't match."
         )
@@ -885,7 +953,28 @@ def test_send_search_alert_webhooks(self):
             },
         }
 
-        for webhook_sent in webhook_events:
+        # Assert V2 Opinion Search Alerts Webhook
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            self.search_alert.name,
+            1,
+            self.dly_opinion.cluster.case_name,
+            2,
+            "opinions",
+        )
+
+        # Assert V1 Opinion Search Alerts Webhook
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            self.search_alert_3.name,
+            1,
+            self.dly_opinion.cluster.case_name,
+            0,
+            None,
+        )
+
+        webhook_events_instances = WebhookEvent.objects.all()
+        for webhook_sent in webhook_events_instances:
             with self.subTest(webhook_sent=webhook_sent):
                 self.assertEqual(
                     webhook_sent.event_status,
@@ -893,6 +982,7 @@ def test_send_search_alert_webhooks(self):
                     msg="The event status doesn't match.",
                 )
                 content = webhook_sent.content
+
                 alert_data_compare = alert_data[
                     content["payload"]["alert"]["id"]
                 ]
@@ -926,14 +1016,13 @@ def test_send_search_alert_webhooks(self):
                 if (
                     content["payload"]["alert"]["alert_type"]
                     == SEARCH_TYPES.OPINION
-                ):
+                ) and webhook_sent.webhook.version == WebhookVersions.v1:
                     # Assert the number of keys in the Opinions Search Webhook
                     # payload
                     keys_count = len(content["payload"]["results"][0])
                     self.assertEqual(
                         keys_count, len(opinion_v3_search_api_keys)
                     )
-
                     # Iterate through all the opinion fields and compare them.
                     for (
                         field,
@@ -951,7 +1040,10 @@ def test_send_search_alert_webhooks(self):
                                 expected_value,
                                 f"Field '{field}' does not match.",
                             )
-                else:
+                elif (
+                    content["payload"]["alert"]["alert_type"]
+                    == SEARCH_TYPES.ORAL_ARGUMENT
+                ):
                     # Assertions for OA webhook payload.
                     self.assertEqual(
                         content["payload"]["results"][0]["caseName"],
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index c40aa3b349..e4610cffd9 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -51,7 +51,7 @@
 from cl.search.models import Docket
 from cl.search.tasks import index_docket_parties_in_es
 from cl.stats.models import Stat
-from cl.tests.cases import ESIndexTestCase, RECAPAlertsAssertions, TestCase
+from cl.tests.cases import ESIndexTestCase, SearchAlertsAssertions, TestCase
 from cl.tests.utils import MockResponse
 from cl.users.factories import UserProfileWithParentsFactory
 
@@ -61,7 +61,7 @@
     return_value="alert_hits_sweep",
 )
 class RECAPAlertsSweepIndexTest(
-    RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions
+    RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions
 ):
     """
     RECAP Alerts Sweep Index Tests
@@ -627,6 +627,18 @@ def test_filter_out_alerts_to_send_by_query_and_hits(
             alert_de.docket.case_name,
             [rd_2.description],
         )
+        webhook_events = WebhookEvent.objects.all().values_list(
+            "content", flat=True
+        )
+        # Assert webhook event child hits.
+        self._count_webhook_hits_and_child_hits(
+            list(webhook_events),
+            cross_object_alert.name,
+            1,
+            alert_de.docket.case_name,
+            1,
+        )
+
         # Assert email text version:
         txt_email = mail.outbox[4].body
         self.assertIn(cross_object_alert.name, txt_email)
@@ -1712,7 +1724,7 @@ def test_percolator_plus_sweep_alerts_integration(
     return_value="alert_hits_percolator",
 )
 class RECAPAlertsPercolatorTest(
-    RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions
+    RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions
 ):
     """
     RECAP Alerts Percolator Tests
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index 22db8820f7..b571431455 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -1,5 +1,4 @@
 import json
-from collections import defaultdict
 from typing import Any
 
 from elasticsearch_dsl.response import Hit
@@ -12,13 +11,14 @@
 from cl.api.webhooks import send_webhook_event
 from cl.celery_init import app
 from cl.corpus_importer.api_serializers import DocketEntrySerializer
-from cl.lib.elasticsearch_utils import merge_highlights_into_result
+from cl.lib.elasticsearch_utils import set_results_child_docs
 from cl.search.api_serializers import (
     RECAPESResultSerializer,
     V3OAESResultSerializer,
 )
 from cl.search.api_utils import ResultObject
 from cl.search.models import SEARCH_TYPES, DocketEntry
+from cl.search.types import ESDictDocument
 
 
 @app.task()
@@ -127,7 +127,7 @@ def send_es_search_alert_webhook(
 
 @app.task()
 def send_search_alert_webhook_es(
-    results: list[dict[str, Any]] | list[Hit],
+    results: list[ESDictDocument] | list[Hit],
     webhook_pk: int,
     alert_pk: int,
 ) -> None:
@@ -152,34 +152,7 @@ def send_search_alert_webhook_es(
                 es_results, many=True
             ).data
         case SEARCH_TYPES.RECAP:
-            for result in results:
-                child_result_objects = []
-                child_docs = None
-                if isinstance(result, dict):
-                    child_docs = result.get("child_docs")
-                elif hasattr(result, "child_docs"):
-                    child_docs = result.child_docs
-
-                if child_docs:
-                    for child_doc in child_docs:
-                        if isinstance(result, dict):
-                            child_result_objects.append(child_doc)
-                        else:
-                            child_result_objects.append(
-                                defaultdict(
-                                    lambda: None,
-                                    child_doc["_source"].to_dict(),
-                                )
-                            )
-
-                result["child_docs"] = child_result_objects
-                # Merge HL into the parent document from percolator response.
-                if isinstance(result, dict):
-                    meta_hl = result.get("meta", {}).get("highlight", {})
-                    merge_highlights_into_result(
-                        meta_hl,
-                        result,
-                    )
+            set_results_child_docs(results, merge_highlights=True)
             serialized_results = RECAPESResultSerializer(
                 results, many=True
             ).data
diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py
index f6ca97d9e3..b46c106b86 100644
--- a/cl/api/webhooks.py
+++ b/cl/api/webhooks.py
@@ -13,7 +13,12 @@
 )
 from cl.alerts.models import Alert
 from cl.alerts.utils import OldAlertReport
-from cl.api.models import Webhook, WebhookEvent, WebhookEventType
+from cl.api.models import (
+    Webhook,
+    WebhookEvent,
+    WebhookEventType,
+    WebhookVersions,
+)
 from cl.api.utils import (
     generate_webhook_key_content,
     update_webhook_event_after_request,
@@ -23,6 +28,7 @@
 from cl.recap.api_serializers import PacerFetchQueueSerializer
 from cl.recap.models import PROCESSING_STATUS, PacerFetchQueue
 from cl.search.api_serializers import (
+    OpinionClusterESResultSerializer,
     SearchResultSerializer,
     V3OpinionESResultSerializer,
 )
@@ -192,10 +198,17 @@ def send_search_alert_webhook(
         ).data
     else:
         # ES results serialization
-        serialized_results = V3OpinionESResultSerializer(
-            results,
-            many=True,
-        ).data
+        match webhook.version:
+            case WebhookVersions.v1:
+                serialized_results = V3OpinionESResultSerializer(
+                    results,
+                    many=True,
+                ).data
+            case WebhookVersions.v2:
+                serialized_results = OpinionClusterESResultSerializer(
+                    results,
+                    many=True,
+                ).data
 
     post_content = {
         "webhook": generate_webhook_key_content(webhook),
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 129115ff20..a3c2019095 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -4,6 +4,7 @@
 import re
 import time
 import traceback
+from collections import defaultdict
 from copy import deepcopy
 from dataclasses import fields
 from functools import reduce, wraps
@@ -1281,6 +1282,7 @@ def build_es_base_query(
                     mlt_query,
                     child_highlighting=child_highlighting,
                     api_version=api_version,
+                    alerts=alerts,
                 )
             )
 
@@ -2964,9 +2966,10 @@ def do_es_api_query(
     child documents.
     """
 
+    alerts = True if hl_tag == ALERTS_HL_TAG else False
     try:
         es_queries = build_es_base_query(
-            search_query, cd, cd["highlight"], api_version
+            search_query, cd, cd["highlight"], api_version, alerts=alerts
         )
         s = es_queries.search_query
         child_docs_query = es_queries.child_query
@@ -3047,7 +3050,7 @@ def do_es_api_query(
             # parameters as in the frontend. Only switch highlighting according
             # to the user request.
             main_query = add_es_highlighting(
-                s, cd, highlighting=cd["highlight"]
+                s, cd, alerts=alerts, highlighting=cd["highlight"]
             )
 
     return main_query, child_docs_query
@@ -3243,14 +3246,6 @@ def do_es_sweep_alert_query(
 
     limit_inner_hits({}, main_results, cd["type"])
     set_results_highlights(main_results, cd["type"])
-
-    for result in main_results:
-        child_result_objects = []
-        if hasattr(result, "child_docs"):
-            for child_doc in result.child_docs:
-                child_result_objects.append(child_doc.to_dict())
-            result["child_docs"] = child_result_objects
-
     return main_results, docket_results, rd_results
 
 
@@ -3279,3 +3274,48 @@ def simplify_estimated_count(search_count: int) -> int:
         zeroes = (len(search_count_str) - 2) * "0"
         return int(first_two + zeroes)
     return search_count
+
+
+def set_results_child_docs(
+    results: list[Hit] | list[dict[str, Any]], merge_highlights: bool = False
+) -> None:
+    """Process and attach child documents to the main search results.
+
+    :param results: A list of search results, which can be ES Hit objects
+    or a list of dicts.
+    :param merge_highlights: A boolean indicating whether to merge
+    highlight data into the results.
+    :return: None. Results are modified in place.
+    """
+
+    for result in results:
+        child_result_objects = []
+        child_docs = None
+
+        # Get child_docs based on result type
+        if isinstance(result, dict):
+            child_docs = result.get("child_docs")
+        elif hasattr(result, "child_docs"):
+            child_docs = result.child_docs
+
+        # Process child documents if they exist
+        if child_docs:
+            for child_doc in child_docs:
+                if isinstance(result, dict):
+                    child_result_objects.append(child_doc)
+                else:
+                    child_result_objects.append(
+                        defaultdict(
+                            lambda: None,
+                            child_doc["_source"].to_dict(),
+                        )
+                    )
+
+        # Set processed child docs back to result
+        result["child_docs"] = child_result_objects
+
+        # Optionally merges highlights. Used for integrating percolator
+        # highlights into the percolated document.
+        if merge_highlights and isinstance(result, dict):
+            meta_hl = result.get("meta", {}).get("highlight", {})
+            merge_highlights_into_result(meta_hl, result)
diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py
index 122b06f944..7b40998ae5 100644
--- a/cl/search/api_utils.py
+++ b/cl/search/api_utils.py
@@ -19,6 +19,7 @@
     do_es_api_query,
     limit_inner_hits,
     merge_unavailable_fields_on_parent_document,
+    set_results_child_docs,
     set_results_highlights,
 )
 from cl.lib.scorched_utils import ExtraSolrInterface
@@ -476,17 +477,7 @@ def process_results(self, results: Response) -> None:
             "v4",
             self.clean_data["highlight"],
         )
-        for result in results:
-            child_result_objects = []
-            if hasattr(result, "child_docs"):
-                for child_doc in result.child_docs:
-                    child_result_objects.append(
-                        defaultdict(
-                            lambda: None, child_doc["_source"].to_dict()
-                        )
-                    )
-                result["child_docs"] = child_result_objects
-
+        set_results_child_docs(results)
         if self.reverse:
             # If doing backward pagination, reverse the results of the current
             # page to maintain consistency of the results on the page,
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index 5b0c03e374..338d5f1e02 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -399,7 +399,7 @@ def _test_page_variables(
         return next_page, previous_page, current_page
 
 
-class RECAPAlertsAssertions:
+class SearchAlertsAssertions:
 
     @staticmethod
     def get_html_content_from_email(email_content):
@@ -493,7 +493,9 @@ def _count_alert_hits_and_child_hits(
                 case_text_cleaned = self.clean_case_title(case_text)
                 if case_title == case_text_cleaned:
                     child_hit_count = len(
-                        case.xpath("following-sibling::ul[1]/li/a")
+                        case.xpath(
+                            "following-sibling::ul[1]/li/a | following-sibling::ul[1]/li/strong"
+                        )
                     )
                     self.assertEqual(
                         child_hit_count,
@@ -522,8 +524,8 @@ def extract_child_descriptions(case_item):
             child_documents = case_item.xpath("./following-sibling::ul[1]/li")
             results = []
             for li in child_documents:
-                a_tag = li.xpath(".//a")[0]
-                full_text = a_tag.text_content()
+                child_tag = li.xpath(".//a | .//strong")[0]
+                full_text = child_tag.text_content()
                 first_part = full_text.split("\u2014")[0].strip()
                 results.append(first_part)
 
@@ -550,6 +552,7 @@ def _count_webhook_hits_and_child_hits(
         expected_hits,
         case_title,
         expected_child_hits,
+        nested_field="recap_documents",
     ):
         """Confirm the following assertions for the search alert webhook:
         - An specific alert webhook was triggered.
@@ -557,6 +560,8 @@ def _count_webhook_hits_and_child_hits(
         - The specified case contains the expected number of child hits.
         """
 
+        matched_alert_name = None
+        matched_case_title = None
         for webhook in webhooks:
             if webhook["payload"]["alert"]["name"] == alert_title:
                 webhook_cases = webhook["payload"]["results"]
@@ -566,14 +571,21 @@ def _count_webhook_hits_and_child_hits(
                     msg=f"Did not get the right number of hits for the alert %s. "
                     % alert_title,
                 )
+                matched_alert_name = True
                 for case in webhook["payload"]["results"]:
                     if case_title == strip_tags(case["caseName"]):
+                        matched_case_title = True
+                        if nested_field is None:
+                            self.assertTrue(nested_field not in case)
+                            continue
                         self.assertEqual(
-                            len(case["recap_documents"]),
+                            len(case[nested_field]),
                             expected_child_hits,
                             msg=f"Did not get the right number of child documents for the case %s. "
                             % case_title,
                         )
+        self.assertTrue(matched_alert_name, msg="Alert name didn't match")
+        self.assertTrue(matched_case_title, msg="Case title didn't match")
 
     def _count_percolator_webhook_hits_and_child_hits(
         self,

From 13a954131df43cd4dfc2c5edb4576135c170fe24 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 19 Nov 2024 09:45:57 -0600
Subject: [PATCH 061/143] fix(webhook): Moved form webhook version field

- Applied suggestion to generate version_choices
---
 cl/users/api_views.py                         |  7 +++---
 .../webhooks_htmx/webhooks-form-common.html   | 24 +++++++++----------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/cl/users/api_views.py b/cl/users/api_views.py
index 294c489a34..865188911d 100644
--- a/cl/users/api_views.py
+++ b/cl/users/api_views.py
@@ -226,11 +226,10 @@ def get_available_versions(self, request, *args, **kwargs):
         used_versions = set(
             existing_webhooks.values_list("version", flat=True)
         )
-        # Get available webhook versions
-        version_labels = dict(WebhookVersions.choices)
+        # Get available webhook versions, excluding used ones
         version_choices = [
-            (v, version_labels[v])
-            for v in version_labels
+            (v, label)
+            for v, label in WebhookVersions.choices
             if v not in used_versions
         ]
         context["version_choices"] = version_choices
diff --git a/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html b/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
index 316a58473f..e5c8f5709c 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhooks-form-common.html
@@ -46,18 +46,6 @@
   </div>
 </div>
 
-<div class="form-group">
-      {{ webhook_form.enabled }}
-      <label class="form-check-label" for="id_enabled">Enabled</label>
-      {% if webhook_form.url.errors %}
-        <p class="help-block">
-          {% for error in webhook_form.enabled.errors %}
-            {{ error|escape }}
-          {% endfor %}
-        </p>
-      {% endif %}
-</div>
-
 <div class="form-group">
   <div class="row">
     <label class="col-xs-12">Version</label>
@@ -84,3 +72,15 @@
   </div>
 </div>
 
+<div class="form-group">
+      {{ webhook_form.enabled }}
+      <label class="form-check-label" for="id_enabled">Enabled</label>
+      {% if webhook_form.url.errors %}
+        <p class="help-block">
+          {% for error in webhook_form.enabled.errors %}
+            {{ error|escape }}
+          {% endfor %}
+        </p>
+      {% endif %}
+</div>
+

From a5c106e7dade53f6f59ad9d9f172f108ebf2602a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 19 Nov 2024 17:11:36 -0600
Subject: [PATCH 062/143] feat(casenames): chunksize argument removed remove
 unused words in FALSE_POSITIVES update tokenize_case_name() function improve
 code readability

---
 .../commands/update_casenames_wl_dataset.py   | 207 +++++++++---------
 1 file changed, 100 insertions(+), 107 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 76b6cc03e1..d7fb4a6870 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -35,26 +35,31 @@
     "st",
     "ex",
     "rel",
-    "v",
     "vs",
     "for",
-    "a",
 }
 
 DATE_FORMATS = ("%B %d, %Y", "%d-%b-%y", "%m/%d/%Y", "%m/%d/%y", "%b. %d, %Y")
 
 
-def tokenize_case_name(case_name: str) -> list[str]:
+def tokenize_case_name(case_name: str) -> set[str]:
     """Tokenizes case name and removes single-character words except for letters with periods.
 
+    Also removes false positive words
+
     :param case_name: case name to tokenize
     :return: list of words
     """
-    return [
-        word.lower()
-        for word in WORD_PATTERN.findall(case_name)
-        if len(word) > 1
-    ]
+    return (
+        set(
+            [
+                word.lower()
+                for word in WORD_PATTERN.findall(case_name)
+                if len(word) > 1
+            ]
+        )
+        - FALSE_POSITIVES
+    )
 
 
 def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
@@ -66,11 +71,11 @@ def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
     """
     # Tokenize each string, capturing both words and abbreviations with periods and
     # convert all words to lowercase for case-insensitive matching
-    csv_case_name_tokens = set(tokenize_case_name(csv_case_name))
-    cluster_case_name_tokens = set(tokenize_case_name(cl_case_name))
+    csv_case_name_tokens = tokenize_case_name(csv_case_name)
+    cluster_case_name_tokens = tokenize_case_name(cl_case_name)
 
-    # Check if there is an overlap between case names and remove false positive words
-    overlap = csv_case_name_tokens & cluster_case_name_tokens - FALSE_POSITIVES
+    # Check if there is an overlap between case names
+    overlap = csv_case_name_tokens & cluster_case_name_tokens
 
     if not overlap:
         # if no hits no match on name - move along
@@ -204,114 +209,109 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
-def process_csv(
-    filepath: str, delay: float, dry_run: bool, chunk_size: int
-) -> None:
+def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
     """Process rows from csv file
 
     :param filepath: path to csv file
     :param delay: delay between saves in seconds
     :param dry_run: flag to simulate update process
-    :param chunk_size: number of rows to read at a time
     """
 
     total_clusters_updated = 0
     total_dockets_updated = 0
 
     logger.info(f"Processing {filepath}")
-    for chunk in pd.read_csv(filepath, chunksize=chunk_size):
-        for row in chunk.dropna().itertuples():
-            (
-                index,
-                west_case_name,
-                court,
-                date_str,
-                cite1,
-                cite2,
-                docket,
-                volume,
-            ) = row
-
-            clean_docket_num = docket.strip('="').strip('"')
-            if not clean_docket_num:
-                logger.info(f"Row index: {index} - No docket number found.")
-                continue
-
-            date_filed = parse_date(date_str)
-            if not date_filed:
-                logger.info(f"Row index: {index} - No valid date found.")
-                continue
-
-            valid_citations = parse_citations([cite1, cite2])
-
-            if not valid_citations:
-                logger.info(f"Row index: {index} - Missing two citations.")
-                continue
-
-            # Query for possible matches using data from row
-            possible_matches = query_possible_matches(
-                valid_citations=valid_citations,
-                docket_number=clean_docket_num,
-                date_filed=date_filed,
-            )
-
-            if not possible_matches:
-                logger.info(f"Row index: {index} - No matches found.")
-                continue
-
-            matches = []
-            for match in possible_matches:
-                cl_case_name = (
-                    match.cluster.case_name_full
-                    if match.cluster.case_name_full
-                    else match.cluster.case_name
-                )
-                case_name_match = check_case_names_match(
-                    west_case_name, cl_case_name
-                )
-                if case_name_match:
-                    matches.append(match)
-
-            if not possible_matches:
-                logger.info(f"Row index: {index} - No matches found.")
-                continue
-
-            if len(matches) != 1:
-                logger.warning(
-                    f"Row index: {index} - Failed, Matches found: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
-                )
-                continue
+    df = pd.read_csv(filepath).dropna()
+    for row in df.itertuples():
+        (
+            index,
+            west_case_name,
+            court,
+            date_str,
+            cite1,
+            cite2,
+            docket,
+            volume,
+        ) = row
+
+        clean_docket_num = docket.strip('="').strip('"')
+        if not clean_docket_num:
+            logger.info(f"Row index: {index} - No docket number found.")
+            continue
 
+        date_filed = parse_date(date_str)
+        if not date_filed:
             logger.info(
-                f"Row index: {index} - Match found: {matches[0].cluster_id} - Csv case name: {west_case_name}"
+                f"Row index: {index} - No valid date found: {date_str}"
             )
+            continue
 
-            if dry_run:
-                # Dry run, don't save anything
-                continue
+        valid_citations = parse_citations([cite1, cite2])
 
-            # Update case names
-            cluster_updated, docket_updated = update_matched_case_name(
-                matches[0].cluster, west_case_name
-            )
+        if not valid_citations:
+            logger.info(f"Row index: {index} - Missing two valid citations.")
+            continue
 
-            if cluster_updated:
-                total_clusters_updated = +1
+        # Query for possible matches using data from row
+        possible_matches = query_possible_matches(
+            valid_citations=valid_citations,
+            docket_number=clean_docket_num,
+            date_filed=date_filed,
+        )
 
-            if docket_updated:
-                total_dockets_updated = +1
+        if not possible_matches:
+            logger.info(f"Row index: {index} - No matches found.")
+            continue
 
-            # Add any of the citations if possible
-            add_citations_to_cluster(
-                [
-                    f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
-                    for cite in valid_citations
-                ],
-                matches[0].cluster_id,
+        matches = []
+        for match in possible_matches:
+            cl_case_name = (
+                match.cluster.case_name_full
+                if match.cluster.case_name_full
+                else match.cluster.case_name
+            )
+            case_name_match = check_case_names_match(
+                west_case_name, cl_case_name
             )
+            if case_name_match:
+                matches.append(match)
 
-            # Wait between each processed row to avoid sending to many indexing tasks
-            time.sleep(delay)
+        if len(matches) != 1:
+            logger.warning(
+                f"Row index: {index} - Failed, Matches found: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
+            )
+            continue
+
+        logger.info(
+            f"Row index: {index} - Match found: {matches[0].cluster_id} - Csv case name: {west_case_name}"
+        )
+
+        if dry_run:
+            # Dry run, don't save anything
+            continue
+
+        # Update case names
+        cluster_updated, docket_updated = update_matched_case_name(
+            matches[0].cluster, west_case_name
+        )
+
+        if cluster_updated:
+            total_clusters_updated = +1
+
+        if docket_updated:
+            total_dockets_updated = +1
+
+        # Add any of the citations if possible
+        add_citations_to_cluster(
+            [
+                f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
+                for cite in valid_citations
+            ],
+            matches[0].cluster_id,
+        )
+
+        # Wait between each processed row to avoid sending to many indexing tasks
+        time.sleep(delay)
 
     if not dry_run:
         logger.info(f"Clusters updated: {total_clusters_updated}")
@@ -339,22 +339,15 @@ def add_arguments(self, parser):
             action="store_true",
             help="Simulate the update process without making changes",
         )
-        parser.add_argument(
-            "--chunk-size",
-            type=int,
-            default=100000,
-            help="The number of rows to read at a time",
-        )
 
     def handle(self, *args, **options):
         filepath = options["filepath"]
         delay = options["delay"]
         dry_run = options["dry_run"]
-        chunk_size = options["chunk_size"]
 
         if not filepath:
             raise CommandError(
                 "Filepath is required. Use --filepath to specify the CSV file location."
             )
 
-        process_csv(filepath, delay, dry_run, chunk_size)
+        process_csv(filepath, delay, dry_run)

From 12d899f25df6dc54c90013aa789cfc8e4aa86be7 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 19 Nov 2024 17:13:46 -0600
Subject: [PATCH 063/143] feat(casenames): improve code readability

---
 .../commands/update_casenames_wl_dataset.py           | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index d7fb4a6870..ef1f09c85d 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -70,12 +70,11 @@ def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
     :return: True if they match else False
     """
     # Tokenize each string, capturing both words and abbreviations with periods and
-    # convert all words to lowercase for case-insensitive matching
-    csv_case_name_tokens = tokenize_case_name(csv_case_name)
-    cluster_case_name_tokens = tokenize_case_name(cl_case_name)
-
-    # Check if there is an overlap between case names
-    overlap = csv_case_name_tokens & cluster_case_name_tokens
+    # convert all words to lowercase for case-insensitive matching and check if there
+    # is an overlap between case names
+    overlap = tokenize_case_name(csv_case_name) & tokenize_case_name(
+        cl_case_name
+    )
 
     if not overlap:
         # if no hits no match on name - move along

From 76ecb76f8ce7efdf5133d1aae0164948262314b9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 19 Nov 2024 17:37:04 -0600
Subject: [PATCH 064/143] feat(casenames): Join abbreviations/acronyms

---
 .../commands/update_casenames_wl_dataset.py      | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index ef1f09c85d..49c51b1582 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -208,6 +208,18 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
+def combine_initials(case_name: str) -> str:
+    """Combine initials in case captions
+
+    :param case_name: the case caption
+    :return: the cleaned case caption
+    """
+
+    pattern = r"((?:[A-Z]\.?\s?){2,})(\s|$)"
+
+    return re.sub(pattern, lambda m: m.group(0).replace(".", ""), case_name)
+
+
 def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
     """Process rows from csv file
 
@@ -269,6 +281,10 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
                 if match.cluster.case_name_full
                 else match.cluster.case_name
             )
+
+            west_case_name = combine_initials(west_case_name)
+            cl_case_name = combine_initials(cl_case_name)
+
             case_name_match = check_case_names_match(
                 west_case_name, cl_case_name
             )

From 511690e29c8bd2499bd7691104655843cb5afae3 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Wed, 20 Nov 2024 14:28:28 -0300
Subject: [PATCH 065/143] feat(api): Add 'count' parameter to return total item
 count in API v4

- When `count=on` is specified in a paginated list endpoint query params, the API returns only the total count of items matching the query
- In standard v4 paginated responses, include a `count` key with a URL to get the total count
- This feature helps users verify if their filters are working by easily accessing the total count
---
 cl/api/pagination.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/cl/api/pagination.py b/cl/api/pagination.py
index 9e79185d66..cdb3f6a5c1 100644
--- a/cl/api/pagination.py
+++ b/cl/api/pagination.py
@@ -44,6 +44,8 @@ class VersionBasedPagination(PageNumberPagination):
     }
     ordering = ""
     cursor_ordering_fields = []
+    is_count_request = False
+    count = 0
 
     def __init__(self):
         super().__init__()
@@ -88,6 +90,12 @@ def paginate_queryset(self, queryset, request, view=None):
 
         self.version = request.version
         self.request = request
+        self.is_count_request = request.query_params.get("count") == "on"
+
+        if self.is_count_request:
+            self.count = queryset.count()
+            return []
+
         do_cursor_pagination, requested_ordering = (
             self.do_v4_cursor_pagination()
         )
@@ -103,10 +111,18 @@ def paginate_queryset(self, queryset, request, view=None):
         )
 
     def get_paginated_response(self, data):
+        if self.is_count_request:
+            return Response({"count": self.count})
+
         do_cursor_pagination, _ = self.do_v4_cursor_pagination()
         if do_cursor_pagination:
-            # Get paginated response for CursorPagination
-            return self.cursor_paginator.get_paginated_response(data)
+            response = self.cursor_paginator.get_paginated_response(data)
+            # Build and include the count URL:
+            count_url = self.request.build_absolute_uri()
+            count_url = replace_query_param(count_url, "count", "on")
+            response.data["count"] = count_url
+            response.data.move_to_end("count", last=False)
+            return response
 
         # Get paginated response for PageNumberPagination
         return super().get_paginated_response(data)

From b496e78f2225454d0f014b0ceeeeb1b3ae30ea14 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Wed, 20 Nov 2024 18:29:05 -0300
Subject: [PATCH 066/143] docs(api): Add 'Counting' section to REST API v4 docs

---
 cl/api/templates/includes/toc_sidebar.html |  1 +
 cl/api/templates/rest-docs-vlatest.html    | 24 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/cl/api/templates/includes/toc_sidebar.html b/cl/api/templates/includes/toc_sidebar.html
index 60a99cd69f..e609bfd201 100644
--- a/cl/api/templates/includes/toc_sidebar.html
+++ b/cl/api/templates/includes/toc_sidebar.html
@@ -33,6 +33,7 @@ <h3><span>Table of Contents</span></h3>
       <li><a href="#parsing">Parsing Uploaded Content</a></li>
       <li><a href="#filtering">Filtering</a></li>
       <li><a href="#ordering">Ordering</a></li>
+      <li><a href="#counting">Counting</a></li>
       <li><a href="#field-selection">Field Selection</a></li>
       <li><a href="#pagination">Pagination</a></li>
       <li><a href="#rates">Rate Limits</a></li>
diff --git a/cl/api/templates/rest-docs-vlatest.html b/cl/api/templates/rest-docs-vlatest.html
index 90bc8a7159..2569ae1366 100644
--- a/cl/api/templates/rest-docs-vlatest.html
+++ b/cl/api/templates/rest-docs-vlatest.html
@@ -324,6 +324,30 @@ <h3 id="ordering">Ordering</h3>
     <p>Ordering by fields with duplicate values is non-deterministic. If you wish to order by such a field, you should provide a second field as a tie-breaker to consistently order results. For example, ordering by <code>date_filed</code> will not return consistent ordering for items that have the same date, but this can be fixed by ordering by <code>date_filed,id</code>. In that case, if two items have the same <code>date_filed</code> value, the tie will be broken by the <code>id</code> field.
     </p>
 
+    <h3 id="counting">Counting</h3>
+    <p>To retrieve the total number of items matching your query without fetching all the data, you can use the <code>count=on</code> parameter. This is useful for verifying filters and understanding the scope of your query results without incurring the overhead of retrieving full datasets.
+    </p>
+    <pre class="pre-scrollable">curl "{% get_full_host %}{% url "opinion-list" version="v4" %}?cited_opinion=32239&count=on"
+
+{"count": 3302}</pre>
+    <p>When <code>count=on</code> is specified:</p>
+    <ul>
+      <li>The API returns only the <code>count</code> key with the total number of matching items.</li>
+      <li>Pagination parameters like <code>cursor</code> are ignored.</li>
+      <li>The response does not include any result data, which can improve performance for large datasets.</li>
+    </ul>
+    <p>In standard paginated responses, a <code>count</code> key is included with the URL to obtain the total count for your query:</p>
+    <pre class="pre-scrollable">curl "{% get_full_host %}{% url "opinion-list" version="v4" %}?cited_opinion=32239
+
+{
+  "count": "https://www.courtlistener.com/api/rest/v4/opinions/?cited_opinion=32239&count=on",
+  "next": "https://www.courtlistener.com/api/rest/v4/opinions/?cited_opinion=32239&cursor=2",
+  "previous": null,
+  "results": [
+    // paginated results
+  ]
+}</pre>
+    <p>You can follow this URL to get the total count of items matching your query.</p>
 
     <h3 id="field-selection">Field Selection</h3>
     <p>To save bandwidth and increase serialization performance, fields can be limited by using the <code>fields</code> parameter with a comma-separated list of fields.

From 9404d773ea062529ab4e94d07584ef3d2dd53197 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Wed, 20 Nov 2024 19:21:24 -0300
Subject: [PATCH 067/143] test(api): Add tests for 'count' parameter in
 paginated endpoints

---
 cl/api/tests.py | 105 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 63a8e14aa5..6ab357caf1 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -2561,3 +2561,108 @@ async def test_avoid_logging_not_successful_webhook_events(
         self.assertEqual(await webhook_events.acount(), 2)
         # Confirm no milestone event should be created.
         self.assertEqual(await milestone_events.acount(), 0)
+
+
+class CountParameterTests(TestCase):
+    @classmethod
+    def setUpTestData(cls) -> None:
+        cls.user_1 = UserProfileWithParentsFactory.create(
+            user__username="recap-user",
+            user__password=make_password("password"),
+        )
+        permissions = Permission.objects.filter(
+            codename__in=["has_recap_api_access", "has_recap_upload_access"]
+        )
+        cls.user_1.user.user_permissions.add(*permissions)
+
+        cls.court_canb = CourtFactory(id="canb")
+        cls.court_cand = CourtFactory(id="cand")
+
+        cls.url = reverse("docket-list", kwargs={"version": "v4"})
+
+        for i in range(10):
+            DocketFactory(
+                court=cls.court_canb,
+                source=Docket.HARVARD,
+                pacer_case_id=str(i),
+            )
+        for i in range(7):
+            DocketFactory(
+                court=cls.court_canb,
+                source=Docket.RECAP,
+                pacer_case_id=str(100 + i),
+            )
+        for i in range(5):
+            DocketFactory(
+                court=cls.court_cand,
+                source=Docket.HARVARD,
+                pacer_case_id=str(200 + i),
+            )
+
+    def setUp(self):
+        self.client = make_client(self.user_1.user.pk)
+
+    async def test_count_on_returns_only_count(self):
+        """
+        Test that when 'count=on' is specified, the API returns only the count.
+        """
+        params = {"count": "on"}
+        response = await self.client.get(self.url, params)
+
+        self.assertEqual(response.status_code, 200)
+        # The response should only contain the 'count' key
+        self.assertEqual(list(response.data.keys()), ["count"])
+        self.assertIsInstance(response.data["count"], int)
+        # The count should match the total number of dockets
+        expected_count = await sync_to_async(Docket.objects.count)()
+        self.assertEqual(response.data["count"], expected_count)
+
+    async def test_standard_response_includes_count_url(self):
+        """
+        Test that the standard response includes a 'count' key with the count URL.
+        """
+        response = await self.client.get(self.url)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("count", response.data)
+        count_url = response.data["count"]
+        self.assertIsInstance(count_url, str)
+        self.assertIn("count=on", count_url)
+
+    async def test_invalid_count_parameter(self):
+        """
+        Test that invalid 'count' parameter values are handled appropriately.
+        """
+        params = {"count": "invalid"}
+        response = await self.client.get(self.url, params)
+
+        self.assertEqual(response.status_code, 200)
+        # The response should be the standard paginated response
+        self.assertIn("results", response.data)
+        self.assertIsInstance(response.data["results"], list)
+
+    async def test_count_with_filters(self):
+        """
+        Test that the count returned matches the filters applied.
+        """
+        params = {"court": "canb", "source": Docket.RECAP, "count": "on"}
+        response = await self.client.get(self.url, params)
+
+        self.assertEqual(response.status_code, 200)
+        expected_count = await sync_to_async(
+            lambda: Docket.objects.filter(
+                court__id="canb",
+                source=Docket.RECAP,
+            ).count()
+        )()
+        self.assertEqual(response.data["count"], expected_count)
+
+    async def test_count_with_no_results(self):
+        """
+        Test that 'count=on' returns zero when no results match the filters.
+        """
+        params = {"court": "cand", "source": Docket.RECAP, "count": "on"}
+        response = await self.client.get(self.url, params)
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.data["count"], 0)

From 8f169c33653cac53cc1fb83a0db45ce2b3930910 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 21 Nov 2024 14:56:37 -0500
Subject: [PATCH 068/143] fix(op_page/views.py): Remove redundant query

Also cache ordered opinion query
---
 cl/opinion_page/views.py | 12 +-----------
 cl/search/models.py      |  1 +
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index c158e92bb5..6e266bf362 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -939,7 +939,7 @@ async def setup_opinion_context(
     get_string = make_get_string(request)
 
     sub_opinion_pks = [
-       str(opinion.pk) async for opinion in cluster.sub_opinions.all()
+        str(opinion.pk) async for opinion in cluster.sub_opinions.all()
     ]
 
     es_has_cited_opinions = await es_cited_case_count(
@@ -1137,17 +1137,7 @@ async def view_opinion_authorities(
     """
     cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
 
-    authorities_context: AuthoritiesContext = AuthoritiesContext(
-        citation_record=cluster,
-        query_string=request.META["QUERY_STRING"],
-        total_authorities_count=await cluster.aauthority_count(),
-        view_all_url="view_authorities_url",
-        doc_type="opinion",
-    )
-    await authorities_context.post_init()
-
     additional_context = {
-        "authorities_context": authorities_context,
         "authorities_with_data": await cluster.aauthorities_with_data(),
     }
 
diff --git a/cl/search/models.py b/cl/search/models.py
index cd33171745..b7c4d808b4 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3015,6 +3015,7 @@ def __str__(self) -> str:
     def get_absolute_url(self) -> str:
         return reverse("view_case", args=[self.pk, self.slug])
 
+    @cached_property
     def ordered_opinions(self):
         # Fetch all sub-opinions ordered by ordering_key
         sub_opinions = self.sub_opinions.all().order_by("ordering_key")

From 4a8c85166073de092afa14010621de4636bfb915 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 21 Nov 2024 15:01:23 -0500
Subject: [PATCH 069/143] fix(opinions.css): Remove duplicate css rule

---
 cl/assets/static-global/css/opinions.css | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
index 235fcc9173..d2c460e8c8 100644
--- a/cl/assets/static-global/css/opinions.css
+++ b/cl/assets/static-global/css/opinions.css
@@ -621,16 +621,6 @@ div.footnote:first-of-type {
     margin-left: 3em;
   }
 
-  a.page-label::after {
-    display: inline;
-    position: relative;
-    attr(label);
-    float: right;
-    font-size: 1em;
-    color: dimgray;
-    width: 0;
-  }
-
   div.counsel > a.page-label::after {
     right: -1.0em;
   }

From 96d47900feaf68b06740beac76d83ae1df706cec Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Thu, 21 Nov 2024 18:24:44 -0400
Subject: [PATCH 070/143] feat(alerts): Improve count accuracy by tracking up
 to 10,001 results

---
 cl/lib/elasticsearch_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 65a409ba15..8d0a2d8956 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3216,13 +3216,15 @@ def do_es_sweep_alert_query(
     multi_search = multi_search.add(main_query)
     if parent_query:
         parent_search = search_query.query(parent_query)
-        parent_search = parent_search.extra(from_=0)
+        # Ensure accurate tracking of total hit count for up to 10,001 query results
+        parent_search = parent_search.extra(from_=0, track_total_hits=10_001)
         parent_search = parent_search.source(includes=["docket_id"])
         multi_search = multi_search.add(parent_search)
 
     if child_query:
         child_search = child_search_query.query(child_query)
-        child_search = child_search.extra(from_=0)
+        # Ensure accurate tracking of total hit count for up to 10,001 query results
+        child_search = child_search.extra(from_=0, track_total_hits=10_001)
         child_search = child_search.source(includes=["id"])
         multi_search = multi_search.add(child_search)
 

From 1da9ae7b08ef78e47dc959c0f4aaef0b97371704 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Thu, 21 Nov 2024 19:30:42 -0500
Subject: [PATCH 071/143] fix(update_from_text): add OpinionCluster.source as
 filter

Also add tests for get_module_by_court_id function
---
 cl/lib/juriscraper_utils.py                   |  2 +-
 .../management/commands/update_from_text.py   |  1 +
 cl/scrapers/tests.py                          | 33 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/cl/lib/juriscraper_utils.py b/cl/lib/juriscraper_utils.py
index 2eb902352b..f2484e8b86 100644
--- a/cl/lib/juriscraper_utils.py
+++ b/cl/lib/juriscraper_utils.py
@@ -48,7 +48,7 @@ def get_scraper_object_by_name(court_id: str, juriscraper_module: str = ""):
                 continue
 
 
-def get_module_by_court_id(court_id: str, module_type: str):
+def get_module_by_court_id(court_id: str, module_type: str) -> str:
     """Given a `court_id` return a juriscraper module path
 
     Some court_ids match multiple scraper files. These will force the user
diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
index ca1fe21d64..399e49e1b1 100644
--- a/cl/scrapers/management/commands/update_from_text.py
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -169,6 +169,7 @@ def handle(self, *args, **options):
             "docket__court_id": court_id,
             "date_filed__gte": options["date_filed_gte"],
             "date_filed__lte": options["date_filed_lte"],
+            "source__contains": "C",
         }
 
         if options["cluster_status"]:
diff --git a/cl/scrapers/tests.py b/cl/scrapers/tests.py
index 2318f8f025..232a6564b4 100644
--- a/cl/scrapers/tests.py
+++ b/cl/scrapers/tests.py
@@ -17,6 +17,7 @@
 from cl.api.models import WebhookEvent, WebhookEventType
 from cl.audio.factories import AudioWithParentsFactory
 from cl.audio.models import Audio
+from cl.lib.juriscraper_utils import get_module_by_court_id
 from cl.lib.microservice_utils import microservice
 from cl.lib.test_helpers import generate_docket_target_sources
 from cl.scrapers.DupChecker import DupChecker
@@ -48,7 +49,7 @@
     OpinionClusterFactory,
     OpinionFactory,
 )
-from cl.search.models import Citation, Court, Docket, Opinion
+from cl.search.models import SOURCES, Citation, Court, Docket, Opinion
 from cl.settings import MEDIA_ROOT
 from cl.tests.cases import ESIndexTestCase, SimpleTestCase, TestCase
 from cl.tests.fixtures import ONE_SECOND_MP3_BYTES, SMALL_WAV_BYTES
@@ -881,6 +882,7 @@ def setUp(self):
                 docket=DocketFactory(court=self.vt, docket_number="12"),
                 date_filed=date(2020, 6, 1),
                 precedential_status="Published",
+                source=SOURCES.COURT_M_HARVARD,
             ),
             plain_text="""Docket Number: 2020-12
             Disposition: Affirmed
@@ -891,6 +893,7 @@ def setUp(self):
                 docket=DocketFactory(court=self.vt, docket_number="13"),
                 date_filed=date(2020, 7, 1),
                 precedential_status="Unpublished",
+                source=SOURCES.COURT_WEBSITE,
             ),
             plain_text="Docket Number: 2020-13\nDisposition: Affirmed",
         )
@@ -900,6 +903,7 @@ def setUp(self):
                 docket=self.docket_sc,
                 date_filed=date(2021, 6, 1),
                 precedential_status="Published",
+                source=SOURCES.COURT_WEBSITE,
             ),
             plain_text="Some text with no matches",
             id=101,
@@ -910,6 +914,7 @@ def setUp(self):
                 docket=DocketFactory(court=self.vt, docket_number="13"),
                 date_filed=date(2022, 6, 1),
                 precedential_status="Unpublished",
+                source=SOURCES.COURT_WEBSITE,
             ),
             id=100,
             plain_text="Docket Number: 2022-13\n2022 VT 11",
@@ -1000,3 +1005,29 @@ def test_scraped_citation_object_is_valid(self):
             scraped_citation_object_is_valid(valid_citation),
             "Citation object should be marked as valid",
         )
+
+
+class CommandInputTest(TestCase):
+    def test_get_module_by_court_id(self):
+        """Test if get_module_by_court_id helper is working properly"""
+        try:
+            get_module_by_court_id("lactapp", "opinions")
+            self.fail("Court id matches more than 1 Site object, should fail")
+        except ValueError:
+            pass
+
+        try:
+            get_module_by_court_id("ca1", "something")
+            self.fail("Invalid module type, should fail")
+        except ValueError:
+            pass
+
+        # same court, different type
+        self.assertEqual(
+            "juriscraper.opinions.united_states.federal_appellate.ca1",
+            get_module_by_court_id("ca1", "opinions"),
+        )
+        self.assertEqual(
+            "juriscraper.oral_args.united_states.federal_appellate.ca1",
+            get_module_by_court_id("ca1", "oral_args"),
+        )

From e3cade0bde72917076adf5ffdaff99f70f69a5ab Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 22 Nov 2024 00:49:11 -0400
Subject: [PATCH 072/143] feat(alerts): Adds logic to re-run aux queries

---
 cl/lib/elasticsearch_utils.py | 38 +++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 8d0a2d8956..4c561b0166 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3237,15 +3237,41 @@ def do_es_sweep_alert_query(
     if child_query:
         rd_results = responses[2]
 
+    # Re-run parent query to fetch potentially missed docket IDs due to large
+    # result sets.
+    should_repeat_parent_query = (
+        docket_results and docket_results.hits.total.value >= 10_000
+    )
+    if should_repeat_parent_query:
+        docket_ids = [int(d.docket_id) for d in main_results]
+        # Adds extra filter to refine results.
+        parent_query.filter.append(Q("terms", docket_id=docket_ids))
+        parent_search = search_query.query(parent_query)
+        parent_search = parent_search.source(includes=["docket_id"])
+        docket_results = parent_search.execute()
+
     limit_inner_hits({}, main_results, cd["type"])
     set_results_highlights(main_results, cd["type"])
 
-    for result in main_results:
-        child_result_objects = []
-        if hasattr(result, "child_docs"):
-            for child_doc in result.child_docs:
-                child_result_objects.append(child_doc.to_dict())
-            result["child_docs"] = child_result_objects
+    # This block addresses a potential issue where the initial child query
+    # might not return all expected results, especially when the result set is
+    # large. To ensure complete data retrieval, it extracts child document IDs
+    # from the main results and refines the child query filter with these IDs.
+    # Finally, it re-executes the child search.
+    should_repeat_child_query = (
+        rd_results and rd_results.hits.total.value >= 10_000
+    )
+    if should_repeat_child_query:
+        rd_ids = [
+            int(rd.to_dict()["id"])
+            for docket in main_results
+            if hasattr(docket, "child_docs")
+            for rd in docket.child_docs
+        ]
+        child_query.filter.append(Q("terms", id=rd_ids))
+        child_search = child_search_query.query(child_query)
+        child_search = child_search.source(includes=["docket_id"])
+        rd_results = child_search.execute()
 
     return main_results, docket_results, rd_results
 

From 8493940a86944f34f297fdc4175fea95164d6374 Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 09:59:41 -0300
Subject: [PATCH 073/143] fix(api): Restrict count parameter to v4

Co-authored-by: Alberto Islas <albertisfu@gmail.com>
---
 cl/api/pagination.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/pagination.py b/cl/api/pagination.py
index cdb3f6a5c1..24e7eb7eb3 100644
--- a/cl/api/pagination.py
+++ b/cl/api/pagination.py
@@ -90,7 +90,7 @@ def paginate_queryset(self, queryset, request, view=None):
 
         self.version = request.version
         self.request = request
-        self.is_count_request = request.query_params.get("count") == "on"
+        self.is_count_request = request.query_params.get("count") == "on" and self.version == "v4"
 
         if self.is_count_request:
             self.count = queryset.count()

From 04e40198b906e778e08f77aa70c9c30859eb568d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Nov 2024 13:00:20 +0000
Subject: [PATCH 074/143] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 cl/api/pagination.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cl/api/pagination.py b/cl/api/pagination.py
index 24e7eb7eb3..27ee3928d2 100644
--- a/cl/api/pagination.py
+++ b/cl/api/pagination.py
@@ -90,7 +90,9 @@ def paginate_queryset(self, queryset, request, view=None):
 
         self.version = request.version
         self.request = request
-        self.is_count_request = request.query_params.get("count") == "on" and self.version == "v4"
+        self.is_count_request = (
+            request.query_params.get("count") == "on" and self.version == "v4"
+        )
 
         if self.is_count_request:
             self.count = queryset.count()

From 0adcaa338c1cc0abb0b614f473b18726675c4d6a Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 10:02:10 -0300
Subject: [PATCH 075/143] refactor(api_test): Use native async method instead
 of sync_to_async

Co-authored-by: Alberto Islas <albertisfu@gmail.com>
---
 cl/api/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 6ab357caf1..6cc3466897 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -2614,7 +2614,7 @@ async def test_count_on_returns_only_count(self):
         self.assertEqual(list(response.data.keys()), ["count"])
         self.assertIsInstance(response.data["count"], int)
         # The count should match the total number of dockets
-        expected_count = await sync_to_async(Docket.objects.count)()
+        expected_count = await Docket.objects.acount()
         self.assertEqual(response.data["count"], expected_count)
 
     async def test_standard_response_includes_count_url(self):

From 39ac01bc34c5e4eb95646ad69d2181304b027e86 Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 10:04:28 -0300
Subject: [PATCH 076/143] fix(docs): Add missing quote

Co-authored-by: Alberto Islas <albertisfu@gmail.com>
---
 cl/api/templates/rest-docs-vlatest.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/api/templates/rest-docs-vlatest.html b/cl/api/templates/rest-docs-vlatest.html
index 2569ae1366..91e7205bcd 100644
--- a/cl/api/templates/rest-docs-vlatest.html
+++ b/cl/api/templates/rest-docs-vlatest.html
@@ -337,7 +337,7 @@ <h3 id="counting">Counting</h3>
       <li>The response does not include any result data, which can improve performance for large datasets.</li>
     </ul>
     <p>In standard paginated responses, a <code>count</code> key is included with the URL to obtain the total count for your query:</p>
-    <pre class="pre-scrollable">curl "{% get_full_host %}{% url "opinion-list" version="v4" %}?cited_opinion=32239
+    <pre class="pre-scrollable">curl "{% get_full_host %}{% url "opinion-list" version="v4" %}?cited_opinion=32239"
 
 {
   "count": "https://www.courtlistener.com/api/rest/v4/opinions/?cited_opinion=32239&count=on",

From 3c6879e1c2e5821f9fc1022ac9f324421382f5aa Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 11:20:19 -0300
Subject: [PATCH 077/143] test(api): Add tests that check DB queries for
 counting and for standard requests

---
 cl/api/tests.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 6cc3466897..c3793bda9a 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -320,6 +320,57 @@ def test_recap_api_required_filter(self, mock_logging_prefix) -> None:
         r = self.client.get(path, {"pacer_doc_id__in": "17711118263,asdf"})
         self.assertEqual(r.status_code, HTTPStatus.OK)
 
+    def test_count_on_query_counts(self, mock_logging_prefix) -> None:
+        """
+        Check that a v4 API request with param `count=on` only performs
+        2 queries to the database: one to check the authenticated user,
+        and another to select the count.
+        """
+        with CaptureQueriesContext(connection) as ctx:
+            path = reverse("docket-list", kwargs={"version": "v4"})
+            params = {"count": "on"}
+            self.client.get(path, params)
+
+        self.assertEqual(
+            len(ctx.captured_queries),
+            2,
+            msg=f"{len(ctx.captured_queries)} queries executed, 2 expected",
+        )
+
+        executed_queries = [query["sql"] for query in ctx.captured_queries]
+        expected_queries = [
+            'FROM "auth_user" WHERE "auth_user"."id" =',
+            'SELECT COUNT(*) AS "__count"',
+        ]
+        for executed_query, expected_fragment in zip(
+            executed_queries, expected_queries
+        ):
+            self.assertIn(
+                expected_fragment,
+                executed_query,
+                msg=f"Expected query fragment not found: {expected_fragment}",
+            )
+
+    def test_standard_request_no_count_query(
+        self, mock_logging_prefix
+    ) -> None:
+        """
+        Check that a v4 API request without param `count=on` doesn't perform
+        a count query.
+        """
+        with CaptureQueriesContext(connection) as ctx:
+            path = reverse("docket-list", kwargs={"version": "v4"})
+            no_count_params = {}
+            self.client.get(path, no_count_params)
+
+        executed_queries = [query["sql"] for query in ctx.captured_queries]
+        for sql in executed_queries:
+            self.assertNotIn(
+                'SELECT COUNT(*) AS "__count"',
+                sql,
+                msg="Unexpected COUNT query found in standard request.",
+            )
+
 
 class ApiEventCreationTestCase(TestCase):
     """Check that events are created properly."""

From 02bfe8019ac189ff21f96647978f1d258cf9bcea Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 11:21:15 -0300
Subject: [PATCH 078/143] refactor(test): Remove unnecessary code

---
 cl/api/tests.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index c3793bda9a..4a77f714b5 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -2631,12 +2631,6 @@ def setUpTestData(cls) -> None:
 
         cls.url = reverse("docket-list", kwargs={"version": "v4"})
 
-        for i in range(10):
-            DocketFactory(
-                court=cls.court_canb,
-                source=Docket.HARVARD,
-                pacer_case_id=str(i),
-            )
         for i in range(7):
             DocketFactory(
                 court=cls.court_canb,

From ecdc5a77725d3a7f7493654beb99ff77fc0d91d1 Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 11:22:18 -0300
Subject: [PATCH 079/143] refactor(api_test): Use native async method instead
 of sync_to_async

---
 cl/api/tests.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 4a77f714b5..4edadbe6da 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -2694,12 +2694,10 @@ async def test_count_with_filters(self):
         response = await self.client.get(self.url, params)
 
         self.assertEqual(response.status_code, 200)
-        expected_count = await sync_to_async(
-            lambda: Docket.objects.filter(
-                court__id="canb",
-                source=Docket.RECAP,
-            ).count()
-        )()
+        expected_count = await Docket.objects.filter(
+            court__id="canb",
+            source=Docket.RECAP,
+        ).acount()
         self.assertEqual(response.data["count"], expected_count)
 
     async def test_count_with_no_results(self):

From 9ff7f60cb10bcc0442ff5e79472cea7a204f8a3b Mon Sep 17 00:00:00 2001
From: Elisa <elisa@free.law>
Date: Fri, 22 Nov 2024 11:31:53 -0300
Subject: [PATCH 080/143] fix(api_test): Remove unnecessary variable

---
 cl/api/tests.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 4edadbe6da..e275cdb926 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -360,8 +360,7 @@ def test_standard_request_no_count_query(
         """
         with CaptureQueriesContext(connection) as ctx:
             path = reverse("docket-list", kwargs={"version": "v4"})
-            no_count_params = {}
-            self.client.get(path, no_count_params)
+            self.client.get(path)
 
         executed_queries = [query["sql"] for query in ctx.captured_queries]
         for sql in executed_queries:

From 271aa14265839a32fefb24501d900f7fd4ed917c Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 22 Nov 2024 13:42:38 -0500
Subject: [PATCH 081/143] fix(PR): Implement a number of PR fixes

Tweak CSS/JS to simplify logic
add cached property import for ordered
opiions and remove errant comment
---
 cl/assets/static-global/css/opinions.css      |  28 ++--
 cl/assets/static-global/js/opinions.js        |  23 ++-
 .../includes/add_download_button.html         |   3 +-
 .../templates/includes/add_note_button.html   |  14 +-
 cl/opinion_page/templates/opinions.html       | 147 +++++++++---------
 cl/opinion_page/utils.py                      |   1 -
 cl/search/models.py                           |   1 +
 7 files changed, 109 insertions(+), 108 deletions(-)

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
index d2c460e8c8..b120143c0b 100644
--- a/cl/assets/static-global/css/opinions.css
+++ b/cl/assets/static-global/css/opinions.css
@@ -407,17 +407,16 @@ div.footnote:first-of-type {
   }
 
   /*Opinion Date File*/
-  .case-date-new {
+
+ .case-date-new {
     border: 1px solid #B53C2C;
+    padding: 0px 10px;
     border-radius: 20px; /* Rounds the corners */
-    padding: 5px;
-    padding-left: 8px;
-    padding-right: 8px;
-    padding-top: 8px;
     color: #B53C2C;
-
   }
 
+
+
   /*Buttons on Top of Page*/
   .add-a-note {
     margin-left: 5px;
@@ -470,24 +469,29 @@ div.footnote:first-of-type {
     color: black;
     border-color: black;
     background-color: white;
-    vertical-align: top;
-    float:right;
-    display:block;
   }
 
+
   #btn-group-download-original {
     float:right;
     margin-top: 0px;
     margin-left:10px;
     padding-right: 10px;
   }
-
   #add-note-button {
     color: black;
     border-color: black;
     background-color: white;
-    vertical-align: top;
-    float: right;
+  }
+
+  .top-row {
+    height: 32px;
+    line-height:28px
+  }
+
+  .action-buttons{
+    display: flex;
+    column-gap: 5px;
   }
 
   #get-citation-btn-group {
diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index 0c81aa6e65..65d35e2248 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -250,7 +250,6 @@ $(document).ready(function () {
 // Update sidebar to show where we are on the page
 document.addEventListener('scroll', function () {
   let sections = document.querySelectorAll('.jump-link');
-  let links = document.querySelectorAll('.jump-links > a');
   let currentSection = '';
 
   // Determine which section is currently in view
@@ -261,8 +260,9 @@ document.addEventListener('scroll', function () {
       currentSection = section.getAttribute('id');
     }
   });
-
-  // Remove the active class from all links and their parent elements
+  if (!currentSection) currentSection = 'top';
+  // Remove the active class from links and their parent elements
+  let links = document.querySelectorAll('.jump-links > a.active');
   links.forEach((link) => {
     link.classList.remove('active');
     if (link.parentElement) {
@@ -271,12 +271,11 @@ document.addEventListener('scroll', function () {
   });
 
   // Add the active class to the link and its parent that corresponds to the current section
-  links.forEach((link) => {
-    if (link.getAttribute('href') === `#${currentSection}`) {
-      link.classList.add('active');
-      if (link.parentElement) {
-        link.parentElement.classList.add('active');
-      }
-    }
-  });
-});
+  let activeLink = document.getElementById(`nav_${currentSection}`);
+  if (!activeLink) return;
+
+  activeLink.classList.add('active');
+  if (activeLink.parentElement) {
+    activeLink.parentElement.classList.add('active');
+  }
+});
\ No newline at end of file
diff --git a/cl/opinion_page/templates/includes/add_download_button.html b/cl/opinion_page/templates/includes/add_download_button.html
index a4844bc075..b6d05c41be 100644
--- a/cl/opinion_page/templates/includes/add_download_button.html
+++ b/cl/opinion_page/templates/includes/add_download_button.html
@@ -1,4 +1,5 @@
-<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1 hidden-print">
+{#<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1 hidden-print">#}
+<div id="btn-group-download-original" class="btn-group hidden-print">
     <button type="button"
             id="download-original"
             class="btn btn-primary dropdown-toggle"
diff --git a/cl/opinion_page/templates/includes/add_note_button.html b/cl/opinion_page/templates/includes/add_note_button.html
index fb5fdaac40..f107802383 100644
--- a/cl/opinion_page/templates/includes/add_note_button.html
+++ b/cl/opinion_page/templates/includes/add_note_button.html
@@ -1,6 +1,8 @@
-<button id="add-note-button"
- class="btn btn-success pointer hidden-print"
- data-toggle="modal"
- data-target="#modal-save-note, #modal-logged-out"
- title="{% if form_instance_id %}Edit this note{% else %}Save this record as a note in your profile{% endif %}">
-  <i class="fa fa-bookmark {% if form_instance_id %}gold{% endif %}"></i> <span class="hidden-xs">{% if form_instance_id %}Edit Note{% else %}Add Note{% endif %}</span></button>
+<div class="btn-group hidden-print">
+    <button id="add-note-button"
+     class="btn btn-success pointer hidden-print"
+     data-toggle="modal"
+     data-target="#modal-save-note, #modal-logged-out"
+     title="{% if form_instance_id %}Edit this note{% else %}Save this record as a note in your profile{% endif %}">
+      <i class="fa fa-bookmark {% if form_instance_id %}gold{% endif %}"></i> <span class="hidden-xs">{% if form_instance_id %}Edit Note{% else %}Add Note{% endif %}</span></button>
+</div>
\ No newline at end of file
diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index bf2cf23ebc..320dbb40d9 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -61,64 +61,58 @@ <h3><span>Admin</span></h3>
                     </div>
                 {% endif %}
 
-                <div id="opinion-toc" class="sidebar-section">
-                    <h3> <span>Jump To</span> </h3>
-                    <li class="jump-links active"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#" >Top</a></li>
-                    <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
-                    {% if cluster.headmatter %}
-                      <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o">Headmatter</a></li>
-                    {% else %}
-
-                        {% if cluster.correction %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#correction">Correction</a></li>
-                        {% endif %}
-                        {% if cluster.attorneys %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#attorneys">Attorneys</a></li>
-                        {% endif %}
-                        {% if cluster.headnotes %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes">Headnotes</a></li>
-                        {% endif %}
-                        {% if cluster.syllabus %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#syllabus">Syllabus</a></li>
-                        {% endif %}
-                        {% if cluster.summary %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#summary">Summary</a></li>
-                        {% endif %}
-                        {% if cluster.history %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#history">History</a></li>
-                        {% endif %}
-                        {% if cluster.disposition %}
-                            <li class="jump-links"><a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#disposition">Disposition</a></li>
-                        {% endif %}
-                    {% endif %}
-                    {% for sub_opinion in cluster.ordered_opinions %}
+           <div id="opinion-toc" class="sidebar-section">
+                  <h3> <span>Jump To</span> </h3>
+                  <li class="jump-links active"><a id="nav_top" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#"  class="active">Top</a></li>
+                  <li class="jump-links"><a id="nav_caption" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
+                  {% if cluster.headmatter %}
+                    <li class="jump-links"><a id="nav_o" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o">Headmatter</a></li>
+                  {% else %}
+
+                      {% if cluster.correction %}
+                          <li class="jump-links"><a  id="nav_correction" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#correction">Correction</a></li>
+                      {% endif %}
+                      {% if cluster.attorneys %}
+                          <li class="jump-links"><a id="nav_attorneys" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#attorneys">Attorneys</a></li>
+                      {% endif %}
+                      {% if cluster.headnotes %}
+                          <li class="jump-links"><a id="nav_headnotes" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes">Headnotes</a></li>
+                      {% endif %}
+                      {% if cluster.syllabus %}
+                          <li class="jump-links"><a id="nav_syllabus" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#syllabus">Syllabus</a></li>
+                      {% endif %}
+                      {% if cluster.summary %}
+                          <li class="jump-links"><a id="nav_summary" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#summary">Summary</a></li>
+                      {% endif %}
+                      {% if cluster.history %}
+                          <li class="jump-links"><a id="nav_history" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#history">History</a></li>
+                      {% endif %}
+                      {% if cluster.disposition %}
+                          <li class="jump-links"><a id="nav_disposition" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#disposition">Disposition</a></li>
+                      {% endif %}
+                  {% endif %}
+                  {% for sub_opinion in cluster.ordered_opinions %}
+                  <li class="jump-links sub-opinion">
+                    <a id="nav_o{{ forloop.counter }}" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
                       {% if cluster.sub_opinions.all.count > 1 %}
-                        {% if sub_opinion.ordering_key != None %}
-                          <li class="jump-links sub-opinion">
-                            <a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
-                            {% if sub_opinion.get_type_display == "Concurrence Opinion" %}
-                              Concurrence
-                            {% else %}
-                              {{ sub_opinion.get_type_display }}
-                            {% endif %}
-                                {% if sub_opinion.author_str %}
-                                by {{ sub_opinion.author_str }}
-                              {% endif %}
-                            </a>
-                          </li>
+                        {% if sub_opinion.get_type_display == "Concurrence Opinion" %}
+                          Concurrence
+                        {% else %}
+                          {{ sub_opinion.get_type_display }}
                         {% endif %}
-                      {% else %}
-                        <li class="jump-links sub-opinion">
-                          <a href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
-                            {{ sub_opinion.get_type_display }}
                             {% if sub_opinion.author_str %}
-                              by {{ sub_opinion.author_str }}
-                            {% endif %}
-                          </a>
-                        </li>
+                            by {{ sub_opinion.author_str }}
                         {% endif %}
-                    {% endfor %}
-                </div>
+                      {% else %}
+                        {{ sub_opinion.get_type_display }}
+                        {% if sub_opinion.author_str %}
+                          by {{ sub_opinion.author_str }}
+                        {% endif %}
+                      {% endif %}
+                    </a>
+                  </li>
+                  {% endfor %}
+              </div>
 
             {% if cluster.sub_opinions.all.first.extracted_by_ocr or "U" in cluster.source and tab == "opinions" %}
                 <div class="col-sm-12 alert-warning alert v-offset-above-2">
@@ -196,30 +190,31 @@ <h3>
         <div id="caption-square">
 
             <div id="opinion-caption">
+              <div class="flex justify-content-between top-row">
                 <span class="case-date-new">{{ cluster.date_filed }}</span>
+                <div class="action-buttons">
+                  <div id="get-citation-btn-group" class="btn-group hidden-print">
+                    <a id="get-citation-alerts" href="/?show_alert_modal=yes&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})"
+                      rel="nofollow"
+                      class="btn"
+                    ><i class="fa fa-bell-o"></i><span class="hidden-xs">Get Citation Alerts</span></a>
+                    <a class="btn dropdown-toggle"
+                      data-toggle="dropdown"
+                      aria-haspopup="true"
+                      aria-expanded="false">
+                      <span class="caret"></span>
+                      <span class="sr-only">Toggle Dropdown</span>
+                    </a>
+                    <ul class="dropdown-menu">
+                      <li><a href="{% url "alert_help" %}#citation-alerts">Learn More</a></li>
+                    </ul>
+                  </div>
+                  {% if pdf_path %}
+                      {% include "includes/add_download_button.html" %}
+                  {% endif %}
                   {% include "includes/add_note_button.html" with form_instance_id=note_form.instance.cluster_id %}
-
-                {% if pdf_path %}
-                    {% include "includes/add_download_button.html" %}
-                {% endif %}
-
-                <div id="get-citation-btn-group" class="btn-group hidden-print">
-                        <a id="get-citation-alerts" href="/?show_alert_modal=yes&q=cites%3A({{ cluster.sub_opinions.all|OR_join }})"
-                           rel="nofollow"
-                           class="btn"
-                        ><i class="fa fa-bell-o"></i><span class="hidden-xs">Get Citation Alerts</span></a>
-                        <a class="btn dropdown-toggle"
-                           data-toggle="dropdown"
-                           aria-haspopup="true"
-                           aria-expanded="false">
-                          <span class="caret"></span>
-                          <span class="sr-only">Toggle Dropdown</span>
-                        </a>
-                        <ul class="dropdown-menu">
-                          <li><a href="{% url "alert_help" %}#citation-alerts">Learn More</a></li>
-                        </ul>
-                      </div>
-
+                </div>
+            </div>
 
                 <div class="case-caption jump-link" id="caption">{{ cluster.docket.case_name }}</div>
                 <h4 class="case-court">{{ cluster.docket.court }}</h4>
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index d199bb395c..9fc779b37f 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -330,7 +330,6 @@ async def es_get_related_clusters_with_cache(
     )
     related_cluster_result.timeout = False
     related_cluster_result.sub_opinion_pks = list(map(int, sub_opinion_pks))
-    # related_cluster_result.has_related_cases = True if response else False
 
     if timeout_related == False:
         await cache.aset(
diff --git a/cl/search/models.py b/cl/search/models.py
index b7c4d808b4..94275afdbe 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -16,6 +16,7 @@
 from django.urls import NoReverseMatch, reverse
 from django.utils import timezone
 from django.utils.encoding import force_str
+from django.utils.functional import cached_property
 from django.utils.text import slugify
 from eyecite import get_citations
 from eyecite.tokenizers import HyperscanTokenizer

From abaa31a1a7680ac9ac3dc6a2818182f6e0c69fe7 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 22 Nov 2024 13:47:53 -0500
Subject: [PATCH 082/143] fix(search.models): Update aauthorities with data

Add prefetch related objects along with
authorities data query
---
 cl/search/models.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 94275afdbe..1b0197bdd2 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2989,7 +2989,13 @@ async def aauthorities_with_data(self):
         The returned list is sorted by that citation count field.
         """
         authorities_with_data = []
-        async for authority in await self.aauthorities():
+        authorities_base = await self.aauthorities()
+        authorities_qs = (
+            authorities_base.prefetch_related("citations")
+            .select_related("docket__court")
+            .order_by("-citation_count", "-date_filed")
+        )
+        async for authority in authorities_qs:
             authority.citation_depth = (
                 await get_citation_depth_between_clusters(
                     citing_cluster_pk=self.pk, cited_cluster_pk=authority.pk

From fe1ee4f5c5d4103ef58f70b5fe2a83d6a3780c01 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 22 Nov 2024 13:51:59 -0500
Subject: [PATCH 083/143] fix(search.models): Update acaption

Remove extra cluster query
Make docket and court async
---
 cl/search/models.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 1b0197bdd2..ab875f7661 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2799,9 +2799,8 @@ async def acaption(self):
             else:
                 caption += f", {citations[0]}"
 
-        cluster = await OpinionCluster.objects.aget(pk=self.pk)
-        docket = await Docket.objects.aget(id=cluster.docket_id)
-        court = await Court.objects.aget(pk=docket.court_id)
+        docket = await sync_to_async(lambda: self.docket)()
+        court = await sync_to_async(lambda: docket.court)()
         if docket.court_id != "scotus":
             court = re.sub(" ", "&nbsp;", court.citation_string)
             # Strftime fails before 1900. Do it this way instead.

From 70f562524fd6e8dc8441004b5601a0861eb7a33a Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 22 Nov 2024 15:00:58 -0400
Subject: [PATCH 084/143] feat(alerts): Refines logic to re-run aux queries

---
 cl/lib/elasticsearch_utils.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 4c561b0166..92acd4ca61 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3217,14 +3217,20 @@ def do_es_sweep_alert_query(
     if parent_query:
         parent_search = search_query.query(parent_query)
         # Ensure accurate tracking of total hit count for up to 10,001 query results
-        parent_search = parent_search.extra(from_=0, track_total_hits=10_001)
+        parent_search = parent_search.extra(
+            from_=0,
+            track_total_hits=settings.ELASTICSEARCH_MAX_RESULT_COUNT + 1,
+        )
         parent_search = parent_search.source(includes=["docket_id"])
         multi_search = multi_search.add(parent_search)
 
     if child_query:
         child_search = child_search_query.query(child_query)
         # Ensure accurate tracking of total hit count for up to 10,001 query results
-        child_search = child_search.extra(from_=0, track_total_hits=10_001)
+        child_search = child_search.extra(
+            from_=0,
+            track_total_hits=settings.ELASTICSEARCH_MAX_RESULT_COUNT + 1,
+        )
         child_search = child_search.source(includes=["id"])
         multi_search = multi_search.add(child_search)
 
@@ -3240,7 +3246,9 @@ def do_es_sweep_alert_query(
     # Re-run parent query to fetch potentially missed docket IDs due to large
     # result sets.
     should_repeat_parent_query = (
-        docket_results and docket_results.hits.total.value >= 10_000
+        docket_results
+        and docket_results.hits.total.value
+        >= settings.ELASTICSEARCH_MAX_RESULT_COUNT
     )
     if should_repeat_parent_query:
         docket_ids = [int(d.docket_id) for d in main_results]
@@ -3259,18 +3267,20 @@ def do_es_sweep_alert_query(
     # from the main results and refines the child query filter with these IDs.
     # Finally, it re-executes the child search.
     should_repeat_child_query = (
-        rd_results and rd_results.hits.total.value >= 10_000
+        rd_results
+        and rd_results.hits.total.value
+        >= settings.ELASTICSEARCH_MAX_RESULT_COUNT
     )
     if should_repeat_child_query:
         rd_ids = [
-            int(rd.to_dict()["id"])
+            int(rd["_source"]["id"])
             for docket in main_results
             if hasattr(docket, "child_docs")
             for rd in docket.child_docs
         ]
         child_query.filter.append(Q("terms", id=rd_ids))
         child_search = child_search_query.query(child_query)
-        child_search = child_search.source(includes=["docket_id"])
+        child_search = child_search.source(includes=["id"])
         rd_results = child_search.execute()
 
     return main_results, docket_results, rd_results

From 3b60523ce90c06dc623ad49e4bc081e980c37beb Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 22 Nov 2024 14:35:50 -0500
Subject: [PATCH 085/143] fix(search.models): Optimize opinion views

Optimize opinion view rendering by removing redundant cluster query
---
 cl/opinion_page/views.py | 84 +++++++++++++++++++++++++++-------------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 6e266bf362..5b059bb0c5 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -10,7 +10,7 @@
 from django.contrib import messages
 from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
 from django.core.paginator import EmptyPage, PageNotAnInteger, Paginator
-from django.db.models import IntegerField, Prefetch
+from django.db.models import IntegerField, Prefetch, QuerySet
 from django.db.models.functions import Cast
 from django.http import HttpRequest, HttpResponseRedirect
 from django.http.response import (
@@ -994,8 +994,17 @@ async def setup_opinion_context(
     return context
 
 
+async def get_opinions_base_queryset() -> QuerySet:
+    return OpinionCluster.objects.prefetch_related(
+        "sub_opinions__opinions_cited", "citations"
+    ).select_related("docket__court")
+
+
 async def render_opinion_view(
-    request: HttpRequest, pk: int, tab: str, additional_context: dict = {}
+    request: HttpRequest,
+    cluster: OpinionCluster,
+    tab: str,
+    additional_context: dict = {},
 ) -> HttpResponse:
     """Helper function to render opinion views with common context.
 
@@ -1005,15 +1014,15 @@ async def render_opinion_view(
     :param additional_context: Any additional context to be passed to the view
     :return: HttpResponse
     """
-    queryset = OpinionCluster.objects.prefetch_related("sub_opinions")
-    cluster: OpinionCluster = await aget_object_or_404(queryset, pk=pk)
-
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
     if not ui_flag_for_o:
         return await view_opinion_old(request, pk, "str")
 
+    if not any([ui_flag_for_o]):
+        return await view_opinion_old(request, cluster.pk, "str")
+
     context = await setup_opinion_context(cluster, request, tab=tab)
 
     if additional_context:
@@ -1107,9 +1116,13 @@ async def view_opinion(request: HttpRequest, pk: int, _: str) -> HttpResponse:
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    if ui_flag_for_o:
-        return await render_opinion_view(request, pk, "opinions")
-    return await view_opinion_old(request, pk, "str")
+    if not ui_flag_for_o:
+        return await view_opinion_old(request, pk, "str")
+
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
+    return await render_opinion_view(request, cluster, "opinions")
 
 
 async def view_opinion_pdf(
@@ -1122,7 +1135,10 @@ async def view_opinion_pdf(
     :param _: url slug
     :return: Opinion PDF tab
     """
-    return await render_opinion_view(request, pk, "pdf")
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
+    return await render_opinion_view(request, cluster, "pdf")
 
 
 async def view_opinion_authorities(
@@ -1135,22 +1151,25 @@ async def view_opinion_authorities(
     :param _: url slug
     :return: Table of Authorities tab
     """
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
-
-    additional_context = {
-        "authorities_with_data": await cluster.aauthorities_with_data(),
-    }
-
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    if ui_flag_for_o:
-        return await render_opinion_view(
-            request, pk, "authorities", additional_context
+    if not ui_flag_for_o:
+        # Old page to load for people outside the flag
+        return await view_authorities(
+            request=request, pk=pk, slug="authorities"
         )
 
-    # Old page to load for people outside the flag
-    return await view_authorities(request=request, pk=pk, slug="authorities")
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
+
+    additional_context = {
+        "authorities_with_data": await cluster.aauthorities_with_data(),
+    }
+    return await render_opinion_view(
+        request, cluster, "authorities", additional_context
+    )
 
 
 async def view_opinion_cited_by(
@@ -1163,14 +1182,16 @@ async def view_opinion_cited_by(
     :param _: url slug
     :return: Cited By tab
     """
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
     cited_query = await es_get_cited_clusters_with_cache(cluster, request)
     additional_context = {
         "citing_clusters": cited_query.citing_clusters,
         "citing_cluster_count": cited_query.citing_cluster_count,
     }
     return await render_opinion_view(
-        request, pk, "cited-by", additional_context
+        request, cluster, "cited-by", additional_context
     )
 
 
@@ -1184,7 +1205,16 @@ async def view_opinion_summaries(
     :param _: url slug
     :return: Summaries tab
     """
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
+        request, "ui_flag_for_o"
+    )
+    if not ui_flag_for_o:
+        # Old page to load for people outside the flag
+        return await view_summaries(request=request, pk=pk, slug="summaries")
+
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
     parenthetical_groups_qs = await get_or_create_parenthetical_groups(cluster)
     parenthetical_groups = [
         parenthetical_group
@@ -1210,7 +1240,7 @@ async def view_opinion_summaries(
         "ui_flag_for_o": ui_flag_for_o,
     }
     return await render_opinion_view(
-        request, pk, "summaries", additional_context
+        request, cluster, "summaries", additional_context
     )
 
 
@@ -1224,7 +1254,9 @@ async def view_opinion_related_cases(
     :param _: url slug
     :return: Related Cases tab
     """
-    cluster: OpinionCluster = await aget_object_or_404(OpinionCluster, pk=pk)
+    cluster: OpinionCluster = await aget_object_or_404(
+        await get_opinions_base_queryset(), pk=pk
+    )
     related_cluster_object = await es_get_related_clusters_with_cache(
         cluster, request
     )
@@ -1236,7 +1268,7 @@ async def view_opinion_related_cases(
         "queries_timeout": related_cluster_object.timeout,
     }
     return await render_opinion_view(
-        request, pk, "related-cases", additional_context
+        request, cluster, "related-cases", additional_context
     )
 
 

From ebfbcb9ba9543de22e81ed30bbf6859522a3d4a4 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 22 Nov 2024 16:15:41 -0500
Subject: [PATCH 086/143] fix(opinion-page.views): Remove extra code

---
 cl/opinion_page/views.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/opinion_page/views.py b/cl/opinion_page/views.py
index 5b059bb0c5..df3e7fdbd1 100644
--- a/cl/opinion_page/views.py
+++ b/cl/opinion_page/views.py
@@ -1017,8 +1017,6 @@ async def render_opinion_view(
     ui_flag_for_o = await sync_to_async(waffle.flag_is_active)(
         request, "ui_flag_for_o"
     )
-    if not ui_flag_for_o:
-        return await view_opinion_old(request, pk, "str")
 
     if not any([ui_flag_for_o]):
         return await view_opinion_old(request, cluster.pk, "str")

From b580beacc9f928104a89b85f52ff3e09898584fa Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 22 Nov 2024 15:31:54 -0600
Subject: [PATCH 087/143] fix(elasticsearch): Improved set_results_child_docs
 helper method

---
 cl/lib/elasticsearch_utils.py | 40 +++++++++++++----------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index a3c2019095..92594a0eb1 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3289,33 +3289,23 @@ def set_results_child_docs(
     """
 
     for result in results:
-        child_result_objects = []
-        child_docs = None
-
-        # Get child_docs based on result type
-        if isinstance(result, dict):
-            child_docs = result.get("child_docs")
-        elif hasattr(result, "child_docs"):
-            child_docs = result.child_docs
-
-        # Process child documents if they exist
-        if child_docs:
-            for child_doc in child_docs:
-                if isinstance(result, dict):
-                    child_result_objects.append(child_doc)
-                else:
-                    child_result_objects.append(
-                        defaultdict(
-                            lambda: None,
-                            child_doc["_source"].to_dict(),
-                        )
-                    )
-
-        # Set processed child docs back to result
-        result["child_docs"] = child_result_objects
+        result_is_dict = isinstance(result, dict)
+        if result_is_dict:
+            # If the result is a dictionary, do nothing, or assign [] to
+            # child_docs if it is not present.
+            child_docs = result.get("child_docs", [])
+            result["child_docs"] = child_docs
+        else:
+            # Process child hits if the result is an ES AttrDict instance,
+            # so they can be properly serialized.
+            child_docs = getattr(result, "child_docs", [])
+            result["child_docs"] = [
+                defaultdict(lambda: None, doc["_source"].to_dict())
+                for doc in child_docs
+            ]
 
         # Optionally merges highlights. Used for integrating percolator
         # highlights into the percolated document.
-        if merge_highlights and isinstance(result, dict):
+        if merge_highlights and result_is_dict:
             meta_hl = result.get("meta", {}).get("highlight", {})
             merge_highlights_into_result(meta_hl, result)

From c05c40a4061f5d360daec4f11abe078a247501ab Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 22 Nov 2024 16:14:26 -0600
Subject: [PATCH 088/143] feat(casenames): refactor code to parse and add
 citations

---
 .../commands/update_casenames_wl_dataset.py   | 101 +++++++++++-------
 1 file changed, 65 insertions(+), 36 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 49c51b1582..8f203ad8ba 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -5,11 +5,13 @@
 
 import pandas as pd
 from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
 from django.db.models import Q, QuerySet
 from eyecite import get_citations
 from eyecite.models import FullCaseCitation
 from eyecite.tokenizers import HyperscanTokenizer
 
+from cl.citations.utils import map_reporter_db_cite_type
 from cl.corpus_importer.utils import add_citations_to_cluster
 from cl.search.models import Citation, OpinionCluster
 
@@ -127,25 +129,34 @@ def parse_citations(citation_strings: list[str]) -> list[dict]:
     for cite_str in citation_strings:
         # Get citations from the string
         found_cites = get_citations(cite_str, tokenizer=HYPERSCAN_TOKENIZER)
+        if not found_cites:
+            continue
+
+        citation = found_cites[0]
 
         # Ensure we have valid citations to process
-        for citation in found_cites:
-            if isinstance(citation, FullCaseCitation):
-                volume = citation.groups.get("volume")
-
-                # Validate the volume
-                if volume and volume.isdigit():
-                    # Append the validated citation as a dictionary
-                    validated_citations.append(
-                        {
-                            "volume": citation.groups["volume"],
-                            "reporter": citation.corrected_reporter(),
-                            "page": citation.groups["page"],
-                        }
-                    )
-                else:
-                    # If volume is invalid, skip this citation
-                    continue
+        if isinstance(citation, FullCaseCitation):
+            volume = citation.groups.get("volume")
+
+            # Validate the volume
+            if not volume or not volume.isdigit():
+                continue
+
+            if not citation.corrected_reporter():
+                reporter_type = Citation.STATE
+            else:
+                cite_type_str = citation.all_editions[0].reporter.cite_type
+                reporter_type = map_reporter_db_cite_type(cite_type_str)
+
+            # Append the validated citation as a dictionary
+            validated_citations.append(
+                {
+                    "volume": citation.groups["volume"],
+                    "reporter": citation.corrected_reporter(),
+                    "page": citation.groups["page"],
+                    "type": reporter_type,
+                }
+            )
 
     return validated_citations
 
@@ -305,28 +316,46 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
             # Dry run, don't save anything
             continue
 
-        # Update case names
-        cluster_updated, docket_updated = update_matched_case_name(
-            matches[0].cluster, west_case_name
-        )
-
-        if cluster_updated:
-            total_clusters_updated = +1
+        with transaction.atomic():
+            matched_cluster = matches[0].cluster
 
-        if docket_updated:
-            total_dockets_updated = +1
+            # Update case names
+            cluster_updated, docket_updated = update_matched_case_name(
+                matched_cluster, west_case_name
+            )
 
-        # Add any of the citations if possible
-        add_citations_to_cluster(
-            [
-                f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
-                for cite in valid_citations
-            ],
-            matches[0].cluster_id,
-        )
+            if cluster_updated:
+                total_clusters_updated = +1
+
+            if docket_updated:
+                total_dockets_updated = +1
+
+            # Add any of the citations if possible
+            for citation in valid_citations:
+                if Citation.objects.filter(
+                    cluster_id=matched_cluster.id,
+                    reporter=citation.get("reporter"),
+                ).exists():
+                    # Avoid adding a citation if we already have a citation from the
+                    # citation's reporter.
+                    logger.info(
+                        f"Can't add: {citation.get('volume')} {citation.get('reporter')} {citation.get('page')} to cluster id: {matched_cluster.id}. There is already "
+                        f"a citation from that reporter."
+                    )
+                    continue
+                citation["cluster_id"] = matched_cluster.id
+                Citation.objects.get_or_create(**citation)
+
+            add_citations_to_cluster(
+                [
+                    f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
+                    for cite in valid_citations
+                ],
+                matches[0].cluster_id,
+            )
 
-        # Wait between each processed row to avoid sending to many indexing tasks
-        time.sleep(delay)
+            # Wait between each processed row to avoid sending to many indexing tasks
+            time.sleep(delay)
 
     if not dry_run:
         logger.info(f"Clusters updated: {total_clusters_updated}")

From a6b05f14b60acdf69649100dac0861e432feea5d Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 22 Nov 2024 16:59:26 -0600
Subject: [PATCH 089/143] fix(elasticsearch): Solved merge conflicts

- Removed score from Opinion Search Alert webhooks
---
 .../management/commands/cl_send_alerts.py      |  4 ++--
 cl/alerts/tests/tests.py                       | 18 ++++++++++++++++++
 cl/api/tasks.py                                |  4 ++--
 cl/api/webhooks.py                             |  4 ++--
 cl/lib/elasticsearch_utils.py                  | 12 ++++++++++--
 cl/search/api_serializers.py                   | 16 +++++++++++++++-
 cl/search/api_utils.py                         |  7 ++-----
 cl/tests/cases.py                              |  5 +++--
 8 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/cl/alerts/management/commands/cl_send_alerts.py b/cl/alerts/management/commands/cl_send_alerts.py
index 19eeb511fd..4792e78fd2 100644
--- a/cl/alerts/management/commands/cl_send_alerts.py
+++ b/cl/alerts/management/commands/cl_send_alerts.py
@@ -26,7 +26,7 @@
 from cl.lib.elasticsearch_utils import (
     do_es_api_query,
     limit_inner_hits,
-    set_results_child_docs,
+    set_child_docs_and_score,
     set_results_highlights,
 )
 from cl.lib.scorched_utils import ExtraSolrInterface
@@ -161,7 +161,7 @@ def query_alerts_es(
     results = responses[0]
     limit_inner_hits({}, results, cd["type"])
     set_results_highlights(results, cd["type"])
-    set_results_child_docs(results)
+    set_child_docs_and_score(results)
     if v1_webhook:
         v1_results = responses[1]
     return results, v1_results
diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
index b048ed9507..990658c6a3 100644
--- a/cl/alerts/tests/tests.py
+++ b/cl/alerts/tests/tests.py
@@ -963,6 +963,24 @@ def test_send_search_alert_webhooks(self):
             "opinions",
         )
 
+        # Assert HL content in V2 webhooks.
+        self._assert_webhook_hit_hl(
+            webhook_events,
+            self.search_alert.name,
+            "caseName",
+            "<strong>California</strong> vs Lorem",
+            child_field=False,
+            nested_field="opinions",
+        )
+        self._assert_webhook_hit_hl(
+            webhook_events,
+            self.search_alert.name,
+            "snippet",
+            "Lorem dolor <strong>california</strong> sit amet, consectetur adipiscing elit.",
+            child_field=True,
+            nested_field="opinions",
+        )
+
         # Assert V1 Opinion Search Alerts Webhook
         self._count_webhook_hits_and_child_hits(
             list(webhook_events),
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index c236367351..39c5fe7533 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -11,7 +11,7 @@
 from cl.api.webhooks import send_webhook_event
 from cl.celery_init import app
 from cl.corpus_importer.api_serializers import DocketEntrySerializer
-from cl.lib.elasticsearch_utils import set_results_child_docs
+from cl.lib.elasticsearch_utils import set_child_docs_and_score
 from cl.search.api_serializers import (
     RECAPESWebhookResultSerializer,
     V3OAESResultSerializer,
@@ -152,7 +152,7 @@ def send_search_alert_webhook_es(
                 es_results, many=True
             ).data
         case SEARCH_TYPES.RECAP:
-            set_results_child_docs(results, merge_highlights=True)
+            set_child_docs_and_score(results, merge_highlights=True)
             serialized_results = RECAPESWebhookResultSerializer(
                 results, many=True
             ).data
diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py
index b46c106b86..15f1d3cabf 100644
--- a/cl/api/webhooks.py
+++ b/cl/api/webhooks.py
@@ -28,7 +28,7 @@
 from cl.recap.api_serializers import PacerFetchQueueSerializer
 from cl.recap.models import PROCESSING_STATUS, PacerFetchQueue
 from cl.search.api_serializers import (
-    OpinionClusterESResultSerializer,
+    OpinionClusterWebhookResultSerializer,
     SearchResultSerializer,
     V3OpinionESResultSerializer,
 )
@@ -205,7 +205,7 @@ def send_search_alert_webhook(
                     many=True,
                 ).data
             case WebhookVersions.v2:
-                serialized_results = OpinionClusterESResultSerializer(
+                serialized_results = OpinionClusterWebhookResultSerializer(
                     results,
                     many=True,
                 ).data
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 7b7cd6fc7e..119f78a754 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3316,8 +3316,10 @@ def simplify_estimated_count(search_count: int) -> int:
     return search_count
 
 
-def set_results_child_docs(
-    results: list[Hit] | list[dict[str, Any]], merge_highlights: bool = False
+def set_child_docs_and_score(
+    results: list[Hit] | list[dict[str, Any]] | Response,
+    merge_highlights: bool = False,
+    merge_score: bool = False,
 ) -> None:
     """Process and attach child documents to the main search results.
 
@@ -3325,6 +3327,8 @@ def set_results_child_docs(
     or a list of dicts.
     :param merge_highlights: A boolean indicating whether to merge
     highlight data into the results.
+    :param merge_score: A boolean indicating whether to merge
+    the BM25 score into the results.
     :return: None. Results are modified in place.
     """
 
@@ -3349,3 +3353,7 @@ def set_results_child_docs(
         if merge_highlights and result_is_dict:
             meta_hl = result.get("meta", {}).get("highlight", {})
             merge_highlights_into_result(meta_hl, result)
+
+        # Optionally merges the BM25 score for display in the API.
+        if merge_score and isinstance(result, Response):
+            result["bm25_score"] = result.meta.score
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index f27053e95d..31752a79af 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -619,7 +619,7 @@ class Meta:
         )
 
 
-class OpinionClusterESResultSerializer(MainMetaMixin, DocumentSerializer):
+class OpinionClusterBaseESResultSerializer(DocumentSerializer):
     """The serializer for OpinionCluster Search results."""
 
     opinions = OpinionDocumentESResultSerializer(
@@ -649,6 +649,20 @@ class Meta:
         )
 
 
+class OpinionClusterESResultSerializer(
+    OpinionClusterBaseESResultSerializer, MainMetaMixin
+):
+    """The serializer for OpinionCluster Search results."""
+
+
+class OpinionClusterWebhookResultSerializer(
+    OpinionClusterBaseESResultSerializer
+):
+    """The serializer class for OpinionCluster search Webhooks results."""
+
+    meta = BaseMetaDataSerializer(source="*", read_only=True)
+
+
 class PositionESResultSerializer(ChildMetaMixin, DocumentSerializer):
     """The serializer for Positions Search results."""
 
diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py
index f7f9585148..f5c22e388a 100644
--- a/cl/search/api_utils.py
+++ b/cl/search/api_utils.py
@@ -19,7 +19,7 @@
     do_es_api_query,
     limit_inner_hits,
     merge_unavailable_fields_on_parent_document,
-    set_results_child_docs,
+    set_child_docs_and_score,
     set_results_highlights,
 )
 from cl.lib.scorched_utils import ExtraSolrInterface
@@ -475,10 +475,7 @@ def process_results(self, results: Response) -> None:
             "v4",
             self.clean_data["highlight"],
         )
-        set_results_child_docs(results)
-        for result in results:
-            # Include the ES main document score as bm25_score.
-            result["bm25_score"] = result.meta.score
+        set_child_docs_and_score(results, merge_score=True)
 
         if self.reverse:
             # If doing backward pagination, reverse the results of the current
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index 2b549f741a..0b95c311e1 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -663,6 +663,7 @@ def _assert_webhook_hit_hl(
         field_name,
         hl_expected,
         child_field,
+        nested_field="recap_documents",
     ):
         """Assert Hl in webhook fields."""
         for webhook in webhooks:
@@ -671,10 +672,10 @@ def _assert_webhook_hit_hl(
                 if child_field:
                     self.assertNotIn(
                         "score",
-                        hit["recap_documents"][0]["meta"],
+                        hit[nested_field][0]["meta"],
                         msg="score shouldn't be present on webhook nested documents",
                     )
-                    child_field_content = hit["recap_documents"][0][field_name]
+                    child_field_content = hit[nested_field][0][field_name]
                     self.assertIn(
                         hl_expected,
                         child_field_content,

From d12e30b648ca3d41777726181c940fc964fdc952 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 22 Nov 2024 17:20:44 -0600
Subject: [PATCH 090/143] fix(api): Fixed score merge and improved related test

---
 cl/lib/elasticsearch_utils.py | 2 +-
 cl/tests/cases.py             | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 119f78a754..1b9366f713 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3355,5 +3355,5 @@ def set_child_docs_and_score(
             merge_highlights_into_result(meta_hl, result)
 
         # Optionally merges the BM25 score for display in the API.
-        if merge_score and isinstance(result, Response):
+        if merge_score and isinstance(result, AttrDict):
             result["bm25_score"] = result.meta.score
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index 0b95c311e1..8b23dea418 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -270,6 +270,11 @@ async def _compare_field(
                 set(meta_expected_value.keys()),
                 f"The keys in field '{meta_field}' do not match.",
             )
+            for score_value in meta_value.values():
+                self.assertIsNotNone(
+                    score_value, f"The score value can't be None."
+                )
+
         else:
             self.assertEqual(
                 meta_value,

From 9bef8df75773e891b7994c710df33a7b2d4f9ef0 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 22 Nov 2024 17:53:33 -0600
Subject: [PATCH 091/143] feat(casenames): add new date format found in dataset

---
 .../management/commands/update_casenames_wl_dataset.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 8f203ad8ba..fe7eeeefe6 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -41,7 +41,14 @@
     "for",
 }
 
-DATE_FORMATS = ("%B %d, %Y", "%d-%b-%y", "%m/%d/%Y", "%m/%d/%y", "%b. %d, %Y")
+DATE_FORMATS = (
+    "%B %d, %Y",
+    "%d-%b-%y",
+    "%m/%d/%Y",
+    "%m/%d/%y",
+    "%b. %d, %Y",
+    "%Y-%m-%d",
+)
 
 
 def tokenize_case_name(case_name: str) -> set[str]:
@@ -105,6 +112,7 @@ def parse_date(date_str: str) -> date | None:
     12/1/1960
     26-Sep-00
     Feb. 28, 2001
+    2007-01-24
 
     :param date_str: date string
     :return: date object or none

From 9c1b7cc3d61fecd295c128eb7d11df3ecbd4048b Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 22 Nov 2024 18:37:11 -0600
Subject: [PATCH 092/143] fix(api): Fixed people V4 API test to return scores

---
 cl/search/tests/tests_es_person.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py
index 6c59b01cf6..eb82285286 100644
--- a/cl/search/tests/tests_es_person.py
+++ b/cl/search/tests/tests_es_person.py
@@ -616,6 +616,7 @@ async def test_results_api_fields(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.PEOPLE,
             "q": f"id:{self.person_2.pk} AND nomination_process:(U.S. Senate)",
+            "order_by": "score desc",
         }
         # API
         r = await self._test_api_results_count(search_params, 1, "API fields")
@@ -662,6 +663,7 @@ def test_results_api_empty_fields(self) -> None:
         search_params = {
             "type": SEARCH_TYPES.PEOPLE,
             "q": f"id:{person.pk}",
+            "order_by": "score desc",
         }
         # API
         r = async_to_sync(self._test_api_results_count)(
@@ -869,6 +871,7 @@ async def test_results_api_highlighted_fields(self) -> None:
             "q": f"id:{self.person_2.pk} name:Sheindlin dob_city:Brookyln nomination_process:(U.S. Senate) political_affiliation:Democratic",
             "school": "New York Law School",
             "dob_state": "NY",
+            "order_by": "score desc",
         }
 
         # Judged Search type HL disabled.

From 73fa6e5d19c24dd7196695fc965ff882af89ad66 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 25 Nov 2024 10:22:15 -0600
Subject: [PATCH 093/143] fix(pacer_free_opinions): add ReadError to
 get_and_process_free_pdf decorator reduce to one second the sleep between
 courts cycle

---
 .../management/commands/scrape_pacer_free_opinions.py         | 4 ++--
 cl/corpus_importer/tasks.py                                   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
index 43611d240f..08b2de837d 100644
--- a/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
+++ b/cl/corpus_importer/management/commands/scrape_pacer_free_opinions.py
@@ -331,10 +331,10 @@ def get_pdfs(
                 throttle.update_min_items(min_items)
 
             logger.info(
-                f"Court cycle completed for: {row.court_id}. Current iteration: {cycle_checker.current_iteration}. Sleep 2 seconds "
+                f"Court cycle completed for: {row.court_id}. Current iteration: {cycle_checker.current_iteration}. Sleep 1 second "
                 f"before starting the next cycle."
             )
-            time.sleep(2)
+            time.sleep(1)
         logger.info(f"Processing row id: {row.id} from {row.court_id}")
         c = chain(
             process_free_opinion_result.si(
diff --git a/cl/corpus_importer/tasks.py b/cl/corpus_importer/tasks.py
index 8ed46333f7..bfa21e43b5 100644
--- a/cl/corpus_importer/tasks.py
+++ b/cl/corpus_importer/tasks.py
@@ -25,6 +25,7 @@
 from httpx import (
     HTTPStatusError,
     NetworkError,
+    ReadError,
     RemoteProtocolError,
     TimeoutException,
 )
@@ -598,6 +599,7 @@ def process_free_opinion_result(
         ConnectionError,
         ReadTimeout,
         RedisConnectionError,
+        ReadError,
     ),
     max_retries=15,
     interval_start=5,

From 249d8fcb6627e727a154aec2181c3ba0ea306125 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 25 Nov 2024 10:22:59 -0600
Subject: [PATCH 094/143] fix(api): Prioritize
 DjangoModelPermissionsOrAnonReadOnly when checking V3APIPermission

---
 cl/api/api_permissions.py   | 15 +++++++++++----
 cl/api/tests.py             | 23 +++++++++++++++++++++++
 cl/disclosures/api_views.py |  1 +
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/cl/api/api_permissions.py b/cl/api/api_permissions.py
index 6562da789f..b73fc5fcae 100644
--- a/cl/api/api_permissions.py
+++ b/cl/api/api_permissions.py
@@ -2,9 +2,9 @@
 
 from django.conf import settings
 from django.contrib.auth.models import AnonymousUser, User
-from django.http import HttpRequest
 from rest_framework import permissions
 from rest_framework.exceptions import PermissionDenied
+from rest_framework.request import Request
 from rest_framework.views import APIView
 
 from cl.lib.redis_utils import get_redis_interface
@@ -19,7 +19,10 @@ def has_object_permission(self, request, view, obj):
         return obj.user == request.user
 
 
-class V3APIPermission(permissions.BasePermission):
+class V3APIPermission(
+    permissions.DjangoModelPermissionsOrAnonReadOnly,
+    permissions.BasePermission,
+):
 
     r = get_redis_interface("STATS")
     v3_blocked_message = (
@@ -52,7 +55,7 @@ def is_user_v3_blocked(self, user: User) -> bool:
         return is_blocked_user
 
     @staticmethod
-    def is_v3_api_request(request: HttpRequest) -> bool:
+    def is_v3_api_request(request: Request) -> bool:
         return getattr(request, "version", None) == "v3"
 
     @staticmethod
@@ -62,7 +65,7 @@ def check_request() -> bool:
             return True
         return False
 
-    def has_permission(self, request: HttpRequest, view: APIView) -> bool:
+    def has_permission(self, request: Request, view: APIView) -> bool:
         """Check if the user has permission to access the V3 API.
 
         :param request: The HTTPRequest object.
@@ -70,6 +73,10 @@ def has_permission(self, request: HttpRequest, view: APIView) -> bool:
         :return: True if the user has permission to access V3, False if not.
         """
 
+        # Prioritize DjangoModelPermissionsOrAnonReadOnly permissions
+        if not super().has_permission(request, view):
+            return False
+
         if (
             not self.is_v3_api_request(request)
             or not settings.BLOCK_NEW_V3_USERS  # type: ignore
diff --git a/cl/api/tests.py b/cl/api/tests.py
index 63a8e14aa5..238c0d04a7 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -484,6 +484,8 @@ def setUpTestData(cls) -> None:
 
         cls.audio_path_v3 = reverse("audio-list", kwargs={"version": "v3"})
         cls.audio_path_v4 = reverse("audio-list", kwargs={"version": "v4"})
+        cls.debt_path_v4 = reverse("debt-list", kwargs={"version": "v4"})
+        cls.debt_path_v3 = reverse("debt-list", kwargs={"version": "v3"})
 
     def setUp(self) -> None:
         self.r = get_redis_interface("STATS")
@@ -595,6 +597,27 @@ async def test_allow_v4_for_anonymous_users(self, mock_api_prefix) -> None:
             response = await self.async_client.get(self.audio_path_v4)
         self.assertEqual(response.status_code, HTTPStatus.OK)
 
+    async def test_confirm_v4_post_requests_are_not_allowed(
+        self, mock_api_prefix
+    ) -> None:
+        """Confirm V4 users are not allowed to POST requests."""
+        response = await self.client_2.post(self.debt_path_v4, {})
+        self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN)
+
+    async def test_confirm_v3_post_requests_are_not_allowed(
+        self, mock_api_prefix
+    ) -> None:
+        """Confirm V3 users are not allowed to POST requests."""
+        response = await self.client_2.post(self.debt_path_v3, {})
+        self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN)
+
+    async def test_confirm_anonymous_post_requests_are_not_allowed(
+        self, mock_api_prefix
+    ) -> None:
+        """Confirm anonymous users are not allowed to POST requests."""
+        response = await self.async_client.post(self.debt_path_v4, {})
+        self.assertEqual(response.status_code, HTTPStatus.UNAUTHORIZED)
+
 
 class DRFOrderingTests(TestCase):
     """Does ordering work generally and specifically?"""
diff --git a/cl/disclosures/api_views.py b/cl/disclosures/api_views.py
index 1c1be6f3a4..98f03e67fc 100644
--- a/cl/disclosures/api_views.py
+++ b/cl/disclosures/api_views.py
@@ -1,4 +1,5 @@
 from rest_framework import viewsets
+from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly
 
 from cl.api.api_permissions import V3APIPermission
 from cl.api.utils import LoggingMixin

From c218189b347d8ecaef57a2e32f85bdd005d42a1b Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 25 Nov 2024 10:47:16 -0600
Subject: [PATCH 095/143] fix(api): Added DjangoModelPermissionsOrAnonReadOnly
 to required api views

---
 cl/api/api_permissions.py   |  9 +-------
 cl/audio/api_views.py       |  6 ++++-
 cl/disclosures/api_views.py | 45 ++++++++++++++++++++++++++++--------
 cl/people_db/api_views.py   | 46 +++++++++++++++++++++++++++++--------
 cl/recap/views.py           | 10 ++++++--
 cl/search/api_views.py      | 31 ++++++++++++++++++++-----
 6 files changed, 112 insertions(+), 35 deletions(-)

diff --git a/cl/api/api_permissions.py b/cl/api/api_permissions.py
index b73fc5fcae..c5af0d0696 100644
--- a/cl/api/api_permissions.py
+++ b/cl/api/api_permissions.py
@@ -19,10 +19,7 @@ def has_object_permission(self, request, view, obj):
         return obj.user == request.user
 
 
-class V3APIPermission(
-    permissions.DjangoModelPermissionsOrAnonReadOnly,
-    permissions.BasePermission,
-):
+class V3APIPermission(permissions.BasePermission):
 
     r = get_redis_interface("STATS")
     v3_blocked_message = (
@@ -73,10 +70,6 @@ def has_permission(self, request: Request, view: APIView) -> bool:
         :return: True if the user has permission to access V3, False if not.
         """
 
-        # Prioritize DjangoModelPermissionsOrAnonReadOnly permissions
-        if not super().has_permission(request, view):
-            return False
-
         if (
             not self.is_v3_api_request(request)
             or not settings.BLOCK_NEW_V3_USERS  # type: ignore
diff --git a/cl/audio/api_views.py b/cl/audio/api_views.py
index a444db4a98..fa6d518ec9 100644
--- a/cl/audio/api_views.py
+++ b/cl/audio/api_views.py
@@ -1,4 +1,5 @@
 from rest_framework import viewsets
+from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly
 
 from cl.api.api_permissions import V3APIPermission
 from cl.api.utils import LoggingMixin
@@ -10,7 +11,10 @@
 class AudioViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = AudioSerializer
     filterset_class = AudioFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
diff --git a/cl/disclosures/api_views.py b/cl/disclosures/api_views.py
index 98f03e67fc..64ce52bac4 100644
--- a/cl/disclosures/api_views.py
+++ b/cl/disclosures/api_views.py
@@ -41,7 +41,10 @@
 class AgreementViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Agreement.objects.all().order_by("-id")
     serializer_class = AgreementSerializer
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     filterset_class = AgreementFilter
     # Default cursor ordering key
@@ -57,7 +60,10 @@ class AgreementViewSet(LoggingMixin, viewsets.ModelViewSet):
 class DebtViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Debt.objects.all().order_by("-id")
     serializer_class = DebtSerializer
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     filterset_class = DebtFilter
     # Default cursor ordering key
@@ -88,7 +94,10 @@ class FinancialDisclosureViewSet(LoggingMixin, viewsets.ModelViewSet):
     )
     serializer_class = FinancialDisclosureSerializer
     filterset_class = FinancialDisclosureFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -104,7 +113,10 @@ class GiftViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Gift.objects.all().order_by("-id")
     serializer_class = GiftSerializer
     filterset_class = GiftFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -120,7 +132,10 @@ class InvestmentViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Investment.objects.all().order_by("-id")
     serializer_class = InvestmentSerializer
     filterset_class = InvestmentFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -136,7 +151,10 @@ class NonInvestmentIncomeViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = NonInvestmentIncome.objects.all().order_by("-id")
     serializer_class = NonInvestmentIncomeSerializer
     filterset_class = NonInvestmentIncomeFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -152,7 +170,10 @@ class PositionViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Position.objects.all().order_by("-id")
     serializer_class = PositionSerializer
     filterset_class = PositionFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -168,7 +189,10 @@ class ReimbursementViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Reimbursement.objects.all().order_by("-id")
     serializer_class = ReimbursementSerializer
     filterset_class = ReimbursementFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -184,7 +208,10 @@ class SpouseIncomeViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = SpouseIncome.objects.all().order_by("-id")
     serializer_class = SpouseIncomeSerializer
     filterset_class = SpouseIncomeFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
diff --git a/cl/people_db/api_views.py b/cl/people_db/api_views.py
index c593c1789a..7675ef32b6 100644
--- a/cl/people_db/api_views.py
+++ b/cl/people_db/api_views.py
@@ -1,5 +1,6 @@
 from django.db.models import Exists, OuterRef, Prefetch
 from rest_framework import viewsets
+from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly
 
 from cl.api.api_permissions import V3APIPermission
 from cl.api.pagination import TinyAdjustablePagination
@@ -90,7 +91,10 @@ class PersonDisclosureViewSet(viewsets.ModelViewSet):
     serializer_class = PersonDisclosureSerializer
     filterset_class = PersonDisclosureFilter
     pagination_class = TinyAdjustablePagination
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -122,7 +126,10 @@ class PersonViewSet(LoggingMixin, viewsets.ModelViewSet):
     )
     serializer_class = PersonSerializer
     filterset_class = PersonFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -145,7 +152,10 @@ class PositionViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Position.objects.all().order_by("-id")
     serializer_class = PositionSerializer
     filterset_class = PositionFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -175,7 +185,10 @@ class RetentionEventViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = RetentionEvent.objects.all().order_by("-id")
     serializer_class = RetentionEventSerializer
     filterset_class = RetentionEventFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified", "date_retention")
     # Default cursor ordering key
     ordering = "-id"
@@ -191,7 +204,10 @@ class EducationViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Education.objects.all().order_by("-id")
     serializer_class = EducationSerializer
     filterset_class = EducationFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified")
     # Default cursor ordering key
     ordering = "-id"
@@ -207,7 +223,10 @@ class SchoolViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = School.objects.all().order_by("-id")
     serializer_class = SchoolSerializer
     filterset_class = SchoolFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = ("id", "date_created", "date_modified", "name")
     # Default cursor ordering key
     ordering = "-id"
@@ -223,7 +242,10 @@ class PoliticalAffiliationViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = PoliticalAffiliation.objects.all().order_by("-id")
     serializer_class = PoliticalAffiliationSerializer
     filterset_class = PoliticalAffiliationFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -245,7 +267,10 @@ class SourceViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = Source.objects.all().order_by("-id")
     serializer_class = SourceSerializer
     filterset_class = SourceFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_modified",
@@ -261,7 +286,10 @@ class ABARatingViewSet(LoggingMixin, viewsets.ModelViewSet):
     queryset = ABARating.objects.all().order_by("-id")
     serializer_class = ABARatingSerializer
     filterset_class = ABARatingFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
diff --git a/cl/recap/views.py b/cl/recap/views.py
index 9bb70cb6cf..f2383b8868 100644
--- a/cl/recap/views.py
+++ b/cl/recap/views.py
@@ -3,7 +3,10 @@
 from asgiref.sync import async_to_sync, sync_to_async
 from django.contrib.auth.models import User
 from rest_framework.exceptions import ValidationError
-from rest_framework.permissions import IsAuthenticatedOrReadOnly
+from rest_framework.permissions import (
+    DjangoModelPermissionsOrAnonReadOnly,
+    IsAuthenticatedOrReadOnly,
+)
 from rest_framework.viewsets import ModelViewSet
 
 from cl.api.api_permissions import V3APIPermission
@@ -179,7 +182,10 @@ class FjcIntegratedDatabaseViewSet(LoggingMixin, ModelViewSet):
     queryset = FjcIntegratedDatabase.objects.all().order_by("-id")
     serializer_class = FjcIntegratedDatabaseSerializer
     filterset_class = FjcIntegratedDatabaseFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
diff --git a/cl/search/api_views.py b/cl/search/api_views.py
index 2a2ca2eeeb..df798edd6f 100644
--- a/cl/search/api_views.py
+++ b/cl/search/api_views.py
@@ -4,6 +4,7 @@
 from rest_framework import pagination, permissions, response, viewsets
 from rest_framework.exceptions import NotFound
 from rest_framework.pagination import PageNumberPagination
+from rest_framework.permissions import DjangoModelPermissionsOrAnonReadOnly
 
 from cl.api.api_permissions import V3APIPermission
 from cl.api.pagination import ESCursorPagination
@@ -65,7 +66,10 @@
 
 class OriginatingCourtInformationViewSet(viewsets.ModelViewSet):
     serializer_class = OriginalCourtInformationSerializer
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     # Default cursor ordering key
     ordering = "-id"
     # Additional cursor ordering fields
@@ -80,7 +84,10 @@ class OriginatingCourtInformationViewSet(viewsets.ModelViewSet):
 class DocketViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = DocketSerializer
     filterset_class = DocketFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -171,7 +178,10 @@ class RECAPDocumentViewSet(
 class CourtViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = CourtSerializer
     filterset_class = CourtFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_modified",
@@ -191,7 +201,10 @@ class CourtViewSet(LoggingMixin, viewsets.ModelViewSet):
 class OpinionClusterViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = OpinionClusterSerializer
     filterset_class = OpinionClusterFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -216,7 +229,10 @@ class OpinionClusterViewSet(LoggingMixin, viewsets.ModelViewSet):
 class OpinionViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = OpinionSerializer
     filterset_class = OpinionFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     ordering_fields = (
         "id",
         "date_created",
@@ -240,7 +256,10 @@ class OpinionViewSet(LoggingMixin, viewsets.ModelViewSet):
 class OpinionsCitedViewSet(LoggingMixin, viewsets.ModelViewSet):
     serializer_class = OpinionsCitedSerializer
     filterset_class = OpinionsCitedFilter
-    permission_classes = [V3APIPermission]
+    permission_classes = [
+        DjangoModelPermissionsOrAnonReadOnly,
+        V3APIPermission,
+    ]
     # Default cursor ordering key
     ordering = "-id"
     # Additional cursor ordering fields

From 3337f5bfb273cc2ba3846310f828e3962eeae602 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 25 Nov 2024 12:21:51 -0500
Subject: [PATCH 096/143] feat(scrapers.admin): create admin page for a scraper
 status page

- Includes a migration file for the materialized view
- MV will have to be refreshed manually or via a cronjob
- MV considers only courts that have an active scraper, and that have no updates in a week
---
 cl/scrapers/admin.py                          | 59 ++++------------
 .../0004_create_mv_latest_opinion.py          | 69 +++++++++++++++++++
 .../0004_create_mv_latest_opinion.sql         | 49 +++++++++++++
 ...004_create_mv_latest_opinion_customers.sql | 49 +++++++++++++
 4 files changed, 181 insertions(+), 45 deletions(-)
 create mode 100644 cl/scrapers/migrations/0004_create_mv_latest_opinion.py
 create mode 100644 cl/scrapers/migrations/0004_create_mv_latest_opinion.sql
 create mode 100644 cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql

diff --git a/cl/scrapers/admin.py b/cl/scrapers/admin.py
index c7cc689a6e..56ec54df03 100644
--- a/cl/scrapers/admin.py
+++ b/cl/scrapers/admin.py
@@ -32,68 +32,37 @@ class PACERFreeDocumentRowAdmin(admin.ModelAdmin):
 admin.site.register(UrlHash)
 
 
-class MVLatestOpinions(models.Model):
+class MVLatestOpinion(models.Model):
     """
     Model linked to materialized view for monitoring scrapers
 
+    The SQL for creating the view is on it's migration file.
+
     Must use `REFRESH MATERIALIZED VIEW scrapers_mv_latest_opinion`
     periodically
     """
 
-    query = """
-    CREATE MATERIALIZED VIEW
-        scrapers_mv_latest_opinion
-    AS
-    (
-    SELECT
-        court_id,
-        max(so.date_created) as latest_creation_date,
-        (now() - max(so.date_created))::text as time_since
-    FROM
-        (
-            SELECT id, court_id
-            FROM search_docket
-            WHERE court_id IN (
-                SELECT id
-                FROM search_court
-                /*
-                    Only check courts with scrapers in use
-                */
-                WHERE
-                    has_opinion_scraper
-                    AND in_use
-            )
-        ) sd
-    INNER JOIN
-        (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
-    INNER JOIN
-        search_opinion so ON so.cluster_id = soc.id
-    GROUP BY
-        sd.court_id
-    HAVING
-        /*
-            Only return results for courts with no updates in a week
-        */
-        now() - max(so.date_created) > interval '7 days'
-    ORDER BY
-        2 DESC
-    )
-    """
     # a django model must have a primary key
     court_id = models.TextField(primary_key=True)
-    latest_creation_date = models.DateField()
+    latest_creation_date = models.DateTimeField()
     time_since = models.TextField()
+    view_last_updated = models.DateTimeField()
 
     class Meta:
-        managed = False  # ignore this model in migrations
+        managed = False
         db_table = "scrapers_mv_latest_opinion"
 
 
-@admin.register(MVLatestOpinions)
-class MVLatestOpinionsAdmin(admin.ModelAdmin):
+@admin.register(MVLatestOpinion)
+class MVLatestOpinionAdmin(admin.ModelAdmin):
     """Admin page to look at the latest opinion for each court
 
     Use this to monitor silently failing scrapers
     """
 
-    list_display = ["court_id", "latest_creation_date", "time_since"]
+    list_display = [
+        "court_id",
+        "latest_creation_date",
+        "time_since",
+        "view_last_updated",
+    ]
diff --git a/cl/scrapers/migrations/0004_create_mv_latest_opinion.py b/cl/scrapers/migrations/0004_create_mv_latest_opinion.py
new file mode 100644
index 0000000000..4570c75d97
--- /dev/null
+++ b/cl/scrapers/migrations/0004_create_mv_latest_opinion.py
@@ -0,0 +1,69 @@
+# Generated by Django 5.1.2 on 2024-11-25 15:27
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("scrapers", "0003_delete_errorlog"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="MVLatestOpinion",
+            fields=[
+                (
+                    "court_id",
+                    models.TextField(primary_key=True, serialize=False),
+                ),
+                ("latest_creation_date", models.DateTimeField()),
+                ("time_since", models.TextField()),
+                ("view_last_updated", models.DateTimeField()),
+            ],
+            options={
+                "db_table": "scrapers_mv_latest_opinion",
+                "managed": False,
+            },
+        ),
+        migrations.RunSQL("""
+        CREATE MATERIALIZED VIEW IF NOT EXISTS
+            scrapers_mv_latest_opinion
+        AS
+        (
+        SELECT
+            court_id,
+            max(so.date_created) as latest_creation_date,
+            DATE_TRUNC('minutes', (now() - max(so.date_created)))::text as time_since,
+            now() as view_last_updated
+        FROM
+            (
+                SELECT id, court_id
+                FROM search_docket
+                WHERE court_id IN (
+                    SELECT id
+                    FROM search_court
+                    /*
+                        Only check courts with scrapers in use
+                    */
+                    WHERE
+                        has_opinion_scraper
+                        AND in_use
+                )
+            ) sd
+        INNER JOIN
+            (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
+        INNER JOIN
+            search_opinion so ON so.cluster_id = soc.id
+        GROUP BY
+            sd.court_id
+        HAVING
+            /*
+                Only return results for courts with no updates in a week
+            */
+            now() - max(so.date_created) > interval '7 days'
+        ORDER BY
+            2 DESC
+        )
+        """)
+    ]
diff --git a/cl/scrapers/migrations/0004_create_mv_latest_opinion.sql b/cl/scrapers/migrations/0004_create_mv_latest_opinion.sql
new file mode 100644
index 0000000000..45c212298e
--- /dev/null
+++ b/cl/scrapers/migrations/0004_create_mv_latest_opinion.sql
@@ -0,0 +1,49 @@
+BEGIN;
+--
+-- Create model MVLatestOpinion
+--
+-- (no-op)
+--
+-- Raw SQL operation
+--
+
+        CREATE MATERIALIZED VIEW IF NOT EXISTS
+            scrapers_mv_latest_opinion
+        AS
+        (
+        SELECT
+            court_id,
+            max(so.date_created) as latest_creation_date,
+            DATE_TRUNC('minutes', (now() - max(so.date_created)))::text as time_since,
+            now() as view_last_updated
+        FROM
+            (
+                SELECT id, court_id
+                FROM search_docket
+                WHERE court_id IN (
+                    SELECT id
+                    FROM search_court
+                    /*
+                        Only check courts with scrapers in use
+                    */
+                    WHERE
+                        has_opinion_scraper
+                        AND in_use
+                )
+            ) sd
+        INNER JOIN
+            (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
+        INNER JOIN
+            search_opinion so ON so.cluster_id = soc.id
+        GROUP BY
+            sd.court_id
+        HAVING
+            /*
+                Only return results for courts with no updates in a week
+            */
+            now() - max(so.date_created) > interval '7 days'
+        ORDER BY
+            2 DESC
+        )
+        ;
+COMMIT;
diff --git a/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql b/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql
new file mode 100644
index 0000000000..45c212298e
--- /dev/null
+++ b/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql
@@ -0,0 +1,49 @@
+BEGIN;
+--
+-- Create model MVLatestOpinion
+--
+-- (no-op)
+--
+-- Raw SQL operation
+--
+
+        CREATE MATERIALIZED VIEW IF NOT EXISTS
+            scrapers_mv_latest_opinion
+        AS
+        (
+        SELECT
+            court_id,
+            max(so.date_created) as latest_creation_date,
+            DATE_TRUNC('minutes', (now() - max(so.date_created)))::text as time_since,
+            now() as view_last_updated
+        FROM
+            (
+                SELECT id, court_id
+                FROM search_docket
+                WHERE court_id IN (
+                    SELECT id
+                    FROM search_court
+                    /*
+                        Only check courts with scrapers in use
+                    */
+                    WHERE
+                        has_opinion_scraper
+                        AND in_use
+                )
+            ) sd
+        INNER JOIN
+            (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
+        INNER JOIN
+            search_opinion so ON so.cluster_id = soc.id
+        GROUP BY
+            sd.court_id
+        HAVING
+            /*
+                Only return results for courts with no updates in a week
+            */
+            now() - max(so.date_created) > interval '7 days'
+        ORDER BY
+            2 DESC
+        )
+        ;
+COMMIT;

From 2136db8414dd17e927c873df27ba5d476f49ea3e Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 25 Nov 2024 12:23:45 -0500
Subject: [PATCH 097/143] refactor(scrapers.update_from_text): "C" to
 SOURCES.COURT_WEBSITE

---
 cl/scrapers/management/commands/update_from_text.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cl/scrapers/management/commands/update_from_text.py b/cl/scrapers/management/commands/update_from_text.py
index 399e49e1b1..ee093d9e01 100644
--- a/cl/scrapers/management/commands/update_from_text.py
+++ b/cl/scrapers/management/commands/update_from_text.py
@@ -5,7 +5,12 @@
 
 from cl.lib.command_utils import ScraperCommand, logger
 from cl.scrapers.tasks import update_document_from_text
-from cl.search.models import PRECEDENTIAL_STATUS, Opinion, OpinionCluster
+from cl.search.models import (
+    PRECEDENTIAL_STATUS,
+    SOURCES,
+    Opinion,
+    OpinionCluster,
+)
 
 
 def rerun_extract_from_text(
@@ -169,7 +174,7 @@ def handle(self, *args, **options):
             "docket__court_id": court_id,
             "date_filed__gte": options["date_filed_gte"],
             "date_filed__lte": options["date_filed_lte"],
-            "source__contains": "C",
+            "source__contains": SOURCES.COURT_WEBSITE,
         }
 
         if options["cluster_status"]:

From 71dc92a6f8b62a608aa2b7cb41874b37546e28d5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Mon, 25 Nov 2024 13:58:56 -0600
Subject: [PATCH 098/143] fix(elasticsearch): Applied suggestion in
 set_child_docs_and_score

---
 cl/lib/elasticsearch_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 1b9366f713..2c1f1053c9 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3337,8 +3337,7 @@ def set_child_docs_and_score(
         if result_is_dict:
             # If the result is a dictionary, do nothing, or assign [] to
             # child_docs if it is not present.
-            child_docs = result.get("child_docs", [])
-            result["child_docs"] = child_docs
+            result["child_docs"] = result.get("child_docs", [])
         else:
             # Process child hits if the result is an ES AttrDict instance,
             # so they can be properly serialized.

From 1bbb82e16ac43d64de32bf5a16e09b78a06b5d68 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Mon, 25 Nov 2024 19:24:53 -0300
Subject: [PATCH 099/143] feat(webhook_logs): Display timezone in all datetimes

---
 cl/users/templates/includes/webhook-event-detail.html         | 4 ++--
 .../templates/includes/webhooks_htmx/webhook-logs-list.html   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/users/templates/includes/webhook-event-detail.html b/cl/users/templates/includes/webhook-event-detail.html
index 20f631fb53..2510ac729c 100644
--- a/cl/users/templates/includes/webhook-event-detail.html
+++ b/cl/users/templates/includes/webhook-event-detail.html
@@ -13,11 +13,11 @@ <h3 class="v-offset-below-2">Webhook Event Details{% if webhook_event.debug %} (
       <label class="col-md-3">Endpoint status: </label><p class="col-md-9">{% if webhook_event.webhook.enabled %} Enabled {% else %} Disabled {% endif %}</p>
       <label class="col-md-3">Event type: </label><p class="col-md-9">{{ webhook_event.webhook.get_event_type_display }}</p>
       <label class="col-md-3">Idempotency-key: </label><p class="col-md-9">{{ webhook_event.event_id }}</p>
-      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created }}</p>
+      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created|date:"M. j, Y, h:m a T" }}</p>
       <label class="col-md-3">Status code: </label><p class="col-md-9 bold"> {% if webhook_event.status_code %}{{ webhook_event.status_code }} {{ webhook_event.get_status_code_display }} {% else %}-{% endif %}</p>
       <label class="col-md-3">Event status: </label><p class="col-md-9 bold {{ webhook_event.event_status|webhook_status_class }}">{{ webhook_event.get_event_status_display }}</p>
       <label class="col-md-3">Failure count: </label><p class="col-md-9">{{ webhook_event.retry_counter }}</p>
-      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
+      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date|date:"M. j, Y, h:m a T" }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
     </div>
     <div class="row form-group">
       <div class="col-xs-12">
diff --git a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
index dc022dff94..a43f9eeb85 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
@@ -21,11 +21,11 @@
           {% endif %}
         </p>
       </td>
-      <td>{{ webhook.date_created }}</td>
+      <td>{{ webhook.date_created|date:"M. j, Y, h:m a T" }}</td>
       {% if not webhook.debug %}
         <td class="text-center">
           {% if webhook.next_retry_date %}
-            {{ webhook.next_retry_date }}
+            {{ webhook.next_retry_date|date:"M. j, Y, h:m a T" }}
           {% else %}
             -
           {% endif %}

From 39761ebc4f0e943b1ec1841f56590ce07dcac311 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Mon, 25 Nov 2024 19:44:40 -0300
Subject: [PATCH 100/143] fix(webhook_logs): Fix format string character

---
 cl/users/templates/includes/webhook-event-detail.html         | 4 ++--
 .../templates/includes/webhooks_htmx/webhook-logs-list.html   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/users/templates/includes/webhook-event-detail.html b/cl/users/templates/includes/webhook-event-detail.html
index 2510ac729c..63c5ed7920 100644
--- a/cl/users/templates/includes/webhook-event-detail.html
+++ b/cl/users/templates/includes/webhook-event-detail.html
@@ -13,11 +13,11 @@ <h3 class="v-offset-below-2">Webhook Event Details{% if webhook_event.debug %} (
       <label class="col-md-3">Endpoint status: </label><p class="col-md-9">{% if webhook_event.webhook.enabled %} Enabled {% else %} Disabled {% endif %}</p>
       <label class="col-md-3">Event type: </label><p class="col-md-9">{{ webhook_event.webhook.get_event_type_display }}</p>
       <label class="col-md-3">Idempotency-key: </label><p class="col-md-9">{{ webhook_event.event_id }}</p>
-      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created|date:"M. j, Y, h:m a T" }}</p>
+      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created|date:"M. j, Y, h:i a T" }}</p>
       <label class="col-md-3">Status code: </label><p class="col-md-9 bold"> {% if webhook_event.status_code %}{{ webhook_event.status_code }} {{ webhook_event.get_status_code_display }} {% else %}-{% endif %}</p>
       <label class="col-md-3">Event status: </label><p class="col-md-9 bold {{ webhook_event.event_status|webhook_status_class }}">{{ webhook_event.get_event_status_display }}</p>
       <label class="col-md-3">Failure count: </label><p class="col-md-9">{{ webhook_event.retry_counter }}</p>
-      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date|date:"M. j, Y, h:m a T" }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
+      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date|date:"M. j, Y, h:i a T" }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
     </div>
     <div class="row form-group">
       <div class="col-xs-12">
diff --git a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
index a43f9eeb85..18b19f641a 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
@@ -21,11 +21,11 @@
           {% endif %}
         </p>
       </td>
-      <td>{{ webhook.date_created|date:"M. j, Y, h:m a T" }}</td>
+      <td>{{ webhook.date_created|date:"M. j, Y, h:i a T" }}</td>
       {% if not webhook.debug %}
         <td class="text-center">
           {% if webhook.next_retry_date %}
-            {{ webhook.next_retry_date|date:"M. j, Y, h:m a T" }}
+            {{ webhook.next_retry_date|date:"M. j, Y, h:i a T" }}
           {% else %}
             -
           {% endif %}

From e8c71e779126dcd74a98fce90ab8e73b85a61267 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Mon, 25 Nov 2024 20:58:00 -0300
Subject: [PATCH 101/143] fix(webhook_logs): Always display datetimes in UTC in
 webhook logs

---
 cl/custom_filters/templatetags/extras.py        | 17 ++++++++++++++++-
 .../includes/webhook-event-detail.html          |  5 +++--
 .../webhooks_htmx/webhook-logs-list.html        |  5 +++--
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/cl/custom_filters/templatetags/extras.py b/cl/custom_filters/templatetags/extras.py
index 39d535b2df..6532ca2881 100644
--- a/cl/custom_filters/templatetags/extras.py
+++ b/cl/custom_filters/templatetags/extras.py
@@ -1,7 +1,7 @@
 import random
 import re
 import urllib.parse
-from datetime import datetime
+from datetime import datetime, timezone
 
 import waffle
 from django import template
@@ -337,6 +337,21 @@ def format_date(date_str: str) -> str:
         return date_str
 
 
+@register.filter
+def datetime_in_utc(date_obj) -> str:
+    """Formats a datetime object in UTC with timezone displayed.
+    For example: 'Nov. 25, 2024, 01:28 p.m. UTC'"""
+    if date_obj is None:
+        return ""
+    try:
+        return date_filter(
+            date_obj.astimezone(timezone.utc),
+            "M. j, Y, h:i a T",
+        )
+    except (ValueError, TypeError):
+        return date_obj
+
+
 @register.filter
 def build_docket_id_q_param(request_q: str, docket_id: str) -> str:
     """Build a query string that includes the docket ID and any existing query
diff --git a/cl/users/templates/includes/webhook-event-detail.html b/cl/users/templates/includes/webhook-event-detail.html
index 63c5ed7920..9f70262daa 100644
--- a/cl/users/templates/includes/webhook-event-detail.html
+++ b/cl/users/templates/includes/webhook-event-detail.html
@@ -1,4 +1,5 @@
 {% extends "profile/webhooks_base.html" %}
+{% load extras %}
 {% load static %}
 {% load waffle_tags %}
 {% load humanize %}
@@ -13,11 +14,11 @@ <h3 class="v-offset-below-2">Webhook Event Details{% if webhook_event.debug %} (
       <label class="col-md-3">Endpoint status: </label><p class="col-md-9">{% if webhook_event.webhook.enabled %} Enabled {% else %} Disabled {% endif %}</p>
       <label class="col-md-3">Event type: </label><p class="col-md-9">{{ webhook_event.webhook.get_event_type_display }}</p>
       <label class="col-md-3">Idempotency-key: </label><p class="col-md-9">{{ webhook_event.event_id }}</p>
-      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created|date:"M. j, Y, h:i a T" }}</p>
+      <label class="col-md-3">Event date: </label><p class="col-md-9">{{ webhook_event.date_created|datetime_in_utc }}</p>
       <label class="col-md-3">Status code: </label><p class="col-md-9 bold"> {% if webhook_event.status_code %}{{ webhook_event.status_code }} {{ webhook_event.get_status_code_display }} {% else %}-{% endif %}</p>
       <label class="col-md-3">Event status: </label><p class="col-md-9 bold {{ webhook_event.event_status|webhook_status_class }}">{{ webhook_event.get_event_status_display }}</p>
       <label class="col-md-3">Failure count: </label><p class="col-md-9">{{ webhook_event.retry_counter }}</p>
-      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date|date:"M. j, Y, h:i a T" }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
+      <label class="col-md-3">Next retry date: </label><p class="col-md-9">{% if not webhook_event.debug %}{% if webhook_event.next_retry_date %}{{ webhook_event.next_retry_date|datetime_in_utc }}{% else %}-{% endif %}{% else %}Test events will not be retried{% endif %}</p>
     </div>
     <div class="row form-group">
       <div class="col-xs-12">
diff --git a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
index 18b19f641a..a9f8596832 100644
--- a/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
+++ b/cl/users/templates/includes/webhooks_htmx/webhook-logs-list.html
@@ -1,3 +1,4 @@
+{% load extras %}
 {% load widget_tweaks %}
 {% if results %}
   {% for webhook in results %}
@@ -21,11 +22,11 @@
           {% endif %}
         </p>
       </td>
-      <td>{{ webhook.date_created|date:"M. j, Y, h:i a T" }}</td>
+      <td>{{ webhook.date_created|datetime_in_utc }}</td>
       {% if not webhook.debug %}
         <td class="text-center">
           {% if webhook.next_retry_date %}
-            {{ webhook.next_retry_date|date:"M. j, Y, h:i a T" }}
+            {{ webhook.next_retry_date|datetime_in_utc }}
           {% else %}
             -
           {% endif %}

From b2c9ada3443a0ef37e4fac3bb59485dc7cd49c7b Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 25 Nov 2024 19:59:15 -0600
Subject: [PATCH 102/143] feat(casenames): update parse_citations function

---
 .../management/commands/update_casenames_wl_dataset.py     | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index fe7eeeefe6..8a0e85a815 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -150,11 +150,8 @@ def parse_citations(citation_strings: list[str]) -> list[dict]:
             if not volume or not volume.isdigit():
                 continue
 
-            if not citation.corrected_reporter():
-                reporter_type = Citation.STATE
-            else:
-                cite_type_str = citation.all_editions[0].reporter.cite_type
-                reporter_type = map_reporter_db_cite_type(cite_type_str)
+            cite_type_str = citation.all_editions[0].reporter.cite_type
+            reporter_type = map_reporter_db_cite_type(cite_type_str)
 
             # Append the validated citation as a dictionary
             validated_citations.append(

From 274112171af31e2d56f7c6b7f9607b8cb770b0d7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 26 Nov 2024 11:11:19 -0600
Subject: [PATCH 103/143] fix(elasticsearch): Fixed ES MLT query

Fixes: #4305
---
 cl/lib/elasticsearch_utils.py       | 34 +++++++++++++++++++++--------
 cl/opinion_page/utils.py            |  8 +++----
 cl/search/constants.py              |  8 +++----
 cl/search/tests/tests_es_opinion.py |  1 +
 4 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 2c1f1053c9..96cb01653f 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -176,22 +176,38 @@ async def build_more_like_this_query(related_ids: list[str]) -> Query:
     exclusions for specific opinion clusters.
     """
 
-    document_list = [{"_id": f"o_{id}"} for id in related_ids]
+    opinion_cluster_pairs = [
+        opinion_pair
+        for opinion_id in related_ids
+        if (
+            opinion_pair := await Opinion.objects.filter(pk=opinion_id)
+            .values("pk", "cluster_id")
+            .afirst()
+        )
+    ]
+    unique_clusters = {pair["cluster_id"] for pair in opinion_cluster_pairs}
+
+    document_list = [
+        {
+            "_id": f'o_{opinion_pair["pk"]}',
+            "routing": opinion_pair["cluster_id"],
+        }
+        for opinion_pair in opinion_cluster_pairs
+    ]
     more_like_this_fields = SEARCH_MLT_OPINION_QUERY_FIELDS.copy()
     mlt_query = Q(
         "more_like_this",
         fields=more_like_this_fields,
         like=document_list,
-        min_term_freq=1,
-        max_query_terms=12,
+        min_term_freq=settings.RELATED_MLT_MINTF,
+        max_query_terms=settings.RELATED_MLT_MAXQT,
+        min_word_length=settings.RELATED_MLT_MINWL,
+        max_word_length=settings.RELATED_MLT_MAXWL,
+        max_doc_freq=settings.RELATED_MLT_MAXDF,
+        analyzer="search_analyzer_exact",
     )
     # Exclude opinion clusters to which the related IDs to query belong.
-    cluster_ids_to_exclude = (
-        OpinionCluster.objects.filter(sub_opinions__pk__in=related_ids)
-        .distinct("pk")
-        .values_list("pk", flat=True)
-    )
-    cluster_ids_list = [pk async for pk in cluster_ids_to_exclude.aiterator()]
+    cluster_ids_list = list(unique_clusters)
     exclude_cluster_ids = [Q("terms", cluster_id=cluster_ids_list)]
     bool_query = Q("bool", must=[mlt_query], must_not=exclude_cluster_ids)
     return bool_query
diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 160453bb1f..b135d3b020 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -166,13 +166,11 @@ async def build_cites_clusters_query(
 async def build_related_clusters_query(
     cluster_search: Search,
     sub_opinion_pks: list[str],
-    search_params: dict[str, str],
 ) -> Search:
     """Build the ES related clusters query based on sub-opinion IDs.
 
     :param cluster_search: The Elasticsearch DSL Search object
     :param sub_opinion_pks: A list of IDs representing sub-opinions to be queried.
-    :param search_params: A dict of parameters used to form the query.
     :return: The ES DSL Search object representing the query to find the
     related clusters.
     """
@@ -267,11 +265,13 @@ async def es_get_citing_and_related_clusters_with_cache(
     related_index = citing_index = None
     if cached_related_clusters is None:
         related_query = await build_related_clusters_query(
-            cluster_search, sub_opinion_pks, search_params
+            cluster_search, sub_opinion_pks
         )
         related_query = related_query.extra(
-            size=settings.RELATED_COUNT, track_total_hits=False
+            size=settings.RELATED_COUNT,
+            track_total_hits=False,
         )
+        print("Related query opinion: ", related_query.to_dict())
         multi_search = multi_search.add(related_query)
         related_index = response_index
         response_index += 1
diff --git a/cl/search/constants.py b/cl/search/constants.py
index 333dfbca6c..f7e76cb8fb 100644
--- a/cl/search/constants.py
+++ b/cl/search/constants.py
@@ -110,10 +110,10 @@
     "syllabus",
 ]
 SEARCH_MLT_OPINION_QUERY_FIELDS = [
-    "procedural_history",
-    "posture",
-    "syllabus",
-    "text",
+    "procedural_history.exact",
+    "posture.exact",
+    "syllabus.exact",
+    "text.exact",
 ]
 
 # ES fields that are used for highlighting
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index c7d9c2568d..4996f7d985 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2253,6 +2253,7 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
         cluster_2.delete()
 
 
+@override_settings(RELATED_MLT_MINTF=1)
 class RelatedSearchTest(
     ESIndexTestCase, CourtTestCase, PeopleTestCase, SearchTestCase, TestCase
 ):

From 877cf130e2bc7c49dc2cd446cf8d98d794e20b5e Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 26 Nov 2024 14:27:44 -0300
Subject: [PATCH 104/143] test(webhooks): Adjust test so it's now less
 sensitive to trailing whitespaces

Adding a {% load %} tag introduced an extra newline that was picked up by the test.
Instead of counting newline chars we now only focus on whether the response contains meaningful content or not.
---
 cl/users/tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/users/tests.py b/cl/users/tests.py
index 89170a445c..f5e26aac09 100644
--- a/cl/users/tests.py
+++ b/cl/users/tests.py
@@ -3467,7 +3467,7 @@ async def test_list_webhook_events(self) -> None:
         response = await self.client.get(webhook_event_path_list)
         self.assertEqual(response.status_code, HTTPStatus.OK)
         # There shouldn't be results for user_1
-        self.assertEqual(response.content, b"\n\n")
+        self.assertEqual(response.content.strip(), b"")
 
         sa_webhook = await sync_to_async(WebhookFactory)(
             user=self.user_1,
@@ -3485,7 +3485,7 @@ async def test_list_webhook_events(self) -> None:
         response = await self.client.get(webhook_event_path_list)
         self.assertEqual(response.status_code, HTTPStatus.OK)
         # There should be results for user_1
-        self.assertNotEqual(response.content, b"\n\n")
+        self.assertNotEqual(response.content.strip(), b"")
 
     async def test_get_available_webhook_versions(self) -> None:
         """Can we get users available versions for a webhook event type?"""

From d8b72b08aea225cb7f1e9cb1ee1f1f114349a1f5 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 26 Nov 2024 11:34:20 -0600
Subject: [PATCH 105/143] fix(elasticsearch): Added a fallback to the MLT query
 in case the IDs are not found in the DB

---
 cl/lib/elasticsearch_utils.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 96cb01653f..f0a88ce0ea 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -189,11 +189,15 @@ async def build_more_like_this_query(related_ids: list[str]) -> Query:
 
     document_list = [
         {
-            "_id": f'o_{opinion_pair["pk"]}',
-            "routing": opinion_pair["cluster_id"],
+            "_id": f'o_{pair["pk"]}',
+            "routing": pair["cluster_id"],
+            # Important to match documents in the production cluster
         }
-        for opinion_pair in opinion_cluster_pairs
-    ]
+        for pair in opinion_cluster_pairs
+    ] or [
+        {"_id": f"o_{pk}"} for pk in related_ids
+    ]  # Fall back in case IDs are not found in DB.
+
     more_like_this_fields = SEARCH_MLT_OPINION_QUERY_FIELDS.copy()
     mlt_query = Q(
         "more_like_this",

From dd1c21453f4a05d31d5373e49fb0fb06496b8e0c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 26 Nov 2024 11:37:53 -0600
Subject: [PATCH 106/143] feat(casenames): fix code to combine initials in case
 names

---
 .../commands/update_casenames_wl_dataset.py          | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 8a0e85a815..6c1d4def01 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -231,9 +231,15 @@ def combine_initials(case_name: str) -> str:
     :return: the cleaned case caption
     """
 
-    pattern = r"((?:[A-Z]\.?\s?){2,})(\s|$)"
-
-    return re.sub(pattern, lambda m: m.group(0).replace(".", ""), case_name)
+    initials_pattern = re.compile(r"(\b[A-Z]\.?\s?){2,}(\s|$)")
+
+    matches = initials_pattern.finditer(case_name)
+    if matches:
+        for match in matches:
+            initials = match.group()
+            compressed_initials = re.sub(r"(?!\s$)[\s\.]", "", initials)
+            case_name = case_name.replace(initials, compressed_initials)
+    return case_name
 
 
 def process_csv(filepath: str, delay: float, dry_run: bool) -> None:

From 8247d391d00dc873e5b19fa53bc802adec147341 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 26 Nov 2024 12:40:23 -0500
Subject: [PATCH 107/143] feat(opinions): Update css

A few minor tweaks to some obvious css
issues when looking around.

Also - when we reingest the harvard
data it is going to modify footnotes
and page numbers in some.  I want to add some css to make sure we are prepared
for them.
---
 cl/assets/static-global/css/opinions.css | 28 +++++++++++++++++++++++-
 cl/assets/static-global/css/override.css |  1 -
 cl/assets/static-global/js/opinions.js   |  9 ++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/cl/assets/static-global/css/opinions.css b/cl/assets/static-global/css/opinions.css
index b120143c0b..ff1b0200d3 100644
--- a/cl/assets/static-global/css/opinions.css
+++ b/cl/assets/static-global/css/opinions.css
@@ -578,6 +578,32 @@ div.footnote:first-of-type {
     margin-left: 2px;
   }
 
+  page-label {
+    font-style: italic;
+    font-size: 0.8em;
+    margin-right: 4px;
+    margin-left: 2px;
+  }
+
+  page-label {
+      cursor: pointer;
+  }
+
+  page-label:hover {
+      color: darkblue;
+      text-decoration: underline;  /* Example hover styling */
+  }
+
+  page-label::after {
+    display: inline;
+    position: relative;
+    content: attr(data-label);
+    float: right;
+    font-size: 1em;
+    color: dimgray;
+    width: 0;
+  }
+
   a.page-label {
     font-style: italic;
     font-size: 0.8em;
@@ -623,6 +649,7 @@ div.footnote:first-of-type {
    /* Adjust to move the entire blockquote to the right */
   blockquote {
     margin-left: 3em;
+    display: block;
   }
 
   div.counsel > a.page-label::after {
@@ -687,7 +714,6 @@ div.footnote:first-of-type {
     display: block;
     text-indent: 1em;
   }
-
 }
 
 html {
diff --git a/cl/assets/static-global/css/override.css b/cl/assets/static-global/css/override.css
index 021b6e6996..b0b0979f9e 100644
--- a/cl/assets/static-global/css/override.css
+++ b/cl/assets/static-global/css/override.css
@@ -1031,7 +1031,6 @@ closely the content in the book*/
 
 #headmatter > .footnotes > .footnote > a {
   color: #000099;
-  position: absolute;
   font-size: 1em;
 }
 
diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index 65d35e2248..e6665237bf 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -278,4 +278,13 @@ document.addEventListener('scroll', function () {
   if (activeLink.parentElement) {
     activeLink.parentElement.classList.add('active');
   }
+});
+
+document.querySelectorAll("page-label").forEach(label => {
+    label.addEventListener("click", function() {
+        const href = this.getAttribute("href");
+        if (href) {
+            window.location.href = href;
+        }
+    });
 });
\ No newline at end of file

From 9adcd298c410ef56ae7ced70b4b77893049a34fa Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 26 Nov 2024 16:07:28 -0300
Subject: [PATCH 108/143] feat(api): Enable filtering courts by parent court id

---
 cl/search/filters.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cl/search/filters.py b/cl/search/filters.py
index d7f11e472c..6f1d6f6603 100644
--- a/cl/search/filters.py
+++ b/cl/search/filters.py
@@ -28,6 +28,10 @@ class CourtFilter(NoEmptyFilterSet):
         "cl.search.filters.DocketFilter", queryset=Docket.objects.all()
     )
     jurisdiction = filters.MultipleChoiceFilter(choices=Court.JURISDICTIONS)
+    parent_court = filters.CharFilter(
+        field_name="parent_court__id",
+        lookup_expr="exact",
+    )
 
     class Meta:
         model = Court

From 00885f3e6e84243d99b35830e346cbc866a0a6d4 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 26 Nov 2024 13:17:51 -0600
Subject: [PATCH 109/143] fix(elasticsearch): Removed stray print

---
 cl/opinion_page/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index b135d3b020..a1c9d0eeeb 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -271,7 +271,6 @@ async def es_get_citing_and_related_clusters_with_cache(
             size=settings.RELATED_COUNT,
             track_total_hits=False,
         )
-        print("Related query opinion: ", related_query.to_dict())
         multi_search = multi_search.add(related_query)
         related_index = response_index
         response_index += 1

From adf676efbb37e85409686a5d20d48bbfbe128c98 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Tue, 26 Nov 2024 16:58:58 -0600
Subject: [PATCH 110/143] fix(elasticsearch): Avoid wrapping numbers in quotes
 in boost queries

- Removed Solr failing tests.

Fixes: #4737
---
 cl/lib/utils.py          |  2 +-
 cl/search/tests/tests.py | 73 ++++++++--------------------------------
 2 files changed, 15 insertions(+), 60 deletions(-)

diff --git a/cl/lib/utils.py b/cl/lib/utils.py
index 223056420f..592f8876d0 100644
--- a/cl/lib/utils.py
+++ b/cl/lib/utils.py
@@ -248,7 +248,7 @@ def cleanup_main_query(query_string: str) -> str:
     """
     inside_a_phrase = False
     cleaned_items = []
-    for item in re.split(r'([^a-zA-Z0-9_\-~":]+)', query_string):
+    for item in re.split(r'([^a-zA-Z0-9_\-^~":]+)', query_string):
         if not item:
             continue
 
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index fe59be528d..3dac875f01 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -1076,6 +1076,20 @@ def test_round_estimated_search_counts(self) -> None:
             with self.subTest(test=test, msg="Test estimated search counts."):
                 self.assertEqual(simplify_estimated_count(test[0]), test[1])
 
+    def test_avoid_wrapping_boosted_numbers_in_quotes(self) -> None:
+        """Confirm that numbers in boost queries are not wrapped in quotes
+        that makes the query to fail.
+        """
+        search_params = {
+            "type": SEARCH_TYPES.ORAL_ARGUMENT,
+            "q": "Jose^3",
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertNotIn("encountered an error", r.content.decode())
+
 
 class SearchAPIV4CommonTest(ESIndexTestCase, TestCase):
     """Common tests for the Search API V4 endpoints."""
@@ -1643,35 +1657,6 @@ def test_search_query_saving(self) -> None:
             "Repeated query not marked as having hit cache",
         )
 
-    # Force Solr use
-    @override_flag("oa-es-active", False)
-    @override_flag("r-es-active", False)
-    @override_flag("p-es-active", False)
-    @override_flag("o-es-active", False)
-    def test_search_query_saving_solr(self) -> None:
-        """Are queries saved when using solr search (do_search)"""
-        for query in self.searches:
-            url = f"{reverse('show_results')}?{query}"
-            self.client.get(url)
-            last_query = SearchQuery.objects.last()
-            expected_query = self.normalize_query(query, replace_space=True)
-            stored_query = self.normalize_query(last_query.get_params)
-            self.assertEqual(
-                expected_query,
-                stored_query,
-                f"Query was not saved properly. Expected {expected_query}, got {stored_query}",
-            )
-            self.assertEqual(
-                last_query.engine,
-                SearchQuery.SOLR,
-                f"Saved wrong `engine` value, expected {SearchQuery.SOLR}",
-            )
-            self.assertEqual(
-                last_query.source,
-                SearchQuery.WEBSITE,
-                self.source_error_message,
-            )
-
     def test_failed_es_search_queries(self) -> None:
         """Do we flag failed ElasticSearch queries properly?"""
         query = "type=r&q=contains/sproximity token"
@@ -1772,36 +1757,6 @@ def test_failed_es_search_v3_api_queries(self) -> None:
             f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}",
         )
 
-    @override_flag("oa-es-active", False)
-    @override_flag("oa-es-activate", False)
-    @override_flag("r-es-search-api-active", False)
-    @override_flag("p-es-active", False)
-    @override_flag("o-es-search-api-active", False)
-    def test_search_solr_api_v3_query_saving(self) -> None:
-        """Do we save queries on all V3 Search Solr endpoints"""
-        for query in self.base_searches:
-            url = f"{reverse("search-list", kwargs={"version": "v3"})}?{query}"
-            self.client.get(url)
-            # Compare parsed query strings;
-            last_query = SearchQuery.objects.last()
-            expected_query = self.normalize_query(query, replace_space=True)
-            stored_query = self.normalize_query(last_query.get_params)
-            self.assertEqual(
-                expected_query,
-                stored_query,
-                f"Query was not saved properly. Expected {expected_query}, got {stored_query}",
-            )
-            self.assertEqual(
-                last_query.engine,
-                SearchQuery.SOLR,
-                f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}",
-            )
-            self.assertEqual(
-                last_query.source,
-                SearchQuery.API,
-                self.source_error_message,
-            )
-
 
 class CaptionTest(TestCase):
     """Can we make good looking captions?"""

From 5d0938681837ca5b1855dd040062f70ef01f2dde Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 26 Nov 2024 20:59:29 -0300
Subject: [PATCH 111/143] test(api): Add tests for court filtering by
 parent_court

---
 cl/api/tests.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 238c0d04a7..275695b50f 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -86,7 +86,7 @@
     TagViewSet,
 )
 from cl.search.factories import CourtFactory, DocketFactory
-from cl.search.models import SOURCES, Docket, Opinion
+from cl.search.models import SOURCES, Court, Docket, Opinion
 from cl.stats.models import Event
 from cl.tests.cases import SimpleTestCase, TestCase, TransactionTestCase
 from cl.tests.utils import MockResponse, make_client
@@ -673,6 +673,56 @@ async def assertCountInResults(self, expected_count):
         )
 
 
+class DRFCourtApiFilterTests(TestCase, FilteringCountTestCase):
+    @classmethod
+    def setUpTestData(cls):
+        Court.objects.all().delete()
+
+        cls.parent_court = CourtFactory(id="parent1", full_name="Parent Court")
+
+        cls.child_court1 = CourtFactory(
+            id="child1",
+            parent_court=cls.parent_court,
+            full_name="Child Court 1",
+        )
+        cls.child_court2 = CourtFactory(
+            id="child2",
+            parent_court=cls.parent_court,
+            full_name="Child Court 2",
+        )
+
+        cls.orphan_court = CourtFactory(id="orphan", full_name="Orphan Court")
+
+    @async_to_sync
+    async def setUp(self):
+        self.path = reverse("court-list", kwargs={"version": "v4"})
+        self.q: Dict[str, Any] = {}
+
+    async def test_parent_court_filter(self):
+        """Can we filter courts by parent_court id?"""
+        self.q["parent_court"] = "parent1"
+        await self.assertCountInResults(2)  # Should return child1 and child2
+
+        # Verify the returned court IDs
+        response = await self.async_client.get(self.path, self.q)
+        court_ids = [court["id"] for court in response.data["results"]]
+        self.assertEqual(set(court_ids), {"child1", "child2"})
+
+        # Filter for courts with parent_court id='orphan' (none should match)
+        self.q["parent_court"] = "orphan"
+        await self.assertCountInResults(0)
+
+    async def test_no_parent_court_filter(self):
+        """Do we get all courts when using no filters?"""
+        self.q = {}
+        await self.assertCountInResults(4)  # Should return all four courts
+
+    async def test_invalid_parent_court_filter(self):
+        """Do we handle invalid parent_court values correctly?"""
+        self.q["parent_court"] = "nonexistent"
+        await self.assertCountInResults(0)
+
+
 class DRFJudgeApiFilterTests(
     SimpleUserDataMixin, TestCase, FilteringCountTestCase
 ):

From 9093b7f707088bdc794bd8fc56fd34b056f4cff2 Mon Sep 17 00:00:00 2001
From: Elisa Anguita <elisa@free.law>
Date: Tue, 26 Nov 2024 22:02:36 -0300
Subject: [PATCH 112/143] test(api): Add more tests for court filtering using
 other fields

---
 cl/api/tests.py | 157 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 149 insertions(+), 8 deletions(-)

diff --git a/cl/api/tests.py b/cl/api/tests.py
index 275695b50f..94f068b819 100644
--- a/cl/api/tests.py
+++ b/cl/api/tests.py
@@ -1,5 +1,5 @@
 import json
-from datetime import date, timedelta
+from datetime import date, datetime, timedelta, timezone
 from http import HTTPStatus
 from typing import Any, Dict
 from unittest import mock
@@ -666,11 +666,18 @@ async def assertCountInResults(self, expected_count):
             f"the JSON: \n{r.json()}",
         )
         got = len(r.data["results"])
+        try:
+            path = r.request.get("path")
+            query_string = r.request.get("query_string")
+            url = f"{path}?{query_string}"
+        except AttributeError:
+            url = self.path
         self.assertEqual(
             got,
             expected_count,
-            msg=f"Expected {expected_count}, but got {got}.\n\nr.data was: {r.data}",
+            msg=f"Expected {expected_count}, but got {got} in {url}\n\nr.data was: {r.data}",
         )
+        return r
 
 
 class DRFCourtApiFilterTests(TestCase, FilteringCountTestCase):
@@ -678,21 +685,67 @@ class DRFCourtApiFilterTests(TestCase, FilteringCountTestCase):
     def setUpTestData(cls):
         Court.objects.all().delete()
 
-        cls.parent_court = CourtFactory(id="parent1", full_name="Parent Court")
+        cls.parent_court = CourtFactory(
+            id="parent1",
+            full_name="Parent Court",
+            short_name="PC",
+            citation_string="PC",
+            in_use=True,
+            has_opinion_scraper=True,
+            has_oral_argument_scraper=False,
+            position=1,
+            start_date=date(2000, 1, 1),
+            end_date=None,
+            jurisdiction=Court.FEDERAL_APPELLATE,
+            date_modified=datetime(2021, 1, 1, tzinfo=timezone.utc),
+        )
 
         cls.child_court1 = CourtFactory(
             id="child1",
             parent_court=cls.parent_court,
             full_name="Child Court 1",
+            short_name="CC1",
+            citation_string="CC1",
+            in_use=False,
+            has_opinion_scraper=False,
+            has_oral_argument_scraper=True,
+            position=2,
+            start_date=date(2010, 6, 15),
+            end_date=date(2020, 12, 31),
+            jurisdiction=Court.STATE_SUPREME,
+            date_modified=datetime(2022, 6, 15, tzinfo=timezone.utc),
         )
         cls.child_court2 = CourtFactory(
             id="child2",
             parent_court=cls.parent_court,
             full_name="Child Court 2",
+            short_name="CC2",
+            citation_string="CC2",
+            in_use=True,
+            has_opinion_scraper=False,
+            has_oral_argument_scraper=False,
+            position=3,
+            start_date=date(2015, 5, 20),
+            end_date=None,
+            jurisdiction=Court.STATE_TRIAL,
+            date_modified=datetime(2023, 3, 10, tzinfo=timezone.utc),
+        )
+
+        cls.orphan_court = CourtFactory(
+            id="orphan",
+            full_name="Orphan Court",
+            short_name="OC",
+            citation_string="OC",
+            in_use=True,
+            has_opinion_scraper=False,
+            has_oral_argument_scraper=False,
+            position=4,
+            start_date=date(2012, 8, 25),
+            end_date=None,
+            jurisdiction=Court.FEDERAL_DISTRICT,
+            date_modified=datetime(2023, 5, 5, tzinfo=timezone.utc),
         )
 
-        cls.orphan_court = CourtFactory(id="orphan", full_name="Orphan Court")
-
     @async_to_sync
     async def setUp(self):
         self.path = reverse("court-list", kwargs={"version": "v4"})
@@ -701,15 +754,15 @@ async def setUp(self):
     async def test_parent_court_filter(self):
         """Can we filter courts by parent_court id?"""
         self.q["parent_court"] = "parent1"
-        await self.assertCountInResults(2)  # Should return child1 and child2
+        # Should return child1 and child2:
+        response = await self.assertCountInResults(2)
 
         # Verify the returned court IDs
-        response = await self.async_client.get(self.path, self.q)
         court_ids = [court["id"] for court in response.data["results"]]
         self.assertEqual(set(court_ids), {"child1", "child2"})
 
         # Filter for courts with parent_court id='orphan' (none should match)
-        self.q["parent_court"] = "orphan"
+        self.q = {"parent_court": "orphan"}
         await self.assertCountInResults(0)
 
     async def test_no_parent_court_filter(self):
@@ -722,6 +775,94 @@ async def test_invalid_parent_court_filter(self):
         self.q["parent_court"] = "nonexistent"
         await self.assertCountInResults(0)
 
+    async def test_id_filter(self):
+        """Can we filter courts by id?"""
+        self.q["id"] = "child1"
+        response = await self.assertCountInResults(1)
+        self.assertEqual(response.data["results"][0]["id"], "child1")
+
+    async def test_in_use_filter(self):
+        """Can we filter courts by in_use field?"""
+        self.q = {"in_use": "true"}
+        await self.assertCountInResults(3)  # parent1, child2, orphan
+        self.q = {"in_use": "false"}
+        await self.assertCountInResults(1)  # child1
+
+    async def test_has_opinion_scraper_filter(self):
+        """Can we filter courts by has_opinion_scraper field?"""
+        self.q = {"has_opinion_scraper": "true"}
+        await self.assertCountInResults(1)  # parent1
+        self.q = {"has_opinion_scraper": "false"}
+        await self.assertCountInResults(3)  # child1, child2, orphan
+
+    async def test_has_oral_argument_scraper_filter(self):
+        """Can we filter courts by has_oral_argument_scraper field?"""
+        self.q = {"has_oral_argument_scraper": "true"}
+        await self.assertCountInResults(1)  # child1
+        self.q = {"has_oral_argument_scraper": "false"}
+        await self.assertCountInResults(3)  # parent1, child2, orphan
+
+    async def test_position_filter(self):
+        """Can we filter courts by position with integer lookups?"""
+        self.q = {"position__gt": "2"}
+        await self.assertCountInResults(2)  # child2 (3), orphan (4)
+        self.q = {"position__lte": "2"}
+        await self.assertCountInResults(2)  # parent1 (1), child1 (2)
+
+    async def test_start_date_filter(self):
+        """Can we filter courts by start_date with date lookups?"""
+        self.q = {"start_date__year": "2015"}
+        await self.assertCountInResults(1)  # child2 (2015-05-20)
+        self.q = {"start_date__gte": "2010-01-01"}
+        await self.assertCountInResults(3)  # child1, child2, orphan
+
+    async def test_end_date_filter(self):
+        """Can we filter courts by end_date with date lookups?"""
+        self.q = {"end_date__day": "31"}
+        await self.assertCountInResults(1)  # parent1, child2, orphan
+        self.q = {"end_date__year": "2024"}
+        await self.assertCountInResults(0)
+
+    async def test_short_name_filter(self):
+        """Can we filter courts by short_name with text lookups?"""
+        self.q = {"short_name__iexact": "Cc1"}
+        await self.assertCountInResults(1)  # child1
+        self.q = {"short_name__icontains": "cc"}
+        await self.assertCountInResults(2)  # child1, child2
+
+    async def test_full_name_filter(self):
+        """Can we filter courts by full_name with text lookups?"""
+        self.q = {"full_name__istartswith": "Child"}
+        await self.assertCountInResults(2)  # child1, child2
+        self.q = {"full_name__iendswith": "Court"}
+        await self.assertCountInResults(2)  # parent1, orphan
+
+    async def test_citation_string_filter(self):
+        """Can we filter courts by citation_string with text lookups?"""
+        self.q = {"citation_string": "OC"}
+        await self.assertCountInResults(1)  # orphan
+        self.q = {"citation_string__icontains": "2"}
+        await self.assertCountInResults(1)  # child2
+
+    async def test_jurisdiction_filter(self):
+        """Can we filter courts by jurisdiction?"""
+        self.q = {
+            "jurisdiction": [
+                Court.FEDERAL_APPELLATE,
+                Court.FEDERAL_DISTRICT,
+            ]
+        }
+        await self.assertCountInResults(2)  # parent1 and orphan
+
+    async def test_combined_filters(self):
+        """Can we filter courts with multiple filters applied?"""
+        self.q = {
+            "in_use": "true",
+            "has_opinion_scraper": "false",
+            "position__gt": "2",
+        }
+        await self.assertCountInResults(2)  # child2 and orphan
+
 
 class DRFJudgeApiFilterTests(
     SimpleUserDataMixin, TestCase, FilteringCountTestCase

From fc3a2c727d42f702aa7fc3df860475bc6b6bb0e9 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 27 Nov 2024 10:40:20 -0600
Subject: [PATCH 113/143] fix(elasticsearch): Enabled child highlighting for
 the related: query

---
 cl/lib/elasticsearch_utils.py       | 2 +-
 cl/search/tests/tests_es_opinion.py | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index f0a88ce0ea..f7dbb19708 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -1260,7 +1260,7 @@ def build_es_base_query(
                         {"opinion": []},
                         [],
                         mlt_query,
-                        child_highlighting=False,
+                        child_highlighting=True,
                         api_version=api_version,
                     )
                 )
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index 4996f7d985..5266c76ff8 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -2358,6 +2358,9 @@ def test_more_like_this_opinion(self) -> None:
             < r.content.decode().index("/opinion/%i/" % expected_second_pk),
             msg="'Howard v. Honda' should come AFTER 'case name cluster 3'.",
         )
+        # Confirm that results contain a snippet
+        self.assertIn("<mark>plain</mark>", r.content.decode())
+
         # Confirm "related to" cluster legend is within the results' header.
         h2_element = html.fromstring(r.content.decode()).xpath(
             '//h2[@id="result-count"]'

From ad031f478155fc4a97e2ff3945f69a1588815c5f Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Wed, 27 Nov 2024 11:24:50 -0600
Subject: [PATCH 114/143] fix(elasticsearch): Fixed undefined error variable in
 do_es_search

- Fixed do_collapse_count_query return value on errors
---
 cl/lib/elasticsearch_utils.py       |  4 ++--
 cl/search/tests/tests.py            | 14 ++++++++++++++
 cl/search/tests/tests_es_opinion.py | 17 +++++++++++++++++
 cl/search/views.py                  |  3 +--
 4 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 2c1f1053c9..a494d32a2b 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -3084,7 +3084,7 @@ def build_cardinality_count(count_query: Search, unique_field: str) -> Search:
 
 def do_collapse_count_query(
     search_type: str, main_query: Search, query: Query
-) -> int | None:
+) -> int:
     """Execute an Elasticsearch count query for queries that uses collapse.
     Uses a query with aggregation to determine the number of unique opinions
     based on the 'cluster_id' or 'docket_id' according to the search_type.
@@ -3109,7 +3109,7 @@ def do_collapse_count_query(
             f"Error on count query request: {search_query.to_dict()}"
         )
         logger.warning(f"Error was: {e}")
-        total_results = None
+        total_results = 0
     return total_results
 
 
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 3dac875f01..57aa8b27d2 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -2,6 +2,7 @@
 import io
 import os
 from datetime import date
+from http import HTTPStatus
 from pathlib import Path
 from unittest import mock
 from urllib.parse import parse_qs
@@ -1090,6 +1091,19 @@ def test_avoid_wrapping_boosted_numbers_in_quotes(self) -> None:
         )
         self.assertNotIn("encountered an error", r.content.decode())
 
+    def test_raise_forbidden_error_on_depth_pagination(self) -> None:
+        """Confirm that a 403 Forbidden error is raised on depth pagination."""
+        search_params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "Lorem",
+            "page": 101,
+        }
+        r = self.client.get(
+            reverse("show_results"),
+            search_params,
+        )
+        self.assertEqual(r.status_code, HTTPStatus.FORBIDDEN)
+
 
 class SearchAPIV4CommonTest(ESIndexTestCase, TestCase):
     """Common tests for the Search API V4 endpoints."""
diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py
index c7d9c2568d..b276cfb508 100644
--- a/cl/search/tests/tests_es_opinion.py
+++ b/cl/search/tests/tests_es_opinion.py
@@ -547,6 +547,23 @@ def test_o_results_api_pagination(self) -> None:
         for created_opinion in created_opinions:
             created_opinion.delete()
 
+    async def test_bad_syntax_error(self) -> None:
+        """Can we properly raise the ElasticServerError exception?"""
+
+        # Bad syntax due to the / char in the query.
+        params = {
+            "type": SEARCH_TYPES.OPINION,
+            "q": "This query contains bad/syntax query",
+        }
+        r = await self.async_client.get(
+            reverse("search-list", kwargs={"version": "v3"}), params
+        )
+        self.assertEqual(r.status_code, HTTPStatus.INTERNAL_SERVER_ERROR)
+        self.assertEqual(
+            r.data["detail"],
+            "Internal Server Error. Please try again later or review your query.",
+        )
+
 
 class OpinionV4APISearchTest(
     OpinionSearchAPICommonTests,
diff --git a/cl/search/views.py b/cl/search/views.py
index 10f3f4b7f9..e545b4a9c7 100644
--- a/cl/search/views.py
+++ b/cl/search/views.py
@@ -729,6 +729,7 @@ def do_es_search(
     query_citation = None
     facet_fields = []
     missing_citations_str = []
+    error = True
 
     search_form = SearchForm(get_params, is_es_form=True, courts=courts)
     match get_params.get("type", SEARCH_TYPES.OPINION):
@@ -827,8 +828,6 @@ def do_es_search(
                     cd if not error else {"type": cd["type"]},
                     search_form,
                 )
-    else:
-        error = True
 
     courts, court_count_human, court_count = merge_form_with_courts(
         courts, search_form

From 06db105f24de5cc6d8316d3159f86aad7cfd549c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 27 Nov 2024 14:28:52 -0600
Subject: [PATCH 115/143] feat(casenames): update tokenize_case_name() rename
 functions parameters update logger messages update docstrings

---
 .../commands/update_casenames_wl_dataset.py   | 156 +++++++++++-------
 1 file changed, 97 insertions(+), 59 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 6c1d4def01..b936bdafaa 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -10,9 +10,9 @@
 from eyecite import get_citations
 from eyecite.models import FullCaseCitation
 from eyecite.tokenizers import HyperscanTokenizer
+from juriscraper.lib.string_utils import harmonize
 
 from cl.citations.utils import map_reporter_db_cite_type
-from cl.corpus_importer.utils import add_citations_to_cluster
 from cl.search.models import Citation, OpinionCluster
 
 logger = logging.getLogger(__name__)
@@ -59,29 +59,29 @@ def tokenize_case_name(case_name: str) -> set[str]:
     :param case_name: case name to tokenize
     :return: list of words
     """
-    return (
-        set(
-            [
-                word.lower()
-                for word in WORD_PATTERN.findall(case_name)
-                if len(word) > 1
-            ]
-        )
-        - FALSE_POSITIVES
-    )
+    words = []
+    for word in WORD_PATTERN.findall(case_name):
+        if len(word) > 1:
+            # Only keep words with more than one character
+            words.append(word)
 
+    # Return only valid words
+    return set(words) - FALSE_POSITIVES
 
-def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
+
+def check_case_names_match(west_case_name: str, cl_case_name: str) -> bool:
     """Compare two case name and decide whether they are the same or not
 
-    :param csv_case_name: case name from csv
+    Tokenize each string, capturing both words and abbreviations with periods and
+    convert all words to lowercase for case-insensitive matching and check if there is
+    an overlap between case names
+
+    :param west_case_name: case name from csv
     :param cl_case_name: case name from cluster
     :return: True if they match else False
     """
-    # Tokenize each string, capturing both words and abbreviations with periods and
-    # convert all words to lowercase for case-insensitive matching and check if there
-    # is an overlap between case names
-    overlap = tokenize_case_name(csv_case_name) & tokenize_case_name(
+
+    overlap = tokenize_case_name(west_case_name) & tokenize_case_name(
         cl_case_name
     )
 
@@ -90,14 +90,14 @@ def check_case_names_match(csv_case_name: str, cl_case_name: str) -> bool:
         return False
 
     # Check for "v." in title
-    if "v." not in csv_case_name.lower():
+    if "v." not in west_case_name.lower():
         # in the matter of Smith
         # if no V. - likely an "in re" case and only match on at least 1 name
         return True
 
     # otherwise check if a match occurs on both sides of the V
-    v_index = csv_case_name.lower().index("v.")
-    hit_indices = [csv_case_name.lower().find(hit) for hit in overlap]
+    v_index = west_case_name.lower().index("v.")
+    hit_indices = [west_case_name.lower().find(hit) for hit in overlap]
 
     return min(hit_indices) < v_index < max(hit_indices)
 
@@ -122,7 +122,7 @@ def parse_date(date_str: str) -> date | None:
             return datetime.strptime(date_str, fmt).date()
         except (ValueError, TypeError):
             continue
-    logger.warning(f"Invalid date format: {date_str}")
+    logger.warning("Invalid date format: %s", date_str)
     return None
 
 
@@ -136,6 +136,8 @@ def parse_citations(citation_strings: list[str]) -> list[dict]:
 
     for cite_str in citation_strings:
         # Get citations from the string
+
+        # We find all the citations that could match a cluster to update the case name
         found_cites = get_citations(cite_str, tokenizer=HYPERSCAN_TOKENIZER)
         if not found_cites:
             continue
@@ -194,12 +196,12 @@ def query_possible_matches(
 
 
 def update_matched_case_name(
-    matched_cluster: OpinionCluster, csv_case_name: str
+    matched_cluster: OpinionCluster, west_case_name: str
 ) -> tuple[bool, bool]:
     """Update case name of matched cluster and related docket if empty any of them
 
     :param matched_cluster: OpinionCluster object
-    :param csv_case_name: case name from csv row
+    :param west_case_name: case name from csv row
     :return: tuple with boolean values if cluster and related docket case name updated
     """
     cluster_case_name_updated = False
@@ -207,17 +209,17 @@ def update_matched_case_name(
 
     if not matched_cluster.case_name:
         # Save case name in cluster when we don't have it
-        matched_cluster.case_name = csv_case_name
+        matched_cluster.case_name = harmonize(west_case_name)
         matched_cluster.save()
-        logger.info(f"Case name updated for cluster id: {matched_cluster.id}")
+        logger.info("Case name updated for cluster id: %s", matched_cluster.id)
         cluster_case_name_updated = True
 
     if not matched_cluster.docket.case_name:
         # Save case name in docket when we don't have it
-        matched_cluster.docket.case_name = csv_case_name
+        matched_cluster.docket.case_name = harmonize(west_case_name)
         matched_cluster.docket.save()
         logger.info(
-            f"Case name updated for docket id: {matched_cluster.docket.id}"
+            "Case name updated for docket id: %s", matched_cluster.docket.id
         )
         docket_case_name_updated = True
 
@@ -227,6 +229,9 @@ def update_matched_case_name(
 def combine_initials(case_name: str) -> str:
     """Combine initials in case captions
 
+    This function identifies initials (e.g., "J. D. E.") in a case name and combines
+    them into a compressed format without spaces or periods (e.g., "JDE").
+
     :param case_name: the case caption
     :return: the cleaned case caption
     """
@@ -252,8 +257,9 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
 
     total_clusters_updated = 0
     total_dockets_updated = 0
+    total_citations_added = 0
 
-    logger.info(f"Processing {filepath}")
+    logger.info("Processing %s", filepath)
     df = pd.read_csv(filepath).dropna()
     for row in df.itertuples():
         (
@@ -269,20 +275,21 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
 
         clean_docket_num = docket.strip('="').strip('"')
         if not clean_docket_num:
-            logger.info(f"Row index: {index} - No docket number found.")
+            logger.info("Row index: %s - No docket number found.", index)
             continue
 
         date_filed = parse_date(date_str)
         if not date_filed:
             logger.info(
-                f"Row index: {index} - No valid date found: {date_str}"
+                "Row index: %s - No valid date found: %s", index, date_str
             )
             continue
 
-        valid_citations = parse_citations([cite1, cite2])
+        west_citations: list[str] = [cite1, cite2]
+        valid_citations = parse_citations(west_citations)
 
         if not valid_citations:
-            logger.info(f"Row index: {index} - Missing two valid citations.")
+            logger.info("Row index: %s - Missing valid citations.", index)
             continue
 
         # Query for possible matches using data from row
@@ -293,7 +300,7 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
         )
 
         if not possible_matches:
-            logger.info(f"Row index: {index} - No matches found.")
+            logger.info("Row index: %s - No possible matches found.", index)
             continue
 
         matches = []
@@ -311,16 +318,31 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
                 west_case_name, cl_case_name
             )
             if case_name_match:
-                matches.append(match)
+                matches.append(match.cluster)
 
-        if len(matches) != 1:
+        if len(matches) == 0:
+            # No match found within possible matches, go to next row
+            logger.info(
+                "Row index: %s - No match found within possible matches.",
+                index,
+            )
+            continue
+        elif len(matches) > 1:
+            # More than one match, log and go to next row
+            matches_found = ", ".join([str(cluster.id) for cluster in matches])
             logger.warning(
-                f"Row index: {index} - Failed, Matches found: {len(matches)} - Matches: {[cluster.id for cluster in matches]}"
+                "Row index: %s - Multiple matches found: %s",
+                index,
+                matches_found,
             )
             continue
 
+        # Single match found
         logger.info(
-            f"Row index: {index} - Match found: {matches[0].cluster_id} - Csv case name: {west_case_name}"
+            "Row index: %s - Match found: %s - West case name: %s",
+            index,
+            matches[0].id,
+            west_case_name,
         )
 
         if dry_run:
@@ -328,7 +350,7 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
             continue
 
         with transaction.atomic():
-            matched_cluster = matches[0].cluster
+            matched_cluster = matches[0]
 
             # Update case names
             cluster_updated, docket_updated = update_matched_case_name(
@@ -342,35 +364,51 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
                 total_dockets_updated = +1
 
             # Add any of the citations if possible
+            citation_to_add = None
+
             for citation in valid_citations:
-                if Citation.objects.filter(
+
+                new_cite_str = f"{citation.get('volume')} {citation.get('reporter')} {citation.get('page')}"
+
+                cites = Citation.objects.filter(
                     cluster_id=matched_cluster.id,
                     reporter=citation.get("reporter"),
-                ).exists():
-                    # Avoid adding a citation if we already have a citation from the
-                    # citation's reporter.
-                    logger.info(
-                        f"Can't add: {citation.get('volume')} {citation.get('reporter')} {citation.get('page')} to cluster id: {matched_cluster.id}. There is already "
-                        f"a citation from that reporter."
+                )
+
+                if cites.exists():
+                    if cites[0].__str__() == new_cite_str:
+                        # We already have that citation
+                        continue
+                    # Same reporter, different citation, revert changes
+                    logger.warning(
+                        "Row index: %s - Revert changes for cluster id: %s",
+                        index,
+                        matched_cluster.id,
                     )
-                    continue
-                citation["cluster_id"] = matched_cluster.id
-                Citation.objects.get_or_create(**citation)
-
-            add_citations_to_cluster(
-                [
-                    f"{cite.get('volume')} {cite.get('reporter')} {cite.get('page')}"
-                    for cite in valid_citations
-                ],
-                matches[0].cluster_id,
-            )
+                    transaction.set_rollback(True)
+                    citation_to_add = None
+                    break
+
+                # We used one from the row to find the match, we only need to add the other citation
+                citation_to_add = citation
+
+            if citation_to_add:
+                # Add the cluster id and create the new citation
+                citation_to_add["cluster_id"] = matched_cluster.id
+                new_citation = Citation.objects.create(**citation_to_add)
+                logger.info(
+                    "New citation added: %s to cluster id: %s",
+                    new_citation,
+                    matched_cluster.id,
+                )
+                total_citations_added += 1
 
             # Wait between each processed row to avoid sending to many indexing tasks
             time.sleep(delay)
 
-    if not dry_run:
-        logger.info(f"Clusters updated: {total_clusters_updated}")
-        logger.info(f"Dockets updated: {total_dockets_updated}")
+    logger.info("Clusters updated: %s", total_clusters_updated)
+    logger.info("Dockets updated: %s", total_dockets_updated)
+    logger.info("Citations added: %s", total_citations_added)
 
 
 class Command(BaseCommand):

From d8c994e8613a16c4c662c36dbdce7cf1c3299e3d Mon Sep 17 00:00:00 2001
From: grossir <14970769+grossir@users.noreply.github.com>
Date: Wed, 27 Nov 2024 20:48:16 +0000
Subject: [PATCH 116/143] Update freelawproject dependencies

---
 poetry.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 68f09e1cfb..9e83e42f9c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2320,13 +2320,13 @@ setuptools = "*"
 
 [[package]]
 name = "juriscraper"
-version = "2.6.43"
+version = "2.6.44"
 description = "An API to scrape American court websites for metadata."
 optional = false
 python-versions = "*"
 files = [
-    {file = "juriscraper-2.6.43-py27-none-any.whl", hash = "sha256:c2765e5f0a6563fe4842bf72b13aec2b6feb873dc2350523ff6b5102bdf1f757"},
-    {file = "juriscraper-2.6.43.tar.gz", hash = "sha256:99029ab83cbe99673e4598c8e9b30df9e3d21ef98bd78baef9907ab53ad96e10"},
+    {file = "juriscraper-2.6.44-py27-none-any.whl", hash = "sha256:29278f6429c25b171d3aebd341d795f7aa611669a8ff26d694943776499cadac"},
+    {file = "juriscraper-2.6.44.tar.gz", hash = "sha256:cded9d566ffafb97cf6af8a1d5933aa0db12be2e1c0e0f412b0bd3d4f9896a8b"},
 ]
 
 [package.dependencies]

From a22a3c8699f77639b494d13153dfca9e2f56d8bb Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 27 Nov 2024 17:22:28 -0600
Subject: [PATCH 117/143] feat(casenames): update tokenize_case_name() update
 process to add new citation add test for check_case_names_match() update
 query_possible_matches()

---
 .../commands/update_casenames_wl_dataset.py   | 57 ++++++++-----------
 cl/corpus_importer/tests.py                   | 49 ++++++++++++++++
 2 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index b936bdafaa..11d56d24c6 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -63,7 +63,7 @@ def tokenize_case_name(case_name: str) -> set[str]:
     for word in WORD_PATTERN.findall(case_name):
         if len(word) > 1:
             # Only keep words with more than one character
-            words.append(word)
+            words.append(word.lower())
 
     # Return only valid words
     return set(words) - FALSE_POSITIVES
@@ -173,6 +173,9 @@ def query_possible_matches(
 ) -> QuerySet[Citation]:
     """Find matches for row data
 
+    It will remove duplicates, it could happen if we already have both citations, if we
+    have multiple matches, these must be unique
+
     :param valid_citations: list of FullCaseCitation objects
     :param docket_number: cleaned docket number from row
     :param date_filed: formatted filed date from row
@@ -188,9 +191,11 @@ def query_possible_matches(
             cluster__date_filed=date_filed,
         )
         citation_queries |= citation_query
-    possible_matches = Citation.objects.filter(
-        citation_queries
-    ).select_related("cluster")
+    possible_matches = (
+        Citation.objects.filter(citation_queries)
+        .select_related("cluster")
+        .distinct("cluster__id")
+    )
 
     return possible_matches
 
@@ -364,44 +369,32 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
                 total_dockets_updated = +1
 
             # Add any of the citations if possible
-            citation_to_add = None
-
             for citation in valid_citations:
 
-                new_cite_str = f"{citation.get('volume')} {citation.get('reporter')} {citation.get('page')}"
-
-                cites = Citation.objects.filter(
-                    cluster_id=matched_cluster.id,
+                citation["cluster_id"] = matched_cluster.id
+                if Citation.objects.filter(**citation).exists():
+                    # We already have the citation
+                    continue
+                elif Citation.objects.filter(
+                    cluster_id=citation["cluster_id"],
                     reporter=citation.get("reporter"),
-                )
-
-                if cites.exists():
-                    if cites[0].__str__() == new_cite_str:
-                        # We already have that citation
-                        continue
-                    # Same reporter, different citation, revert changes
+                ).exists():
+                    # # Same reporter, different citation, revert changes
                     logger.warning(
                         "Row index: %s - Revert changes for cluster id: %s",
                         index,
                         matched_cluster.id,
                     )
                     transaction.set_rollback(True)
-                    citation_to_add = None
                     break
-
-                # We used one from the row to find the match, we only need to add the other citation
-                citation_to_add = citation
-
-            if citation_to_add:
-                # Add the cluster id and create the new citation
-                citation_to_add["cluster_id"] = matched_cluster.id
-                new_citation = Citation.objects.create(**citation_to_add)
-                logger.info(
-                    "New citation added: %s to cluster id: %s",
-                    new_citation,
-                    matched_cluster.id,
-                )
-                total_citations_added += 1
+                else:
+                    new_citation = Citation.objects.create(**citation)
+                    logger.info(
+                        "New citation added: %s to cluster id: %s",
+                        new_citation,
+                        matched_cluster.id,
+                    )
+                    total_citations_added += 1
 
             # Wait between each processed row to avoid sending to many indexing tasks
             time.sleep(delay)
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 7a76435ded..1e7d9de3da 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -62,6 +62,9 @@
     log_added_items_to_redis,
     merge_rss_data,
 )
+from cl.corpus_importer.management.commands.update_casenames_wl_dataset import (
+    check_case_names_match,
+)
 from cl.corpus_importer.signals import (
     handle_update_latest_case_id_and_schedule_iquery_sweep,
     update_latest_case_id_and_schedule_iquery_sweep,
@@ -4078,3 +4081,49 @@ def test_probe_iquery_pages_daemon_court_got_stuck(
             f"iquery:court_empty_probe_attempts:{self.court_cacd.pk}"
         )
         self.assertEqual(int(court_empty_attempts), 0)
+
+
+class CaseNamesTest(SimpleTestCase):
+    def test_check_case_names_match(self) -> None:
+        """Can we check if the case names match?"""
+        case_names_tests = (
+            (
+                "US v. Guerrero-Martinez",  # 736793
+                "United States v. Hector Guerrero-Martinez, AKA Hector Guerrero AKA Hector Martinez-Guerrero",
+                False,
+            ),
+            (
+                "In re CP",  # 2140442
+                "In Re CP",
+                True,
+            ),
+            (
+                "Dennis v. City of Easton",  # 730246
+                "Richard Dennis, Penelope Dennis, Loretta M. Dennis v. City of Easton, Edward J. Ferraro, Robet S. Stein, Doris Asteak, Paul Schleuter, Howard B. White, Easton Board of Health",
+                True,
+            ),
+            (
+                "Parmelee v. Bruggeman",  # 736598
+                "Allan Parmelee v. Milford Bruggeman Janine Bruggeman Friend of the Court for the State of Michigan Nancy Rose, Employee of the State of Michigan for the Friend of the Court Glenda Friday, Employee of the State of Michigan for the Friend of the Court Karen Dunn, Employee of the State of Michigan for the Friend of the Court Thomas Kreckman, Employee of the State of Michigan for the Friend of the Court State of Michigan",
+                True,
+            ),
+            (
+                "Automobile Assur. Financial Corp. v. Syrett Corp.",  # 735935
+                "Automobile Assurance Financial Corporation, a Utah Corporation Venuti and Associates, Inc., a Utah Corporation Venuti Partners, Ltd., a Utah Limited Partnership Frank P. Venuti, an Individual, Parker M. Nielson v. Syrett Corporation, a Delaware Corporation, Formerly a Utah Corporation, John R. Riley, an Individual, Third-Party-Defendant",
+                True,
+            ),
+            (
+                "Christopher Ambroze, M.D., PC v. Aetna Health Plans of New York, Inc.",  # 735476
+                "Christopher Ambroze, M.D., P.C., Rockville Anesthesia Group, Llp, Harvey Finkelstein, Plainview Anesthesiologists, P.C., Joseph A. Singer, Atlantic Anesthesia Associates, P.C. v. Aetna Health Plans of New York, Inc., Aetna Health Management, Inc., Aetna Life and Casualty Company, C. Frederick Berger, and Gregg Stolzberg",
+                True,
+            ),
+            (
+                "O'Neal v. Merkel",  # 730350
+                "Terence Kenneth O'Neal v. T.E. Merkel Nurse Cashwell Nurse Allen Nurse Davis Mr. Conn, and Franklin E. Freeman, Jr. Gary Dixon Doctor Lowy Doctor Shaw Doctor Castalloe Harry Allsbrook Mr. Cherry",
+                True,
+            ),
+        )
+        for wl_casename, cl_casename, overlap in case_names_tests:
+            self.assertEqual(
+                check_case_names_match(wl_casename, cl_casename), overlap
+            )

From 110b85ce93582905cf2b164680bb985f32727c23 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 28 Nov 2024 06:59:01 -0600
Subject: [PATCH 118/143] feat(casenames): update tokenize_case_name() update
 test for tokenize_case_name()

---
 .../commands/update_casenames_wl_dataset.py           |  5 +++--
 cl/corpus_importer/tests.py                           | 11 +++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 11d56d24c6..acce5f19e2 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -54,13 +54,14 @@
 def tokenize_case_name(case_name: str) -> set[str]:
     """Tokenizes case name and removes single-character words except for letters with periods.
 
-    Also removes false positive words
+    It uses harmonize() from juriscraper to make case names cleaner
+    Also removes false positive words, e.g. (U.S -> United States)
 
     :param case_name: case name to tokenize
     :return: list of words
     """
     words = []
-    for word in WORD_PATTERN.findall(case_name):
+    for word in WORD_PATTERN.findall(harmonize(case_name)):
         if len(word) > 1:
             # Only keep words with more than one character
             words.append(word.lower())
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 1e7d9de3da..00d9535a87 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -4087,10 +4087,15 @@ class CaseNamesTest(SimpleTestCase):
     def test_check_case_names_match(self) -> None:
         """Can we check if the case names match?"""
         case_names_tests = (
+            (
+                "U.S. v. Smith",
+                "United States v. Smith",
+                True,
+            ),
             (
                 "US v. Guerrero-Martinez",  # 736793
                 "United States v. Hector Guerrero-Martinez, AKA Hector Guerrero AKA Hector Martinez-Guerrero",
-                False,
+                True,
             ),
             (
                 "In re CP",  # 2140442
@@ -4125,5 +4130,7 @@ def test_check_case_names_match(self) -> None:
         )
         for wl_casename, cl_casename, overlap in case_names_tests:
             self.assertEqual(
-                check_case_names_match(wl_casename, cl_casename), overlap
+                check_case_names_match(wl_casename, cl_casename),
+                overlap,
+                msg="Case names don't match",
             )

From 62bdf183658933aba4c70b8c3e8c4e5fe5a8d2e7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Thu, 28 Nov 2024 11:39:58 -0600
Subject: [PATCH 119/143] fix(elasticsearch): Improved comment in
 build_more_like_this_query

---
 cl/lib/elasticsearch_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index f7dbb19708..3d0b2c7fa2 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -196,7 +196,10 @@ async def build_more_like_this_query(related_ids: list[str]) -> Query:
         for pair in opinion_cluster_pairs
     ] or [
         {"_id": f"o_{pk}"} for pk in related_ids
-    ]  # Fall back in case IDs are not found in DB.
+    ]  # Fallback in case IDs are not found in the database.
+    # The user might have provided non-existent Opinion IDs.
+    # This ensures that the query does not raise an error and instead returns
+    # no results.
 
     more_like_this_fields = SEARCH_MLT_OPINION_QUERY_FIELDS.copy()
     mlt_query = Q(

From dd85888d3d8969e009dd44e38a86aa54884d815c Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 29 Nov 2024 09:59:10 -0500
Subject: [PATCH 120/143] feat(update_casenames): Tweak name comparison feature

---
 .../commands/update_casenames_wl_dataset.py   | 34 +++++++------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index acce5f19e2..3e1776a335 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -54,14 +54,11 @@
 def tokenize_case_name(case_name: str) -> set[str]:
     """Tokenizes case name and removes single-character words except for letters with periods.
 
-    It uses harmonize() from juriscraper to make case names cleaner
-    Also removes false positive words, e.g. (U.S -> United States)
-
     :param case_name: case name to tokenize
     :return: list of words
     """
     words = []
-    for word in WORD_PATTERN.findall(harmonize(case_name)):
+    for word in WORD_PATTERN.findall(case_name):
         if len(word) > 1:
             # Only keep words with more than one character
             words.append(word.lower())
@@ -82,21 +79,23 @@ def check_case_names_match(west_case_name: str, cl_case_name: str) -> bool:
     :return: True if they match else False
     """
 
-    overlap = tokenize_case_name(west_case_name) & tokenize_case_name(
-        cl_case_name
-    )
+    west_set = tokenize_case_name(west_case_name)
+    cl_set = tokenize_case_name(cl_case_name)
 
+    overlap = west_set & cl_set
     if not overlap:
         # if no hits no match on name - move along
         return False
 
     # Check for "v." in title
-    if "v." not in west_case_name.lower():
+    if "v." not in west_case_name.lower() or (
+        len(cl_set) == 1 or len(west_set) == 1
+    ):
         # in the matter of Smith
         # if no V. - likely an "in re" case and only match on at least 1 name
         return True
 
-    # otherwise check if a match occurs on both sides of the V
+    # otherwise check if a match occurs on both sides of the `v.`
     v_index = west_case_name.lower().index("v.")
     hit_indices = [west_case_name.lower().find(hit) for hit in overlap]
 
@@ -215,14 +214,14 @@ def update_matched_case_name(
 
     if not matched_cluster.case_name:
         # Save case name in cluster when we don't have it
-        matched_cluster.case_name = harmonize(west_case_name)
+        matched_cluster.case_name = west_case_name
         matched_cluster.save()
         logger.info("Case name updated for cluster id: %s", matched_cluster.id)
         cluster_case_name_updated = True
 
     if not matched_cluster.docket.case_name:
         # Save case name in docket when we don't have it
-        matched_cluster.docket.case_name = harmonize(west_case_name)
+        matched_cluster.docket.case_name = west_case_name
         matched_cluster.docket.save()
         logger.info(
             "Case name updated for docket id: %s", matched_cluster.docket.id
@@ -268,17 +267,8 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
     logger.info("Processing %s", filepath)
     df = pd.read_csv(filepath).dropna()
     for row in df.itertuples():
-        (
-            index,
-            west_case_name,
-            court,
-            date_str,
-            cite1,
-            cite2,
-            docket,
-            volume,
-        ) = row
-
+        index, case_name, court, date_str, cite1, cite2, docket, _ = row
+        west_case_name = harmonize(case_name)
         clean_docket_num = docket.strip('="').strip('"')
         if not clean_docket_num:
             logger.info("Row index: %s - No docket number found.", index)

From a0920a202fbc2884b6f30cdfe2ac2376f8111635 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Nov 2024 10:11:05 -0600
Subject: [PATCH 121/143] feat(casenames): update tokenize_case_name() remove
 combine initials

---
 .../commands/update_casenames_wl_dataset.py   | 29 ++-----------------
 cl/corpus_importer/tests.py                   |  4 +--
 2 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index 3e1776a335..c18f815043 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -79,8 +79,8 @@ def check_case_names_match(west_case_name: str, cl_case_name: str) -> bool:
     :return: True if they match else False
     """
 
-    west_set = tokenize_case_name(west_case_name)
-    cl_set = tokenize_case_name(cl_case_name)
+    west_set = tokenize_case_name(west_case_name.lower())
+    cl_set = tokenize_case_name(cl_case_name.lower())
 
     overlap = west_set & cl_set
     if not overlap:
@@ -98,7 +98,6 @@ def check_case_names_match(west_case_name: str, cl_case_name: str) -> bool:
     # otherwise check if a match occurs on both sides of the `v.`
     v_index = west_case_name.lower().index("v.")
     hit_indices = [west_case_name.lower().find(hit) for hit in overlap]
-
     return min(hit_indices) < v_index < max(hit_indices)
 
 
@@ -231,27 +230,6 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
-def combine_initials(case_name: str) -> str:
-    """Combine initials in case captions
-
-    This function identifies initials (e.g., "J. D. E.") in a case name and combines
-    them into a compressed format without spaces or periods (e.g., "JDE").
-
-    :param case_name: the case caption
-    :return: the cleaned case caption
-    """
-
-    initials_pattern = re.compile(r"(\b[A-Z]\.?\s?){2,}(\s|$)")
-
-    matches = initials_pattern.finditer(case_name)
-    if matches:
-        for match in matches:
-            initials = match.group()
-            compressed_initials = re.sub(r"(?!\s$)[\s\.]", "", initials)
-            case_name = case_name.replace(initials, compressed_initials)
-    return case_name
-
-
 def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
     """Process rows from csv file
 
@@ -307,9 +285,6 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
                 else match.cluster.case_name
             )
 
-            west_case_name = combine_initials(west_case_name)
-            cl_case_name = combine_initials(cl_case_name)
-
             case_name_match = check_case_names_match(
                 west_case_name, cl_case_name
             )
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 00d9535a87..37cc03d24d 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -4093,7 +4093,7 @@ def test_check_case_names_match(self) -> None:
                 True,
             ),
             (
-                "US v. Guerrero-Martinez",  # 736793
+                "United States v. Guerrero-Martinez",  # 736793
                 "United States v. Hector Guerrero-Martinez, AKA Hector Guerrero AKA Hector Martinez-Guerrero",
                 True,
             ),
@@ -4132,5 +4132,5 @@ def test_check_case_names_match(self) -> None:
             self.assertEqual(
                 check_case_names_match(wl_casename, cl_casename),
                 overlap,
-                msg="Case names don't match",
+                msg=f"Case names don't match: {wl_casename} - {cl_casename}",
             )

From 01c1fa4e98b9f6dd1042e25b74153e421bdcfe33 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Nov 2024 11:30:45 -0600
Subject: [PATCH 122/143] feat(update_casenames): add start_row and limit param
 to command

---
 .../commands/update_casenames_wl_dataset.py   | 41 +++++++++++++++++--
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
index c18f815043..c98d619b93 100644
--- a/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
+++ b/cl/corpus_importer/management/commands/update_casenames_wl_dataset.py
@@ -230,12 +230,20 @@ def update_matched_case_name(
     return cluster_case_name_updated, docket_case_name_updated
 
 
-def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
+def process_csv(
+    filepath: str,
+    delay: float,
+    dry_run: bool,
+    limit: int | None,
+    start_row: int,
+) -> None:
     """Process rows from csv file
 
     :param filepath: path to csv file
     :param delay: delay between saves in seconds
     :param dry_run: flag to simulate update process
+    :param limit: limit number of rows to process
+    :param start_row: start row
     """
 
     total_clusters_updated = 0
@@ -243,7 +251,19 @@ def process_csv(filepath: str, delay: float, dry_run: bool) -> None:
     total_citations_added = 0
 
     logger.info("Processing %s", filepath)
-    df = pd.read_csv(filepath).dropna()
+
+    # Generate rows to skip, excluding the header row
+    skip_rows = list(range(1, start_row)) if start_row else None
+
+    df = pd.read_csv(filepath, skiprows=skip_rows, nrows=limit).dropna()
+
+    # Reset the index to start from 0 (needed if we pass skip_rows param)
+    df.reset_index(drop=True, inplace=True)
+
+    if start_row:
+        # Update rows index to reflect the original csv row numbers
+        df.index = range(start_row, start_row + len(df))
+
     for row in df.itertuples():
         index, case_name, court, date_str, cite1, cite2, docket, _ = row
         west_case_name = harmonize(case_name)
@@ -391,15 +411,30 @@ def add_arguments(self, parser):
             action="store_true",
             help="Simulate the update process without making changes",
         )
+        parser.add_argument(
+            "--start-row",
+            default=0,
+            type=int,
+            help="Start row (inclusive).",
+        )
+        parser.add_argument(
+            "--limit",
+            default=None,
+            type=int,
+            help="Limit number of rows to process.",
+            required=False,
+        )
 
     def handle(self, *args, **options):
         filepath = options["filepath"]
         delay = options["delay"]
         dry_run = options["dry_run"]
+        limit = options["limit"]
+        start_row = options["start_row"]
 
         if not filepath:
             raise CommandError(
                 "Filepath is required. Use --filepath to specify the CSV file location."
             )
 
-        process_csv(filepath, delay, dry_run)
+        process_csv(filepath, delay, dry_run, limit, start_row)

From c15f20d8131a933a66cd2b94b313d1094e70e325 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Fri, 29 Nov 2024 10:39:56 -0800
Subject: [PATCH 123/143] feat(ci): Add iquery rollout task

---
 .github/workflows/docker-build.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index ff00cf891a..37fbcf30d2 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -131,6 +131,11 @@ jobs:
       - name: Watch cl-es-sweep-indexer rollout status
         run: kubectl rollout status -n ${{ env.EKS_NAMESPACE }} deployment/cl-es-sweep-indexer
 
+      - name: Rollout cl-iquery-probe
+        run: kubectl set image -n ${{ env.EKS_NAMESPACE }} deployment/cl-iquery-probe cl-iquery-probe=freelawproject/courtlistener:${{ steps.vars.outputs.sha_short }}-prod
+      - name: Watch cl-iquery-probe rollout status
+        run: kubectl rollout status -n ${{ env.EKS_NAMESPACE }} deployment/cl-iquery-probe
+
 
       # Watch "cronjobs" in k9s
       - name: Update cronjobs

From 84cdae99fd3ef375547e859a87a00f3fe8dd0ea8 Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Fri, 29 Nov 2024 10:41:21 -0800
Subject: [PATCH 124/143] feat(settings): Throttle down users

---
 cl/settings/third_party/rest_framework.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cl/settings/third_party/rest_framework.py b/cl/settings/third_party/rest_framework.py
index 9e4365b4ce..2c0f04163e 100644
--- a/cl/settings/third_party/rest_framework.py
+++ b/cl/settings/third_party/rest_framework.py
@@ -29,8 +29,12 @@
     "OVERRIDE_THROTTLE_RATES": {
         # Throttling down.
         # Unresponsive
+        "projecttesting": "1/hour",
         "SAGW": "1/hour",
         # Bounced
+        "riwiko8259": "1/hour",
+        "xicaro7027": "1/hour",
+        "nayibij851": "1/hour",
         "testname2024": "1/hour",
         "cadebe2258": "1/hour",
         # Disposable email

From 8c2cd99ad2dc9d816c9f5348a6ef2006af9d400a Mon Sep 17 00:00:00 2001
From: mlissner <mlissner@michaeljaylissner.com>
Date: Fri, 29 Nov 2024 10:42:03 -0800
Subject: [PATCH 125/143] feat(html): Typos

---
 cl/api/templates/migration-guide.html | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/cl/api/templates/migration-guide.html b/cl/api/templates/migration-guide.html
index 8b9d421529..4d1dbf7822 100644
--- a/cl/api/templates/migration-guide.html
+++ b/cl/api/templates/migration-guide.html
@@ -31,13 +31,13 @@ <h4 class="v-offset-below-3">
 <div class="col-xs-12 col-md-8 col-lg-6">
   <h1 id="about">V4 API Migration Guide</h1>
   <p class="overview lead">
-    After several years of planning and development we have released v4 of our APIs.
+    After several years of planning and development, we have released v4 of our APIs.
   </p>
   <p>
     This upgrade responds to feedback we have received over the years and should be much better for our users — faster, more featureful, more scalable, and more accurate.
   </p>
   <p>
-    Unfortunately, we couldn't make these new APIs completely backwards compatible so this guide explains what's new.
+    Unfortunately, we couldn't make these new APIs completely backwards compatible, so this guide explains what's new.
   </p>
 
   <h2 id="support">Support</h2>
@@ -71,10 +71,10 @@ <h2 id="timeline-changes">Timeline for Changes</h2>
   </p>
   <h2 id="what-if-do-nothing">What If I Do Nothing?</h2>
   <p>
-    You might be fine. Most of the database and search APIs are only changing slightly and v3 will be supported for some period of time.
+    You might be fine. Most of the database and search APIs are only changing slightly, and v3 will be supported for some period of time.
     But you should read this guide to see if any changes are needed to your application.
   </p>
-  <p>The remainder of this guide is in three section:</p>
+  <p>The remainder of this guide is in three sections:</p>
   <ul>
     <li>New features you can expect</li>
     <li>How to migrate database APIs</li>
@@ -90,7 +90,7 @@ <h3 id="cursor-pagination">Cursor-based pagination</h3>
   <h3 id="elasticsearch">ElasticSearch</h3>
   <p>v4 of the Search API is powered by ElasticSearch instead of Solr. This is a huge upgrade to our API and search engine.
   </p>
-  <p>Some of the improvements include:</p>
+  <p>Some improvements include:</p>
   <ul>
     <li>In v4, all PACER cases are now searchable. In v3 you only got results if a case had a docket entry.
     </li>
@@ -107,12 +107,12 @@ <h3 id="elasticsearch">ElasticSearch</h3>
       <li>Camelcase words like "McDonalds" are more searchable.</li>
       <li>Highlighting is more consistent and can be disabled for better performance.</li>
     </ul>
-    <li>Emojis and unicode characters are now searchable.</li>
+    <li>Emojis and Unicode characters are now searchable.</li>
     <li>Docket number and other fielded searches are more robust.</li>
     <li>Timezone handling is more consistent.</li>
     <li>We've added a number of new searchable fields.</li>
   </ul>
-  <p>For more details please<a href="https://free.law/2024/01/18/new-recap-archive-search-is-live/" target="_blank"> see our blog</a>.</p>
+  <p>For more details, please<a href="https://free.law/2024/01/18/new-recap-archive-search-is-live/" target="_blank"> see our blog</a>.</p>
   <h2 id="breaking-changes">Breaking Changes to v3 of the Search API</h2>
   <p>We cannot continue running Solr forever, but we can do our best to support v3 of the API. To do this, on <strong>November 25, 2024</strong>,
     v3 of the Search API will be upgraded to use ElasticSearch. We expect this to support most uses, but it will cause some breaking changes, as outlined in this section.

From 16bd4a76dcd8211ac3d874aa6d2ab057b3b4c9ad Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 29 Nov 2024 15:48:04 -0600
Subject: [PATCH 126/143] fix(sweep_indexer): Added poll interval setting and
 wait between chunks

---
 cl/search/management/commands/sweep_indexer.py | 13 +++++++++++--
 cl/settings/third_party/elasticsearch.py       |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/cl/search/management/commands/sweep_indexer.py b/cl/search/management/commands/sweep_indexer.py
index 4cc7b0bc4f..c3607ca24c 100644
--- a/cl/search/management/commands/sweep_indexer.py
+++ b/cl/search/management/commands/sweep_indexer.py
@@ -359,7 +359,7 @@ def process_queryset(
         processed_count = 0
         accumulated_chunk = 0
         throttle = CeleryThrottle(
-            poll_interval=10,
+            poll_interval=settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL,
             min_items=self.chunk_size,
             queue_name=self.queue,
         )
@@ -405,8 +405,17 @@ def process_queryset(
                         ).set(queue=self.queue).apply_async()
 
                 accumulated_chunk += len(chunk)
+                if not testing_mode:
+                    # Wait for 1/ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL
+                    # before processing the next chunk.
+                    # e.g: With a poll interval of 10 and a chunk size of 10,
+                    # it will wait for 0.1 seconds for every 10 documents processed,
+                    # maintaining an index rate of 100 documents per second.
+                    time.sleep(
+                        1 / settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL
+                    )
                 self.stdout.write(
-                    "\rProcessed {}/{}, ({:.0%}), last {} PK indexed: {},".format(
+                    "\rProcessed {}/{}, ({:.0%}), last {} ID indexed: {},".format(
                         processed_count,
                         count,
                         processed_count * 1.0 / count,
diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py
index c62e575d2e..69b0b72087 100644
--- a/cl/settings/third_party/elasticsearch.py
+++ b/cl/settings/third_party/elasticsearch.py
@@ -264,6 +264,9 @@
 ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE = env(
     "ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
 )
+ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL = env(
+    "ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=10
+)
 ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
     "ELASTICSEARCH_SWEEP_INDEXER_MODELS",
     default=[

From 9f331aff55a5ce7d7804de586246355241638bd1 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 29 Nov 2024 15:54:40 -0600
Subject: [PATCH 127/143] fix(sweep_indexer): Ignore types for setting

---
 cl/search/management/commands/sweep_indexer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/management/commands/sweep_indexer.py b/cl/search/management/commands/sweep_indexer.py
index c3607ca24c..9c2c77d47e 100644
--- a/cl/search/management/commands/sweep_indexer.py
+++ b/cl/search/management/commands/sweep_indexer.py
@@ -359,7 +359,7 @@ def process_queryset(
         processed_count = 0
         accumulated_chunk = 0
         throttle = CeleryThrottle(
-            poll_interval=settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL,
+            poll_interval=settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL,  # type: ignore
             min_items=self.chunk_size,
             queue_name=self.queue,
         )
@@ -412,7 +412,7 @@ def process_queryset(
                     # it will wait for 0.1 seconds for every 10 documents processed,
                     # maintaining an index rate of 100 documents per second.
                     time.sleep(
-                        1 / settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL
+                        1 / settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL  # type: ignore
                     )
                 self.stdout.write(
                     "\rProcessed {}/{}, ({:.0%}), last {} ID indexed: {},".format(

From 34f1ac37e5623173c8de672509719bbb7644a7b7 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 29 Nov 2024 16:11:22 -0600
Subject: [PATCH 128/143] fix(sweep): Indexer reduced default poll interval

---
 cl/settings/third_party/elasticsearch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py
index 69b0b72087..879baaa58f 100644
--- a/cl/settings/third_party/elasticsearch.py
+++ b/cl/settings/third_party/elasticsearch.py
@@ -265,7 +265,7 @@
     "ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
 )
 ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL = env(
-    "ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=10
+    "ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=5
 )
 ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
     "ELASTICSEARCH_SWEEP_INDEXER_MODELS",

From ceb049e350f82e0086c68755bffb3bd3c1de4c0e Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 29 Nov 2024 17:09:29 -0600
Subject: [PATCH 129/143] fix(sweep_indexer): Added a separate setting for
 waiting between chunks

- Added a logger for scheduling make_docket_by_iquery_sweep
---
 cl/corpus_importer/signals.py                  | 4 ++++
 cl/search/management/commands/sweep_indexer.py | 4 ++--
 cl/settings/third_party/elasticsearch.py       | 5 ++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/cl/corpus_importer/signals.py b/cl/corpus_importer/signals.py
index d2443b62f3..08254d7d85 100644
--- a/cl/corpus_importer/signals.py
+++ b/cl/corpus_importer/signals.py
@@ -76,6 +76,10 @@ def update_latest_case_id_and_schedule_iquery_sweep(docket: Docket) -> None:
                 countdown=task_scheduled_countdown,
                 queue=settings.CELERY_IQUERY_QUEUE,
             )
+            logger.info(
+                f"Enqueued iquery docket case ID: {iquery_pacer_case_id_current} "
+                f"for court {court_id} with countdown {task_scheduled_countdown}"
+            )
 
         # Update the iquery_pacer_case_id_current in Redis
         r.hset(
diff --git a/cl/search/management/commands/sweep_indexer.py b/cl/search/management/commands/sweep_indexer.py
index 9c2c77d47e..fe2bb96e79 100644
--- a/cl/search/management/commands/sweep_indexer.py
+++ b/cl/search/management/commands/sweep_indexer.py
@@ -406,13 +406,13 @@ def process_queryset(
 
                 accumulated_chunk += len(chunk)
                 if not testing_mode:
-                    # Wait for 1/ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL
+                    # Wait for 1/ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS
                     # before processing the next chunk.
                     # e.g: With a poll interval of 10 and a chunk size of 10,
                     # it will wait for 0.1 seconds for every 10 documents processed,
                     # maintaining an index rate of 100 documents per second.
                     time.sleep(
-                        1 / settings.ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL  # type: ignore
+                        1 / settings.ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS  # type: ignore
                     )
                 self.stdout.write(
                     "\rProcessed {}/{}, ({:.0%}), last {} ID indexed: {},".format(
diff --git a/cl/settings/third_party/elasticsearch.py b/cl/settings/third_party/elasticsearch.py
index 879baaa58f..7a1ec6b779 100644
--- a/cl/settings/third_party/elasticsearch.py
+++ b/cl/settings/third_party/elasticsearch.py
@@ -265,7 +265,10 @@
     "ELASTICSEARCH_SWEEP_INDEXER_HEADS_RATE", default=60
 )
 ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL = env(
-    "ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=5
+    "ELASTICSEARCH_SWEEP_INDEXER_POLL_INTERVAL", default=10
+)
+ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS = env(
+    "ELASTICSEARCH_SWEEP_INDEXER_WAIT_BETWEEN_CHUNKS", default=3
 )
 ELASTICSEARCH_SWEEP_INDEXER_MODELS = env(
     "ELASTICSEARCH_SWEEP_INDEXER_MODELS",

From 3a62c294c39383f6f1054a4819c2b92a6ff0c0c3 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 29 Nov 2024 19:37:04 -0400
Subject: [PATCH 130/143] refactor(assets): Format opinions.js for consistency

---
 cl/assets/static-global/js/opinions.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index e6665237bf..3a3f5c2b11 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -9,9 +9,9 @@ $('.star-pagination').each(function (index, element) {
   if ($(this).attr('pagescheme')) {
     // For ANON 2020 this has two sets of numbers but only one can be
     // verified with other databses so only showing one
-    var number = $(this).attr('number')
-    if (number.indexOf("P") > -1) {
-      $(this).attr('label', "");
+    var number = $(this).attr('number');
+    if (number.indexOf('P') > -1) {
+      $(this).attr('label', '');
     } else {
       $(this).attr('label', number);
     }
@@ -287,4 +287,4 @@ document.querySelectorAll("page-label").forEach(label => {
             window.location.href = href;
         }
     });
-});
\ No newline at end of file
+});

From 5d60c2e4664745294c04952698138c44cee41007 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Fri, 29 Nov 2024 20:45:12 -0400
Subject: [PATCH 131/143] refactor(opinions): Remove unused search_params
 argument

This commit removes the search_params argument from the build_related_clusters_query call in the es_get_related_clusters_with_cache method.
---
 cl/opinion_page/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/opinion_page/utils.py b/cl/opinion_page/utils.py
index 18d5fa4c30..33e0682211 100644
--- a/cl/opinion_page/utils.py
+++ b/cl/opinion_page/utils.py
@@ -298,7 +298,7 @@ async def es_get_related_clusters_with_cache(
         return related_cluster_result
 
     related_query = await build_related_clusters_query(
-        cluster_search, sub_opinion_pks, search_params
+        cluster_search, sub_opinion_pks
     )
 
     related_query = related_query.params(

From 948b7d33120669d7d7de5cd0d74687849ed1db44 Mon Sep 17 00:00:00 2001
From: Alberto Islas <albertisfu@gmail.com>
Date: Fri, 29 Nov 2024 20:36:04 -0600
Subject: [PATCH 132/143] fix(corpus_importer): Changed setting to disable
 probe_iquery_pages_daemon

---
 .../management/commands/probe_iquery_pages_daemon.py          | 2 +-
 cl/corpus_importer/tests.py                                   | 2 +-
 cl/settings/project/corpus_importer.py                        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cl/corpus_importer/management/commands/probe_iquery_pages_daemon.py b/cl/corpus_importer/management/commands/probe_iquery_pages_daemon.py
index 759700673e..8a99322eb2 100644
--- a/cl/corpus_importer/management/commands/probe_iquery_pages_daemon.py
+++ b/cl/corpus_importer/management/commands/probe_iquery_pages_daemon.py
@@ -81,7 +81,7 @@ def handle(self, *args, **options):
         iterations_completed = 0
         r = get_redis_interface("CACHE")
         testing = True if testing_iterations else False
-        while True and settings.IQUERY_PROBE_DAEMON_ENABLED:
+        while True and settings.IQUERY_CASE_PROBE_DAEMON_ENABLED:
             for court_id in court_ids:
                 if r.exists(f"iquery:court_wait:{court_id}"):
                     continue
diff --git a/cl/corpus_importer/tests.py b/cl/corpus_importer/tests.py
index 37cc03d24d..5b3d858897 100644
--- a/cl/corpus_importer/tests.py
+++ b/cl/corpus_importer/tests.py
@@ -3346,7 +3346,7 @@ def test_merger(self):
 
 @patch("cl.corpus_importer.tasks.get_or_cache_pacer_cookies")
 @override_settings(
-    IQUERY_PROBE_DAEMON_ENABLED=True,
+    IQUERY_CASE_PROBE_DAEMON_ENABLED=True,
     IQUERY_SWEEP_UPLOADS_SIGNAL_ENABLED=True,
     EGRESS_PROXY_HOSTS=["http://proxy_1:9090", "http://proxy_2:9090"],
 )
diff --git a/cl/settings/project/corpus_importer.py b/cl/settings/project/corpus_importer.py
index dc81d21978..f2f375845f 100644
--- a/cl/settings/project/corpus_importer.py
+++ b/cl/settings/project/corpus_importer.py
@@ -1,8 +1,8 @@
 import environ
 
 env = environ.FileAwareEnv()
-IQUERY_PROBE_DAEMON_ENABLED = env.int(
-    "IQUERY_PROBE_DAEMON_ENABLED", default=False
+IQUERY_CASE_PROBE_DAEMON_ENABLED = env.bool(
+    "IQUERY_CASE_PROBE_DAEMON_ENABLED", default=False
 )
 IQUERY_PROBE_ITERATIONS = env.int("IQUERY_PROBE_ITERATIONS", default=9)
 IQUERY_PROBE_WAIT = env.int("IQUERY_PROBE_WAIT", default=300)

From d5b40e73ec89b1e24ff23c50cf96060ca5b167f0 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Sat, 30 Nov 2024 09:25:39 -0500
Subject: [PATCH 133/143] choice(add_download): Remove commented code

---
 cl/opinion_page/templates/includes/add_download_button.html | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cl/opinion_page/templates/includes/add_download_button.html b/cl/opinion_page/templates/includes/add_download_button.html
index b6d05c41be..bcd7a508ea 100644
--- a/cl/opinion_page/templates/includes/add_download_button.html
+++ b/cl/opinion_page/templates/includes/add_download_button.html
@@ -1,4 +1,3 @@
-{#<div id="btn-group-download-original" class="btn-group v-offset-below-3 v-offset-above-1 hidden-print">#}
 <div id="btn-group-download-original" class="btn-group hidden-print">
     <button type="button"
             id="download-original"

From 726f38896fb1783a0d8c34cdb1cb96abd86cf03a Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Sat, 30 Nov 2024 09:27:13 -0500
Subject: [PATCH 134/143] Update cl/assets/static-global/js/opinions.js

Co-authored-by: Eduardo Rosendo <eduardojra96@gmail.com>
---
 cl/assets/static-global/js/opinions.js | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/cl/assets/static-global/js/opinions.js b/cl/assets/static-global/js/opinions.js
index 3a3f5c2b11..984999c239 100644
--- a/cl/assets/static-global/js/opinions.js
+++ b/cl/assets/static-global/js/opinions.js
@@ -85,17 +85,18 @@ document.querySelectorAll('strong').forEach((el) => {
 // Update and modify footnotes to enable linking
 
   // This is needed for variations in resource.org footnotes
-  $(".footnotes > .footnote").each(function() {
-      var $this = $(this);
-      var newElement = $("<footnote />"); // Create a new <footnote> element
-
-      // Copy attributes and content from the original element
-      $.each(this.attributes, function(_, attr) {
-          newElement.attr(attr.name, attr.value);
-      });
-      newElement.html($this.html()); // Copy the inner content
-      $this.replaceWith(newElement); // Replace the original <div> with <footnote>
+// This is needed for variations in resource.org footnotes
+$('.footnotes > .footnote').each(function () {
+  var $this = $(this);
+  var newElement = $('<footnote />'); // Create a new <footnote> element
+
+  // Copy attributes and content from the original element
+  $.each(this.attributes, function (_, attr) {
+    newElement.attr(attr.name, attr.value);
   });
+  newElement.html($this.html()); // Copy the inner content
+  $this.replaceWith(newElement); // Replace the original <div> with <footnote>
+});
 
 
 $('div.footnote > a').remove();

From cd1b47e41d50b719a5760d561e86a553dd31b823 Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Sat, 30 Nov 2024 11:38:36 -0400
Subject: [PATCH 135/143] feat(asset): Add customization options to dismissible
 banner

This commit introduces new parameters to the dismissible banner template, allowing for more flexible usage scenarios.
---
 .../includes/dismissible_nav_banner.html      | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/cl/assets/templates/includes/dismissible_nav_banner.html b/cl/assets/templates/includes/dismissible_nav_banner.html
index 501e33c4a5..c1f3480830 100644
--- a/cl/assets/templates/includes/dismissible_nav_banner.html
+++ b/cl/assets/templates/includes/dismissible_nav_banner.html
@@ -3,12 +3,16 @@
   available and takes up to four keyword arguments described below:
 
   Parameters:
-    link: The URL for the "Learn More" button.
-    text: Text of the banner.
-    cookie_name: Name of the cookie used to remember if the user has already dismissed
-    the banner. This prevents them from seeing the same message repeatedly.
-    emoji: Insert an emoji next to your banner message using its decimal HTML entity
-    code (like &#128077;).
+    - text: Text of the banner.
+    - link: The URL for the button.
+    - cookie_name: Name of the cookie used to remember if the user has already
+    dismissed the banner. This prevents them from seeing the same message
+    repeatedly.
+    - button_text (optional): Text for the button. Defaults to "Learn More".
+    - button_emoji (optional): An Idiomatic Text element (<i></i>) to display
+    inside the button.
+    - emoji (optional): An HTML entity code (e.g., &#128077;) to insert an
+    emoji next to the banner message.
 
   It's advisable to wrap this template within an if tag and use the parent element to add
   extra conditions to handle the visibility of the banner. The current template only checks
@@ -36,14 +40,14 @@
       </div>
     </div>
     <div class="row flex flex-column flex-sm-row align-items-center justify-content-between">
-      <div class="col-xs-12 col-sm-9 navbar-text lead">
+      <div class="col-xs-12 col-sm-10 navbar-text lead">
         <p>{% if emoji %}{{emoji}}{% endif %} {{text}}</p>
       </div>
-      <div class="col-xs-3 flex justify-content-center justify-content-sm-end">
+      <div class="col-xs-2 flex justify-content-center justify-content-sm-end">
         <a href="{{link}}"
-            class="btn btn-primary btn-lg hidden-xs"><i class="fa fa-search"></i>&nbsp;Learn More</a>
+            class="btn btn-primary btn-lg hidden-xs">{% if button_emoji %}{{button_emoji}}{% endif %}&nbsp;{% if button_text %}{{button_text}}{% else %}Learn More{% endif %}</a>
         <a href="{{link}}"
-            class="btn btn-primary btn-sm hidden-sm hidden-md hidden-lg"><i class="fa fa-search"></i>&nbsp;Learn More</a>
+            class="btn btn-primary btn-sm hidden-sm hidden-md hidden-lg">{% if button_emoji %}{{button_emoji}}{% endif %}&nbsp;{% if button_text %}{{button_text}}{% else %}Learn More{% endif %}</a>
       </div>
     </div>
   </div>

From 26ba640162a207db24a7524788fc525607998f6f Mon Sep 17 00:00:00 2001
From: Eduardo Rosendo <eduardojra96@gmail.com>
Date: Sat, 30 Nov 2024 11:45:57 -0400
Subject: [PATCH 136/143] feat(assets): Adds GivingTuesday banner

---
 cl/assets/templates/base.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/assets/templates/base.html b/cl/assets/templates/base.html
index be91672e34..0cc58c47f3 100644
--- a/cl/assets/templates/base.html
+++ b/cl/assets/templates/base.html
@@ -84,7 +84,7 @@ <h1>You did not supply the "private" variable to your template.
   <header class="row">
     <!-- Donate Banner -->
     {% if FUNDRAISING_MODE %}
-      {% include 'includes/dismissible_nav_banner.html' with link="https://free.law/2024/01/18/new-recap-archive-search-is-live" text="A year in the making, today we are launching a huge new search engine for the RECAP Archive" emoji="&#127873;" cookie_name="no_banner"%}
+      {% include 'includes/dismissible_nav_banner.html' with link="https://donate.free.law/forms/givingtuesday" text="<strong>Today is GivingTuesday.</strong> Your support of Free Law Project helps make the justice system more transparent and accessible to all." cookie_name="giving_tuesday" button_text="Donate Today!"%}
     {% endif %}
 
     <!-- Broken Email Banner -->

From 610f9ff198d56319c8b8e9d5d0af72417aa05211 Mon Sep 17 00:00:00 2001
From: Gianfranco Rossi <gianfranco.rossi.r@gmail.com>
Date: Mon, 2 Dec 2024 11:14:38 -0500
Subject: [PATCH 137/143] feat(scrapers.admin): add
 refresh_scrapers_status_view command

Also, delete customer's SQL
---
 .../commands/refresh_scrapers_status_view.py  | 16 ++++++
 ...004_create_mv_latest_opinion_customers.sql | 49 -------------------
 2 files changed, 16 insertions(+), 49 deletions(-)
 create mode 100644 cl/scrapers/management/commands/refresh_scrapers_status_view.py
 delete mode 100644 cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql

diff --git a/cl/scrapers/management/commands/refresh_scrapers_status_view.py b/cl/scrapers/management/commands/refresh_scrapers_status_view.py
new file mode 100644
index 0000000000..6fb52499e7
--- /dev/null
+++ b/cl/scrapers/management/commands/refresh_scrapers_status_view.py
@@ -0,0 +1,16 @@
+from cl.lib.command_utils import VerboseCommand,logger
+from django.db import connection
+
+class Command(VerboseCommand):
+    help = """Refreshes the `scrapers_mv_latest_opinion` materialized view.
+    
+    Check the cl.scrapers.admin.py file for more info about the view
+    """
+
+    def handle(self, *args, **options):
+        query = "REFRESH MATERIALIZED VIEW scrapers_mv_latest_opinion;"
+        with connection.cursor() as cursor:
+            cursor.execute(query)
+            
+        logger.info("View refresh completed successfully")
+        
\ No newline at end of file
diff --git a/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql b/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql
deleted file mode 100644
index 45c212298e..0000000000
--- a/cl/scrapers/migrations/0004_create_mv_latest_opinion_customers.sql
+++ /dev/null
@@ -1,49 +0,0 @@
-BEGIN;
---
--- Create model MVLatestOpinion
---
--- (no-op)
---
--- Raw SQL operation
---
-
-        CREATE MATERIALIZED VIEW IF NOT EXISTS
-            scrapers_mv_latest_opinion
-        AS
-        (
-        SELECT
-            court_id,
-            max(so.date_created) as latest_creation_date,
-            DATE_TRUNC('minutes', (now() - max(so.date_created)))::text as time_since,
-            now() as view_last_updated
-        FROM
-            (
-                SELECT id, court_id
-                FROM search_docket
-                WHERE court_id IN (
-                    SELECT id
-                    FROM search_court
-                    /*
-                        Only check courts with scrapers in use
-                    */
-                    WHERE
-                        has_opinion_scraper
-                        AND in_use
-                )
-            ) sd
-        INNER JOIN
-            (SELECT id, docket_id FROM search_opinioncluster) soc ON soc.docket_id = sd.id
-        INNER JOIN
-            search_opinion so ON so.cluster_id = soc.id
-        GROUP BY
-            sd.court_id
-        HAVING
-            /*
-                Only return results for courts with no updates in a week
-            */
-            now() - max(so.date_created) > interval '7 days'
-        ORDER BY
-            2 DESC
-        )
-        ;
-COMMIT;

From 48046e7afdbd40ff3c71681063f16e58438de710 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:15:24 +0000
Subject: [PATCH 138/143] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../management/commands/refresh_scrapers_status_view.py  | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cl/scrapers/management/commands/refresh_scrapers_status_view.py b/cl/scrapers/management/commands/refresh_scrapers_status_view.py
index 6fb52499e7..e0bf692f30 100644
--- a/cl/scrapers/management/commands/refresh_scrapers_status_view.py
+++ b/cl/scrapers/management/commands/refresh_scrapers_status_view.py
@@ -1,9 +1,11 @@
-from cl.lib.command_utils import VerboseCommand,logger
 from django.db import connection
 
+from cl.lib.command_utils import VerboseCommand, logger
+
+
 class Command(VerboseCommand):
     help = """Refreshes the `scrapers_mv_latest_opinion` materialized view.
-    
+
     Check the cl.scrapers.admin.py file for more info about the view
     """
 
@@ -11,6 +13,5 @@ def handle(self, *args, **options):
         query = "REFRESH MATERIALIZED VIEW scrapers_mv_latest_opinion;"
         with connection.cursor() as cursor:
             cursor.execute(query)
-            
+
         logger.info("View refresh completed successfully")
-        
\ No newline at end of file

From cb5fb64d186cff4ba2797babb1c57935e8203b01 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Mon, 2 Dec 2024 15:30:52 -0500
Subject: [PATCH 139/143] feat(opinions.html): Ensure Case Name displays

If case name full is the only case name
use it and then dont display it in the
metadata section
---
 cl/opinion_page/templates/opinions.html | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index 320dbb40d9..cc84454184 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -216,14 +216,13 @@ <h3>
                 </div>
             </div>
 
-                <div class="case-caption jump-link" id="caption">{{ cluster.docket.case_name }}</div>
+                <div class="case-caption jump-link" id="caption">{% if cluster.case_name %}{{ cluster.case_name }}{% elif cluster.case_name_full %} {{ cluster.case_name_full }}{% else %}{{ cluster.case_name_short }}{% endif %}</div>
                 <h4 class="case-court">{{ cluster.docket.court }}</h4>
                 <br>
                 <div class="case-details">
                     <ul class="list-unstyled">
                         <li><strong>Citations:</strong> {{ cluster.citation_string|default:"None known" }}</li>
-
-                        {% if cluster.case_name_full != cluster.case_name and cluster.case_name_full != "" %}
+                        {% if cluster.case_name_full != cluster.case_name and cluster.case_name_full != "" and cluster.case_name != "" %}
                         <li><strong>Full Case Name:</strong>
                             {{ cluster.case_name_full }}
                         </li>

From 1af9aab1405f8cf287282e270a3f7b0d318963ca Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 3 Dec 2024 09:40:40 -0500
Subject: [PATCH 140/143] fix(opinions.html): Use best case name

---
 cl/opinion_page/templates/opinions.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index cc84454184..d627dbcd74 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -216,13 +216,13 @@ <h3>
                 </div>
             </div>
 
-                <div class="case-caption jump-link" id="caption">{% if cluster.case_name %}{{ cluster.case_name }}{% elif cluster.case_name_full %} {{ cluster.case_name_full }}{% else %}{{ cluster.case_name_short }}{% endif %}</div>
+                <div class="case-caption jump-link" id="caption">{{ cluster|best_case_name|safe }}</div>
                 <h4 class="case-court">{{ cluster.docket.court }}</h4>
                 <br>
                 <div class="case-details">
                     <ul class="list-unstyled">
                         <li><strong>Citations:</strong> {{ cluster.citation_string|default:"None known" }}</li>
-                        {% if cluster.case_name_full != cluster.case_name and cluster.case_name_full != "" and cluster.case_name != "" %}
+                        {% if cluster.case_name_full != cluster|best_case_name %}
                         <li><strong>Full Case Name:</strong>
                             {{ cluster.case_name_full }}
                         </li>

From 71fc4b8da32e3ed5c40ab2a35e31606fe0527f7c Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 3 Dec 2024 09:55:45 -0500
Subject: [PATCH 141/143] fix(opinions.html): Enable Harvard SCAN tab

---
 cl/opinion_page/templates/opinions.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index 320dbb40d9..37343d034a 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -333,10 +333,10 @@ <h4 class="case-court">{{ cluster.docket.court }}</h4>
                         {% endif %}
                     </li>
                 {% endif %}
-                {% if has_downloads and "pdf" in pdf_path %}
+                {% if has_downloads and "pdf" in pdf_path or cluster.filepath_pdf_harvard %}
                     <li role="presentation" {% if tab == "pdf" %} class="active" {% endif %}>
                         {% if tab == "pdf" %}
-                            <a href="#" data-toggle="tab">{% if "pdf" in pdf_path %}PDF{% endif %}</a>
+                            <a href="#" data-toggle="tab">PDF</a>
                         {% else %}
                             <a href="{% url 'view_case_pdf' cluster.pk cluster.slug %}">PDF</a>
                         {% endif %}

From 83b9575711a34763611534e06a5918a7682fea0c Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 3 Dec 2024 10:18:49 -0500
Subject: [PATCH 142/143] fix(opinions.html): Remove misc. if statements

Tabs can just link all the time - no need
to hide them
---
 cl/opinion_page/templates/opinions.html | 40 +++++--------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index 8d26b09010..6adb8d5cbf 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -290,55 +290,31 @@ <h4 class="case-court">{{ cluster.docket.court }}</h4>
 
             <ul class="nav nav-tabs hidden-print">
                 <li role="presentation" {% if tab == "opinions" or tab == "" %} class="active" {% endif %}>
-                    {% if tab == "opinions" %}
-                        <a href="#" data-toggle="tab">Opinion</a>
-                    {% else %}
-                        <a href="{% url 'view_case' cluster.pk cluster.slug %}">Opinion</a>
-                    {% endif %}
+                    <a href="{% url 'view_case' cluster.pk cluster.slug %}">Opinion</a>
                 </li>
                 {% if authorities_count > 0 %}
                     <li role="presentation" {% if tab == "authorities" %} class="active" {% endif %}>
-                        {% if tab == "authorities" %}
-                            <a href="#" data-toggle="tab">Auth<span class="hidden-sm hidden-md hidden-xs">orities&nbsp;({{ authorities_count }})</a>
-                        {% else %}
-                            <a href="{% url "view_case_authorities" cluster.pk cluster.slug %}">Auth<span class="hidden-xs hidden-sm hidden-md">orities&nbsp;({{ authorities_count }})</span></a>
-                        {% endif %}
+                        <a href="{% url "view_case_authorities" cluster.pk cluster.slug %}">Auth<span class="hidden-xs hidden-sm hidden-md">orities&nbsp;({{ authorities_count }})</span></a>
                     </li>
                 {% endif %}
                 {% if cited_by_count > 0 %}
-                <li role="presentation" {% if tab == "cited-by" %} class="active" {% endif %}>
-                    {% if tab == "cited-by" %}
-                        <a href="#" data-toggle="tab">Cited<span class="hidden-xs hidden-sm hidden-md">&nbsp;By&nbsp;({{ cited_by_count }})</span></a>
-                    {% else %}
+                    <li role="presentation" {% if tab == "cited-by" %} class="active" {% endif %}>
                         <a href="{% url "view_case_cited_by" cluster.pk cluster.slug %}">Cited<span class="hidden-xs hidden-sm hidden-md">&nbsp;By&nbsp;({{ cited_by_count }})</span></a>
-                    {% endif %}
-                </li>
+                    </li>
                 {% endif %}
                 {% if summaries_count > 0 %}
-                  <li role="presentation" {% if tab == "summaries" %} class="active" {% endif %}>
-                    {% if tab == "summaries" %}
-                        <a href="#" data-toggle="tab">Sum<span class="hidden-xs hidden-sm hidden-md">maries&nbsp;({{ summaries_count }})</span></a>
-                    {% else %}
+                    <li role="presentation" {% if tab == "summaries" %} class="active" {% endif %}>
                         <a href="{% url "view_case_summaries" cluster.pk cluster.slug %}">Sum<span class="hidden-xs hidden-sm hidden-md">maries&nbsp;({{ summaries_count }})</span></a>
-                    {% endif %}
-                  </li>
+                    </li>
                 {% endif %}
                 {% if related_cases_count > 0  %}
                     <li role="presentation" {% if tab == "related-cases" %} class="active" {% endif %}>
-                        {% if tab == "related-cases" %}
-                            <a href="#" data-toggle="tab">Related<span class="hidden-xs hidden-sm hidden-md">&nbsp;Cases</span></a>
-                        {% else %}
-                            <a href="{% url 'view_case_related_cases' cluster.pk cluster.slug %}">Related&nbsp;<span class="hidden-xs hidden-sm hidden-md">Cases</span></a>
-                        {% endif %}
+                        <a href="{% url 'view_case_related_cases' cluster.pk cluster.slug %}">Related&nbsp;<span class="hidden-xs hidden-sm hidden-md">Cases</span></a>
                     </li>
                 {% endif %}
                 {% if has_downloads and "pdf" in pdf_path or cluster.filepath_pdf_harvard %}
                     <li role="presentation" {% if tab == "pdf" %} class="active" {% endif %}>
-                        {% if tab == "pdf" %}
-                            <a href="#" data-toggle="tab">PDF</a>
-                        {% else %}
-                            <a href="{% url 'view_case_pdf' cluster.pk cluster.slug %}">PDF</a>
-                        {% endif %}
+                        <a href="{% url 'view_case_pdf' cluster.pk cluster.slug %}">PDF</a>
                     </li>
                 {% endif %}
             </ul>

From 185b21492cbf935d34fe150f803e5d51a27a4603 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Tue, 3 Dec 2024 11:43:05 -0500
Subject: [PATCH 143/143] fix(opinions.html): Fix sponored logo url

---
 cl/opinion_page/templates/opinions.html | 278 ++++++++++++------------
 1 file changed, 137 insertions(+), 141 deletions(-)

diff --git a/cl/opinion_page/templates/opinions.html b/cl/opinion_page/templates/opinions.html
index d627dbcd74..a1212b12b1 100644
--- a/cl/opinion_page/templates/opinions.html
+++ b/cl/opinion_page/templates/opinions.html
@@ -24,162 +24,158 @@
 
 
 {% block sidebar %}
-    {% with sponsored_logo=STATIC_URL|add:'img/vlex-logo-150-75.png' %}
-        <div class="col-sm-3 opinion-sidebar hidden-print" id="sidebar">
-            <div class="top-section">
-                {# show the admin tools if applicable #}
-                {% if perms.search.change_docket or perms.search.change_opinioncluster or perms.search.change_citation %}
-                    <div class="sidebar-section">
-                        <h3><span>Admin</span></h3>
-                        <p>
-                            {% if perms.search.change_docket %}
-                                <a href="{% url 'admin:search_docket_change' cluster.docket.pk %}"
-                                   class="btn btn-primary btn-xs">Docket</a>
-                            {% endif %}
-                            {% if perms.search.change_opinioncluster %}
-                                <a href="{% url 'admin:search_opinioncluster_change' cluster.pk %}"
-                                   class="btn btn-primary btn-xs">Cluster</a>
-                            {% endif %}
-                            {% if perms.search.change_opinion %}
-                                {% for sub_opinion in cluster.sub_opinions.all|dictsort:"type" %}
-                                    <a href="{% url 'admin:search_opinion_change' sub_opinion.pk %}"
-                                       class="btn btn-primary btn-xs">{{ sub_opinion.get_type_display|cut:"Opinion" }} opinion</a>
-                                {% endfor %}
-                            {% endif %}
-                            {% if request.user.is_superuser %}
-                                {% if private %}
-                                    <div class="btn btn-danger btn-xs">Blocked <i
-                                            class="fa fa-ban"></i></div>
-                                {% else %}
-                                    <div class="btn btn-success btn-sm block-item"
-                                         data-id="{{ cluster.pk }}"
-                                         data-type="cluster">Block Cluster and Docket <i
-                                            class="fa fa-ban"></i></div>
-                                {% endif %}
-                            {% endif %}
-                        </p>
-                    </div>
-                {% endif %}
-
-           <div id="opinion-toc" class="sidebar-section">
-                  <h3> <span>Jump To</span> </h3>
-                  <li class="jump-links active"><a id="nav_top" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#"  class="active">Top</a></li>
-                  <li class="jump-links"><a id="nav_caption" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
-                  {% if cluster.headmatter %}
-                    <li class="jump-links"><a id="nav_o" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o">Headmatter</a></li>
-                  {% else %}
-
-                      {% if cluster.correction %}
-                          <li class="jump-links"><a  id="nav_correction" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#correction">Correction</a></li>
-                      {% endif %}
-                      {% if cluster.attorneys %}
-                          <li class="jump-links"><a id="nav_attorneys" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#attorneys">Attorneys</a></li>
-                      {% endif %}
-                      {% if cluster.headnotes %}
-                          <li class="jump-links"><a id="nav_headnotes" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes">Headnotes</a></li>
-                      {% endif %}
-                      {% if cluster.syllabus %}
-                          <li class="jump-links"><a id="nav_syllabus" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#syllabus">Syllabus</a></li>
-                      {% endif %}
-                      {% if cluster.summary %}
-                          <li class="jump-links"><a id="nav_summary" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#summary">Summary</a></li>
-                      {% endif %}
-                      {% if cluster.history %}
-                          <li class="jump-links"><a id="nav_history" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#history">History</a></li>
-                      {% endif %}
-                      {% if cluster.disposition %}
-                          <li class="jump-links"><a id="nav_disposition" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#disposition">Disposition</a></li>
-                      {% endif %}
-                  {% endif %}
-                  {% for sub_opinion in cluster.ordered_opinions %}
-                  <li class="jump-links sub-opinion">
-                    <a id="nav_o{{ forloop.counter }}" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
-                      {% if cluster.sub_opinions.all.count > 1 %}
-                        {% if sub_opinion.get_type_display == "Concurrence Opinion" %}
-                          Concurrence
-                        {% else %}
-                          {{ sub_opinion.get_type_display }}
+    <div class="col-sm-3 opinion-sidebar hidden-print" id="sidebar">
+        <div class="top-section">
+            {# show the admin tools if applicable #}
+            {% if perms.search.change_docket or perms.search.change_opinioncluster or perms.search.change_citation %}
+                <div class="sidebar-section">
+                    <h3><span>Admin</span></h3>
+                    <p>
+                        {% if perms.search.change_docket %}
+                            <a href="{% url 'admin:search_docket_change' cluster.docket.pk %}"
+                               class="btn btn-primary btn-xs">Docket</a>
                         {% endif %}
-                            {% if sub_opinion.author_str %}
-                            by {{ sub_opinion.author_str }}
+                        {% if perms.search.change_opinioncluster %}
+                            <a href="{% url 'admin:search_opinioncluster_change' cluster.pk %}"
+                               class="btn btn-primary btn-xs">Cluster</a>
                         {% endif %}
-                      {% else %}
-                        {{ sub_opinion.get_type_display }}
-                        {% if sub_opinion.author_str %}
-                          by {{ sub_opinion.author_str }}
+                        {% if perms.search.change_opinion %}
+                            {% for sub_opinion in cluster.sub_opinions.all|dictsort:"type" %}
+                                <a href="{% url 'admin:search_opinion_change' sub_opinion.pk %}"
+                                   class="btn btn-primary btn-xs">{{ sub_opinion.get_type_display|cut:"Opinion" }} opinion</a>
+                            {% endfor %}
+                        {% endif %}
+                        {% if request.user.is_superuser %}
+                            {% if private %}
+                                <div class="btn btn-danger btn-xs">Blocked <i
+                                        class="fa fa-ban"></i></div>
+                            {% else %}
+                                <div class="btn btn-success btn-sm block-item"
+                                     data-id="{{ cluster.pk }}"
+                                     data-type="cluster">Block Cluster and Docket <i
+                                        class="fa fa-ban"></i></div>
+                            {% endif %}
                         {% endif %}
-                      {% endif %}
-                    </a>
-                  </li>
-                  {% endfor %}
-              </div>
-
-            {% if cluster.sub_opinions.all.first.extracted_by_ocr or "U" in cluster.source and tab == "opinions" %}
-                <div class="col-sm-12 alert-warning alert v-offset-above-2">
-                    <p class="bottom">The text of this document was obtained by analyzing a scanned document and may have typos.
                     </p>
                 </div>
-                <div class="clearfix"></div>
             {% endif %}
 
-            {% if tab == "authorities" %}
-                <div class="col-sm-12 alert-warning alert v-offset-above-2">
-                    <p class="bottom">
-                        This page displays all the citations that have been extracted and linked in our system. Please note, it does not serve as a comprehensive list of all citations within the document.
-                    </p>
-                </div>
-                <div class="clearfix"></div>
-            {% endif %}
+       <div id="opinion-toc" class="sidebar-section">
+              <h3> <span>Jump To</span> </h3>
+              <li class="jump-links active"><a id="nav_top" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#"  class="active">Top</a></li>
+              <li class="jump-links"><a id="nav_caption" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#caption" >Caption</a></li>
+              {% if cluster.headmatter %}
+                <li class="jump-links"><a id="nav_o" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o">Headmatter</a></li>
+              {% else %}
 
-            {% if tab == "related-cases" %}
-                <div class="col-sm-12 alert-warning alert v-offset-above-2">
-                    <p class="bottom">
-                        The Related Cases query is used to find legal cases
-                        related to a given case by analyzing textual similarities.
-                        It identifies and retrieves cases with similar content,
-                        allowing for the generation of a summary of related cases,
-                        including their names, links, and filing dates,
-                        to help users explore precedents or comparable rulings.
-                    </p>
-                </div>
-                <div class="clearfix"></div>
-            {% endif %}
+                  {% if cluster.correction %}
+                      <li class="jump-links"><a  id="nav_correction" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#correction">Correction</a></li>
+                  {% endif %}
+                  {% if cluster.attorneys %}
+                      <li class="jump-links"><a id="nav_attorneys" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#attorneys">Attorneys</a></li>
+                  {% endif %}
+                  {% if cluster.headnotes %}
+                      <li class="jump-links"><a id="nav_headnotes" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#headnotes">Headnotes</a></li>
+                  {% endif %}
+                  {% if cluster.syllabus %}
+                      <li class="jump-links"><a id="nav_syllabus" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#syllabus">Syllabus</a></li>
+                  {% endif %}
+                  {% if cluster.summary %}
+                      <li class="jump-links"><a id="nav_summary" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#summary">Summary</a></li>
+                  {% endif %}
+                  {% if cluster.history %}
+                      <li class="jump-links"><a id="nav_history" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#history">History</a></li>
+                  {% endif %}
+                  {% if cluster.disposition %}
+                      <li class="jump-links"><a id="nav_disposition" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#disposition">Disposition</a></li>
+                  {% endif %}
+              {% endif %}
+              {% for sub_opinion in cluster.ordered_opinions %}
+              <li class="jump-links sub-opinion">
+                <a id="nav_o{{ forloop.counter }}" href="{% if tab != "opinions" %}{% url 'view_case' cluster.pk cluster.slug %}{% endif %}#o{{ forloop.counter }}" >
+                  {% if cluster.sub_opinions.all.count > 1 %}
+                    {% if sub_opinion.get_type_display == "Concurrence Opinion" %}
+                      Concurrence
+                    {% else %}
+                      {{ sub_opinion.get_type_display }}
+                    {% endif %}
+                        {% if sub_opinion.author_str %}
+                        by {{ sub_opinion.author_str }}
+                    {% endif %}
+                  {% else %}
+                    {{ sub_opinion.get_type_display }}
+                    {% if sub_opinion.author_str %}
+                      by {{ sub_opinion.author_str }}
+                    {% endif %}
+                  {% endif %}
+                </a>
+              </li>
+              {% endfor %}
+          </div>
+
+        {% if cluster.sub_opinions.all.first.extracted_by_ocr or "U" in cluster.source and tab == "opinions" %}
+            <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                <p class="bottom">The text of this document was obtained by analyzing a scanned document and may have typos.
+                </p>
+            </div>
+            <div class="clearfix"></div>
+        {% endif %}
 
-                {% if tab == "summaries" %}
-                <div class="col-sm-12 alert-warning alert v-offset-above-2">
-                    <p class="bottom">
-                        Summaries or parenthetical groupings are used to
-                        provide concise explanations or clarifications about a
-                        case’s procedural posture, legal principles, or
-                        facts that are immediately relevant to the citation,
-                        typically enclosed in parentheses following a case citation.
+        {% if tab == "authorities" %}
+            <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                <p class="bottom">
+                    This page displays all the citations that have been extracted and linked in our system. Please note, it does not serve as a comprehensive list of all citations within the document.
+                </p>
+            </div>
+            <div class="clearfix"></div>
+        {% endif %}
 
-                    </p>
-                </div>
-                <div class="clearfix"></div>
-            {% endif %}
+        {% if tab == "related-cases" %}
+            <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                <p class="bottom">
+                    The Related Cases query is used to find legal cases
+                    related to a given case by analyzing textual similarities.
+                    It identifies and retrieves cases with similar content,
+                    allowing for the generation of a summary of related cases,
+                    including their names, links, and filing dates,
+                    to help users explore precedents or comparable rulings.
+                </p>
             </div>
+            <div class="clearfix"></div>
+        {% endif %}
 
-            <div class="bottom-section">
-                {# Sponsored by #}
-                {% if sponsored %}
-                    <div id="sponsored-by" class="sidebar-section">
-                        <h3>
-                          <span>Sponsored By</span>
-                        </h3>
-                        <p>This opinion added to CourtListener with support from v|Lex.</p>
-                        <a href="https://www.vlex.com" rel="sponsored" target="_blank">
-                            <img id="vlex" src="{{ sponsored_logo }}" alt="v|lex logo" class="img-responsive" width="150" height="75" >
-                        </a>
-                    </div>
-                {% else %}
-                    {% include "includes/donate_sidebar.html" with referrer="o-donate-now" %}
-                {% endif %}
+            {% if tab == "summaries" %}
+            <div class="col-sm-12 alert-warning alert v-offset-above-2">
+                <p class="bottom">
+                    Summaries or parenthetical groupings are used to
+                    provide concise explanations or clarifications about a
+                    case’s procedural posture, legal principles, or
+                    facts that are immediately relevant to the citation,
+                    typically enclosed in parentheses following a case citation.
+
+                </p>
             </div>
+            <div class="clearfix"></div>
+        {% endif %}
         </div>
 
-
-    {% endwith %}
+        <div class="bottom-section">
+            {# Sponsored by #}
+            {% if sponsored %}
+                <div id="sponsored-by" class="sidebar-section">
+                    <h3>
+                      <span>Sponsored By</span>
+                    </h3>
+                    <p>This opinion added to CourtListener with support from v|Lex.</p>
+                    <a href="https://www.vlex.com" rel="sponsored" target="_blank">
+                        <img id="vlex" src="{% static "img/vlex-logo-150-75.png" %}" alt="v|lex logo" class="img-responsive" width="150" height="75" >
+                    </a>
+                </div>
+            {% else %}
+                {% include "includes/donate_sidebar.html" with referrer="o-donate-now" %}
+            {% endif %}
+        </div>
+    </div>
 {% endblock %}
 
 {% block body-classes %}opinion-body{% endblock %}