diff --git a/jupyter_notebooks/clean_titles.ipynb b/jupyter_notebooks/clean_titles.ipynb new file mode 100644 index 00000000..6e42df4b --- /dev/null +++ b/jupyter_notebooks/clean_titles.ipynb @@ -0,0 +1,597 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dirty_titles = [\n", + " 'GCN //*[@id=\"gcn-news-and-events\"]/a',\n", + " \"GCN {title}\",\n", + " \"GCN {title}\",\n", + " \"GCN {title}\",\n", + " \"GCN advanced gravitational wave detector network {title}\",\n", + " 'GCN //*[@id=\"super-kamioka-neutrino-detection-experiment-super-kamiokande\"]/a',\n", + " \"New GCN Circular\",\n", + " \"GCN Circular Catalog Page page# where page# is the digits at the end of url\",\n", + " \"GCN Circular 0\",\n", + " \"GCN Circular 1\",\n", + " \"GCN Circular -1\",\n", + " \"GCN Circular 10\",\n", + " \"NASA's Earth Observing System Project Science Office Mission List\",\n", + " \"NASA Earth Science Data Links\",\n", + " \"Our Changing Planet: The View from Space - The Dynamic Atmosphere - Introduction\",\n", + " \"Our Changing Planet: The View from Space - Evidence of Our Tenure - Introduction\",\n", + " \"Our Changing Planet: The View from Space - Forward\",\n", + " \"Our Changing Planet: The View from Space - The Frozen Caps - Introduction\",\n", + " \"Our Changing Planet: The View from Space - Preface\",\n", + " \"Our Changing Planet: The View from Space - The Restless Ocean - Introduction\",\n", + " \"Our Changing Planet: The View from Space - The Vital Land - Introduction\",\n", + " \"EOSPSO Resource Links\",\n", + " \"NASA Social Media\",\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " \"GISS MOLSCAT\",\n", + " \"GISS GCM ModelE\",\n", + " \"JIGSAW(GEO): Mesh Generation for Unstructured GCMs\",\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " \"Goddard Institute for Space Studies (GISS) Annual Publications Year #yearumber where #yearumber is the digits in url\",\n", + " \"Goddard Institute for Space Studies (GISS) in press or accepted Publications\",\n", + " \"Goddard Institute for Space Studies (GISS) Early On-line Publications\",\n", + " \"First Results from ACPC Case Studies on Aerosol Effects on Shallow and Deep Clouds\",\n", + " \"DISCUSSION OF: A STATISTICAL ANALYSIS OF MULTIPLE TEMPERATURE PROXIES: ARE RECONSTRUCTIONS OF SURFACE TEMPERATURES OVER THE LAST 1000 YEARS RELIABLE?\",\n", + " \"Potential Climatic Impacts of Increasing Atmospheric CO2 with Emphasis on Water Availability and Hydrology in the United States\",\n", + " \"NASA Climate Change Research Initiative (CCRI)\",\n", + " '//*[@id=\"cpad\"]/h3',\n", + " \"Graduate {title}\",\n", + " \"Graduate {title}\",\n", + " \"New York City Research Initiative\",\n", + " \"ModelE {title}\",\n", + " \"ModelE AR5 Simulations: Past Climate Change and Future Climate Predictions\",\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " '//*[@id=\"cpad\"]/h2 {title}',\n", + " \"Dictionary of TERMS used in Solar Radiation\",\n", + " \"Model ERRORs in calculating Solar Radiation at top of Atmosphere\",\n", + " \"Configurations for CMIP3 Simulations, Updates, and Issues\",\n", + " \"Configurations for CMIP5 Simulations, Updates, and Issues\",\n", + " \"Configurations for CMIP6 Simulations\",\n", + " \"Earth's Energy Imbalance {title}\",\n", + " \"Earth's Energy Imbalance {title}\",\n", + " \"Earth's Energy Imbalance {title}\",\n", + " \"Earth's Energy Imbalance {title}\",\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " \"Computation of Seasonal and (Meteorological) Annual Means\",\n", + " 'Fraction of \"Warm\" Stations',\n", + " \"Updates to Analysis: GHCN-M V3 vs. V2\",\n", + " \"August 2007 Updates to Analysis and Effects\",\n", + " \"Updates to Analysis (2003-2011)\",\n", + " \"Documentation and description of surface solar irradiance data sets produced for SeaWiFS\",\n", + " \"Atmospheric Chemistry and Climate Model Intercomparison Project (ACCMIP)\",\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3',\n", + " '//*[@id=\"cpad\"]/h2',\n", + " '//*[@id=\"cpad\"]/h3',\n", + " \"{title} News & Features\",\n", + " \"{title} News & Features\",\n", + " \"{title} News & Features\",\n", + " \"{title} News & Features\",\n", + " \"{title} News & Features\",\n", + " \"Satellites and Weather Models Study West African Storms\",\n", + " \"Trial of the Century — Act II\",\n", + " \"Sea Level Rise, After the Ice Melted and Today\",\n", + " \"Particulate Consequences of Particular Future Activities\",\n", + " \"Wetlands' Outsize Influence on Climate\",\n", + " \"La physique de la modélisation du climat\",\n", + " \"Understanding Carbon Monoxide as Pollutant and as Agent of Climate Change\",\n", + " \"NASA's Earth Observing System Project Science Office Mission List\",\n", + " \"IGWN Public Alerts User Guide - {title}\",\n", + " \"NASA Power - {title}\",\n", + " \"Earth Science Missions\",\n", + " \"COS-B Calibration Guide\",\n", + " \"COS-B Calibration Guide - {title}\",\n", + " \"SAS-2 Calibration Guide\",\n", + " \"SAX Calibration Files in the HEASARC CALDB\",\n", + " \"SAS-2 Calibration Files in the HEASARC CALDB\",\n", + " \"SAS-2 Calibration Files - {title}\",\n", + " \"C3R2 Survey Data - KOA Contributed Datasets\",\n", + " \"Comet ISON - KOA Contributed Datasets\",\n", + " \"Deep Impact - KOA Contributed Datasets\",\n", + " \"NIRSPEC Search for Gaseous Plumes from Europa - KOA Contributed Datasets\",\n", + " \"GRB051111 - KOA Contributed Datasets\",\n", + " \"KCWI Commissioning Data - KOA Contributed Datasets\",\n", + " \"KODIAQ - KOA Contributed Datasets\",\n", + " \"Keck Sample of Quiescent Galaxies - KOA Contributed Datasets\",\n", + " \"Exoplanet Archive Application Programming Interface (API) User Guide\",\n", + " \"Python Interface to HEASoft\",\n", + " \"FITS Data Format\",\n", + " \"GeneLab Sample Processing - Standard Operating Procedure PDFs\",\n", + " \"GeneLab Sample Processing\",\n", + " \"GeneLab Training\",\n", + " \"Effects of Aeration on Soybean Plant Growth in Lunar and Martian Regolith\",\n", + " \"Geospatial Services Data Pathfinder | Earthdata\",\n", + " \"USGS ISIS: Planetary Image Processing Software\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Cassini Saturn-Orbiter\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Voyagers\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Sojourner\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-SIRTF\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Mars Pathfinder\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Huygens\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Galileo\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Solar Temperature\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Saturn Orbit Timeline\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Saturn Orbit Insertion\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Gravity Assist Primer\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Participants' Guide\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Huygens\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Gravity\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Glossary\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Characteristics of Electromagnetic Energy\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Editorial Page\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Diane Fisher\",\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - David Doody\",\n", + " \"WebGL - Moon\",\n", + " \"Cell Phone Photography - Moon: NASA Science\",\n", + " \"What Happens During a Solar Eclipse? - Moon: NASA Science\",\n", + " \"The Moon Through a Camera Lens - Moon: NASA Science\",\n", + " \"Photography Through a Telescope - Moon: NASA Science\",\n", + " \"Planetary Plasma Interactions Software\",\n", + " \"Pluto and Arrokoth Data Archive\",\n", + " \"Juno Mission\",\n", + " \"Juno JIRAM Instrument\",\n", + " \"Juno Orbits\",\n", + " \"JunoCam\",\n", + " \"Mars Thermal Emission Spectrometer\",\n", + " \"Mars Lander Data\",\n", + " \"LADEE Science Goals\",\n", + " \"LADEE NAIF Support\",\n", + " \"Lunar Atmospheres Data Archive\",\n", + " \"Lunar Dust Experiment (LDEX)\",\n", + " \"LADEE Mission\",\n", + " \"Mercury Data Archive - Mercury Flyby or Orbiting Spacecraft\",\n", + " \"Fortran Programs and Data for Modeling CIA Opacities\",\n", + " \"All Geosciences Node Data Holdings\",\n", + " \"Explicit Planetary Isentropic Coordinate (EPIC) Model\",\n", + " \"SBN Tools, Utilities, and Interfaces\",\n", + " \"Small Bodies Image Browser\",\n", + " \"Images from the Universe Gallery\",\n", + " \"Titan Data Archive - Titan Flyby or Orbiting Spacecraft\",\n", + " \"Saturn Data Archive\",\n", + " \"PDS3 Standards Reference\",\n", + " \"Outer Planet Icy Satellites Archive Page\",\n", + " \"PDS4 Training\",\n", + " \"PDS Granular Data System LOLA RDR Query Tool\",\n", + " \"PDS Granular Data System Diviner RDR Query Tool\",\n", + " \"PDS4 JParser\",\n", + " \"PDS Analyst's Notebook\",\n", + " \"Metadata Injector for PDS Labels\",\n", + " \"PDS LOLA RDR Query\",\n", + " \"Venus Orbital Data Explorer\",\n", + " \"Lunar Orbital Data Explorer\",\n", + " \"PDS4 Local Data Dictionary Tool (LDDTool)\",\n", + " \"Jupiter Data Archive\",\n", + " \"Mercury Orbital Data Explorer\",\n", + " \"Planetary Image Locator Tool (PILOT)\",\n", + " \"Planetary Image Locator Tool (PILOT) - FAQ\",\n", + " \"DIVINER RDR Query\",\n", + " \"HIRESPRV: A Comparison With Data Products From the California Planet Search\",\n", + " \"PDS Software, Tools, Tutorials & Viewers\",\n", + " \"The Exoplanet Opacity Database - MAESTRO\",\n", + " \"The Exoplanet Opacity Database - The Team\",\n", + " \"The Exoplanet Opacity Database - What are Opacities?\",\n", + " \"The Exoplanet Opacity Database - Opacities\",\n", + " \"JHelioviewer - Homepage\",\n", + " \"JHelioviewer - About\",\n", + " \"JHelioviewer - Demo\",\n", + " \"JHelioviewer - Documentation\",\n", + " \"JHelioviewer - Download\",\n", + " \"PDS/PPI Models and Simulations Data Holdings\",\n", + "]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "cleaned_titles = []\n", + "for title in dirty_titles:\n", + " cleaned_title = title.replace(\"{title}\", \"doc.title\").strip()\n", + " cleaned_titles.append(cleaned_title)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "\n", + "def add_doc_title(input_string):\n", + " splits = input_string.strip().split('doc.title')\n", + " if len(splits) < 2:\n", + " return input_string\n", + " mystring = \"Concat(\"\n", + " for idx, item in enumerate(splits[:-1]):\n", + " if not idx==0:\n", + " mystring += ', '\n", + " mystring += f\"\\\"{item.strip()} \\\"\"\n", + " mystring += ', doc.title'\n", + " if len(splits[-1]) > 0:\n", + " mystring += f\"\\\"{splits[-1]}\\\")\"\n", + " else:\n", + " mystring += f\")\"\n", + " return mystring\n", + "\n", + "# or re.match('.*{title}*', title)\n", + "# if re.match('.*\\/\\/.*', title)\n", + "replaced = [title.replace('//', 'xpath://').replace(\"{title}\", \"doc.title\") for title in dirty_titles]\n", + "concats = [add_doc_title(item) for item in replaced]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN advanced gravitational wave detector network \", doc.title)',\n", + " 'Concat(\"Graduate \", doc.title)',\n", + " 'Concat(\"Graduate \", doc.title)',\n", + " 'Concat(\"ModelE \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"xpath://*[@id=\"cpad\"]/h2 \", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance \", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance \", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance \", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance \", doc.title)',\n", + " 'Concat(\" \", doc.title\" News & Features\")',\n", + " 'Concat(\" \", doc.title\" News & Features\")',\n", + " 'Concat(\" \", doc.title\" News & Features\")',\n", + " 'Concat(\" \", doc.title\" News & Features\")',\n", + " 'Concat(\" \", doc.title\" News & Features\")',\n", + " 'Concat(\"IGWN Public Alerts User Guide - \", doc.title)',\n", + " 'Concat(\"NASA Power - \", doc.title)',\n", + " 'Concat(\"COS-B Calibration Guide - \", doc.title)',\n", + " 'Concat(\"SAS-2 Calibration Files - \", doc.title)']" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[i for i in concats if 'Concat' in i]" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "def add_doc_title(input_string):\n", + " splits = input_string.split('doc.title')\n", + " mystring = \"Concat(\"\n", + " for item in splits[:-1]:\n", + " mystring += f\"\\\"{item}\\\"\"\n", + " mystring += ', doc.title, '\n", + " mystring += f\"\\\"{splits[-1]}\\\")\"\n", + " return mystring\n", + "\n", + "\n", + "replacements = {\n", + " \"GCN {title}\": 'Concat(\"GCN \", doc.title)',\n", + " \"GCN {title}\": 'Concat(\"GCN \", doc.title)',\n", + " \"GCN {title}\": 'Concat(\"GCN \", doc.title)',\n", + " \"GCN advanced gravitational wave detector network {title}\": 'Concat(\"GCN advanced gravitational wave detector network \", doc.title)',\n", + " \"Graduate {title}\": 'Concat(\"Graduate \", doc.title)',\n", + " \"Graduate {title}\": 'Concat(\"Graduate \", doc.title)',\n", + " \"ModelE {title}\": 'Concat(\"ModelE \", doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 {title}': 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " \"Earth's Energy Imbalance {title}\": 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " \"Earth's Energy Imbalance {title}\": 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " \"Earth's Energy Imbalance {title}\": 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " \"Earth's Energy Imbalance {title}\": 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " \"{title} News & Features\": 'Concat(doc.title, \" News & Features\")',\n", + " \"{title} News & Features\": 'Concat(doc.title, \" News & Features\")',\n", + " \"{title} News & Features\": 'Concat(doc.title, \" News & Features\")',\n", + " \"{title} News & Features\": 'Concat(doc.title, \" News & Features\")',\n", + " \"{title} News & Features\": 'Concat(doc.title, \" News & Features\")',\n", + " \"IGWN Public Alerts User Guide - {title}\": 'Concat(\"IGWN Public Alerts User Guide - \", doc.title)',\n", + " \"NASA Power - {title}\": 'Concat(\"NASA Power - \", doc.title)',\n", + " \"COS-B Calibration Guide - {title}\": 'Concat(\"COS-B Calibration Guide - \", doc.title)',\n", + " \"SAS-2 Calibration Files - {title}\": 'Concat(\"SAS-2 Calibration Files - \", doc.title)',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h3': 'xpath://*[@id=\"cpad\"]/h3',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h2 //*[@id=\"cpad\"]/h3': 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " '//*[@id=\"cpad\"]/h2': 'xpath://*[@id=\"cpad\"]/h2',\n", + " '//*[@id=\"cpad\"]/h3': 'xpath://*[@id=\"cpad\"]/h3',\n", + " 'GCN //*[@id=\"gcn-news-and-events\"]/a': 'Concat(\"GCN \", xpath://*[@id=\"gcn-news-and-events\"]/a)',\n", + " 'GCN //*[@id=\"super-kamioka-neutrino-detection-experiment-super-kamiokande\"]/a': 'Concat(\"GCN\", xpath://*[@id=\"super-kamioka-neutrino-detection-experiment-super-kamiokande\"]/a)',\n", + "}\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Concat(\"GCN \", xpath://*[@id=\"gcn-news-and-events\"]/a)',\n", + " 'Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN \", doc.title)',\n", + " 'Concat(\"GCN advanced gravitational wave detector network \", doc.title)',\n", + " 'Concat(\"GCN\", xpath://*[@id=\"super-kamioka-neutrino-detection-experiment-super-kamiokande\"]/a)',\n", + " 'New GCN Circular',\n", + " 'GCN Circular Catalog Page page# where page# is the digits at the end of url',\n", + " 'GCN Circular 0',\n", + " 'GCN Circular 1',\n", + " 'GCN Circular -1',\n", + " 'GCN Circular 10',\n", + " \"NASA's Earth Observing System Project Science Office Mission List\",\n", + " 'NASA Earth Science Data Links',\n", + " 'Our Changing Planet: The View from Space - The Dynamic Atmosphere - Introduction',\n", + " 'Our Changing Planet: The View from Space - Evidence of Our Tenure - Introduction',\n", + " 'Our Changing Planet: The View from Space - Forward',\n", + " 'Our Changing Planet: The View from Space - The Frozen Caps - Introduction',\n", + " 'Our Changing Planet: The View from Space - Preface',\n", + " 'Our Changing Planet: The View from Space - The Restless Ocean - Introduction',\n", + " 'Our Changing Planet: The View from Space - The Vital Land - Introduction',\n", + " 'EOSPSO Resource Links',\n", + " 'NASA Social Media',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'GISS MOLSCAT',\n", + " 'GISS GCM ModelE',\n", + " 'JIGSAW(GEO): Mesh Generation for Unstructured GCMs',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'Goddard Institute for Space Studies (GISS) Annual Publications Year #yearumber where #yearumber is the digits in url',\n", + " 'Goddard Institute for Space Studies (GISS) in press or accepted Publications',\n", + " 'Goddard Institute for Space Studies (GISS) Early On-line Publications',\n", + " 'First Results from ACPC Case Studies on Aerosol Effects on Shallow and Deep Clouds',\n", + " 'DISCUSSION OF: A STATISTICAL ANALYSIS OF MULTIPLE TEMPERATURE PROXIES: ARE RECONSTRUCTIONS OF SURFACE TEMPERATURES OVER THE LAST 1000 YEARS RELIABLE?',\n", + " 'Potential Climatic Impacts of Increasing Atmospheric CO2 with Emphasis on Water Availability and Hydrology in the United States',\n", + " 'NASA Climate Change Research Initiative (CCRI)',\n", + " 'xpath://*[@id=\"cpad\"]/h3',\n", + " 'Concat(\"Graduate \", doc.title)',\n", + " 'Concat(\"Graduate \", doc.title)',\n", + " 'New York City Research Initiative',\n", + " 'Concat(\"ModelE \", doc.title)',\n", + " 'ModelE AR5 Simulations: Past Climate Change and Future Climate Predictions',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, doc.title)',\n", + " 'Dictionary of TERMS used in Solar Radiation',\n", + " 'Model ERRORs in calculating Solar Radiation at top of Atmosphere',\n", + " 'Configurations for CMIP3 Simulations, Updates, and Issues',\n", + " 'Configurations for CMIP5 Simulations, Updates, and Issues',\n", + " 'Configurations for CMIP6 Simulations',\n", + " 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " 'Concat(\"Earth\\'s Energy Imbalance\", doc.title)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'Computation of Seasonal and (Meteorological) Annual Means',\n", + " 'Fraction of \"Warm\" Stations',\n", + " 'Updates to Analysis: GHCN-M V3 vs. V2',\n", + " 'August 2007 Updates to Analysis and Effects',\n", + " 'Updates to Analysis (2003-2011)',\n", + " 'Documentation and description of surface solar irradiance data sets produced for SeaWiFS',\n", + " 'Atmospheric Chemistry and Climate Model Intercomparison Project (ACCMIP)',\n", + " 'Concat(xpath://*[@id=\"cpad\"]/h2, xpath://*[@id=\"cpad\"]/h3)',\n", + " 'xpath://*[@id=\"cpad\"]/h2',\n", + " 'xpath://*[@id=\"cpad\"]/h3',\n", + " 'Concat(doc.title, \" News & Features\")',\n", + " 'Concat(doc.title, \" News & Features\")',\n", + " 'Concat(doc.title, \" News & Features\")',\n", + " 'Concat(doc.title, \" News & Features\")',\n", + " 'Concat(doc.title, \" News & Features\")',\n", + " 'Satellites and Weather Models Study West African Storms',\n", + " 'Trial of the Century — Act II',\n", + " 'Sea Level Rise, After the Ice Melted and Today',\n", + " 'Particulate Consequences of Particular Future Activities',\n", + " \"Wetlands' Outsize Influence on Climate\",\n", + " 'La physique de la modélisation du climat',\n", + " 'Understanding Carbon Monoxide as Pollutant and as Agent of Climate Change',\n", + " \"NASA's Earth Observing System Project Science Office Mission List\",\n", + " 'Concat(\"IGWN Public Alerts User Guide - \", doc.title)',\n", + " 'Concat(\"NASA Power - \", doc.title)',\n", + " 'Earth Science Missions',\n", + " 'COS-B Calibration Guide',\n", + " 'Concat(\"COS-B Calibration Guide - \", doc.title)',\n", + " 'SAS-2 Calibration Guide',\n", + " 'SAX Calibration Files in the HEASARC CALDB',\n", + " 'SAS-2 Calibration Files in the HEASARC CALDB',\n", + " 'Concat(\"SAS-2 Calibration Files - \", doc.title)',\n", + " 'C3R2 Survey Data - KOA Contributed Datasets',\n", + " 'Comet ISON - KOA Contributed Datasets',\n", + " 'Deep Impact - KOA Contributed Datasets',\n", + " 'NIRSPEC Search for Gaseous Plumes from Europa - KOA Contributed Datasets',\n", + " 'GRB051111 - KOA Contributed Datasets',\n", + " 'KCWI Commissioning Data - KOA Contributed Datasets',\n", + " 'KODIAQ - KOA Contributed Datasets',\n", + " 'Keck Sample of Quiescent Galaxies - KOA Contributed Datasets',\n", + " 'Exoplanet Archive Application Programming Interface (API) User Guide',\n", + " 'Python Interface to HEASoft',\n", + " 'FITS Data Format',\n", + " 'GeneLab Sample Processing - Standard Operating Procedure PDFs',\n", + " 'GeneLab Sample Processing',\n", + " 'GeneLab Training',\n", + " 'Effects of Aeration on Soybean Plant Growth in Lunar and Martian Regolith',\n", + " 'Geospatial Services Data Pathfinder | Earthdata',\n", + " 'USGS ISIS: Planetary Image Processing Software',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Cassini Saturn-Orbiter',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Voyagers',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Sojourner',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-SIRTF',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Mars Pathfinder',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Huygens',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Spacecraft Information-Galileo',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Solar Temperature',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Saturn Orbit Timeline',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Saturn Orbit Insertion',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Gravity Assist Primer',\n", + " \"Basics of Space Flight - Solar System Exploration: NASA Science - Participants' Guide\",\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Huygens',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Gravity',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Glossary',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Characteristics of Electromagnetic Energy',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Editorial Page',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - Diane Fisher',\n", + " 'Basics of Space Flight - Solar System Exploration: NASA Science - David Doody',\n", + " 'WebGL - Moon',\n", + " 'Cell Phone Photography - Moon: NASA Science',\n", + " 'What Happens During a Solar Eclipse? - Moon: NASA Science',\n", + " 'The Moon Through a Camera Lens - Moon: NASA Science',\n", + " 'Photography Through a Telescope - Moon: NASA Science',\n", + " 'Planetary Plasma Interactions Software',\n", + " 'Pluto and Arrokoth Data Archive',\n", + " 'Juno Mission',\n", + " 'Juno JIRAM Instrument',\n", + " 'Juno Orbits',\n", + " 'JunoCam',\n", + " 'Mars Thermal Emission Spectrometer',\n", + " 'Mars Lander Data',\n", + " 'LADEE Science Goals',\n", + " 'LADEE NAIF Support',\n", + " 'Lunar Atmospheres Data Archive',\n", + " 'Lunar Dust Experiment (LDEX)',\n", + " 'LADEE Mission',\n", + " 'Mercury Data Archive - Mercury Flyby or Orbiting Spacecraft',\n", + " 'Fortran Programs and Data for Modeling CIA Opacities',\n", + " 'All Geosciences Node Data Holdings',\n", + " 'Explicit Planetary Isentropic Coordinate (EPIC) Model',\n", + " 'SBN Tools, Utilities, and Interfaces',\n", + " 'Small Bodies Image Browser',\n", + " 'Images from the Universe Gallery',\n", + " 'Titan Data Archive - Titan Flyby or Orbiting Spacecraft',\n", + " 'Saturn Data Archive',\n", + " 'PDS3 Standards Reference',\n", + " 'Outer Planet Icy Satellites Archive Page',\n", + " 'PDS4 Training',\n", + " 'PDS Granular Data System LOLA RDR Query Tool',\n", + " 'PDS Granular Data System Diviner RDR Query Tool',\n", + " 'PDS4 JParser',\n", + " \"PDS Analyst's Notebook\",\n", + " 'Metadata Injector for PDS Labels',\n", + " 'PDS LOLA RDR Query',\n", + " 'Venus Orbital Data Explorer',\n", + " 'Lunar Orbital Data Explorer',\n", + " 'PDS4 Local Data Dictionary Tool (LDDTool)',\n", + " 'Jupiter Data Archive',\n", + " 'Mercury Orbital Data Explorer',\n", + " 'Planetary Image Locator Tool (PILOT)',\n", + " 'Planetary Image Locator Tool (PILOT) - FAQ',\n", + " 'DIVINER RDR Query',\n", + " 'HIRESPRV: A Comparison With Data Products From the California Planet Search',\n", + " 'PDS Software, Tools, Tutorials & Viewers',\n", + " 'The Exoplanet Opacity Database - MAESTRO',\n", + " 'The Exoplanet Opacity Database - The Team',\n", + " 'The Exoplanet Opacity Database - What are Opacities?',\n", + " 'The Exoplanet Opacity Database - Opacities',\n", + " 'JHelioviewer - Homepage',\n", + " 'JHelioviewer - About',\n", + " 'JHelioviewer - Demo',\n", + " 'JHelioviewer - Documentation',\n", + " 'JHelioviewer - Download',\n", + " 'PDS/PPI Models and Simulations Data Holdings']" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[replacements.get(title, title) for title in dirty_titles]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}