forked from opensciencegrid/topology
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* master: Add Michael Antonelli as contact to brown-cms resources (FD #76261) Added project UCDenver_Roberts Add downtime for DENVER_INTERNET2_OSDF_CACHE due to toPelican Add downtime for MGHPCC_NRP_OSDF_ORIGIN due to hw issues Update BNL-ATLAS_downtime.yaml Run FOS check on OSG repo only Requirements Fixup on FOS Script Removing ID - not needed for new services Fixing ID error Update BNL-ATLAS_downtime.yaml Adding WebDAV.tape service for BNL-ATLAS Add InstitutionID and FieldOfScienceID to the project OrderedDict so the ordering of the XML elements matches the schema adding osdftest for monitoring Update projects/UCSD_WatsonParris.yaml with FieldOfScienceID Update AGLT2_downtime.yaml Add CHTC-KAPEL-TEST EP Add downtime for CARDIFF_UK_OSDF_CACHE due to overloaded Add downtime for CARDIFF_UK_OSDF_CACHE due to 'power maintenance ' Field of science ids (opensciencegrid#3809) Update NET2.yaml Add Joseph Reichert as contact to rutgers-cms resources (FD #76281) adding /CN=osdftest.t2.ucsd.edu to create a perf OSDF test Update UCSD_WatsonParris.yaml Create UCSD_WatsonParris.yaml
- Loading branch information
Showing
1,147 changed files
with
1,717 additions
and
6 deletions.
There are no files selected for viewing
188 changes: 188 additions & 0 deletions
188
.github/scripts/check_project_fos_precision/field_of_science.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
from functools import lru_cache | ||
from typing import Union | ||
import string | ||
|
||
import pandas as pd | ||
|
||
|
||
@lru_cache() | ||
def get_cip_df(): | ||
|
||
cip_df = pd.read_excel("data/SED-CIP-2022.xlsx") | ||
|
||
# Drop the first two rows and make the third row the column title | ||
cip_df.columns = cip_df.iloc[2] | ||
cip_df = cip_df.iloc[3:] | ||
|
||
cip_df["BroadFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 0)) | ||
cip_df["MajorFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 1)) | ||
cip_df["DetailedFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 2)) | ||
|
||
return cip_df | ||
|
||
|
||
def get_matching_rows(cip_df, broad_id, major_id, detailed_id): | ||
|
||
# Check the finest grain first | ||
detailed_rows = cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & ( | ||
cip_df["DetailedFieldId"] == detailed_id)] | ||
|
||
if len(detailed_rows) > 0: | ||
return detailed_rows | ||
|
||
# Check the major grain | ||
major_rows = cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id)] | ||
|
||
if len(major_rows) > 0: | ||
return major_rows | ||
|
||
# Check the broad grain | ||
broad_rows = cip_df[cip_df["BroadFieldId"] == broad_id] | ||
|
||
if len(broad_rows) > 0: | ||
return broad_rows | ||
|
||
raise ValueError(f"No matching rows for {broad_id}.{major_id}{detailed_id}") | ||
|
||
|
||
def map_id_to_fields_of_science(id: str): | ||
|
||
# Define the fields we hope to populate | ||
broad_field_of_science = None | ||
major_field_of_science = None | ||
detailed_field_of_science = None | ||
|
||
cip_df = get_cip_df() | ||
|
||
# If we have a direct match, return it | ||
direct_match = cip_df[cip_df["SED-CIP code"] == id] | ||
if len(direct_match) > 0: | ||
return [direct_match["New broad field"].values[0], direct_match["New major field"].values[0], direct_match["New detailed field"].values[0]] | ||
|
||
# Add the broad field | ||
broad_id = get_id(id, 0) | ||
major_id = get_id(id, 1) | ||
detailed_id = get_id(id, 2) | ||
|
||
try: | ||
matching_rows = get_matching_rows(cip_df, broad_id, major_id, detailed_id) | ||
except ValueError as e: | ||
print(id) | ||
return [broad_field_of_science, major_field_of_science, detailed_field_of_science] | ||
|
||
possible_broad_fields = set(map(lambda x: x[1]['New broad field'], matching_rows.iterrows())) | ||
if broad_id is not None: | ||
best_option = None | ||
max_rows = 0 | ||
for possible_broad_field in set(map(lambda x: x[1]['New broad field'], matching_rows.iterrows())): | ||
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df["New broad field"] == possible_broad_field)]) | ||
|
||
if l > max_rows: | ||
max_rows = l | ||
best_option = possible_broad_field | ||
|
||
print(f"Broad Field: {broad_id}.{major_id}{detailed_id} has possible values {possible_broad_fields} we picked {best_option}") | ||
|
||
broad_field_of_science = best_option | ||
|
||
possible_major_fields = set(map(lambda x: x[1]['New major field'], matching_rows.iterrows())) | ||
if major_id is not None: | ||
best_option = None | ||
max_rows = 0 | ||
for possible_major_field in possible_major_fields: | ||
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & ( | ||
cip_df["New major field"] == possible_major_field)]) | ||
if l > max_rows: | ||
max_rows = l | ||
best_option = possible_major_field | ||
|
||
print(f"Major Field: {broad_id}.{major_id}{detailed_id} has rows {possible_major_fields} we picked {best_option}") | ||
|
||
major_field_of_science = best_option | ||
|
||
possible_detailed_fields = set(map(lambda x: x[1]['New detailed field'], matching_rows.iterrows())) | ||
if detailed_id is not None: | ||
best_option = None | ||
max_rows = 0 | ||
for possible_detailed_field in possible_detailed_fields: | ||
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & ( | ||
cip_df["DetailedFieldId"] == detailed_id) & (cip_df["New detailed field"] == possible_detailed_field)]) | ||
if l > max_rows: | ||
max_rows = l | ||
best_option = possible_detailed_field | ||
|
||
print(f"Detailed Field: {broad_id}.{major_id}{detailed_id} has rows {possible_detailed_fields} we picked {best_option}") | ||
|
||
detailed_field_of_science = best_option | ||
|
||
return [broad_field_of_science, major_field_of_science, detailed_field_of_science] | ||
|
||
|
||
def get_id(id: Union[float, str], granularity: int): | ||
|
||
# Check if None | ||
if pd.isna(id): | ||
return None | ||
|
||
# Fix up issues from reading the id as a float | ||
digits = [x for x in str(id) if x in string.digits] | ||
|
||
# If the first part is preceded with a 0, (01.2023) | ||
if len(str(id).split(".")[0]) == 1: | ||
digits = ['0', *digits] | ||
|
||
# If the number ends with a 0, (10.2320) | ||
if len(digits) % 2 == 1: | ||
digits = [*digits, '0'] | ||
|
||
|
||
if len(digits) % 2 == 1: | ||
digits = ['0', *digits] | ||
|
||
if granularity == 0: | ||
return "".join(digits[:2]) | ||
|
||
if granularity == 1: | ||
|
||
if len(digits) < 4: | ||
return None | ||
|
||
return "".join(digits[2:4]) | ||
|
||
if granularity == 2: | ||
|
||
if len(digits) < 6: | ||
return None | ||
|
||
return "".join(digits[4:]) | ||
|
||
|
||
def tests(): | ||
|
||
if get_id(1.0, 0) != "01": | ||
raise ValueError("Test failed") | ||
|
||
if get_id(1.0, 1) != "00": | ||
raise ValueError("Test failed") | ||
|
||
if get_id(10.2320, 2) != "20": | ||
raise ValueError("Test failed") | ||
|
||
if get_id(10.2320, 1) != "23": | ||
raise ValueError("Test failed") | ||
|
||
if get_id(10.2320, 0) != "10": | ||
raise ValueError("Test failed") | ||
|
||
if get_id(01.23, 2) != None: | ||
raise ValueError("Test failed") | ||
|
||
if get_id(01.23, 0) != "01": | ||
raise ValueError("Test failed") | ||
|
||
if map_id_to_fields_of_science("26.15") != ['Biological and biomedical sciences','Neurobiology and neurosciences', None]: | ||
raise ValueError("Test failed") | ||
|
||
if __name__ == "__main__": | ||
tests() | ||
print("All tests passed") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import sys | ||
import datetime | ||
|
||
import yaml | ||
import requests | ||
|
||
from field_of_science import get_id | ||
|
||
|
||
def get_active_projects(start_date: datetime.datetime): | ||
response = requests.get( | ||
"https://gracc.opensciencegrid.org/q/gracc.osg.summary/_search", | ||
json={ | ||
"size": 0, | ||
"query": { | ||
"bool": { | ||
"filter": [ | ||
{ | ||
"term": { | ||
"ResourceType": "Payload" | ||
} | ||
}, | ||
{ | ||
"range": { | ||
"EndTime": { | ||
"lte": int(datetime.datetime.now().timestamp() * 1000), | ||
"gte": int(start_date.timestamp() * 1000) | ||
} | ||
} | ||
} | ||
] | ||
}, | ||
}, | ||
"aggs": { | ||
"projects": { | ||
"terms": { | ||
"field": "ProjectName", | ||
"size": 99999999 | ||
}, | ||
"aggs": { | ||
"projectJobsRan": { | ||
"sum": { | ||
"field": "Njobs" | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
) | ||
|
||
data = response.json() | ||
|
||
active_projects = [x['key'] for x in data['aggregations']['projects']['buckets']] | ||
|
||
return active_projects | ||
|
||
|
||
|
||
def has_detailed_precision(id: str): | ||
return get_id(id, granularity=1) is not None | ||
|
||
|
||
def main(): | ||
one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365) | ||
active_project_names = get_active_projects(one_year_ago) | ||
|
||
print(active_project_names) | ||
|
||
exceptions = [] | ||
for project_name in active_project_names: | ||
try: | ||
project_data = yaml.load(open(f"../../../projects/{project_name}.yaml"), Loader=yaml.Loader) | ||
|
||
if "FieldOfScienceID" not in project_data or not has_detailed_precision(project_data["FieldOfScienceID"]): | ||
exceptions.append(f"Project {project_name} is running in the OSPool without detailed precision.") | ||
|
||
except FileNotFoundError as e: | ||
pass | ||
|
||
|
||
if exceptions: | ||
print("\n".join(exceptions), sys.stderr) | ||
raise Exception("Projects without detailed precision need to be updated.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
12 changes: 12 additions & 0 deletions
12
.github/scripts/check_project_fos_precision/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
certifi==2024.2.2 | ||
charset-normalizer==3.3.2 | ||
idna==3.7 | ||
numpy==1.26.4 | ||
pandas==2.2.2 | ||
python-dateutil==2.9.0.post0 | ||
pytz==2024.1 | ||
PyYAML==6.0.1 | ||
requests==2.31.0 | ||
six==1.16.0 | ||
tzdata==2024.1 | ||
urllib3==2.2.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
name: Check Project FOS Precision | ||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
schedule: | ||
- cron: '0 0 * * *' | ||
|
||
jobs: | ||
check: | ||
name: Check | ||
runs-on: ubuntu-latest | ||
if: startsWith(github.repository, 'opensciencegrid/') | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: 3.9.15 | ||
cache: 'pip' # caching pip dependencies | ||
- run: pip install -r ./.github/scripts/check_project_fos_precision/requirements.txt | ||
- run: python ./.github/scripts/check_project_fos_precision/main.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.