Skip to content

Commit

Permalink
Merge branch 'master' into itb
Browse files Browse the repository at this point in the history
* master:
  Add Michael Antonelli as contact to brown-cms resources (FD #76261)
  Added project UCDenver_Roberts
  Add downtime for DENVER_INTERNET2_OSDF_CACHE due to toPelican
  Add downtime for MGHPCC_NRP_OSDF_ORIGIN due to hw issues
  Update BNL-ATLAS_downtime.yaml
  Run FOS check on OSG repo only
  Requirements Fixup on FOS Script
  Removing ID - not needed for new services
  Fixing ID error
  Update BNL-ATLAS_downtime.yaml
  Adding WebDAV.tape service for  BNL-ATLAS
  Add InstitutionID and FieldOfScienceID to the project OrderedDict so the ordering of the XML elements matches the schema
  adding osdftest for monitoring
  Update projects/UCSD_WatsonParris.yaml with FieldOfScienceID
  Update AGLT2_downtime.yaml
  Add CHTC-KAPEL-TEST EP
  Add downtime for CARDIFF_UK_OSDF_CACHE due to overloaded
  Add downtime for CARDIFF_UK_OSDF_CACHE due to 'power maintenance '
  Field of science ids (opensciencegrid#3809)
  Update NET2.yaml
  Add Joseph Reichert as contact to rutgers-cms resources (FD #76281)
  adding  /CN=osdftest.t2.ucsd.edu to create a perf OSDF test
  Update UCSD_WatsonParris.yaml
  Create UCSD_WatsonParris.yaml
  • Loading branch information
matyasselmeci committed Apr 30, 2024
2 parents 0e9625c + 5700ee5 commit 7410e58
Show file tree
Hide file tree
Showing 1,147 changed files with 1,717 additions and 6 deletions.
188 changes: 188 additions & 0 deletions .github/scripts/check_project_fos_precision/field_of_science.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from functools import lru_cache
from typing import Union
import string

import pandas as pd


@lru_cache()
def get_cip_df():

cip_df = pd.read_excel("data/SED-CIP-2022.xlsx")

# Drop the first two rows and make the third row the column title
cip_df.columns = cip_df.iloc[2]
cip_df = cip_df.iloc[3:]

cip_df["BroadFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 0))
cip_df["MajorFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 1))
cip_df["DetailedFieldId"] = cip_df['SED-CIP code'].apply(lambda x: get_id(x, 2))

return cip_df


def get_matching_rows(cip_df, broad_id, major_id, detailed_id):

# Check the finest grain first
detailed_rows = cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & (
cip_df["DetailedFieldId"] == detailed_id)]

if len(detailed_rows) > 0:
return detailed_rows

# Check the major grain
major_rows = cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id)]

if len(major_rows) > 0:
return major_rows

# Check the broad grain
broad_rows = cip_df[cip_df["BroadFieldId"] == broad_id]

if len(broad_rows) > 0:
return broad_rows

raise ValueError(f"No matching rows for {broad_id}.{major_id}{detailed_id}")


def map_id_to_fields_of_science(id: str):

# Define the fields we hope to populate
broad_field_of_science = None
major_field_of_science = None
detailed_field_of_science = None

cip_df = get_cip_df()

# If we have a direct match, return it
direct_match = cip_df[cip_df["SED-CIP code"] == id]
if len(direct_match) > 0:
return [direct_match["New broad field"].values[0], direct_match["New major field"].values[0], direct_match["New detailed field"].values[0]]

# Add the broad field
broad_id = get_id(id, 0)
major_id = get_id(id, 1)
detailed_id = get_id(id, 2)

try:
matching_rows = get_matching_rows(cip_df, broad_id, major_id, detailed_id)
except ValueError as e:
print(id)
return [broad_field_of_science, major_field_of_science, detailed_field_of_science]

possible_broad_fields = set(map(lambda x: x[1]['New broad field'], matching_rows.iterrows()))
if broad_id is not None:
best_option = None
max_rows = 0
for possible_broad_field in set(map(lambda x: x[1]['New broad field'], matching_rows.iterrows())):
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df["New broad field"] == possible_broad_field)])

if l > max_rows:
max_rows = l
best_option = possible_broad_field

print(f"Broad Field: {broad_id}.{major_id}{detailed_id} has possible values {possible_broad_fields} we picked {best_option}")

broad_field_of_science = best_option

possible_major_fields = set(map(lambda x: x[1]['New major field'], matching_rows.iterrows()))
if major_id is not None:
best_option = None
max_rows = 0
for possible_major_field in possible_major_fields:
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & (
cip_df["New major field"] == possible_major_field)])
if l > max_rows:
max_rows = l
best_option = possible_major_field

print(f"Major Field: {broad_id}.{major_id}{detailed_id} has rows {possible_major_fields} we picked {best_option}")

major_field_of_science = best_option

possible_detailed_fields = set(map(lambda x: x[1]['New detailed field'], matching_rows.iterrows()))
if detailed_id is not None:
best_option = None
max_rows = 0
for possible_detailed_field in possible_detailed_fields:
l = len(cip_df[(cip_df["BroadFieldId"] == broad_id) & (cip_df['MajorFieldId'] == major_id) & (
cip_df["DetailedFieldId"] == detailed_id) & (cip_df["New detailed field"] == possible_detailed_field)])
if l > max_rows:
max_rows = l
best_option = possible_detailed_field

print(f"Detailed Field: {broad_id}.{major_id}{detailed_id} has rows {possible_detailed_fields} we picked {best_option}")

detailed_field_of_science = best_option

return [broad_field_of_science, major_field_of_science, detailed_field_of_science]


def get_id(id: Union[float, str], granularity: int):

# Check if None
if pd.isna(id):
return None

# Fix up issues from reading the id as a float
digits = [x for x in str(id) if x in string.digits]

# If the first part is preceded with a 0, (01.2023)
if len(str(id).split(".")[0]) == 1:
digits = ['0', *digits]

# If the number ends with a 0, (10.2320)
if len(digits) % 2 == 1:
digits = [*digits, '0']


if len(digits) % 2 == 1:
digits = ['0', *digits]

if granularity == 0:
return "".join(digits[:2])

if granularity == 1:

if len(digits) < 4:
return None

return "".join(digits[2:4])

if granularity == 2:

if len(digits) < 6:
return None

return "".join(digits[4:])


def tests():

if get_id(1.0, 0) != "01":
raise ValueError("Test failed")

if get_id(1.0, 1) != "00":
raise ValueError("Test failed")

if get_id(10.2320, 2) != "20":
raise ValueError("Test failed")

if get_id(10.2320, 1) != "23":
raise ValueError("Test failed")

if get_id(10.2320, 0) != "10":
raise ValueError("Test failed")

if get_id(01.23, 2) != None:
raise ValueError("Test failed")

if get_id(01.23, 0) != "01":
raise ValueError("Test failed")

if map_id_to_fields_of_science("26.15") != ['Biological and biomedical sciences','Neurobiology and neurosciences', None]:
raise ValueError("Test failed")

if __name__ == "__main__":
tests()
print("All tests passed")
88 changes: 88 additions & 0 deletions .github/scripts/check_project_fos_precision/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import sys
import datetime

import yaml
import requests

from field_of_science import get_id


def get_active_projects(start_date: datetime.datetime):
response = requests.get(
"https://gracc.opensciencegrid.org/q/gracc.osg.summary/_search",
json={
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"ResourceType": "Payload"
}
},
{
"range": {
"EndTime": {
"lte": int(datetime.datetime.now().timestamp() * 1000),
"gte": int(start_date.timestamp() * 1000)
}
}
}
]
},
},
"aggs": {
"projects": {
"terms": {
"field": "ProjectName",
"size": 99999999
},
"aggs": {
"projectJobsRan": {
"sum": {
"field": "Njobs"
}
}
}
}
}
}
)

data = response.json()

active_projects = [x['key'] for x in data['aggregations']['projects']['buckets']]

return active_projects



def has_detailed_precision(id: str):
return get_id(id, granularity=1) is not None


def main():
one_year_ago = datetime.datetime.now() - datetime.timedelta(days=365)
active_project_names = get_active_projects(one_year_ago)

print(active_project_names)

exceptions = []
for project_name in active_project_names:
try:
project_data = yaml.load(open(f"../../../projects/{project_name}.yaml"), Loader=yaml.Loader)

if "FieldOfScienceID" not in project_data or not has_detailed_precision(project_data["FieldOfScienceID"]):
exceptions.append(f"Project {project_name} is running in the OSPool without detailed precision.")

except FileNotFoundError as e:
pass


if exceptions:
print("\n".join(exceptions), sys.stderr)
raise Exception("Projects without detailed precision need to be updated.")


if __name__ == "__main__":
main()
12 changes: 12 additions & 0 deletions .github/scripts/check_project_fos_precision/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
certifi==2024.2.2
charset-normalizer==3.3.2
idna==3.7
numpy==1.26.4
pandas==2.2.2
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
requests==2.31.0
six==1.16.0
tzdata==2024.1
urllib3==2.2.1
22 changes: 22 additions & 0 deletions .github/workflows/check_project_fos_precision.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Check Project FOS Precision
on:
pull_request:
branches:
- main
schedule:
- cron: '0 0 * * *'

jobs:
check:
name: Check
runs-on: ubuntu-latest
if: startsWith(github.repository, 'opensciencegrid/')
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9.15
cache: 'pip' # caching pip dependencies
- run: pip install -r ./.github/scripts/check_project_fos_precision/requirements.txt
- run: python ./.github/scripts/check_project_fos_precision/main.py
1 change: 1 addition & 0 deletions projects/ACE_NIAID.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/451cgt72wj62'
FieldOfScienceID: '26.1103'
1 change: 1 addition & 0 deletions projects/AMFORA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/o14joi278jrs'
FieldOfScienceID: '11'
1 change: 1 addition & 0 deletions projects/AMNH.astro.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/em2w05s9c1uc'
FieldOfScienceID: '40.02'
1 change: 1 addition & 0 deletions projects/AMNH.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/em2w05s9c1uc'
FieldOfScienceID: '54.0101'
1 change: 1 addition & 0 deletions projects/AMNH_Burbrink.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/em2w05s9c1uc'
FieldOfScienceID: '26'
1 change: 1 addition & 0 deletions projects/AMNH_MacLow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/em2w05s9c1uc'
FieldOfScienceID: '40.02'
1 change: 1 addition & 0 deletions projects/AMNH_Smith.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/em2w05s9c1uc'
FieldOfScienceID: '26'
1 change: 1 addition & 0 deletions projects/AMS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Sponsor:
VirtualOrganization:
Name: OSG
InstitutionID: 'https://osg-htc.org/iid/jtlq7k0qkxtn'
FieldOfScienceID: '40.0804'
1 change: 1 addition & 0 deletions projects/ASPU.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/3chofmlz7p5r'
FieldOfScienceID: '26.1103'
1 change: 1 addition & 0 deletions projects/ASU-CFD.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '27'
1 change: 1 addition & 0 deletions projects/ASU_CoMSESNet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '11.0804'
1 change: 1 addition & 0 deletions projects/ASU_EvolutionMedicineIT.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '26'
1 change: 1 addition & 0 deletions projects/ASU_Jacobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '40.02'
1 change: 1 addition & 0 deletions projects/ASU_Ozkan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '40.08'
1 change: 1 addition & 0 deletions projects/ASU_Pfeifer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '26.1399'
1 change: 1 addition & 0 deletions projects/ASU_RCStaff.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '11.9999'
1 change: 1 addition & 0 deletions projects/ASU_Singharoy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Sponsor:
CampusGrid:
Name: OSG Connect
InstitutionID: 'https://osg-htc.org/iid/er1rnzey26m9'
FieldOfScienceID: '26'
Loading

0 comments on commit 7410e58

Please sign in to comment.