Skip to content

Commit

Permalink
bibcode/ads: normalize unicode
Browse files Browse the repository at this point in the history
* Closes #85.
  • Loading branch information
slint committed Oct 29, 2024
1 parent 812f640 commit 26318e4
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 0 deletions.
3 changes: 3 additions & 0 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

"""ID normalizer helper functions."""

import unicodedata

import isbnlib

from .proxies import custom_schemes_registry
Expand All @@ -34,6 +36,7 @@ def normalize_handle(val):

def normalize_ads(val):
"""Normalize an ADS bibliographic code."""
val = unicodedata.normalize("NFKD", val)
m = ads_regexp.match(val)
return m.group(2)

Expand Down
2 changes: 2 additions & 0 deletions idutils/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Utility file containing ID validators."""


import unicodedata
from urllib.parse import urlparse

from .utils import *
Expand Down Expand Up @@ -187,6 +188,7 @@ def is_urn(val):

def is_ads(val):
"""Test if argument is an ADS bibliographic code."""
val = unicodedata.normalize("NFKD", val)
return ads_regexp.match(val)


Expand Down
6 changes: 6 additions & 0 deletions tests/test_idutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@
"2017zndo....495787v",
"http://ui.adsabs.harvard.edu/#abs/2017zndo....495787v",
),
(
"1992ApJ…400L…1W",
["ads"],
"1992ApJ...400L...1W",
"http://ui.adsabs.harvard.edu/#abs/1992ApJ...400L...1W",
),
(
"0000000218250097",
["orcid", "isni"],
Expand Down

0 comments on commit 26318e4

Please sign in to comment.