Skip to content

Commit

Permalink
EXT-7: Handle LangDetectException gracefully, return correct HTTP sta…
Browse files Browse the repository at this point in the history
…tus codes (#8)

* EXT-7: Handle LangDetectException gracefully

* EXT-7: Return correct status code in HTTP exception handler

* Linting

* EXT-7: Return status 500 on unexpected internal error

* EXT-7: Re-raise original exception
  • Loading branch information
pierrebeauguitte authored Sep 4, 2023
1 parent b678365 commit 69339e5
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
3 changes: 2 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,6 @@ def display_error_message_in_template(request: Request, exc: StarletteHTTPExcept
'error': str(exc.detail)
},
"root_path": Utils.get_environment_prefix()
}
},
status_code=exc.status_code
)
8 changes: 6 additions & 2 deletions metadata_extract/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dateutil.parser import parse
from dateparser.search import search_dates
import langdetect
from langdetect.lang_detect_exception import LangDetectException
from . import text, author_name
from .candidate import Candidate, Origin
from .infopage import InfoPage
Expand Down Expand Up @@ -218,8 +219,11 @@ def get_year_from_info(self) -> None:

def get_language(self) -> None:
"""Detects language of concatenated text, and adds it as a candidate."""
lang = langdetect.detect(' '.join(self.doc.pages.values()))
self.metadata.add_candidate('language', Candidate(lang, Origin.LANGUAGE_MODEL))
try:
lang = langdetect.detect(' '.join(self.doc.pages.values()))
self.metadata.add_candidate('language', Candidate(lang, Origin.LANGUAGE_MODEL))
except LangDetectException:
return

def read_info_page(self) -> None:
"""Finds the infopage and searches for candidate values for title, publisher and authors."""
Expand Down
7 changes: 6 additions & 1 deletion metadata_extract/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ def rank_years(self) -> Optional[CandidateType]:
reverse=True)
return sorted_years[0].to_dict()

def choose_language(self) -> Optional[CandidateType]:
if 'language' not in self.candidates:
return None
return self.candidates['language'][0].to_dict()

def choose_isxn(self, identifier: str) -> Optional[CandidateType]:
if identifier not in self.candidates:
return None
Expand Down Expand Up @@ -143,7 +148,7 @@ def choose_authors(self) -> list[CandidateType]:

def choose_best(self) -> None:
self.results['year'] = self.rank_years()
self.results['language'] = self.candidates['language'][0].to_dict()
self.results['language'] = self.choose_language()
self.results['title'] = self.choose_title()
self.results['publisher'] = self.choose_publishers()
self.results['publicationType'] = self.choose_doc_type()
Expand Down
5 changes: 3 additions & 2 deletions src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def process_and_remove(
try:
results = self.meteor.run(filepath)
return results
except Exception:
except Exception as exc:
print(traceback.format_exc())
return {'error': f'Error while processing file {filename if filename else ""}'}
raise HTTPException(detail=f'Error while processing file {filename}',
status_code=500) from exc
finally:
if delete_immediately:
os.remove(filepath)
Expand Down

0 comments on commit 69339e5

Please sign in to comment.