diff --git a/main.py b/main.py index 2e5c880..d78ee97 100644 --- a/main.py +++ b/main.py @@ -79,5 +79,6 @@ def display_error_message_in_template(request: Request, exc: StarletteHTTPExcept 'error': str(exc.detail) }, "root_path": Utils.get_environment_prefix() - } + }, + status_code=exc.status_code ) diff --git a/metadata_extract/finder.py b/metadata_extract/finder.py index 0cd4b08..c75f7aa 100644 --- a/metadata_extract/finder.py +++ b/metadata_extract/finder.py @@ -8,6 +8,7 @@ from dateutil.parser import parse from dateparser.search import search_dates import langdetect +from langdetect.lang_detect_exception import LangDetectException from . import text, author_name from .candidate import Candidate, Origin from .infopage import InfoPage @@ -218,8 +219,11 @@ def get_year_from_info(self) -> None: def get_language(self) -> None: """Detects language of concatenated text, and adds it as a candidate.""" - lang = langdetect.detect(' '.join(self.doc.pages.values())) - self.metadata.add_candidate('language', Candidate(lang, Origin.LANGUAGE_MODEL)) + try: + lang = langdetect.detect(' '.join(self.doc.pages.values())) + self.metadata.add_candidate('language', Candidate(lang, Origin.LANGUAGE_MODEL)) + except LangDetectException: + return def read_info_page(self) -> None: """Finds the infopage and searches for candidate values for title, publisher and authors.""" diff --git a/metadata_extract/metadata.py b/metadata_extract/metadata.py index 7671074..ed5cd1d 100644 --- a/metadata_extract/metadata.py +++ b/metadata_extract/metadata.py @@ -78,6 +78,11 @@ def rank_years(self) -> Optional[CandidateType]: reverse=True) return sorted_years[0].to_dict() + def choose_language(self) -> Optional[CandidateType]: + if 'language' not in self.candidates: + return None + return self.candidates['language'][0].to_dict() + def choose_isxn(self, identifier: str) -> Optional[CandidateType]: if identifier not in self.candidates: return None @@ -143,7 +148,7 @@ def choose_authors(self) -> list[CandidateType]: def choose_best(self) -> None: self.results['year'] = self.rank_years() - self.results['language'] = self.candidates['language'][0].to_dict() + self.results['language'] = self.choose_language() self.results['title'] = self.choose_title() self.results['publisher'] = self.choose_publishers() self.results['publicationType'] = self.choose_doc_type() diff --git a/src/util.py b/src/util.py index 464d63a..8326056 100644 --- a/src/util.py +++ b/src/util.py @@ -93,9 +93,10 @@ def process_and_remove( try: results = self.meteor.run(filepath) return results - except Exception: + except Exception as exc: print(traceback.format_exc()) - return {'error': f'Error while processing file {filename if filename else ""}'} + raise HTTPException(detail=f'Error while processing file {filename}', + status_code=500) from exc finally: if delete_immediately: os.remove(filepath)