Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NER system recognizes job titles #41

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
shared/installed/spacy-model/jobs/** filter=lfs diff=lfs merge=lfs -text
shared/installed/spacy-model/jobs/* filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions mentor_classifier/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/Users/erice/Desktop/mentor-classifier/shared/installed/spacy-model/jobs/** filter=lfs diff=lfs merge=lfs -text
12 changes: 9 additions & 3 deletions mentor_classifier/mentor_classifier/ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import List, Dict


from mentor_classifier.spacy_model import find_or_load_spacy
from mentor_classifier.spacy_model import find_or_load_spacy, find_or_load_custom
from mentor_classifier.types import AnswerInfo
from mentor_classifier.utils import get_shared_root

Expand All @@ -36,10 +36,12 @@ def __init__(self, answers: List[AnswerInfo], shared_root: str = ""):
self.people: List[str] = []
self.places: List[str] = []
self.acronyms: List[str] = []
self.jobs: List[str] = []
self.load(answers, shared_root or get_shared_root())

def load(self, answers: List[AnswerInfo], shared_root: str):
nlp = find_or_load_spacy(path.join(shared_root, "spacy-model"))
jobs = find_or_load_custom(path.join(shared_root, "spacy-model", "jobs"))
for answer in answers:
answer_doc = nlp(answer.answer_text)
if answer_doc.ents:
Expand All @@ -50,14 +52,17 @@ def load(self, answers: List[AnswerInfo], shared_root: str):
self.acronyms.append(ent.text)
if ent.label_ == "GPE":
self.places.append(ent.text)
else:
logging.warning("No named entities found.")
jobs_doc = jobs(answer.answer_text)
if jobs_doc.ents:
for ent in jobs_doc.ents:
self.jobs.append(ent)

def to_dict(self) -> Dict[str, List[str]]:
entities = {
"acronyms": self.acronyms,
"people": self.people,
"places": self.places,
"jobs": self.jobs,
}
return entities

Expand Down Expand Up @@ -85,4 +90,5 @@ def generate_questions(self) -> List[FollowupQuestion]:
self.add_followups("person", self.people, questions)
self.add_followups("place", self.places, questions)
self.add_followups("acronym", self.acronyms, questions)
self.add_followups("job", self.jobs, questions)
return questions
11 changes: 9 additions & 2 deletions mentor_classifier/mentor_classifier/spacy_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@ def find_or_load_spacy(file_path: str) -> Language:
SPACY_MODELS[abs_path] = load(
path.join(
file_path,
"en_core_web_sm-3.0.0",
"en_core_web_sm-3.1.0",
"en_core_web_sm",
"en_core_web_sm-3.0.0",
"en_core_web_sm-3.1.0",
)
)
return SPACY_MODELS[abs_path]


def find_or_load_custom(file_path: str) -> Language:
abs_path = path.abspath(file_path)
if abs_path not in SPACY_MODELS:
SPACY_MODELS[abs_path] = load(path.join(file_path, "output", "model-best"))
return SPACY_MODELS[abs_path]
2 changes: 1 addition & 1 deletion mentor_classifier/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ redis==3.5.3
requests==2.25.1
scikit-learn==0.24.2
sentence-transformers==2.0.0
spacy==3.0.6
spacy==3.1.0



6 changes: 6 additions & 0 deletions mentor_classifier/tests/test_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from mentor_classifier.ner import NamedEntities


@pytest.mark.only
@responses.activate
@pytest.mark.parametrize(
"question, answer, expected_followup",
Expand All @@ -21,6 +22,11 @@
"Can you tell me more about Clint Anderson?",
),
("Where do you work?", "I work at USC", "What is USC?"),
(
"What is your job",
"The Network Security Engineer provides support of the information systems security controls.",
"What does a(n) Network Security Engineer do?",
),
],
)
def test_questions(
Expand Down
2 changes: 1 addition & 1 deletion shared/spacy_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def spacy_download(to_path="installed", replace_existing=True) -> str:
return spacy_path
spacy_tar = os.path.join(to_path, "spacy.tar.gz")
download(
"https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz",
"https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0.tar.gz",
spacy_tar,
)
tar = tarfile.open(spacy_tar)
Expand Down