From 660920240204b5f545ae997bd0ec14c3987ecc99 Mon Sep 17 00:00:00 2001 From: alexx2 Date: Thu, 3 Oct 2024 17:09:49 -0300 Subject: [PATCH] Adicionando o meu pacote de analise de textos em pt-br e eng --- README.md | 20 +++++++++++-------- .../__init__.py | 0 .../analisador_texto_pt_br}/__init__.py | 0 .../analisador_texto_pt_br/pt_br.py | 15 ++++++++++++++ .../analyze_text_eng}/__init__.py | 0 .../analyze_text_eng/eng.py | 14 +++++++++++++ package_name/module1_name/file1_name.py | 0 package_name/module1_name/file2_name.py | 0 package_name/module2_name/file1_name.py | 0 package_name/module2_name/file2_name.py | 0 requirements.txt | 2 ++ setup.py | 10 +++++----- 12 files changed, 48 insertions(+), 13 deletions(-) rename {package_name => analisador_texto_pt_br_e_eng}/__init__.py (100%) rename {package_name/module1_name => analisador_texto_pt_br_e_eng/analisador_texto_pt_br}/__init__.py (100%) create mode 100644 analisador_texto_pt_br_e_eng/analisador_texto_pt_br/pt_br.py rename {package_name/module2_name => analisador_texto_pt_br_e_eng/analyze_text_eng}/__init__.py (100%) create mode 100644 analisador_texto_pt_br_e_eng/analyze_text_eng/eng.py delete mode 100644 package_name/module1_name/file1_name.py delete mode 100644 package_name/module1_name/file2_name.py delete mode 100644 package_name/module2_name/file1_name.py delete mode 100644 package_name/module2_name/file2_name.py diff --git a/README.md b/README.md index f984923..babf1aa 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,31 @@ -# package_name +# analisador_texto_pt_br_e_eng Description. -The package package_name is used to: - - - - +The package analisador_texto_pt_br_e_eng is used to: + -Analisa os textos em português ou em inglês, identificando palavras mais comuns, removendo stopwords e realizando outras análises de texto. + -Usa a biblioteca spaCy para processamento de linguagem natural em ambas as línguas. ## Installation Use the package manager [pip](https://pip.pypa.io/en/stable/) to install package_name ```bash -pip install package_name +pip install analisador_texto_pt_br_e_eng ``` ## Usage ```python -from package_name.module1_name import file1_name -file1_name.my_function() +from analisador_texto_pt_br_e_eng.analisador_texto_pt_br import pt_br +pt_br.analisar_texto("Seu texto em português aqui.") ``` +``` +from analisador_texto_pt_br_e_eng.analisador_texto_eng import eng +eng.analyze_text("Your English text here.") +``` ## Author -My_name +Alexsandro ## License [MIT](https://choosealicense.com/licenses/mit/) \ No newline at end of file diff --git a/package_name/__init__.py b/analisador_texto_pt_br_e_eng/__init__.py similarity index 100% rename from package_name/__init__.py rename to analisador_texto_pt_br_e_eng/__init__.py diff --git a/package_name/module1_name/__init__.py b/analisador_texto_pt_br_e_eng/analisador_texto_pt_br/__init__.py similarity index 100% rename from package_name/module1_name/__init__.py rename to analisador_texto_pt_br_e_eng/analisador_texto_pt_br/__init__.py diff --git a/analisador_texto_pt_br_e_eng/analisador_texto_pt_br/pt_br.py b/analisador_texto_pt_br_e_eng/analisador_texto_pt_br/pt_br.py new file mode 100644 index 0000000..d53206f --- /dev/null +++ b/analisador_texto_pt_br_e_eng/analisador_texto_pt_br/pt_br.py @@ -0,0 +1,15 @@ +import spacy +from collections import Counter + + +nlp = spacy.load("pt_core_news_lg") + +def analisar_texto(texto): + doc = nlp(texto.lower()) + tokens_filtrados = [token.text for token in doc if not token.is_stop and not token.is_punct] + contagem_palavras = Counter(tokens_filtrados) + return { + 'total_palavras': len([token for token in doc if not token.is_punct]), + 'palavras_filtradas': len(tokens_filtrados), + 'palavras_mais_comuns': contagem_palavras.most_common(5) + } diff --git a/package_name/module2_name/__init__.py b/analisador_texto_pt_br_e_eng/analyze_text_eng/__init__.py similarity index 100% rename from package_name/module2_name/__init__.py rename to analisador_texto_pt_br_e_eng/analyze_text_eng/__init__.py diff --git a/analisador_texto_pt_br_e_eng/analyze_text_eng/eng.py b/analisador_texto_pt_br_e_eng/analyze_text_eng/eng.py new file mode 100644 index 0000000..b75b83e --- /dev/null +++ b/analisador_texto_pt_br_e_eng/analyze_text_eng/eng.py @@ -0,0 +1,14 @@ +import spacy +from collections import Counter + +nlp = spacy.load("en_core_web_lg") + +def analyze_text(text): + doc = nlp(text.lower()) + filtered_tokens = [token.text for token in doc if not token.is_stop and not token.is_punct] + word_count = Counter(filtered_tokens) + return { + 'total_words': len([token for token in doc if not token.is_punct]), + 'filtered_words': len(filtered_tokens), + 'most_common_words': word_count.most_common(5) + } diff --git a/package_name/module1_name/file1_name.py b/package_name/module1_name/file1_name.py deleted file mode 100644 index e69de29..0000000 diff --git a/package_name/module1_name/file2_name.py b/package_name/module1_name/file2_name.py deleted file mode 100644 index e69de29..0000000 diff --git a/package_name/module2_name/file1_name.py b/package_name/module2_name/file1_name.py deleted file mode 100644 index e69de29..0000000 diff --git a/package_name/module2_name/file2_name.py b/package_name/module2_name/file2_name.py deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index e69de29..2c96f26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,2 @@ +spacy >= 3.0 +setuptools >= 42 \ No newline at end of file diff --git a/setup.py b/setup.py index 4b4dc95..9b4f9d5 100644 --- a/setup.py +++ b/setup.py @@ -7,14 +7,14 @@ requirements = f.read().splitlines() setup( - name="package_name", + name="Analisador_Texto_pt_br_e_ingles", version="0.0.1", - author="my_name", - author_email="my_email", - description="My short description", + author="Alexsandro", + author_email="alecsbezerra@gmail.com", + description="Analisador de texto em português e inglês usando spaCy.", long_description=page_description, long_description_content_type="text/markdown", - url="my_github_repository_project_link" + url="https://github.com/alexxs2/package-template", packages=find_packages(), install_requires=requirements, python_requires='>=3.8',