Skip to content

Commit

Permalink
V2.0.0 (#209)
Browse files Browse the repository at this point in the history
* improves: add wait_key

* improves: add wait_key

* chore: add scraping package

* refactor: rename CompanyDetails to Share

* refactor: rename CompanyDetails to Share

* refactor: add financial info

* fix: fixed parser datetime

* fix: fixed parser datetime
  • Loading branch information
jlsneto authored Aug 15, 2024
1 parent 9705d90 commit 2732948
Show file tree
Hide file tree
Showing 11 changed files with 608 additions and 9 deletions.
3 changes: 2 additions & 1 deletion cereja/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@
from .mathtools import *
from . import experimental
from ._requests import request
from . import scraping

VERSION = "1.9.9.final.0"
VERSION = "2.0.0.final.0"
__version__ = get_version_pep440_compliant(VERSION)


Expand Down
129 changes: 128 additions & 1 deletion cereja/concurrently/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import queue
import threading
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

from ..utils import decorators

from .. import Progress, console

__all__ = ["MultiProcess"]
__all__ = ["MultiProcess", "Processor"]


class _IMultiProcess(abc.ABC):
Expand Down Expand Up @@ -219,3 +220,129 @@ def __exit__(self, exc_type, exc_val, exc_tb):
)
self._q.join()
self._with_context = False


class Processor:
def __init__(self, num_workers=None, max_in_progress=100, interval_seconds=None, use_progress=True,
on_result=None):
self._num_workers = num_workers if num_workers is not None else 10
self._on_result = on_result
self._total_success = 0
self._max_in_progress = max_in_progress
self._interval_seconds = 0 if interval_seconds is None else interval_seconds
self._process_result_service = None
self._future_to_data = set()
self._failure_data = []
self._stopped = False
self._executor = None
self._started_at = 0
self._progress = Progress(name="Processor",
max_value=100,
states=("value", "percent", "time"),
custom_state_func=self.get_status,
custom_state_name="Tx") if use_progress else None

@property
def in_progress_count(self):
return len(self._future_to_data)

@property
def total_processed(self):
return len(self._failure_data) + self._total_success

@property
def interval_seconds(self):
return self._interval_seconds

@property
def total_active_threads(self):
return threading.active_count()

def _create_process_result_service(self):
if self._process_result_service is not None:
self._process_result_service.join() # Espera terminar se estiver em execução
self._process_result_service = threading.Thread(target=self._process_result, daemon=False)
return self._process_result_service

def get_failure_data(self):
return self._failure_data

def _process_result(self):
# Roda enquanto tiver dados aguardando retorno do processo de validação e atualização do banco
while not self.stopped or self.in_progress_count > 0:
# list() é necessário call para criar cópia do objeto que está sendo manipulado em tempo de execução
for future in as_completed(list(self._future_to_data)):
result = future.result()
self._future_to_data.remove(future)

if self._on_result is not None:
self._on_result(result)
if self._progress is not None:
self._progress.show_progress(self.total_processed)

def _process(self, func, item, *args, **kwargs):
try:
result = func(item, *args, **kwargs)
self._total_success += 1
return result
except Exception as exc:
print(
f"Falha ao processa dado, mas será armazenado para conferência.\n"
f"Error: {exc}")
self._failure_data.append(item)

def get_status(self):
return f"{round(self.total_processed / (time.time() - self._started_at), 2)} cpf/s " \
f"- processing: {self.in_progress_count} " \
f"- success: {self._total_success} " \
f"- fail: {len(self._failure_data)} "

def process(self, func, data, *args, **kwargs):
"""
Função principal, responsável por controlar o tempo de envio dos dados para processar.
"""

self._stopped = False
# inicia thread para atualizar o banco com o resultado da validação.
self._create_process_result_service().start()
self._started_at = time.time()

if self._progress is not None:
self._progress.update_max_value(len(data))
self._progress.start()

with ThreadPoolExecutor(max_workers=self._num_workers,
thread_name_prefix="CPF_PROCESS_WORKER") as self._executor:
for item in data:
start_time = time.time()

future = self._executor.submit(self._process, func, item, *args, **kwargs)
self._future_to_data.add(future)

elapsed_time = time.time() - start_time
# Verifica quanto tempo passou após enviar um dado, caso o tempo for menor que o intervalo
# configurado espera a diferença antes de enviar o próximo lote
if elapsed_time < self.interval_seconds:
time.sleep(self.interval_seconds - elapsed_time)
if self.in_progress_count >= self._max_in_progress:
print(f"O Total de dados sendo processado {self.in_progress_count} é maior que o predefinido {self._max_in_progress}")
time.sleep(10)

self.stop_process()

@property
def stopped(self):
return self._stopped

def stop_process(self):
self._stopped = True
self._process_result_service.join()
self._progress.stop()

def restart_process(self):
self.stop_process() # espera terminar execução do processo anterior
self._stopped = False
self._started_at = time.time()
self._failure_data = []
self._total_success = 0
self._create_process_result_service().start()
3 changes: 3 additions & 0 deletions cereja/date/_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class DateTime(datetime):
r'\d{2}-\d{2}-\d{4}': '%d-%m-%Y', # Format (DD-MM-YYYY)
r'\d{1,2}/\d{1,2}/\d{2,4}': '%m/%d/%Y', # Format (MM/DD/YYYY)
r'\d{1,2}-\d{1,2}-\d{2,4}': '%m-%d-%Y', # Format (MM-DD-YYYY)
r'\d{8}': '%Y%m%d', # Format (YYYYMMDD)
r'\d{2}\d{2}\d{4}': '%d%m%Y', # Format (DDMMYYYY)
r'\d{4}\d{2}\d{2}': '%Y%m%d', # Format (YYYYMMDD) without separators
# Other formats can be added here
}

Expand Down
Empty file added cereja/geo/__init__.py
Empty file.
13 changes: 13 additions & 0 deletions cereja/geo/countries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
REGIONS = {'Norte': ['Amazonas', 'Roraima', 'Amapá', 'Pará', 'Tocantins', 'Rondônia', 'Acre'],
'Nordeste': ['Maranhão', 'Piauí', 'Ceará', 'Rio Grande do Norte', 'Pernambuco', 'Paraíba', 'Sergipe',
'Alagoas', 'Bahia'],
'Centro-Oeste': ['Mato Grosso', 'Mato Grosso do Sul', 'Goiás', 'Distrito Federal'],
'Sudeste': ['São Paulo', 'Rio de Janeiro', 'Espírito Santo', 'Minas Gerais'],
'Sul': ['Paraná', 'Rio Grande do Sul', 'Santa Catarina']
}
STATES = {'Rondônia': 'RO', 'Acre': 'AC', 'Amazonas': 'AM', 'Roraima': 'RR', 'Pará': 'PA', 'Amapá': 'AP',
'Tocantins': 'TO', 'Maranhão': 'MA', 'Piauí': 'PI', 'Ceará': 'CE', 'Rio Grande do Norte': 'RN',
'Paraíba': 'PB', 'Pernambuco': 'PE', 'Alagoas': 'AL', 'Sergipe': 'SE', 'Bahia': 'BA',
'Minas Gerais': 'MG', 'Espírito Santo': 'ES', 'Rio de Janeiro': 'RJ', 'São Paulo': 'SP', 'Paraná': 'PR',
'Santa Catarina': 'SC', 'Rio Grande do Sul': 'RS', 'Mato Grosso do Sul': 'MS', 'Mato Grosso': 'MT',
'Goiás': 'GO', 'Distrito Federal': 'DF'}
188 changes: 188 additions & 0 deletions cereja/geolinear/shapes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import math

from .point import Point
from .. import shape_is_ok


class Circle:
def __init__(self, center: Point, radius: float):
self.center = center
self.radius = radius

@property
def area(self) -> float:
"""Calculate the area of the circle."""
return math.pi * self.radius ** 2

@property
def circumference(self) -> float:
"""Calculate the circumference of the circle."""
return 2 * math.pi * self.radius

def contains(self, point: Point) -> bool:
"""Check if a given point is inside the circle."""
return self.center.distance_to(point) <= self.radius

@property
def diameter(self):
return self.circumference / math.pi


class Triangle:
def __init__(self, p1: Point, p2: Point, p3: Point):
self.p1 = p1
self.p2 = p2
self.p3 = p3

@staticmethod
def side_length(p1: Point, p2: Point) -> float:
"""Calculate length of a side between two points."""
return p1.distance_to(p2)

@property
def perimeter(self) -> float:
"""Calculate the perimeter of the triangle."""
return self.side_length(self.p1, self.p2) + \
self.side_length(self.p2, self.p3) + \
self.side_length(self.p1, self.p3)

@property
def area(self) -> float:
"""Calculate the area of the triangle using Heron's formula."""
s = self.perimeter / 2
a = self.side_length(self.p1, self.p2)
b = self.side_length(self.p2, self.p3)
c = self.side_length(self.p1, self.p3)
return math.sqrt(s * (s - a) * (s - b) * (s - c))


class Rectangle:
def __init__(self, point1: Point, point2: Point):
self.point1 = point1
self.point2 = point2

@property
def width(self) -> float:
"""Width of the rectangle."""
return abs(self.point1.x - self.point2.x)

@property
def height(self) -> float:
"""Height of the rectangle."""
return abs(self.point1.y - self.point2.y)

@property
def area(self) -> float:
"""Calculate the area of the rectangle."""
return self.width * self.height

@property
def perimeter(self) -> float:
"""Calculate the perimeter of the rectangle."""
return 2 * (self.width + self.height)


class Dimension:
def __init__(self, w, h):
self._width = w
self._height = h
self._ratio = w / h
self._center = w // 2, h // 2

@property
def ratio(self):
return self._ratio

@property
def width(self):
return self._width

@property
def height(self):
return self._height

def rect(self, vec, size, keep_ratio=False):
x, y = vec
w = size
h = size
if keep_ratio:
h = h // self.ratio
x2 = x + w
y2 = y + h

return [(x, y), (x, y2), (x2, y), (x2, y2)]

@property
def center(self):
return self._center

@property
def center_x(self):
return self.center[0]

@property
def center_y(self):
return self.center[1]

@staticmethod
def fix_rect_pts(pts):
assert shape_is_ok(pts), ValueError(f"Expected a list with 4 points (x, y), received {pts}")
pts = sorted(pts)
x1 = pts[0][0]
x2 = pts[-1][0]

y1 = pts[0][1]
y2 = pts[-1][1]
return [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]

@classmethod
def from_rect_points(cls, pts):
points = cls.fix_rect_pts(pts)
w = points[1][0] - points[0][0]
h = points[-1][1] - points[0][1]
return cls(w, h)

def circle_edges(self, ra):
pass

def __repr__(self):
return f"Dim(w={self._width}, y={self._height})"


if __name__ == "__main__":
BASE_PICTURE_SHEET_DIM = [720, 1280] # WxH
W_SHEET = 653
H_SHEET = 923

# QRCODE
W_QRCODE = 102
H_QRCODE = 102
QRCODE_CENTER_X_DIST_TO_WINDOW_CENTER = 241
QRCODE_CENTER_Y_DIST_TO_WINDOW_CENTER = 396

# CELULAR
WINDOW_SIZE = [750, 1334]
WINDOW_CENTER = WINDOW_SIZE[0] // 2, WINDOW_SIZE[1] // 2

# pegar proporção
PX = BASE_PICTURE_SHEET_DIM[0] / WINDOW_SIZE[0]
PY = BASE_PICTURE_SHEET_DIM[1] / WINDOW_SIZE[1]

# Calcula tamanho da folha em relação a tela
W_SHEET_WINDOW, H_SHEET_WINDOW = W_SHEET * PX, H_SHEET * PY

# Calcula o ponto inicial da folha na tela
WINDOW_SHEET_X1, WINDOW_SHEET_Y1 = (
WINDOW_CENTER[0] - (W_SHEET_WINDOW // 2), WINDOW_CENTER[1] - H_SHEET_WINDOW // 2)

# Calcula o ponto final da folha na tela
WINDOW_SHEET_X2, WINDOW_SHEET_Y2 = WINDOW_SHEET_X1 + W_SHEET_WINDOW, WINDOW_SHEET_Y1 + H_SHEET_WINDOW

# SET LOC QR_CODE
WINDOW_QRCODE_CENTER = WINDOW_CENTER[0] + (QRCODE_CENTER_X_DIST_TO_WINDOW_CENTER * PX), WINDOW_CENTER[1] - (
QRCODE_CENTER_Y_DIST_TO_WINDOW_CENTER * PY)
W_QRCODE_WINDOW = W_QRCODE * PX
H_QRCODE_WINDOW = H_QRCODE * PY
WINDOW_QRCODE_X1, WINDOW_QRCODE_Y1 = WINDOW_QRCODE_CENTER[0] - (W_QRCODE_WINDOW // 2), WINDOW_QRCODE_CENTER[1] - (
H_QRCODE_WINDOW // 2)
WINDOW_QRCODE_X2, WINDOW_QRCODE_Y2 = WINDOW_QRCODE_X1 + W_QRCODE_WINDOW, WINDOW_QRCODE_Y1 + H_QRCODE_WINDOW
1 change: 1 addition & 0 deletions cereja/scraping/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import b3
Loading

0 comments on commit 2732948

Please sign in to comment.