Skip to content

Commit

Permalink
Merge pull request #11292 from MPMG-DCC-UFMG/dev
Browse files Browse the repository at this point in the history
Atualiza Master com modificações da Dev
  • Loading branch information
rennancl authored May 23, 2023
2 parents 36162a9 + 2d9042b commit cac2014
Show file tree
Hide file tree
Showing 19 changed files with 197 additions and 72 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/continuous-integration-pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ jobs:
python web_install.py
- name: Test with pytest
run: |
coverage run -m pytest
coverage xml
pip install coverage
pip install pytest
pip install pyee==9
python -m coverage run -m pytest
python -m coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: true
verbose: true
Expand Down
24 changes: 17 additions & 7 deletions crawler_manager/log_writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""This file contains the kafka consumer for the spider logs."""

import os
import ujson
from pathlib import Path

from django.apps import apps
from kafka import KafkaConsumer
Expand Down Expand Up @@ -38,6 +40,8 @@ def log_consumer(params=DEFAULT_CONSUMER_PARAMS):
message = ujson.loads(message.value.decode('utf-8'))

log = {}
log['cid'] = message['crawler_id']
log['dtp'] = message['data_path']
log['iid'] = message['instance_id']
log['raw'] = ujson.dumps(message)
log['name'] = message['name']
Expand All @@ -55,12 +59,18 @@ def log_writer(log):
This method writes log in database
"""
Log = apps.get_model('main', 'Log')
# Log = apps.get_model('main', 'Log')

new_log = Log(raw_log=log['raw'],
log_level=log['lvl'],
instance_id=log['iid'],
log_message=log['msg'],
logger_name=log['name'])
# new_log = Log(raw_log=log['raw'],
# log_level=log['lvl'],
# instance_id=log['iid'],
# log_message=log['msg'],
# logger_name=log['name'])

new_log.save()
# new_log.save()

system_path = os.path.join(settings.OUTPUT_FOLDER, log["dtp"])
filename = f'{system_path}/{log["iid"]}/log/{log["iid"]}.{log["lvl"]}'
message = f'[{log["name"]}] {log["msg"]}\n'
with open(filename, 'a') as f:
f.write(message)
4 changes: 3 additions & 1 deletion crawler_manager/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@
'STOPPED_SPIDER_NOTIFICATION_ADDRESS', 'http://web:8000/detail/stop_crawl/{crawler_id}')

TASK_TOPIC = os.getenv('TASK_TOPIC', KAFKA_TOPIC_PREFIX + 'task_topic')
TASK_DATA_CONSUMER_GROUP = os.getenv('TASK_DATA_CONSUMER_DATA', KAFKA_TOPIC_PREFIX + '.task_data_group')
TASK_DATA_CONSUMER_GROUP = os.getenv('TASK_DATA_CONSUMER_DATA', KAFKA_TOPIC_PREFIX + '.task_data_group')

OUTPUT_FOLDER = os.getenv('OUTPUT_FOLDER', '/data')
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ services:
dockerfile: ./docker/django-gunicorn/Dockerfile
command: ./django_run.sh
volumes:
- static_volume:/home/django/C01/staticfiles
- static_volume:/usr/src/app/staticfiles
- data:/data
- migrations:/home/django/C01/main/migrations
- migrations:/usr/src/app/main/migrations
expose:
- 8000
env_file:
Expand Down
19 changes: 4 additions & 15 deletions docker/django-gunicorn/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,12 @@ RUN apt-get update && apt-get install -y default-jre postgresql-client gcc musl-

# Don't create bytecode files
ENV PYTHONDONTWRITEBYTECODE 1

# Don't buffer output
ENV PYTHONUNBUFFERED 1

# Create the django user
ENV HOME=/home/django
RUN useradd --create-home --home-dir $HOME django
RUN chown -R django:django $HOME

# Create the appropriate directories
ENV APP_HOME=/home/django/C01
ENV APP_HOME=/usr/src/app
RUN mkdir $APP_HOME
RUN mkdir $APP_HOME/staticfiles
WORKDIR $APP_HOME
Expand All @@ -40,21 +36,14 @@ COPY crawler_manager crawler_manager

RUN mkdir logs


RUN mkdir /data
RUN chown django:django /data

# Install gunicorn for integration with Nginx
RUN pip install gunicorn

# Copy the gunicorn configuration file
COPY docker/config/gunicorn.conf.py ./gunicorn.conf.py

COPY docker/scripts/django_run.sh ./django_run.sh
RUN chmod +x django_run.sh

RUN chown -R django:django $APP_HOME
USER django
RUN chmod +x ./django_run.sh

# Collect static files
RUN python3 manage.py collectstatic --no-input --clear
RUN python3 manage.py collectstatic --no-input --clear
17 changes: 16 additions & 1 deletion main/forms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from cv2 import FarnebackOpticalFlow
from django import forms
from .models import CrawlRequest, ParameterHandler, ResponseHandler
from django.core.exceptions import ValidationError
Expand Down Expand Up @@ -59,6 +58,10 @@ class Meta:
'img_xpath',
'sound_xpath',
'dynamic_processing',

'create_trace_enabled',
'video_recording_enabled',

'browser_type',
'skip_iter_errors',
'browser_resolution_width',
Expand Down Expand Up @@ -432,6 +435,18 @@ class RawCrawlRequestForm(CrawlRequestForm):
skip_iter_errors = forms.BooleanField(
required=False, label="Pular iterações com erro"
)

create_trace_enabled = forms.BooleanField(
required=False,
label="Criar arquivo trace.zip",
help_text="Gera o arquivo 'trace.zip' para depuração do coletor com a ferramenta Trace Viewer"
)

video_recording_enabled = forms.BooleanField(
required=False,
label="Gravar vídeo do coletor",
help_text="Gera um vídeo da execução do coletor."
)

explore_links = forms.BooleanField(required=False, label="Explorar links")

Expand Down
3 changes: 3 additions & 0 deletions main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ class CrawlRequest(TimeStamped):
browser_resolution_width = models.IntegerField(blank=True, null=True)
browser_resolution_height = models.IntegerField(blank=True, null=True)

# Debug Mode
create_trace_enabled = models.BooleanField(default=False)
video_recording_enabled = models.BooleanField(default=False)

# DETAILS #################################################################
explore_links = models.BooleanField(blank=True, null=True)
Expand Down
2 changes: 1 addition & 1 deletion main/staticfiles/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ table.dataTable thead .sorting_desc_disabled:before {
background-color: #E8F6EF;
}

#dynamic-processing-item-wrap.disabled, #dynamic-processing-skip-errors.disabled, #dynamic-processing-browser-type.disabled, #dynamic-processing-resolution.disabled {
#dynamic-processing-item-wrap.disabled, #dynamic-processing-skip-errors.disabled, #dynamic-processing-browser-type.disabled, #dynamic-processing-resolution.disabled, #dynamic-processing-debug-mode.disabled {
opacity: .5;
pointer-events: none;
}
Expand Down
4 changes: 4 additions & 0 deletions main/staticfiles/js/create_crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -413,19 +413,22 @@ function detailDynamicProcessing() {
dynamic_processing_skip_errors = document.getElementById("dynamic-processing-skip-errors")
dynamic_processing_resolution = document.getElementById("dynamic-processing-resolution")
dynamic_processing_browser_type = document.getElementById("dynamic-processing-browser-type")
dynamic_processing_debug_mode = document.getElementById("dynamic-processing-debug-mode")

if(getCheckboxState("id_dynamic_processing")){
dynamic_processing_check.classList.remove("disabled")
dynamic_processing_block.classList.remove("disabled")
dynamic_processing_skip_errors.classList.remove("disabled")
dynamic_processing_resolution.classList.remove("disabled")
dynamic_processing_browser_type.classList.remove("disabled")
dynamic_processing_debug_mode.classList.remove("disabled")
}else{
dynamic_processing_check.classList.add("disabled")
dynamic_processing_block.classList.add("disabled")
dynamic_processing_skip_errors.classList.add("disabled")
dynamic_processing_resolution.classList.add("disabled")
dynamic_processing_browser_type.classList.add("disabled")
dynamic_processing_debug_mode.classList.add("disabled")
}
}

Expand Down Expand Up @@ -682,3 +685,4 @@ function parseSettings(e) {

reader.readAsText(file);
}

5 changes: 5 additions & 0 deletions main/staticfiles/js/details.js
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,8 @@ function exit_crawler_queue(queue_item_id) {
}
});
}

// Initiates all popovers on the page
$(function () {
$('[data-toggle="popover"]').popover()
})
2 changes: 1 addition & 1 deletion main/task_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def get_first_weekday_date_of_month(weekday: int, year: int, month: int) -> date
def get_last_weekday_date_of_month(weekday: int, year: int, month: int) -> datetime:
date = None
last_day_of_month = get_last_day_of_month(month, year)
for day in range(last_day_of_month - 7, last_day_of_month + 1):
for day in range(last_day_of_month - 6, last_day_of_month + 1):
date = get_date(day, month, year)
if WEEKDAY[date.weekday()] == weekday:
break
Expand Down
12 changes: 11 additions & 1 deletion main/templates/main/_create_06_dynamic.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,17 @@
<div id="dynamic-processing-skip-errors" class="hidden">
{{ form.skip_iter_errors | as_crispy_field}}
</div>
<div id="dynamic-processing-debug-mode" class="hidden">
Modo Debug
<p class="small">Ferramentas de depuração de coletores que utilizam processamento dinâmico</p>
<div id="dynamic-processing-trace-enabled" class="">
{{ form.create_trace_enabled | as_crispy_field}}
</div>
<div id="dynamic-processing-video-recording_enabled" class="">
{{ form.video_recording_enabled | as_crispy_field}}
</div>
</div>
<div id="dynamic-processing-item-wrap" style="padding:20px;" class="hidden">
{{ form.steps | as_crispy_field }}
</div>
</div>
</div>
23 changes: 18 additions & 5 deletions main/templates/main/detail_crawler.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,21 @@ <h4>Instances:</h4>
<th scope="col">Duração</th>
<th scope="col">Nº de arquivos</th>
<th scope="col">Tamanho</th>
<th scope="col">Arquivo de configurações</th>
{% if crawler.dynamic_processing %}
<th scope="col">Screenshots</th>
<th scope="col">
Trace
<button type="button" class="popover-icon btn btn-link" data-toggle="popover" data-trigger="click" data-content=
"<p>Clique neste link para acessar a interface de execução do trace no seu navegador:</p>
<p>
<a href='https://trace.playwright.dev/' target='_blank'>https://playwright.dev/docs/trace-viewer</a>
</p>"
data-html="true">
<i class="fa fa-info-circle" aria-hidden="true"></i>
</button>
</th>
{% endif %}
<th scope="col">Arquivo de configurações</th>
</tr>
</thead>
<tbody>
Expand All @@ -102,10 +113,11 @@ <h4>Instances:</h4>
<td>{{instance.duration_readable}}</td>
<td>{{instance.num_data_files}} arquivos</td>
<td>{{instance.data_size_readable}}</td>
<td><a class="btn btn-primary" href="{% url 'export_config' instance.instance_id%}">Baixar</a></td>
{% if crawler.dynamic_processing %}
<td><button type="button" class="btn btn-primary" onclick="displayScreenshotModal({{instance.instance_id}})">Visualizar</button></td>
<td><a class="btn btn-primary" href="{% url 'export_trace' instance.instance_id%}">Baixar</a></td>
{% endif %}
<td><a class="btn btn-primary" href="{% url 'export_config' instance.instance_id%}">Baixar</a></td>
</tr>

{% if forloop.last and forloop.counter > 3 %}
Expand Down Expand Up @@ -239,7 +251,7 @@ <h5>Progresso de download de arquivos</h5>
<div class="row mb-3">
<div class="col pr-0 d-flex align-items-center justify-content-between">
<h5>Últimas linhas do stdout</h5>
<a class="btn btn-primary" href="{% url 'raw_log' last_instance.instance_id %}">Abrir Log Bruto</a>
<a class="btn btn-primary" target="_blank" href="{% url 'raw_log_out' last_instance.instance_id %}">Abrir Log Bruto</a>
</div>
</div>
<div class="row">
Expand All @@ -260,9 +272,10 @@ <h5>Últimas linhas do stdout</h5>
<!-- Tail stderr -->
<div class="row">
<div class="col">
<div class="row">
<div class="col">
<div class="row mb-3">
<div class="col pr-0 d-flex align-items-center justify-content-between">
<h5>Últimas linhas do stderr</h5>
<a class="btn btn-primary" target="_blank" href="{% url 'raw_log_err' last_instance.instance_id %}">Abrir Log Bruto</a>
</div>
</div>
<div class="row">
Expand Down
5 changes: 3 additions & 2 deletions main/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
path("detail/run_crawl/<int:crawler_id>", views.run_crawl, name="run_crawl"),
path("detail/stop_crawl/<int:crawler_id>", views.stop_crawl, name="stop_crawl"),
path("tail_log_file/<str:instance_id>", views.tail_log_file, name="tail_log_file"),
path("raw_log/<str:instance_id>", views.raw_log, name="raw_log"),
path("raw_log_out/<str:instance_id>", views.raw_log_out, name="raw_log_out"),
path("raw_log_err/<str:instance_id>", views.raw_log_err, name="raw_log_err"),

path("download/files/found/<str:instance_id>/<int:num_files>", views.files_found, name="files_found"),
path("download/file/success/<str:instance_id>", views.success_download_file, name="success_download_file"),
Expand All @@ -39,7 +40,7 @@
path("download/page/duplicated/<str:instance_id>", views.duplicated_download_page, name="duplicated_download_page"),

path("export_config/<str:instance_id>", views.export_config, name="export_config"),

path("export_trace/<str:instance_id>", views.export_trace, name="export_trace"),

path("info/screenshots/<str:instance_id>/<int:page>", views.view_screenshots, name="view_screenshots"),

Expand Down
Loading

0 comments on commit cac2014

Please sign in to comment.