Skip to content

Commit

Permalink
Merge pull request #11022 from MPMG-DCC-UFMG/issue-10855
Browse files Browse the repository at this point in the history
Issue 10855 - Logs do detalhe do coletor a partir dos arquivos
  • Loading branch information
aluiza22 authored Apr 27, 2023
2 parents cd67e92 + fd10930 commit 2d9042b
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 29 deletions.
14 changes: 7 additions & 7 deletions crawler_manager/log_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ def log_writer(log):
This method writes log in database
"""
Log = apps.get_model('main', 'Log')
# Log = apps.get_model('main', 'Log')

new_log = Log(raw_log=log['raw'],
log_level=log['lvl'],
instance_id=log['iid'],
log_message=log['msg'],
logger_name=log['name'])
# new_log = Log(raw_log=log['raw'],
# log_level=log['lvl'],
# instance_id=log['iid'],
# log_message=log['msg'],
# logger_name=log['name'])

new_log.save()
# new_log.save()

system_path = os.path.join(settings.OUTPUT_FOLDER, log["dtp"])
filename = f'{system_path}/{log["iid"]}/log/{log["iid"]}.{log["lvl"]}'
Expand Down
7 changes: 4 additions & 3 deletions main/templates/main/detail_crawler.html
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ <h5>Progresso de download de arquivos</h5>
<div class="row mb-3">
<div class="col pr-0 d-flex align-items-center justify-content-between">
<h5>Últimas linhas do stdout</h5>
<a class="btn btn-primary" href="{% url 'raw_log' last_instance.instance_id %}">Abrir Log Bruto</a>
<a class="btn btn-primary" target="_blank" href="{% url 'raw_log_out' last_instance.instance_id %}">Abrir Log Bruto</a>
</div>
</div>
<div class="row">
Expand All @@ -272,9 +272,10 @@ <h5>Últimas linhas do stdout</h5>
<!-- Tail stderr -->
<div class="row">
<div class="col">
<div class="row">
<div class="col">
<div class="row mb-3">
<div class="col pr-0 d-flex align-items-center justify-content-between">
<h5>Últimas linhas do stderr</h5>
<a class="btn btn-primary" target="_blank" href="{% url 'raw_log_err' last_instance.instance_id %}">Abrir Log Bruto</a>
</div>
</div>
<div class="row">
Expand Down
3 changes: 2 additions & 1 deletion main/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
path("detail/run_crawl/<int:crawler_id>", views.run_crawl, name="run_crawl"),
path("detail/stop_crawl/<int:crawler_id>", views.stop_crawl, name="stop_crawl"),
path("tail_log_file/<str:instance_id>", views.tail_log_file, name="tail_log_file"),
path("raw_log/<str:instance_id>", views.raw_log, name="raw_log"),
path("raw_log_out/<str:instance_id>", views.raw_log_out, name="raw_log_out"),
path("raw_log_err/<str:instance_id>", views.raw_log_err, name="raw_log_err"),

path("download/files/found/<str:instance_id>/<int:num_files>", views.files_found, name="files_found"),
path("download/file/success/<str:instance_id>", views.success_download_file, name="success_download_file"),
Expand Down
69 changes: 51 additions & 18 deletions main/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
import multiprocessing as mp
import os
import time
import subprocess
from datetime import datetime
from typing_extensions import Literal

import crawler_manager.crawler_manager as crawler_manager
from crawler_manager.settings import TASK_TOPIC
from crawler_manager.settings import (TASK_TOPIC, OUTPUT_FOLDER)
from crawler_manager.constants import *

from django.conf import settings
Expand Down Expand Up @@ -662,15 +663,19 @@ def tail_log_file(request, instance_id):
number_pages_duplicated_download = instance.number_pages_duplicated_download
number_pages_previously_crawled = instance.number_pages_previously_crawled

logs = Log.objects.filter(instance_id=instance_id).order_by('-creation_date')
config = CrawlRequest.objects.filter(id=int(instance.crawler.id)).values()[0]
data_path = os.path.join(OUTPUT_FOLDER, config["data_path"])

log_results = logs.filter(Q(log_level="out"))[:20]
err_results = logs.filter(Q(log_level="err"))[:20]

log_text = [f"[{r.logger_name}] {r.log_message}" for r in log_results]
log_text = "\n".join(log_text)
err_text = [f"[{r.logger_name}] [{r.log_level:^5}] {r.log_message}" for r in err_results]
err_text = "\n".join(err_text)
out = subprocess.run(["tail",
f"{data_path}/{instance_id}/log/{instance_id}.out",
"-n",
"20"],
stdout=subprocess.PIPE).stdout
err = subprocess.run(["tail",
f"{data_path}/{instance_id}/log/{instance_id}.err",
"-n",
"20"],
stdout=subprocess.PIPE).stdout

return JsonResponse({
"files_found": files_found,
Expand All @@ -684,23 +689,51 @@ def tail_log_file(request, instance_id):
"pages_duplicated": number_pages_duplicated_download,
"pages_previously_crawled": number_pages_previously_crawled,

"out": log_text,
"err": err_text,
"out": out.decode('utf-8'),
"err": err.decode('utf-8'),
"time": str(datetime.fromtimestamp(time.time())),
})


def raw_log(request, instance_id):
logs = Log.objects.filter(instance_id=instance_id)\
.order_by('-creation_date')
def raw_log_out(request, instance_id):
instance = CrawlerInstance.objects.get(instance_id=instance_id)

config = CrawlRequest.objects.filter(id=int(instance.crawler.id)).values()[0]
data_path = os.path.join(OUTPUT_FOLDER, config["data_path"])

out = subprocess.run(["tail",
f"{data_path}/{instance_id}/log/{instance_id}.out",
"-n",
"100"],
stdout=subprocess.PIPE).stdout

raw_text = out.decode('utf-8')
raw_results = raw_text.splitlines(True)
resp = JsonResponse({str(instance_id): raw_results},
json_dumps_params={'indent': 2})

if len(raw_results) > 0 and instance.running:
resp['Refresh'] = 5
return resp

def raw_log_err(request, instance_id):
instance = CrawlerInstance.objects.get(instance_id=instance_id)

config = CrawlRequest.objects.filter(id=int(instance.crawler.id)).values()[0]
data_path = os.path.join(OUTPUT_FOLDER, config["data_path"])

raw_results = logs[:100]
raw_text = [json.loads(r.raw_log) for r in raw_results]
err = subprocess.run(["tail",
f"{data_path}/{instance_id}/log/{instance_id}.err",
"-n",
"100"],
stdout=subprocess.PIPE).stdout

resp = JsonResponse({str(instance_id): raw_text},
raw_text = err.decode('utf-8')
raw_results = raw_text.splitlines(True)
resp = JsonResponse({str(instance_id): raw_results},
json_dumps_params={'indent': 2})

if len(logs) > 0 and logs[0].instance.running:
if len(raw_results) > 0 and instance.running:
resp['Refresh'] = 5
return resp

Expand Down

0 comments on commit 2d9042b

Please sign in to comment.