Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] System Status and Performance Info UI #6062

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions app/controllers/system_status_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# frozen_string_literal: true

class SystemStatusController < ApplicationController
def index
system_metrics = GetSystemMetrics.new

@sidekiq_stats = system_metrics.fetch_sidekiq_stats
@queue_metrics = system_metrics.fetch_queue_management_metrics
end
end
96 changes: 96 additions & 0 deletions app/services/get_system_metrics.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# frozen_string_literal: true

require 'sidekiq/api'

class GetSystemMetrics
def initialize
@queues = YAML.load_file('config/sidekiq.yml')[:queues]
.reject { |queue_name| queue_name == 'very_long_update' }
fetch_sidekiq_stats
end

def fetch_sidekiq_stats
stats = Sidekiq::Stats.new
{
enqueued_jobs: stats.enqueued,
active_jobs: stats.processes_size
}
end

def fetch_queue_management_metrics
queues = []
paused_queues = []
all_operational = true

@queues.each do |queue_name|
queue = Sidekiq::Queue.new(queue_name)
queues << get_queue_data(queue)

if queue.paused?
all_operational = false
paused_queues << queue_name
end
end

{
queues:,
paused_queues:,
all_operational:
}
end

def get_queue_data(queue)
{
name: queue.name,
size: queue.size,
status: get_queue_status(queue.name, queue.latency),
latency: convert_latency(queue.latency)
}
end

def get_queue_status(queue_name, latency)
latency_thresholds = {
'default' => 1,
'short_update' => 2.hours,
'medium_update' => 12.hours,
'long_update' => 1.day,
'daily_update' => 1.day,
'constant_update' => 15.minutes
}
threshold = latency_thresholds[queue_name]

latency < threshold ? 'Normal' : 'Backlogged'
end

def convert_latency(seconds)
case seconds
when 0...60
"#{seconds.to_i} second#{'s' unless seconds == 1}"
when 60...3600
format_time(seconds, 60, 'minute', 'second')
when 3600...86400
format_time(seconds, 3600, 'hour', 'minute')
else
format_time(seconds, 86400, 'day', 'hour')
end
end

def format_time(seconds, unit, main_unit_name, sub_unit_name)
main_unit = (seconds / unit).to_i
remaining_seconds = (seconds % unit).to_i
result = "#{main_unit} #{main_unit_name}#{'s' unless main_unit == 1}"
if remaining_seconds.positive?
sub_unit, sub_unit_name = case main_unit_name
when 'day'
[3600, 'hour']
when 'hour'
[60, 'minute']
else
[1, 'second']
end
sub_unit_value = (remaining_seconds / sub_unit).to_i
result += " #{sub_unit_value} #{sub_unit_name}#{'s' unless sub_unit_value == 1}"
end
result
end
end
67 changes: 67 additions & 0 deletions app/views/system_status/index.html.haml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
.container.queues
.module
.section-header
%h3= t("status.queues_overview")
.notification
.container
- if @queue_metrics[:all_operational]
%p= t("status.all_queues_operational")
- else
%p= t("status.all_queues_not_operational")

.notifications
.notice
.container
- @queue_metrics[:paused_queues].each do |queue_name|
%p= queue_name.humanize
%br/

%table.table.table--hoverable
%thead
%tr
%th= t("status.queue")
%th= t("status.purpose")
%th= t("status.status")
%th
.tooltip-trigger
= t("status.size")
%span.tooltip-indicator
.tooltip.dark
%p= t("status.size_doc")
%th
.tooltip-trigger
= t("status.latency")
%span.tooltip-indicator
.tooltip.dark
%p= t("status.latency_doc")
%tbody
- @queue_metrics[:queues].each do |queue|
%tr{ class: queue[:status] == "Normal" ? "table-row--success" : "table-row--warning" }
%td
.tooltip-trigger
= t("status.#{queue[:name]}")
%span.tooltip-indicator
.tooltip.dark
%p= t("status.#{queue[:name]}_description")
%td= t("status.#{queue[:name]}_doc")
%td= queue[:status]
%td= queue[:size]
%td= queue[:latency]

.container.sidekiq_stats
%br/
%h3= t("status.sidekiq_stats")
.stat-display
- @sidekiq_stats.each do |key, value|
.stat-display__stat.tooltip-trigger
.stat-display__value= value
%small= key.to_s.humanize

.tooltip.dark
- case key
- when :enqueued_jobs
%p= t("status.enqueued_jobs_doc")
- when :active_jobs
%p= t("status.active_jobs_doc")
- else
%p= t("status.no_info")
36 changes: 36 additions & 0 deletions config/locales/en.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,42 @@ en:
already_is_not: "%{username} is not a Special User!"
demote_success: "%{username} is now just a user."

status:
active_jobs: Active jobs
active_jobs_doc: The number of currently processing jobs.
all_queues_operational: All Queues Operational
all_queues_not_operational: All Queues Operational except
constant_update: Constant update
constant_update_description: Constant updates are independent of the main course stats, pulling in revision metadata, generating alerts, and doing other data and network-intensive tasks, for all current courses.
constant_update_doc: Handles transactional jobs like wiki edits and sending email.
daily_update: Daily update
daily_update_description: This pulls in additional data and performs other tasks that do not need to be done many times per day.
daily_update_doc: Handles once-daily long-running data update tasks.
default: Default
default_description: Schedule course updates by sorting courses into queues depending on how long they run.
default_doc: Handles frequently-run tasks like adding courses to the update queues.
enqueued_jobs: Enqueued jobs
enqueued_jobs_doc: The number of currently enqueued jobs in all queues.
latency: Latency
latency_doc: The waiting time for jobs to start processing in the queue. High latency may indicate a busy system or processing delays.
long_update: Long update
long_update_description: Long updates process courses with more than 10,000 revisions.
long_update_doc: Handles updates for large courses.
medium_update: Medium update
medium_update_description: Medium updates process courses with fewer than 10,000 revisions.
medium_update_doc: Handles updates for typical-sized courses.
no_info: No info available.
purpose: Purpose
queues_overview: Queues Overview
queue: Queue
short_update: Short update
short_update_description: Short updates process courses with fewer than 1,000 revisions.
short_update_doc: Handles updates for small courses.
sidekiq_stats: Sidekiq Stats
size: Size
size_doc: The number of jobs within a queue.
status: Status

# Suggestions source: https://en.wikipedia.org/wiki/Template:Grading_scheme
suggestions:
editing: Editing Suggestions
Expand Down
2 changes: 2 additions & 0 deletions config/routes.rb
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@
get '/private_information' => 'about_this_site#private_information'
get '/styleguide' => 'styleguide#index'

get '/status' => 'system_status#index'

# Errors
match '/404', to: 'errors#file_not_found', via: :all
match '/422', to: 'errors#unprocessable', via: :all
Expand Down
29 changes: 29 additions & 0 deletions spec/controllers/system_status_controller_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

require 'rails_helper'

describe SystemStatusController, type: :request do
describe '#index' do
it 'sets @sidekiq_stats' do
get '/status'

expect(assigns(:sidekiq_stats)).to be_a(Hash)
expect(assigns(:sidekiq_stats)).to include(:enqueued_jobs, :active_jobs)
end

it 'sets @queue_metrics' do
get '/status'

queue_metrics = assigns(:queue_metrics)
expect(queue_metrics).to be_a(Hash)
expect(queue_metrics).to include(:queues, :paused_queues, :all_operational)

expect(queue_metrics[:queues]).to all(include(:name, :size, :status, :latency))
end

it 'renders the index template' do
get '/status'
expect(response).to render_template(:index)
end
end
end
89 changes: 89 additions & 0 deletions spec/services/get_system_metrics_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# frozen_string_literal: true

require 'rails_helper'

describe GetSystemMetrics do
let(:service) { described_class.new }

describe '#initialize' do
it 'initializes with valid Sidekiq queue data' do
queues = YAML.load_file('config/sidekiq.yml')[:queues].reject { |q| q == 'very_long_update' }
expect(service.instance_variable_get(:@queues)).to eq(queues)
end
end

describe '#fetch_sidekiq_stats' do
it 'returns Sidekiq stats with enqueued and active jobs' do
stats = service.fetch_sidekiq_stats
expect(stats).to be_a(Hash)
expect(stats).to include(:enqueued_jobs, :active_jobs)
end
end

describe '#fetch_queue_management_metrics' do
it 'returns queue metrics with valid data' do
metrics = service.fetch_queue_management_metrics
expect(metrics).to be_a(Hash)
expect(metrics).to include(:queues, :paused_queues, :all_operational)

metrics[:queues].each do |queue|
expect(queue).to include(:name, :size, :status, :latency)
expect(queue[:latency]).to be_a(String)
end
end

it 'identifies paused queues correctly' do
allow_any_instance_of(Sidekiq::Queue).to receive(:paused?).and_return(true)
metrics = service.fetch_queue_management_metrics
expect(metrics[:paused_queues]).not_to be_empty
expect(metrics[:all_operational]).to eq(false)
end
end

describe '#get_queue_data' do
it 'returns data for a single queue' do
queue_name = service.instance_variable_get(:@queues).first
queue = Sidekiq::Queue.new(queue_name)
queue_data = service.get_queue_data(queue)

expect(queue_data).to be_a(Hash)
expect(queue_data).to include(:name, :size, :status, :latency)
end
end

describe '#get_queue_status' do
it 'returns Normal for queues under threshold latency' do
expect(service.get_queue_status('short_update', 1.hour)).to eq('Normal')
expect(service.get_queue_status('medium_update', 6.hours)).to eq('Normal')
expect(service.get_queue_status('long_update', 12.hours)).to eq('Normal')
expect(service.get_queue_status('daily_update', 12.hours)).to eq('Normal')
expect(service.get_queue_status('constant_update', 12.minutes)).to eq('Normal')
expect(service.get_queue_status('default', 0)).to eq('Normal')
end

it 'returns Backlogged for queues exceeding threshold latency' do
expect(service.get_queue_status('short_update', 3.hours)).to eq('Backlogged')
expect(service.get_queue_status('medium_update', 13.hours)).to eq('Backlogged')
expect(service.get_queue_status('long_update', 26.hours)).to eq('Backlogged')
expect(service.get_queue_status('daily_update', 26.hours)).to eq('Backlogged')
expect(service.get_queue_status('constant_update', 16.minutes)).to eq('Backlogged')
expect(service.get_queue_status('default', 2)).to eq('Backlogged')
end
end

describe '#convert_latency' do
it 'converts latency in seconds to more readable formats' do
expect(service.convert_latency(30)).to eq('30 seconds')
expect(service.convert_latency(90)).to eq('1 minute 30 seconds')
expect(service.convert_latency(3600)).to eq('1 hour')
expect(service.convert_latency(90000)).to eq('1 day 1 hour')
end
end

describe '#format_time' do
it 'formats time into main and sub-units' do
formatted_time = service.format_time(3661, 3600, 'hour', 'minute')
expect(formatted_time).to eq('1 hour 1 minute')
end
end
end
Loading