Skip to content

Commit

Permalink
Merge pull request sunbird-cb#138 from KB-iGOT/cbrelease-4.8.17
Browse files Browse the repository at this point in the history
Cbrelease 4.8.17
  • Loading branch information
Haritest authored Sep 28, 2024
2 parents 53b1ca8 + 673a3a4 commit 4657291
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 37 deletions.
74 changes: 73 additions & 1 deletion ansible/roles/kong-api/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ service_registry_prefix: /serviceregistry
surveys_service_template_prefix: /template
comment_prefix: /comment
organisation_prefix: /organisation
public_assessment_prefix: /public/assessment

# Service URLs
knowledge_mw_service_url: "http://knowledge-mw-service:5000"
Expand Down Expand Up @@ -184,6 +185,7 @@ cb_external_enrollment_service_url: "http://cb-enrollment-service:7002"
cb_service_registry_url: "http://cb-service-registry:8096"
ml_template_validation_survey_service_url: "http://ml-template-validation-backend-service:5000"
cb_comment_service_url: "http://cb-comment-service:8099"
public_assessment_service_url : "http://public-assessment-service:8090"

premium_consumer_rate_limits:
- api: createContent
Expand Down Expand Up @@ -17792,6 +17794,76 @@ kong_apis:
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: publicAssessmentReadVersion1
uris: "{{ public_assessment_prefix }}/v1/read"
upstream_url: "{{ public_assessment_service_url }}/v1/public/assessment/read/do_11397690976124928011"
strip_uri: true
plugins:
- name: cors
- "{{ statsd_pulgin }}"
- name: rate-limiting
config.policy: local
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: publicAssessmentQuestionListVersion1
uris: "{{ public_assessment_prefix }}/v1/question/list"
upstream_url: "{{ public_assessment_service_url }}/v1/public/assessment/question/list"
strip_uri: true
plugins:
- name: cors
- "{{ statsd_pulgin }}"
- name: rate-limiting
config.policy: local
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: publicAssessmentRead
uris: "{{ public_assessment_prefix }}/v1/read"
upstream_url: "{{ public_assessment_service_url }}/v5/public/user/assessment/read"
strip_uri: true
plugins:
- name: cors
- "{{ statsd_pulgin }}"
- name: rate-limiting
config.policy: local
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: publicAssessmentQuestionList
uris: "{{ public_assessment_prefix }}/v1/question/list"
upstream_url: "{{ public_assessment_service_url }}/v5/public/assessment/question/list"
strip_uri: true
plugins:
- name: cors
- "{{ statsd_pulgin }}"
- name: rate-limiting
config.policy: local
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: publicAssessmentSubmit
uris: "{{ public_assessment_prefix }}/v1/assessment/submit"
upstream_url: "{{ public_assessment_service_url }}/v5/user/assessment/submit"
strip_uri: true
plugins:
- name: cors
- "{{ statsd_pulgin }}"
- name: rate-limiting
config.policy: local
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"

- name: sampleFileDownloadForOrganisationDesignationMapping
uris: "{{ designation_prefix }}/v1/orgMapping/sample"
Expand Down Expand Up @@ -17953,4 +18025,4 @@ kong_apis:
config.hour: "{{ medium_rate_limit_per_hour }}"
config.limit_by: credential
- name: request-size-limiting
config.allowed_payload_size: "{{ medium_request_size_limit }}"
config.allowed_payload_size: "{{ medium_request_size_limit }}"
5 changes: 5 additions & 0 deletions kubernetes/ansible/roles/sunbird-monitoring/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,22 +171,27 @@ alert_teams:
service_blackbox_checks:
- service_name: 'analytics-service'
probe_module: http_2xx
metricspath: /metrics
targets:
- "{{sunbird_analytics_api_base_url}}/health"
- service_name: 'learning-service'
probe_module: http_2xx
metricspath: /metrics
targets:
- "{{sunbird_content_repo_api_base_url}}/health"
- service_name: 'search-service'
probe_module: http_2xx
metricspath: /metrics
targets:
- "{{sunbird_search_service_api_base_url}}/health"
- service_name: 'keycloak-service'
probe_module: http_2xx
metricspath: /metrics
targets:
- "{{proto}}://{{proxy_server_name}}/auth/realms/sunbird/protocol/openid-connect/auth?client_id=portal&state=foo&redirect_uri=https%3A%2F%2F{{proxy_server_name}}%2Fprivate%2Findex%3Fauth_callback%3D1&scope=openid&response_type=code"
- service_name: 'external-domain'
probe_module: http_2xx
metricspath: /probe
targets: "{{ static_site_url }}"

kafka_topic_prefix: sunbird
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ scrapeconfig:
{% for item in service_blackbox_checks %}
# This empty line ensures indentation is correct after ansible jinja2 template is materialized
- job_name: 'availability_{{ item.service_name }}'
metrics_path: /metrics
metrics_path: {{ item.metricspath }}
params:
module: [{{ item.probe_module }}]
static_configs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ environmentjs: |-
DEFAULT_MEETING_SERVICE="OFF"
DEFAULT_ORGANISATION_CODE="{{mentoring_default_organisation_code}}"
DEFAULT_ORG_ID="{{mentoring_default_organisation_code}}"
DEV_DATABASE_URL="postgres://{{ postgresql_username_mentoring }}:{{ postgresql_password_mentoring }}@{{ postgresql_host }}:{{ postgresql_port}}/elevate_mentoring"
DEV_DATABASE_URL="postgres://{{ postgresql_username_mentoring }}:{{ postgresql_password_mentoring }}@{{ sl_postgresql_host }}:{{ postgresql_port}}/elevate_mentoring"
DISABLE_LOG="false"
ENABLE_EMAIL_FOR_REPORT_ISSUE="true"
ERROR_LOG_LEVEL="silly"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,89 +18,89 @@ spec:
labels:
severity: warning
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.instance }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} - ({{ $labels.instance }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
summary: {{`'HIGH CPU USAGE warning ON {{ $labels.instance }}'`}}
- alert: high_cpu_usage_on_node_critical
expr: (avg by (instance, nodename) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode!="idle"}[5m])) * 100) >= {{ .Values.node_cpu_usage_percentage_threshold_Critical }} and (avg by (instance) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode!="idle"}[5m])) * 100) < {{ .Values.node_cpu_usage_percentage_threshold_Fatal }}
for: 1m
labels:
severity: critical
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.instance }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} - ({{ $labels.instance }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
summary: {{`'HIGH CPU USAGE warning ON {{ $labels.instance }}'`}}
- alert: high_cpu_usage_on_node_fatal
expr: (avg by (instance, nodename) (irate(node_cpu_seconds_total{job="vm-node-exporter",mode!="idle"}[5m])) * 100) >= {{ .Values.node_cpu_usage_percentage_threshold_Fatal }}
for: 1m
labels:
severity: fatal
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.instance }} is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} - ({{ $labels.instance }}) is using a LOT of CPU. CPU usage is {{ humanize $value}}%.'`}}
summary: {{`'HIGH CPU USAGE warning ON {{ $labels.instance }}'`}}
- alert: high_memory_usage_on_node_warning
expr: sum by(nodename, instance) ((((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) >= {{ .Values.node_memory_usage_percentage_threshold_Warning }} and (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) < {{ .Values.node_memory_usage_percentage_threshold_Critical }} )
for: 1m
labels:
severity: warning
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
summary: {{`'HIGH MEMORY USAGE warning TASK ON {{ $labels.nodename }}'`}}
- alert: high_memory_usage_on_node_critical
expr: sum by(nodename, instance) ((((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) >= {{ .Values.node_memory_usage_percentage_threshold_Critical }} and (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) < {{ .Values.node_memory_usage_percentage_threshold_Fatal }} )
for: 1m
labels:
severity: critical
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
summary: {{`'HIGH MEMORY USAGE warning TASK ON {{ $labels.nodename }}'`}}
- alert: high_memory_usage_on_node_fatal
expr: sum by(nodename, instance) (((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * on(instance) group_left(nodename) node_uname_info * 100) >= {{ .Values.node_memory_usage_percentage_threshold_Fatal }}
for: 1m
labels:
severity: fatal
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) is using a LOT of MEMORY. MEMORY usage is over {{ humanize $value}}.'`}}
summary: {{`'HIGH MEMORY USAGE warning TASK ON {{ $labels.nodename }}'`}}
- alert: high_load_on_node_warning
expr: sum by(instance, nodename) ((node_load1{job="vm-node-exporter"} / count by (cluster, job, instance)(count by(cluster, job, instance, cpu)(node_cpu_seconds_total{job="vm-node-exporter"})) * 100) >= {{ .Values.node_load_avg_threshold_Warning }} and (node_load1{job="vm-node-exporter"} / count by (cluster, job, instance)(count by(cluster, job, instance, cpu)(node_cpu_seconds_total{job="vm-node-exporter"})) * 100) < {{ .Values.node_load_avg_threshold_Critical }})
for: 5m
labels:
severity: warning
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) has a high load average. Load average is {{ $value }}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) has a high load average. Load average is {{ $value }}%.'`}}
summary: {{`'HIGH LOAD AVERAGE warning ON {{ $labels.nodename }}'`}}
- alert: high_load_on_node_critical
expr: sum by(instance, nodename) ((node_load1{job="vm-node-exporter"} / count by (cluster, job, instance)(count by(cluster, job, instance, cpu)(node_cpu_seconds_total{job="vm-node-exporter"})) * 100) >= {{ .Values.node_load_avg_threshold_Critical }} and (node_load1{job="vm-node-exporter"} / count by (cluster, job, instance)(count by(cluster, job, instance, cpu)(node_cpu_seconds_total{job="vm-node-exporter"})) * 100) < {{ .Values.node_load_avg_threshold_Fatal }})
for: 5m
labels:
severity: critical
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) has a high load average. Load average is {{ $value }}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) has a high load average. Load average is {{ $value }}%.'`}}
summary: {{`'HIGH LOAD AVERAGE warning ON {{ $labels.nodename }}'`}}
- alert: high_load_on_node_fatal
expr: sum by(instance, nodename) ((node_load1{job="vm-node-exporter"} / count by (cluster, job, instance)(count by(cluster, job, instance, cpu)(node_cpu_seconds_total{job="vm-node-exporter"})) * 100) >= {{ .Values.node_load_avg_threshold_Fatal }})
for: 5m
labels:
severity: fatal
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
message: {{`'{{ $labels.nodename }} ({{ $labels.host }}) has a high load average. Load average is {{ $value }}%.'`}}
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'{{ $labels.nodename }} ({{ $labels.instance }}) has a high load average. Load average is {{ $value }}%.'`}}
summary: {{`'HIGH LOAD AVERAGE warning ON {{ $labels.nodename }}'`}}
- alert: node_exporter_down_warning
expr: up == 0
Expand All @@ -109,15 +109,15 @@ spec:
severity: warning
annotations:
message: {{`The node exporter '{{ $labels.job }}' is down.`}}
summary: {{`'NODE EXPORTER SERVICE critical: NODE ''{{ $labels.host }}'''`}}
summary: {{`'NODE EXPORTER SERVICE critical: NODE ''{{ $labels.instance }}'''`}}
- alert: node_running_out_of_disk_space_warning
expr: sum by(nodename, instance) (((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) * 100 / node_filesystem_size_bytes{mountpoint="/"} * on(instance) group_left(nodename) node_uname_info) >= {{ .Values.node_disk_usage_percentage_threshold_Warning }} and ((node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) * 100 / node_filesystem_size_bytes{mountpoint="/"} * on(instance) group_left(nodename) node_uname_info) < {{ .Values.node_disk_usage_percentage_threshold_Critical }} )
for: 1m
labels:
severity: warning
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'Disk usage is {{ humanize $value }}%'`}}
summary: {{`'LOW DISK SPACE WARNING: NODE {{ $labels.nodename }}'`}}
- alert: node_running_out_of_disk_space_critical
Expand All @@ -126,8 +126,8 @@ spec:
labels:
severity: critical
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'Disk usage is {{ humanize $value }}%'`}}
summary: {{`'LOW DISK SPACE WARNING: NODE {{ $labels.nodename }}'`}}
- alert: node_running_out_of_disk_space_fatal
Expand All @@ -136,7 +136,7 @@ spec:
labels:
severity: fatal
annotations:
hostname: '{{ $labels.nodename }}'
ip: '{{ $labels.instance }}'
hostname: {{`'{{ $labels.nodename }}'`}}
ip: {{`'{{ $labels.instance }}'`}}
message: {{`'Disk usage is {{ humanize $value }}%'`}}
summary: {{`'LOW DISK SPACE WARNING: NODE {{ $labels.nodename }}'`}}

0 comments on commit 4657291

Please sign in to comment.