Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add health check for hgweb to match api version #679

Merged
merged 5 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions backend/LexBoxApi/LexBoxKernel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using LexCore.Config;
using LexCore.ServiceInterfaces;
using LexSyncReverseProxy;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Swashbuckle.AspNetCore.Swagger;

namespace LexBoxApi;
Expand All @@ -22,9 +23,9 @@ public static void AddLexBoxApi(this IServiceCollection services,
.ValidateDataAnnotations()
.ValidateOnStart();
// services.AddOptions<HasuraConfig>()
// .BindConfiguration("HasuraConfig")
// .ValidateDataAnnotations()
// .ValidateOnStart();
// .BindConfiguration("HasuraConfig")
// .ValidateDataAnnotations()
// .ValidateOnStart();
services.AddOptions<CloudFlareConfig>()
.BindConfiguration("CloudFlare")
.ValidateDataAnnotations()
Expand All @@ -50,18 +51,20 @@ public static void AddLexBoxApi(this IServiceCollection services,
services.AddScoped<TusService>();
services.AddScoped<TurnstileService>();
services.AddScoped<IHgService, HgService>();
services.AddTransient<HgWebHealthCheck>();
services.AddScoped<ILexProxyService, LexProxyService>();
services.AddSingleton<ISendReceiveService, SendReceiveService>();
services.AddSingleton<LexboxLinkGenerator>();
if (environment.IsDevelopment())
services.AddHostedService<SwaggerValidationService>();
services.AddScheduledTasks(configuration);
services.AddHealthChecks().AddCheck<HgWebHealthCheck>("hgweb", HealthStatus.Unhealthy, ["hg"], TimeSpan.FromSeconds(5));
services.AddSyncProxy();
AuthKernel.AddLexBoxAuth(services, configuration, environment);
services.AddLexGraphQL(environment);
}

private class SwaggerValidationService(IAsyncSwaggerProvider swaggerProvider): IHostedService
private class SwaggerValidationService(IAsyncSwaggerProvider swaggerProvider) : IHostedService
{
public async Task StartAsync(CancellationToken cancellationToken)
{
Expand Down
7 changes: 7 additions & 0 deletions backend/LexBoxApi/Services/HgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,13 @@ public async Task<HttpContent> ExecuteHgRecover(string code, CancellationToken t
return int.TryParse(str, out int result) ? result : null;
}

public async Task<string> HgCommandHealth()
{
var content = await ExecuteHgCommandServerCommand("health", "healthz", default);
var version = await content.ReadAsStringAsync();
return version.Trim();
}

private async Task<HttpContent> ExecuteHgCommandServerCommand(string code, string command, CancellationToken token)
{
var httpClient = _hgClient.Value;
Expand Down
23 changes: 23 additions & 0 deletions backend/LexBoxApi/Services/HgWebHealthCheck.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using LexCore.ServiceInterfaces;
using Microsoft.Extensions.Diagnostics.HealthChecks;

namespace LexBoxApi.Services;

public class HgWebHealthCheck(IHgService hgService) : IHealthCheck
{
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context,
CancellationToken cancellationToken = new())
{
var version = await hgService.HgCommandHealth();
if (string.IsNullOrEmpty(version))
{
return HealthCheckResult.Unhealthy();
}
if (version != AppVersionService.Version)
{
return HealthCheckResult.Degraded(
$"api version: '{AppVersionService.Version}' hg version: '{version}' mismatch");
}
return HealthCheckResult.Healthy();
}
}
1 change: 1 addition & 0 deletions backend/LexCore/ServiceInterfaces/IHgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ public interface IHgService
Task<string?> GetRepositoryIdentifier(Project project);
Task<HttpContent> ExecuteHgRecover(string code, CancellationToken token);
bool HasAbandonedTransactions(string projectCode);
Task<string> HgCommandHealth();
}
2 changes: 1 addition & 1 deletion deployment/base/app-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ metadata:
name: app-config
data:
environment-name: "Development"
hg-otel-enabled: "ON"
hg-otel-disabled: "false" # "true" to disable OpenTelemetry
hg-domain: "hg.localhost"

4 changes: 2 additions & 2 deletions deployment/base/hg-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ spec:
value: ".*"
- name: OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE
value: ".*"
- name: OTEL_ENABLED
- name: OTEL_SDK_DISABLED
valueFrom:
configMapKeyRef:
name: app-config
key: hg-otel-enabled
key: hg-otel-disabled
- name: ENABLE_DEMAND_IMPORT
value: "false"
ports:
Expand Down
1 change: 0 additions & 1 deletion deployment/develop/app-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ metadata:
name: app-config
data:
environment-name: "Development"
hg-otel-enabled: "ON"
hg-domain: "hg-develop.lexbox.org"

13 changes: 0 additions & 13 deletions deployment/local-dev/hg-deployment-patch.yaml

This file was deleted.

1 change: 0 additions & 1 deletion deployment/local-dev/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ patches:
path: change-storage-class.patch.yaml
- path: lexbox-deployment.patch.yaml
- path: ui-deployment.patch.yaml
- path: hg-deployment-patch.yaml
- path: hg-repos-pvc.patch.yaml
- path: lexbox-service.patch.yaml
- path: ingress-config.patch.yaml
Expand Down
1 change: 0 additions & 1 deletion deployment/staging/app-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ metadata:
name: app-config
data:
environment-name: "Staging"
hg-otel-enabled: "ON"
hg-domain: "hg-staging.languageforge.org"
2 changes: 1 addition & 1 deletion hgweb/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ VOLUME /var/hg/repos

ARG APP_VERSION
ENV APP_VERSION=$APP_VERSION
ENV OTEL_ENABLED=ON
ENV OTEL_RESOURCE_ATTRIBUTES="service.name=hgweb,service.version=${APP_VERSION}"
11 changes: 10 additions & 1 deletion hgweb/command-runner.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Define the list of allowed commands
allowed_commands=("verify" "tip" "lexentrycount" "recover")
allowed_commands=("verify" "tip" "lexentrycount" "recover" "healthz")

# Get the project code and command name from the URL
IFS='/' read -ra PATH_SEGMENTS <<< "$PATH_INFO"
Expand Down Expand Up @@ -29,6 +29,15 @@ if [[ ! " ${allowed_commands[@]} " =~ " ${command_name} " ]]; then
exit 1
fi

if [[ $command_name == "healthz" ]]; then
echo "lexbox-version: $APP_VERSION"
echo "Status: 200 OK"
echo "Content-type: text/plain"
echo ""
echo "$APP_VERSION"
exit 0
fi

# Start outputting the result right away so the HTTP connection won't be timed out
echo "Content-type: text/plain"
echo ""
Expand Down
1 change: 1 addition & 0 deletions hgweb/hg.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ ServerName localhost
</IfVersion>
</Directory>

SetEnv APP_VERSION ${APP_VERSION}
ScriptAliasMatch "^/command/(.*)" "/usr/local/www/commands/command-runner.sh/$1"
<Directory /usr/local/www/commands/>
Options +ExecCGI
Expand Down
6 changes: 1 addition & 5 deletions hgweb/hgweb.wsgi
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ if os.getenv('ENABLE_DEMAND_IMPORT', 'false').lower() in ['1', 'true', 'yes']:
else:
demandimport.disable()

from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace import TracerProvider
Expand All @@ -32,10 +31,7 @@ from opentelemetry.sdk.trace.export import (
ConsoleSpanExporter,
)

resource = Resource(attributes={
SERVICE_NAME: "hgweb"
})
provider = TracerProvider(resource=resource)
provider = TracerProvider()
processor = BatchSpanProcessor(OTLPSpanExporter())
provider.add_span_processor(processor)

Expand Down
2 changes: 0 additions & 2 deletions hgweb/opentelemetry_module.conf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ LoadFile /opt/opentelemetry-webserver-sdk/sdk_lib/lib/libopentelemetry_exporter_
LoadFile /opt/opentelemetry-webserver-sdk/sdk_lib/lib/libopentelemetry_webserver_sdk.so
#Load the Apache Module. In this example for Apache
LoadModule otel_apache_module /opt/opentelemetry-webserver-sdk/WebServerModule/Apache/libmod_apache_otel.so
ApacheModuleEnabled ${OTEL_ENABLED}

#ApacheModule Otel Exporter details
ApacheModuleOtelSpanExporter otlp
Expand All @@ -37,7 +36,6 @@ ApacheModuleServiceInstanceId ${APP_VERSION}

ApacheModuleResolveBackends ON
#https://github.com/open-telemetry/opentelemetry-cpp-contrib/blob/2a0db982f3d7ee91dfbe8150435e49e837bfb7ce/instrumentation/otel-webserver-module/src/apache/ApacheTracing.cpp#L90
ApacheModuleTraceAsError ${OTEL_ENABLED}
#ApacheModuleWebserverContext DemoService DemoServiceNamespace DemoInstanceId

#ApacheModuleSegmentType custom
Expand Down
2 changes: 2 additions & 0 deletions skaffold.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ build:
context: hgweb
docker:
dockerfile: Dockerfile
buildArgs:
APP_VERSION: dockerDev
local:
useBuildkit: true
concurrency: 2
Expand Down
Loading