-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Test whether incidents are restored correctly
- Loading branch information
Showing
3 changed files
with
302 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
name: Tests with database | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
pull_request: {} | ||
|
||
jobs: | ||
postgresql: | ||
name: ${{ matrix.database.name }} | ||
runs-on: ubuntu-latest | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
database: | ||
- {name: PostgreSQL 9.6, image: "postgres:9.6"} | ||
- {name: PostgreSQL 10, image: "postgres:10"} | ||
- {name: PostgreSQL 11, image: "postgres:11"} | ||
- {name: PostgreSQL 12, image: "postgres:12"} | ||
- {name: PostgreSQL 13, image: "postgres:13"} | ||
- {name: PostgreSQL 14, image: "postgres:14"} | ||
- {name: PostgreSQL 15, image: "postgres:15"} | ||
- {name: PostgreSQL latest, image: "postgres:latest"} | ||
|
||
env: | ||
NOTIFICATIONS_TESTS_DB_TYPE: pgsql | ||
NOTIFICATIONS_TESTS_DB: notifications | ||
NOTIFICATIONS_TESTS_DB_USER: postgres | ||
NOTIFICATIONS_TESTS_DB_PASSWORD: notifications | ||
NOTIFICATIONS_TESTS_DB_HOST: 127.0.0.1 | ||
NOTIFICATIONS_TESTS_DB_PORT: 5432 | ||
|
||
services: | ||
postgres: | ||
image: ${{ matrix.database.image }} | ||
env: | ||
POSTGRES_PASSWORD: ${{ env.NOTIFICATIONS_TESTS_DB_PASSWORD }} | ||
POSTGRES_DB: ${{ env.NOTIFICATIONS_TESTS_DB }} | ||
# Wait until postgres becomes ready | ||
options: >- | ||
--health-cmd pg_isready | ||
--health-interval 10s | ||
--health-timeout 5s | ||
--health-retries 5 | ||
ports: | ||
- ${{ env.NOTIFICATIONS_TESTS_DB_PORT }}:${{ env.NOTIFICATIONS_TESTS_DB_PORT }} | ||
|
||
steps: | ||
- name: Setup Go | ||
uses: actions/setup-go@v4 | ||
with: | ||
go-version: stable | ||
|
||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Importing Schema | ||
env: | ||
PGPASSWORD: ${{ env.NOTIFICATIONS_TESTS_DB_PASSWORD }} | ||
run: | | ||
psql -U postgres -w -h 127.0.0.1 -d ${{ env.NOTIFICATIONS_TESTS_DB }} < ${{ github.workspace }}/schema/pgsql/schema.sql | ||
- name: Download dependencies | ||
run: go get -v -t -d ./... | ||
|
||
- name: Run tests | ||
timeout-minutes: 10 | ||
run: go test -v -timeout 5m ./... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
package incident | ||
|
||
import ( | ||
"context" | ||
"crypto/rand" | ||
"fmt" | ||
"github.com/creasty/defaults" | ||
"github.com/icinga/icinga-notifications/internal/config" | ||
"github.com/icinga/icinga-notifications/internal/event" | ||
"github.com/icinga/icinga-notifications/internal/object" | ||
baseConfig "github.com/icinga/icingadb/pkg/config" | ||
"github.com/icinga/icingadb/pkg/icingadb" | ||
"github.com/icinga/icingadb/pkg/logging" | ||
"github.com/icinga/icingadb/pkg/types" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
"go.uber.org/zap/zaptest" | ||
"os" | ||
"strconv" | ||
"strings" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestIncidentLoading(t *testing.T) { | ||
ctx := context.Background() | ||
db := getTestDB(ctx, t) | ||
|
||
// Insert a dummy sources for our test cases! | ||
source := config.Source{ID: 1, Type: "notifications", Name: "Icinga Notifications", Icinga2InsecureTLS: types.Bool{Bool: false, Valid: true}} | ||
stmt, _ := db.BuildInsertStmt(source) | ||
_, err := db.NamedExecContext(ctx, stmt, source) | ||
require.NoError(t, err, "populating source table should not fail") | ||
|
||
// Reduce the default placeholders per statement to a meaningful number, so that we can | ||
// test some parallelism when loading the incidents. | ||
db.Options.MaxPlaceholdersPerStatement = 100 | ||
|
||
// Due to the 10*maxPlaceholders constraint, only 10 goroutines are going to process simultaneously. | ||
// Therefore, reduce the default maximum number of connections per table to 4 in order to fully simulate | ||
// semaphore lock wait cycles for a given table. | ||
db.Options.MaxConnectionsPerTable = 4 | ||
|
||
testData := make(map[string]*Incident, 10*db.Options.MaxPlaceholdersPerStatement) | ||
for j := 1; j <= 10*db.Options.MaxPlaceholdersPerStatement; j++ { | ||
i := makeIncident(ctx, db, t, false) | ||
testData[i.ObjectID.String()] = i | ||
} | ||
|
||
t.Run("WithNoRecoveredIncidents", func(t *testing.T) { | ||
assertIncidents(ctx, db, t, testData) | ||
}) | ||
|
||
t.Run("WithSomeRecoveredIncidents", func(t *testing.T) { | ||
tx, err := db.BeginTxx(ctx, nil) | ||
require.NoError(t, err, "starting a transaction should not fail") | ||
|
||
// Drop all cached incidents before re-loading them! | ||
for _, i := range GetCurrentIncidents() { | ||
RemoveCurrent(i.Object) | ||
|
||
// Mark some of the existing incidents as recovered. | ||
if i.Id%20 == 0 { // 1000 / 20 => 50 existing incidents will be marked as recovered! | ||
i.RecoveredAt = types.UnixMilli(time.Now()) | ||
|
||
require.NoError(t, i.Sync(ctx, tx), "failed to update/insert incident") | ||
|
||
// Drop it from our test data as it's recovered | ||
delete(testData, i.ObjectID.String()) | ||
} | ||
} | ||
require.NoError(t, tx.Commit(), "committing a transaction should not fail") | ||
|
||
assert.Len(t, GetCurrentIncidents(), 0, "there should be no cached incidents") | ||
|
||
for j := 1; j <= db.Options.MaxPlaceholdersPerStatement/2; j++ { | ||
// We don't need to cache recovered incidents in memory. | ||
_ = makeIncident(ctx, db, t, true) | ||
|
||
if j%2 == 0 { | ||
// Add some extra new not recovered incidents to fully simulate a daemon reload. | ||
i := makeIncident(ctx, db, t, false) | ||
testData[i.ObjectID.String()] = i | ||
} | ||
} | ||
|
||
assertIncidents(ctx, db, t, testData) | ||
}) | ||
} | ||
|
||
// assertIncidents restores all not recovered incidents from the database and asserts them based on the given testData. | ||
// | ||
// The incident loading process is limited to a maximum duration of 10 seconds and will be | ||
// aborted and causes the entire test suite to fail immediately, if it takes longer. | ||
func assertIncidents(ctx context.Context, db *icingadb.DB, t *testing.T, testData map[string]*Incident) { | ||
logger := logging.NewLogger(zaptest.NewLogger(t).Sugar(), time.Hour) | ||
|
||
// Since we have been using object.FromEvent() to persist the test objects to the database, | ||
// these will be automatically added to the objects cache as well. So clear the cache before | ||
// reloading the incidents, otherwise it will panic in object.RestoreObjects(). | ||
object.ClearCache() | ||
|
||
// The incident loading process may hang due to unknown bugs or semaphore lock waits. | ||
// Therefore, give it maximum time of 10s to finish normally, otherwise give up and fail. | ||
ctx, cancelFunc := context.WithDeadline(ctx, time.Now().Add(10*time.Second)) | ||
defer cancelFunc() | ||
|
||
err := LoadOpenIncidents(ctx, db, logger, &config.RuntimeConfig{}) | ||
require.NoError(t, err, "failed to load not recovered incidents") | ||
|
||
incidents := GetCurrentIncidents() | ||
assert.Len(t, incidents, len(testData), "failed to load all active incidents") | ||
|
||
for _, current := range incidents { | ||
i := testData[current.ObjectID.String()] | ||
assert.NotNilf(t, i, "found mysterious incident that's not part of our test data") | ||
assert.NotNil(t, current.Object, "failed to restore incident object") | ||
|
||
if i != nil { | ||
assert.Equal(t, i.Id, current.Id, "incidents linked to the same object don't have the same ID") | ||
assert.Equal(t, i.Severity, current.Severity, "failed to restore incident severity") | ||
assert.Equal(t, i.StartedAt, current.StartedAt, "failed to restore incident started at") | ||
assert.Equal(t, i.RecoveredAt, current.RecoveredAt, "failed to restore incident recovered at") | ||
|
||
assert.NotNil(t, current.EscalationState, "incident escalation state map should've initialised") | ||
assert.NotNil(t, current.Recipients, "incident recipients map should've initialised") | ||
assert.NotNil(t, current.Rules, "incident rules map should've initialised") | ||
|
||
if current.Object != nil { | ||
assert.Equal(t, i.Object, current.Object, "failed to fully restore incident") | ||
} | ||
} | ||
} | ||
} | ||
|
||
// makeIncident returns a fully initialised recovered/un-recovered incident. | ||
// | ||
// This will firstly create and synchronise a new object from a freshly generated dummy event with distinct | ||
// tags and name, and ensures that no error is returned, otherwise it will cause the entire test suite to fail. | ||
// Once the object has been successfully synchronised, an incident is created and synced with the database. | ||
func makeIncident(ctx context.Context, db *icingadb.DB, t *testing.T, recovered bool) *Incident { | ||
ev := &event.Event{ | ||
Time: time.Time{}, | ||
SourceId: 1, | ||
Name: makeRandomString(t), | ||
Tags: map[string]string{ // Always generate unique object tags not to produce same object ID! | ||
"host": makeRandomString(t), | ||
"service": makeRandomString(t), | ||
}, | ||
ExtraTags: map[string]string{ | ||
"hostgroup/database-server": "", | ||
"servicegroup/webserver": "", | ||
}, | ||
} | ||
|
||
o, err := object.FromEvent(ctx, db, ev) | ||
require.NoError(t, err) | ||
|
||
i := NewIncident(db, o, &config.RuntimeConfig{}, nil) | ||
i.StartedAt = types.UnixMilli(time.Unix(time.Now().Add(-2*time.Hour).Unix(), 0)) | ||
i.Severity = event.SeverityCrit | ||
if recovered { | ||
i.Severity = event.SeverityOK | ||
i.RecoveredAt = types.UnixMilli(time.Now()) | ||
} | ||
|
||
tx, err := db.BeginTxx(ctx, nil) | ||
require.NoError(t, err, "starting a transaction should not fail") | ||
require.NoError(t, i.Sync(ctx, tx), "failed to insert incident") | ||
require.NoError(t, tx.Commit(), "committing a transaction should not fail") | ||
|
||
return i | ||
} | ||
|
||
func makeRandomString(t *testing.T) string { | ||
buf := make([]byte, 20) | ||
_, err := rand.Read(buf) | ||
require.NoError(t, err, "failed to generate random string") | ||
|
||
return fmt.Sprintf("%x", buf) | ||
} | ||
|
||
// getTestDB retrieves the database config from env variables, opens a new database and returns it. | ||
// | ||
// The test suite will be skipped if no environment variable is set, otherwise fails fatally when | ||
// invalid configurations are specified. | ||
func getTestDB(ctx context.Context, t *testing.T) *icingadb.DB { | ||
c := &baseConfig.Database{} | ||
require.NoError(t, defaults.Set(c), "applying config default should not fail") | ||
|
||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB_TYPE"); ok { | ||
c.Type = strings.ToLower(v) | ||
} else { | ||
t.Skipf("Environment %q not set, skipping test!", "NOTIFICATIONS_TESTS_DB_TYPE") | ||
} | ||
|
||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB"); ok { | ||
c.Database = v | ||
} | ||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB_USER"); ok { | ||
c.User = v | ||
} | ||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB_PASSWORD"); ok { | ||
c.Password = v | ||
} | ||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB_HOST"); ok { | ||
c.Host = v | ||
} | ||
if v, ok := os.LookupEnv("NOTIFICATIONS_TESTS_DB_PORT"); ok { | ||
port, err := strconv.Atoi(v) | ||
require.NoError(t, err, "invalid port provided") | ||
|
||
c.Port = port | ||
} | ||
|
||
require.NoError(t, c.Validate(), "database config validation should not fail") | ||
|
||
db, err := c.Open(logging.NewLogger(zaptest.NewLogger(t).Sugar(), time.Hour)) | ||
require.NoError(t, err, "connecting to database should not fail") | ||
require.NoError(t, db.PingContext(ctx), "pinging the database should not fail") | ||
|
||
return db | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters