diff --git a/CHANGELOG.md b/CHANGELOG.md index 773ecac..39bdcfc 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ This project adheres to [Semantic Versioning](http://semver.org/). This CHANGELOG follows the format listed [here](https://github.com/sensu-plugins/community/blob/master/HOW_WE_CHANGELOG.md) ## [Unreleased] +### Fixed +- check-marathon-apps.rb: script should not fail on first faulty result (@bergerx) + +### Added +- check-marathon-apps.rb: introduced `check-config-overrides` flag (@bergerx) ## [2.4.0] - 2018-03-20 ### Changed diff --git a/README.md b/README.md index c7bdac8..6367670 100755 --- a/README.md +++ b/README.md @@ -38,7 +38,10 @@ another check result for the apps `status`. Check results can be customised by two ways: 1. Default check result fields thats applied to all will be provided by a - default check config. Please see th esource code to see the whole defaults. + default check config. Please see the source code to see the whole defaults. + Since the whole default check config tends to be big, you can also use + `check-config-overrides` flag just to provide few new fields or override + existing defaults. 2. Application owners can override check results by using marathon labels. This allows each application to have different fields in the published result. e.g. per app escalation or aggregate can be controlled by applying Marathon diff --git a/bin/check-marathon-apps.rb b/bin/check-marathon-apps.rb index 8641a40..2ae0c3e 100755 --- a/bin/check-marathon-apps.rb +++ b/bin/check-marathon-apps.rb @@ -172,6 +172,13 @@ class MarathonAppsCheck < Sensu::Plugin::Check::CLI description: 'Similar to `--default-check-config` but read from given file. If both parameters are provided '\ '`--default-check-config` will override this one.' + option :check_config_overrides, + long: '--check-config-overrides CHECK_CONFIG_OVERRIDES', + description: 'Instead of providing whole default-check-config if you just want to introduce some new fields '\ + 'to the check config without having to provide whole config, this will be merged to the '\ + 'default-check-config.', + default: '{}' + option :sensu_client_url, description: 'Sensu client HTTP URL', long: '--sensu-client-url url', @@ -203,57 +210,78 @@ def run else DEFAULT_CHECK_CONFIG end - check_config = parse_json(check_config_str) + default_check_config = parse_json(check_config_str) + check_config_overrides = parse_json(config[:check_config_overrides]) + check_config = default_check_config.merge(check_config_overrides) # Filter apps, if both exists exclude pattern will override match pattern apps.keep_if { |app| app['id'][/#{config[:match_pattern]}/] } if config[:match_pattern] apps.delete_if { |app| app['id'][/#{config[:exclude_pat]}/] } if config[:exclude_pat] + failed_apps_to_be_reported = 0 apps.each do |app| - # Select app queue if any - app_queue = queue.select { |q| q['app']['id'][/^#{app['id']}$/] }.to_a.first - - # Build check result - check_result = check_result_scaffold(app) - - # Parse Marathon app labels - labels_config = parse_app_labels(app['labels'].to_h) - - REFERENCES.each do |reference| - # / is and invalid character - check_result['name'] = "check_marathon_app#{app['id'].tr('/', '_')}_#{reference}" - - state = case reference - when 'health' - get_marathon_app_health(app) - when 'status' - get_marathon_app_status(app, app_queue.to_h) - end - - # Merge user provided check config - check_result.merge!(check_config.dig('_').to_h) - check_result.merge!(check_config.dig(reference, '_').to_h) - check_result.merge!(check_config.dig(reference, state).to_h) - - # Merge Marathon parsed check config - check_result.merge!(labels_config.dig('_').to_h) - check_result.merge!(labels_config.dig(reference, '_').to_h) - check_result.merge!(labels_config.dig(reference, state).to_h) - - # Build check result output - check_result['output'] = "#{reference.upcase} #{state.capitalize} - "\ - "tasksRunning(#{app['tasksRunning'].to_i}), tasksStaged(#{app['tasksStaged'].to_i}), "\ - "tasksHealthy(#{app['tasksHealthy'].to_i}), tasksUnhealthy(#{app['tasksUnhealthy'].to_i})" - - # Make sure that check result data types are correct - enforce_sensu_field_types(check_result) - - # Send the result to sensu-client HTTP socket - post_check_result(check_result) - end + failed_apps_to_be_reported += 1 unless process_app_results(app, queue, check_config) + end + + if failed_apps_to_be_reported > 0 + critical "#{failed_apps_to_be_reported} apps are failed to be reported to sensu" + else + ok 'Marathon Apps Status and Health check is running properly' end + end + + def process_app_results(app, queue, check_config) + app_result_pushed = true + + # Select app queue if any + app_queue = queue.select { |q| q['app']['id'][/^#{app['id']}$/] }.to_a.first + + # Build check result + check_result = check_result_scaffold(app) - ok 'Marathon Apps Status and Health check is running properly' + # Parse Marathon app labels + labels_config = parse_app_labels(app['labels'].to_h) + + REFERENCES.each do |reference| + # / is and invalid character + check_result['name'] = "check_marathon_app#{app['id'].tr('/', '_')}_#{reference}" + + state = case reference + when 'health' + get_marathon_app_health(app) + when 'status' + get_marathon_app_status(app, app_queue.to_h) + end + + # Merge user provided check config + check_result.merge!(check_config.dig('_').to_h) + check_result.merge!(check_config.dig(reference, '_').to_h) + check_result.merge!(check_config.dig(reference, state).to_h) + + # Merge Marathon parsed check config + check_result.merge!(labels_config.dig('_').to_h) + check_result.merge!(labels_config.dig(reference, '_').to_h) + check_result.merge!(labels_config.dig(reference, state).to_h) + + # Build check result output + check_result['output'] = "#{reference.upcase} #{state.capitalize} - "\ + "tasksRunning(#{app['tasksRunning'].to_i}), tasksStaged(#{app['tasksStaged'].to_i}), "\ + "tasksHealthy(#{app['tasksHealthy'].to_i}), tasksUnhealthy(#{app['tasksUnhealthy'].to_i})" + + # Make sure that check result data types are correct + enforce_sensu_field_types(check_result) + + # Send the result to sensu-client HTTP socket + app_result = post_check_result(check_result) + + # mark if result cant be posted to sensu + app_result_pushed = if app_result_pushed && app_result + true + else + false + end + end + app_result_pushed end def check_result_scaffold(app) @@ -311,8 +339,11 @@ def post_check_result(data) data.to_json, content_type: 'application/json', timeout: config[:timeout]) + true rescue RestClient::ExceptionWithResponse => e - critical "Error while trying to POST check result (#{config[:sensu_client_url]}/results): #{e.response}" + # print a message about failing POST but keep going + STDERR.puts "Error while trying to POST check result for #{data} (#{config[:sensu_client_url]}/results): #{e.response}" + false end def parse_json(json) diff --git a/test/check_marathon_apps_spec.rb b/test/check_marathon_apps_spec.rb index a61492b..7e8fcca 100755 --- a/test/check_marathon_apps_spec.rb +++ b/test/check_marathon_apps_spec.rb @@ -24,10 +24,21 @@ def fetch_queue(*) end def post_check_result(res) - @check_results.push(res.dup) + # simulate failure from sensu agent, see the overridden method in MarathonAppsCheck + if res['name'] =~ /non-sensu-compliant-test/ + false + else + @check_results.push(res.dup) + true + end end def ok(*); end + + def critical(*args) + @status = 'CRITICAL' + output(*args) + end end describe 'MarathonTaskCheck' do @@ -43,7 +54,8 @@ def ok(*); end describe '#run' do it 'tests multiple applications with different states' do - @check.run + expect { @check.run }.to output("CheckMarathonApps CRITICAL: 1 apps are failed to be reported to sensu\n").to_stdout + expect(@check.check_results).to contain_hash_with_keys( 'name' => 'check_marathon_app_sensu-test_health', 'output' => 'HEALTH Unknown - tasksRunning(1), tasksStaged(0), tasksHealthy(0), tasksUnhealthy(0)', diff --git a/test/fixtures/marathon_apps_with_embeds.json b/test/fixtures/marathon_apps_with_embeds.json index dc6c46e..34e76f7 100644 --- a/test/fixtures/marathon_apps_with_embeds.json +++ b/test/fixtures/marathon_apps_with_embeds.json @@ -589,5 +589,53 @@ "slaveId": "4c5cca36-8774-429e-a912-eaaad2765219-S1" }, "taskStats": {} + }, + { + "id": "/non-sensu-compliant-test", + "backoffFactor": 10, + "backoffSeconds": 10, + "cmd": "sleep 1000000", + "container": { + "type": "MESOS", + "volumes": [] + }, + "cpus": 0.1, + "disk": 0, + "executor": "", + "instances": 1, + "labels": { + "MARATHON_SENSU_AGGREGATE": "aggregate with space is not valid for sensu" + }, + "maxLaunchDelaySeconds": 36000000, + "mem": 128, + "gpus": 0, + "networks": [ + { + "mode": "host" + } + ], + "portDefinitions": [], + "requirePorts": false, + "upgradeStrategy": { + "maximumOverCapacity": 1, + "minimumHealthCapacity": 1 + }, + "version": "2018-03-08T14:47:25.3Z", + "versionInfo": { + "lastScalingAt": "2018-03-08T14:47:25.3Z", + "lastConfigChangeAt": "2018-03-08T14:47:25.3Z" + }, + "killSelection": "YOUNGEST_FIRST", + "unreachableStrategy": { + "inactiveAfterSeconds": 0, + "expungeAfterSeconds": 0 + }, + "tasksStaged": 0, + "tasksRunning": 0, + "tasksHealthy": 0, + "tasksUnhealthy": 0, + "deployments": [], + "tasks": [], + "taskStats": {} } ] \ No newline at end of file