diff --git a/.gitignore b/.gitignore index f64a08f..cd11943 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ stamp-h1 *.checked *.pyc *.c_checked +*.h_checked *.o # diff --git a/Makefile.am b/Makefile.am index e8f2637..8cef3bb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,12 @@ SUBDIRS = pybuild -SUBDIRS += barreleye pycheck bash_completion . +DIST_SUBDIRS = pybuild + +if ENABLE_BARRELE +SUBDIRS += barreleye +endif #ENABLE_BARRELE +SUBDIRS += pycheck bash_completion . +DIST_SUBDIRS += barreleye pycheck bash_completion . + build_dir = `pwd`/build rpmbuild_opt = @@ -23,7 +30,7 @@ EXTRA_DIST = \ coral.spec \ detect-distro.sh \ doc \ - example_configs/barreleye.toml \ + example_configs/barreleye.conf.example \ pybarrele/*.py \ pybuild/*.py \ pycoral/*.py \ @@ -48,6 +55,7 @@ clean-local: rm -f $$exe.spec; \ done rm -fr coral_build_* + rm -fr debian/.debhelper all: all-am $(CHECKS) @@ -57,46 +65,89 @@ mrproper: maintainer-clean rm -f Makefile.in aclocal.m4 configure rm -f compile depcomp install-sh missing -BARRELEYE_RPM = build/RPMS/$(target_cpu)/coral-barreleye-$(PACKAGE_VERSION)-$(CORAL_RELEASE).$(DISTRO_SHORT)*.$(target_cpu).rpm +CORAL_PACKAGES = -CORAL_RPMS = if ENABLE_BARRELE rpmbuild_opt += --with barrele -CORAL_RPMS += $(BARRELEYE_RPM) +CORAL_PACKAGES += barreleye else rpmbuild_opt += --without barrele endif -EXES = barrele +EXES = PYINSTALLER_CMD=PYTHONPATH=$(PIP3_PACKAGE_PATH) pyinstaller -F --clean + +if ENABLE_BARRELE +EXES += barrele dist/barrele: mkdir -p dist $(PYINSTALLER_CMD) barrele +endif + -EXE_BINS = $(addprefix dist/, $(EXES)) +EXE_BINS = @DOLLAR@(addprefix dist/, $(EXES)) exes: $(EXE_BINS) EXTRA_OPTION = -rpms: coral.spec dist +ISO_EXTRA = + + +if MAKE_RPMS +rpms: coral.spec dist Makefile mkdir -p $(build_dir)/BUILD $(build_dir)/SPECS $(build_dir)/SRPMS $(build_dir)/RPMS \ && rpmbuild $(rpmbuild_opt) --define="_topdir $(build_dir)" \ --define="_prefix $(prefix)" \ $(EXTRA_OPTION) -tb $(distdir).tar.gz \ && echo "RPMs successfully generated in $(build_dir)/RPMS" - -ISO_EXTRA = -coral-$(PACKAGE_VERSION).$(DISTRO_SHORT).$(target_cpu).iso: $(ISO_EXTRA) rpms +ISO_EXTRA += rpms +endif #MAKE_RPMS + +if MAKE_DEBS +debs: Makefile + VER=$$(echo @VERSION@ | tr '_' '-'); \ + changelog_version=$$(sed -ne '1s/^coral (\(.*\)).*$$/\1/p' debian/changelog); \ + if [ "$$changelog_version" != "$$VER" ]; then \ + echo -e "coral ($$VER) unstable; urgency=low\n\n * Automated changelog.\n\n -- Coral Packaging Team $$(date -R)\n" > debian/changelog; \ + fi; \ + rm -fr debs; \ + dpkg-buildpackage -us -uc -I.git || { \ + rc=$${PIPESTATUS[0]}; \ + [ $${rc} -gt 1 ] && exit $${rc}; \ + exit 0; \ + }; \ + mkdir -p debs && \ + mv ../coral-*_$${VER}_*.deb \ + ../coral_$${VER}_*.buildinfo \ + ../coral_$${VER}.tar.gz \ + ../coral_$${VER}.dsc \ + ../coral_$${VER}_*.changes \ + debs/ +ISO_EXTRA += debs +endif #MAKE_DEBS + +coral-$(PACKAGE_VERSION).$(DISTRO_SHORT).$(target_cpu).iso: $(ISO_EXTRA) rm $(ISO_PATH) -fr rm -f coral-*.iso rm -f coral-*_SHA256SUM cp -a $(ISO_CACHE_PATH) $(ISO_PATH) ./coral release_info save $(DISTRO_SHORT) $(target_cpu) coral_release_info.yaml cp coral_release_info.yaml $(ISO_PATH) - cp $(CORAL_RPMS) $(PACKAGE_PATH) - createrepo $(PACKAGE_PATH) + mkdir -p $(PACKAGE_PATH) + if [ "$(DISTRO_SHORT)" = "ubuntu2204" ]; then \ + DEB_PACKAGE_VERSION=$$(echo $(PACKAGE_VERSION) | tr '_' '-'); \ + for CORAL_PACKAGE in $(CORAL_PACKAGES); do \ + cp debs/coral-$${CORAL_PACKAGE}_$${DEB_PACKAGE_VERSION}_*.deb $(PACKAGE_PATH); \ + done; \ + fi + if [ "$(DISTRO_SHORT)" = "el7" -o "$(DISTRO_SHORT)" = "el8" ]; then \ + for CORAL_PACKAGE in $(CORAL_PACKAGES); do \ + cp build/RPMS/$(target_cpu)/coral-$${CORAL_PACKAGE}-$(PACKAGE_VERSION)-$(CORAL_RELEASE).$(DISTRO_SHORT)*.$(target_cpu).rpm $(PACKAGE_PATH); \ + done; \ + createrepo $(PACKAGE_PATH); \ + fi @if [ "$(DISTRO_SHORT)" = "el8" ]; then \ repo2module -s stable $(PACKAGE_PATH) $(PACKAGE_PATH)/modules.yaml; \ modifyrepo --mdtype=modules $(PACKAGE_PATH)/modules.yaml $(PACKAGE_PATH)/repodata; \ diff --git a/autogen.sh b/autogen.sh index bbf2366..f76b99f 100755 --- a/autogen.sh +++ b/autogen.sh @@ -18,7 +18,7 @@ EOF done } -check_for_application lex yacc autoheader aclocal automake autoconf +check_for_application autoheader aclocal automake autoconf # Actually we don't need the pkg-config executable, but we need the M4 macros. # We check for `pkg-config' here and hope that M4 macros will then be diff --git a/barreleye/Makefile.am b/barreleye/Makefile.am index 9b407e8..5ce6813 100644 --- a/barreleye/Makefile.am +++ b/barreleye/Makefile.am @@ -12,6 +12,7 @@ M4_DESTINE_FILES = ime-1.1.m4 \ lustre-b_es5_1.m4 \ lustre-b_es5_2.m4 \ lustre-b_es6_0.m4 \ + lustre-b_es6_1.m4 \ lustre-2.13.m4 \ sfa-3.0.m4 \ sfa-11.0.m4 \ diff --git a/barreleye/grafana_dashboards/cluster_status.json.template b/barreleye/grafana_dashboards/cluster_status.json.template index 73a5d9b..fdcc6e6 100644 --- a/barreleye/grafana_dashboards/cluster_status.json.template +++ b/barreleye/grafana_dashboards/cluster_status.json.template @@ -24,7 +24,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -293,7 +293,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -564,7 +564,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -835,7 +835,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -1106,7 +1106,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -1377,7 +1377,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -1648,7 +1648,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -1919,7 +1919,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -2190,7 +2190,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -2461,7 +2461,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -2732,7 +2732,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -3003,7 +3003,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -3274,7 +3274,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -3545,7 +3545,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -3816,7 +3816,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -4087,7 +4087,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, @@ -4358,7 +4358,7 @@ "ok": "rgba(50, 128, 45, 0.9)", "warn": "rgba(237, 129, 40, 0.9)" }, - "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critial(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", + "description": "#### _Warning(yellow)_\n######   if one or more of the following conditions are true:\n * Idle CPU is less than 20%.\n * Load is higher than 5\n * Free memory is less than 1000 MiB;\n * Free space of \"/\" is less than 10 GiB;\n#### _Critical(red)_\n ######   if one or more of the following conditions are true:\n + Idle CPU is less than 5%.\n + Load is higher than 10\n + Free space of \"/\" is less than 1 GiB;\n + Free memory is less than 100 MiB;\n\n----\n######   The system load is defined as the number of runnable tasks in the run-queue and is provided by many operating systems as a one minute average.", "displayName": "", "flipCard": false, "flipTime": 5, diff --git a/barreleye/grafana_dashboards/lustre_client.json.template b/barreleye/grafana_dashboards/lustre_client.json.template index 5f38b88..0e9adc8 100644 --- a/barreleye/grafana_dashboards/lustre_client.json.template +++ b/barreleye/grafana_dashboards/lustre_client.json.template @@ -16,45 +16,32 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1591336291942, + "iteration": 1693407689985, "links": [], "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 199, - "panels": [], - "repeat": "client_uuid", - "scopedVars": { - "client_uuid": { - "selected": true, - "text": "ffff8c277475e800", - "value": "ffff8c277475e800" - } - }, - "title": "Client operations latency", - "type": "row" - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$BARRELEYE_DATASOURCE_NAME", - "description": "", + "description": "The write throughput on this client.", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 6, - "w": 24, + "h": 8, + "w": 12, "x": 0, - "y": 1 + "y": 0 }, - "id": 126, + "hiddenSeries": false, + "id": 207, "legend": { "avg": false, "current": false, @@ -66,34 +53,26 @@ }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", - "paceLength": 10, + "options": { + "alertThreshold": true + }, "percentage": false, - "pointradius": 5, + "pluginVersion": "7.3.7", + "pointradius": 2, "points": false, "renderer": "flot", - "scopedVars": { - "client_uuid": { - "selected": true, - "text": "ffff8c277475e800", - "value": "ffff8c277475e800" - } - }, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "alias": "", - "dsType": "influxdb", + "alias": "Write Throughput", "groupBy": [], - "measurement": "client_stats_close_max", + "measurement": "client_stats_write_bytes_sum", "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_read_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, "refId": "A", "resultFormat": "time_series", "select": [ @@ -109,19 +88,108 @@ "tags": [ { "key": "client_uuid", - "operator": "=", - "value": "ffff8c277475e800" + "operator": "=~", + "value": "/^$client_uuid$/" } ] + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$BARRELEYE_DATASOURCE_NAME", + "description": "The read throughput on this client. Please note this number is NOT necessarily equal to the total read throughput from the OSTs to the client since the client might get the data directly from the client side read cache.", + "fieldConfig": { + "defaults": { + "custom": {} }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 206, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { + "alias": "Read Throughput", "groupBy": [], - "measurement": "client_stats_write_max", + "measurement": "client_stats_read_bytes_sum", "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_write_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -140,950 +208,104 @@ "value": "/^$client_uuid$/" } ] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_open_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_close_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mmap_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "E", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_page_fault_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "F", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_page_mkwrite_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "G", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_seek_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "H", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_fsync_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "I", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_readdir_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "J", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_setattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "K", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_truncate_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "L", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_flock_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "M", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_getattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "N", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_fallocate_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "O", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_create_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "P", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_link_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "Q", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_unlink_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "R", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_symlink_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "S", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mkdir_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "T", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_rmdir_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "U", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mknod_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "V", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_rename_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "W", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_statfs_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "X", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_setxattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "Y", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_getxattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "refId": "Z", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_listxattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$BARRELEYE_DATASOURCE_NAME", + "description": "The write operation rate on this client.", + "fieldConfig": { + "defaults": { + "custom": {} }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 205, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.3.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], + "alias": "Write Operation Rate", + "groupBy": [], + "measurement": "client_stats_write_bytes_samples", "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_removexattr_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -1092,57 +314,23 @@ "value" ], "type": "field" - }, - { - "params": [], - "type": "mean" } ] ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, + "tags": [ { - "params": [ - "null" - ], - "type": "fill" + "key": "client_uuid", + "operator": "=~", + "value": "/^$client_uuid$/" } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_inode_permission_max\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] + ] } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Max latency of client operations", + "title": "Write Operation Rate", "tooltip": { "shared": true, "sort": 0, @@ -1158,12 +346,11 @@ }, "yaxes": [ { - "decimals": 0, - "format": "short", + "format": "ops", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -1186,15 +373,23 @@ "dashLength": 10, "dashes": false, "datasource": "$BARRELEYE_DATASOURCE_NAME", - "description": "", + "description": "The read operation rate on this client.", + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 7 + "h": 8, + "w": 12, + "x": 12, + "y": 8 }, - "id": 90, + "hiddenSeries": false, + "id": 208, "legend": { "avg": false, "current": false, @@ -1206,33 +401,151 @@ }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", - "paceLength": 10, + "options": { + "alertThreshold": true + }, "percentage": false, - "pointradius": 5, + "pluginVersion": "7.3.7", + "pointradius": 2, "points": false, "renderer": "flot", - "scopedVars": { - "client_uuid": { - "selected": true, - "text": "ffff8c277475e800", - "value": "ffff8c277475e800" - } - }, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "alias": "", + "alias": "Read Operation Rate", + "groupBy": [], + "measurement": "client_stats_read_bytes_samples", + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT \"value\" FROM \"client_stats_read_bytes_samples\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [ + { + "key": "client_uuid", + "operator": "=~", + "value": "/^$client_uuid$/" + } + ] + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Read Operation Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cacheTimeout": null, + "datasource": "$BARRELEYE_DATASOURCE_NAME", + "description": "The average latency of different kinds of client operations. High latency can be caused by poor specification of network, servers or/and storages.", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 90, + "interval": null, + "links": [], + "options": { + "displayMode": "gradient", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "7.3.7", + "targets": [ + { + "aggregation": "Last", + "alias": "Read", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "dsType": "influxdb", "groupBy": [], "measurement": "client_stats_close_mean", "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_read_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_read_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -1252,14 +565,22 @@ "operator": "=", "value": "ffff8c277475e800" } - ] + ], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Write", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [], "measurement": "client_stats_write_mean", "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_write_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_write_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -1279,9 +600,17 @@ "operator": "=~", "value": "/^$client_uuid$/" } - ] + ], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Open", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1298,8 +627,8 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_open_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", - "rawQuery": true, + "query": "SELECT \"value\" * 1000 FROM \"client_stats_open_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "rawQuery": false, "refId": "C", "resultFormat": "time_series", "select": [ @@ -1316,9 +645,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Close", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1335,7 +672,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_close_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_close_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -1353,9 +690,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Mmap", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1372,7 +717,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mmap_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_mmap_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "E", "resultFormat": "time_series", @@ -1390,9 +735,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Page Fault", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1409,7 +762,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_page_fault_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_page_fault_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "F", "resultFormat": "time_series", @@ -1427,9 +780,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Page Mkwrite", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1446,7 +807,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_page_mkwrite_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_page_mkwrite_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "G", "resultFormat": "time_series", @@ -1464,9 +825,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Seek", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1483,7 +852,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_seek_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_seek_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid $/) AND $timeFilter", "rawQuery": true, "refId": "H", "resultFormat": "time_series", @@ -1501,9 +870,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Fsync", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1520,7 +897,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_fsync_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_fsync_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "I", "resultFormat": "time_series", @@ -1538,9 +915,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Readdir", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1557,7 +942,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_readdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_readdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "J", "resultFormat": "time_series", @@ -1575,9 +960,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Setattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1594,7 +987,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_setattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_setattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "K", "resultFormat": "time_series", @@ -1612,9 +1005,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Truncate", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1631,7 +1032,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_truncate_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_truncate_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "L", "resultFormat": "time_series", @@ -1649,9 +1050,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Flock", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1668,7 +1077,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_flock_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_flock_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "M", "resultFormat": "time_series", @@ -1686,9 +1095,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Getattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1705,7 +1122,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_getattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_getattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "N", "resultFormat": "time_series", @@ -1723,9 +1140,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Fallocate", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1742,7 +1167,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_fallocate_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_fallocate_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "O", "resultFormat": "time_series", @@ -1760,9 +1185,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Create", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1779,7 +1212,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_create_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_create_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "P", "resultFormat": "time_series", @@ -1797,9 +1230,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Link", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1816,7 +1257,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_link_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_link_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "Q", "resultFormat": "time_series", @@ -1834,9 +1275,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Unlink", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1853,7 +1302,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_unlink_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_unlink_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "R", "resultFormat": "time_series", @@ -1871,9 +1320,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Symlink", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1890,7 +1347,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_symlink_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_symlink_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "S", "resultFormat": "time_series", @@ -1908,9 +1365,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Mkdir", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1927,7 +1392,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mkdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_mkdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "T", "resultFormat": "time_series", @@ -1945,9 +1410,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Rmdir", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -1964,7 +1437,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_rmdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_rmdir_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "U", "resultFormat": "time_series", @@ -1982,9 +1455,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Mknod", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2001,7 +1482,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_mknod_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_mknod_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "V", "resultFormat": "time_series", @@ -2019,9 +1500,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Rename", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2038,7 +1527,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_rename_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_rename_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "W", "resultFormat": "time_series", @@ -2056,9 +1545,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Statfs", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2075,7 +1572,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_statfs_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_statfs_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "X", "resultFormat": "time_series", @@ -2093,9 +1590,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Setxattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2112,7 +1617,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_setxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_setxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "Y", "resultFormat": "time_series", @@ -2130,9 +1635,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Getxattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2149,7 +1662,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_getxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_getxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, "refId": "Z", "resultFormat": "time_series", @@ -2167,9 +1680,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Listxattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2186,8 +1707,9 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_listxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_listxattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, + "refId": "NA", "resultFormat": "time_series", "select": [ [ @@ -2203,9 +1725,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Removexattr", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2222,8 +1752,9 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_removexattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_removexattr_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, + "refId": "NA", "resultFormat": "time_series", "select": [ [ @@ -2239,9 +1770,17 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" }, { + "aggregation": "Last", + "alias": "Inode Permission", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "Never", "groupBy": [ { "params": [ @@ -2258,8 +1797,9 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT \"value\" FROM \"client_stats_inode_permission_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", + "query": "SELECT \"value\" * 1000 FROM \"client_stats_inode_permission_mean\" WHERE (\"client_uuid\" =~ /^$client_uuid$/) AND $timeFilter", "rawQuery": true, + "refId": "NA", "resultFormat": "time_series", "select": [ [ @@ -2275,54 +1815,19 @@ } ] ], - "tags": [] + "tags": [], + "units": "none", + "valueHandler": "Number Threshold" } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "Average latency of client operations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Latency of Client Operations", + "type": "bargauge" } ], "refresh": "1m", - "schemaVersion": 18, + "schemaVersion": 26, "style": "dark", "tags": [], "templating": { @@ -2330,18 +1835,20 @@ { "allValue": null, "current": { - "text": "lustre", - "value": "lustre" + "selected": false, + "text": "lustre0", + "value": "lustre0" }, "datasource": "$BARRELEYE_DATASOURCE_NAME", - "definition": "SHOW TAG VALUES FROM \"client_stats_open_max\" WITH KEY = fs_name", + "definition": "SHOW TAG VALUES FROM \"client_stats_statfs_max\" WITH KEY = fs_name", + "error": null, "hide": 0, "includeAll": false, "label": "Filesystem Name", "multi": false, "name": "fs_name", "options": [], - "query": "SHOW TAG VALUES FROM \"client_stats_open_max\" WITH KEY = fs_name", + "query": "SHOW TAG VALUES FROM \"client_stats_statfs_max\" WITH KEY = fs_name", "refresh": 1, "regex": "", "skipUrlSync": false, @@ -2355,18 +1862,47 @@ { "allValue": null, "current": { - "text": "ffff8c277475e800", - "value": "ffff8c277475e800" + "selected": false, + "text": "autotest-el7-vm313", + "value": "autotest-el7-vm313" + }, + "datasource": "$BARRELEYE_DATASOURCE_NAME", + "definition": "", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Client", + "multi": false, + "name": "client_host", + "options": [], + "query": "SHOW TAG VALUES FROM \"client_stats_statfs_max\" WITH KEY = fqdn WHERE \"fs_name\" = '$fs_name'", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ffff9cca7941f000", + "value": "ffff9cca7941f000" }, "datasource": "$BARRELEYE_DATASOURCE_NAME", - "definition": "SHOW TAG VALUES FROM \"client_stats_open_max\" WITH KEY = client_uuid WHERE \"fs_name\" = '$fs_name'", + "definition": "", + "error": null, "hide": 0, "includeAll": false, "label": "Client UUID", "multi": false, "name": "client_uuid", "options": [], - "query": "SHOW TAG VALUES FROM \"client_stats_open_max\" WITH KEY = client_uuid WHERE \"fs_name\" = '$fs_name'", + "query": "SHOW TAG VALUES FROM \"client_stats_statfs_max\" WITH KEY = client_uuid WHERE \"fs_name\" = '$fs_name' AND \"fqdn\" = '$client_host'", "refresh": 1, "regex": "", "skipUrlSync": false, @@ -2401,6 +1937,6 @@ }, "timezone": "browser", "title": "Lustre Client", - "uid": "sZOw9ZmGk", - "version": 1 -} + "uid": "0ToQbnkSk", + "version": 31 +} \ No newline at end of file diff --git a/barreleye/lustre-b_es6_0.m4 b/barreleye/lustre-b_es6_0.m4 index d188592..5c59a4f 100644 --- a/barreleye/lustre-b_es6_0.m4 +++ b/barreleye/lustre-b_es6_0.m4 @@ -1162,9 +1162,8 @@ recalc_timing +[[:digit:]]+ samples \[sec\] +([[:digit:]]+).+stats file - 0 - CLIENT_STATS_ITEM_FOUR(4, read_bytes, bytes) - CLIENT_STATS_ITEM_FOUR(4, write_bytes, bytes) + CLIENT_STATS_ITEM_FOUR_BYTES(4, read_bytes, bytes, derive) + CLIENT_STATS_ITEM_FOUR_BYTES(4, write_bytes, bytes, derive) CLIENT_STATS_ITEM_FOUR(4, read, usec) CLIENT_STATS_ITEM_FOUR(4, write, usec) CLIENT_STATS_ITEM_ONE(4, ioctl, reqs) diff --git a/barreleye/lustre-b_es6_1.m4 b/barreleye/lustre-b_es6_1.m4 new file mode 100644 index 0000000..2bea07b --- /dev/null +++ b/barreleye/lustre-b_es6_1.m4 @@ -0,0 +1,1202 @@ +include(`lustre.m4')dnl +HEAD(Lustre-es6_0) + + es6_0 + CLIENT_STATS_MEAN(1, read, usecs) + CLIENT_STATS_MEAN(1, write, usecs) + CLIENT_STATS_MEAN(1, open, usecs) + CLIENT_STATS_MEAN(1, close, usecs) + CLIENT_STATS_MEAN(1, mmap, usecs) + CLIENT_STATS_MEAN(1, page_fault, usecs) + CLIENT_STATS_MEAN(1, page_mkwrite, usecs) + CLIENT_STATS_MEAN(1, seek, usecs) + CLIENT_STATS_MEAN(1, fsync, usecs) + CLIENT_STATS_MEAN(1, readdir, usecs) + CLIENT_STATS_MEAN(1, setattr, usecs) + CLIENT_STATS_MEAN(1, truncate, usecs) + CLIENT_STATS_MEAN(1, flock, usecs) + CLIENT_STATS_MEAN(1, getattr, usecs) + CLIENT_STATS_MEAN(1, fallocate, usecs) + CLIENT_STATS_MEAN(1, create, usecs) + CLIENT_STATS_MEAN(1, link, usecs) + CLIENT_STATS_MEAN(1, unlink, usecs) + CLIENT_STATS_MEAN(1, symlink, usecs) + CLIENT_STATS_MEAN(1, mkdir, usecs) + CLIENT_STATS_MEAN(1, rmdir, usecs) + CLIENT_STATS_MEAN(1, mknod, usecs) + CLIENT_STATS_MEAN(1, rename, usecs) + CLIENT_STATS_MEAN(1, statfs, usecs) + CLIENT_STATS_MEAN(1, setxattr, usecs) + CLIENT_STATS_MEAN(1, getxattr, usecs) + CLIENT_STATS_MEAN(1, listxattr, usecs) + CLIENT_STATS_MEAN(1, removexattr, usecs) + CLIENT_STATS_MEAN(1, inode_permission, usecs) + MATH_ENTRY(1, mdt_filesinfo_total, -, mdt_filesinfo_free, mdt_filesinfo_used, filesused, 1) + MATH_ENTRY(1, mdt_kbytesinfo_total, -, mdt_kbytesinfo_free, mdt_kbytesinfo_used, kbytesused, 1) + MATH_ENTRY(1, ost_filesinfo_total, -, ost_filesinfo_free, ost_filesinfo_used, filesused, 1) + MATH_ENTRY(1, ost_kbytesinfo_total, -, ost_kbytesinfo_free, ost_kbytesinfo_used, kbytesused, 1) + SERVICE_STATS_MEAN(1, mdt, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt, ldlm_ibits_enqueue, reqs) + SERVICE_STATS_MEAN(1, mdt, mds_getattr, usecs) + SERVICE_STATS_MEAN(1, mdt, mds_connect, usecs) + SERVICE_STATS_MEAN(1, mdt, mds_get_root, usecs) + SERVICE_STATS_MEAN(1, mdt, mds_statfs, usecs) + SERVICE_STATS_MEAN(1, mdt, mds_getxattr, usecs) + SERVICE_STATS_MEAN(1, mdt, obd_ping, usecs) + SERVICE_STATS_MEAN(1, mdt_readpage, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_readpage, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_readpage, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_readpage, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_readpage, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt_readpage, mds_close, usecs) + SERVICE_STATS_MEAN(1, mdt_readpage, mds_readpage, usecs) + SERVICE_STATS_MEAN(1, mdt_setattr, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_setattr, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_setattr, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_setattr, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_setattr, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt_fld, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_fld, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_fld, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_fld, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_fld, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt_out, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_out, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_out, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_out, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_out, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt_seqm, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_seqm, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_seqm, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_seqm, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_seqm, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, mdt_seqs, req_waittime, usecs) + SERVICE_STATS_MEAN(1, mdt_seqs, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, mdt_seqs, req_active, reqs) + SERVICE_STATS_MEAN(1, mdt_seqs, req_timeout, sec) + SERVICE_STATS_MEAN(1, mdt_seqs, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ost, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ost, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ost, req_active, reqs) + SERVICE_STATS_MEAN(1, ost, req_timeout, sec) + SERVICE_STATS_MEAN(1, ost, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ost_io, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ost_io, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ost_io, req_active, reqs) + SERVICE_STATS_MEAN(1, ost_io, req_timeout, sec) + SERVICE_STATS_MEAN(1, ost_io, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ost_io, ost_read, usecs) + SERVICE_STATS_MEAN(1, ost_io, ost_write, usecs) + SERVICE_STATS_MEAN(1, ost_io, ost_punch, usecs) + SERVICE_STATS_MEAN(1, ost_create, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ost_create, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ost_create, req_active, reqs) + SERVICE_STATS_MEAN(1, ost_create, req_timeout, sec) + SERVICE_STATS_MEAN(1, ost_create, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ost_seq, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ost_seq, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ost_seq, req_active, reqs) + SERVICE_STATS_MEAN(1, ost_seq, req_timeout, sec) + SERVICE_STATS_MEAN(1, ost_seq, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ldlm_canceld, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ldlm_canceld, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ldlm_canceld, req_active, reqs) + SERVICE_STATS_MEAN(1, ldlm_canceld, req_timeout, sec) + SERVICE_STATS_MEAN(1, ldlm_canceld, reqbuf_avail, bufs) + SERVICE_STATS_MEAN(1, ldlm_cbd, req_waittime, usecs) + SERVICE_STATS_MEAN(1, ldlm_cbd, req_qdepth, reqs) + SERVICE_STATS_MEAN(1, ldlm_cbd, req_active, reqs) + SERVICE_STATS_MEAN(1, ldlm_cbd, req_timeout, sec) + SERVICE_STATS_MEAN(1, ldlm_cbd, reqbuf_avail, bufs) + + + constant + /proc/fs/lustre + + directory + + + constant + osd-ldiskfs + + directory + + + regular_expression + (^.+)-(MDT[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + mdt_index + + + directory + + + constant + quota_slave + + directory + + SUBPATH(6, constant, acct_user, 1) + MODE(6, file, 1) + + mdt_acctuser + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTUSER_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTUSER_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTUSER_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_group, 1) + MODE(6, file, 1) + + mdt_acctgroup + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTGROUP_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTGROUP_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTGROUP_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_project, 1) + MODE(6, file, 1) + + mdt_acctproject + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTPROJECT_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTPROJECT_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTPROJECT_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + + + + regular_expression + (^.+)-(OST[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + ost_index + + + directory + + + constant + brw_stats + + file + OST_BRW_STATS_ITEM(5, rpc_bulk, ^pages per bulk .+ +(.+ +)*$, [[:digit:]]+[KM]?, pages, 1) + OST_BRW_STATS_ITEM(5, page_discontiguous_rpc, ^discontiguous pages .+ +(.+ +)*$, [[:digit:]]+[KM]?, pages, 1) + OST_BRW_STATS_ITEM(5, block_discontiguous_rpc, ^discontiguous blocks .+ +(.+ +)*$, [[:digit:]]+[KM]?, blocks, 1) + OST_BRW_STATS_ITEM(5, fragmented_io, ^disk fragmented .+ +(.+ +)*$, [[:digit:]]+[KM]?, fragments, 1) + OST_BRW_STATS_ITEM(5, io_in_flight, ^disk I/Os .+ +(.+ +)*$, [[:digit:]]+[KM]?, ios, 1) + OST_BRW_STATS_ITEM(5, io_time, ^I/O time .+ +(.+ +)*$, [[:digit:]]+[KM]?, milliseconds, 1) + OST_BRW_STATS_ITEM(5, io_size, ^disk I/O size .+ +(.+ +)*$, [[:digit:]]+[KM]?, Bytes, 1) + + + SUBPATH(5, constant, quota_slave, 1) + MODE(5, directory, 1) + + SUBPATH(6, constant, acct_user, 1) + MODE(6, file, 1) + + ost_acctuser + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTUSER_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTUSER_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTUSER_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_group, 1) + MODE(6, file, 1) + + ost_acctgroup + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTGROUP_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTGROUP_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTGROUP_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_project, 1) + MODE(6, file, 1) + + ost_acctproject + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTPROJECT_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTPROJECT_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTPROJECT_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + + + + + constant + osd-zfs + + directory + + + regular_expression + (^.+)-(MDT[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + mdt_index + + + directory + + + constant + quota_slave + + directory + + SUBPATH(6, constant, acct_user, 1) + MODE(6, file, 1) + + zfs_mdt_acctuser + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTUSER_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTUSER_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTUSER_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_group, 1) + MODE(6, file, 1) + + zfs_mdt_acctgroup + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTGROUP_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTGROUP_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTGROUP_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_project, 1) + MODE(6, file, 1) + + zfs_mdt_acctproject + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + MDT_ACCTPROJECT_FIELD(7, 1, id, string, gauge, 1) + MDT_ACCTPROJECT_FIELD(7, 2, usage_inodes, number, gauge, 1) + MDT_ACCTPROJECT_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + + + + regular_expression + (^.+)-(OST[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + ost_index + + + directory + + SUBPATH(5, constant, quota_slave, 1) + MODE(5, directory, 1) + + SUBPATH(6, constant, acct_user, 1) + MODE(6, file, 1) + + zfs_ost_acctuser + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTUSER_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTUSER_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTUSER_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_group, 1) + MODE(6, file, 1) + + zfs_ost_acctgroup + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTGROUP_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTGROUP_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTGROUP_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + SUBPATH(6, constant, acct_project, 1) + MODE(6, file, 1) + + zfs_ost_acctproject + - +id: +(.+) + usage: +\{ inodes: +([[:digit:]]+), kbytes: +([[:digit:]]+).+ + OST_ACCTPROJECT_FIELD(7, 1, id, string, gauge, 1) + OST_ACCTPROJECT_FIELD(7, 2, usage_inodes, number, gauge, 1) + OST_ACCTPROJECT_FIELD(7, 3, usage_kbytes, number, gauge, 1) + + + + + + + + constant + mdt + + directory + + + regular_expression + (^.+)-(MDT[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + mdt_index + + + directory + + + constant + recovery_status + + file + RECOVERY_STATUS_ITEM(5, recovery_start, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, recovery_duration, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_CONNECTED_ITEM(5, completed_clients, mdt, 1) + RECOVERY_STATUS_ITEM(5, replayed_requests, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, last_transno, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, time_remaining, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_CONNECTED_ITEM(5, connected_clients, mdt, 1) + RECOVERY_STATUS_ITEM(5, req_replay_clients, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, lock_replay_clients, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, evicted_clients, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, queued_requests, mdt, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, next_transno, mdt, ([[:digit:]]+), number, 1) + + + + + constant + md_stats + + file + MD_STATS_ITEM_V2(5, open, 1) + MD_STATS_ITEM_V2(5, close, 1) + MD_STATS_ITEM_V2(5, mknod, 1) + MD_STATS_ITEM_V2(5, link, 1) + MD_STATS_ITEM_V2(5, unlink, 1) + MD_STATS_ITEM_V2(5, mkdir, 1) + MD_STATS_ITEM_V2(5, rmdir, 1) + MD_STATS_ITEM_V2(5, rename, 1) + MD_STATS_ITEM_V2(5, getattr, 1) + MD_STATS_ITEM_V2(5, setattr, 1) + MD_STATS_ITEM_V2(5, getxattr, 1) + MD_STATS_ITEM_V2(5, setxattr, 1) + MD_STATS_ITEM_V2(5, statfs, 1) + MD_STATS_ITEM_V2(5, sync, 1) + + + SUBPATH(5, constant, exports, 1) + MODE(5, directory, 1) + + TWO_FIELD_SUBPATH(6, regular_expression, (.+)@(.+), mdt_exp_client, mdt_exp_type, 1) + MODE(6, directory, 1) + EXPORT_MD_STATS_ENTRY_V2(6, , 1) + + + + + constant + job_stats + + file + + mdt_jobstats + - +job_id: +(.+) + +snapshot_time: +.+ + open: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + close: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + mknod: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + link: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + unlink: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + mkdir: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + rmdir: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + rename: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + getattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + setattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + getxattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + setxattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + statfs: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + sync: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + samedir_rename: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + crossdir_rename: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + JOBSTAT_FIELD(6, 1, job_id, string, derive, mdt, jobid, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 2, open, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 7, close, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 12, mknod, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 17, link, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 22, unlink, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 27, mkdir, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 32, rmdir, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 37, rename, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 42, getattr, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 47, setattr, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 52, getxattr, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 57, setxattr, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 62, statfs, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 67, sync, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 72, samedir_rename, number, derive, mdt, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 77, crossdir_rename, number, derive, mdt, 1) + + + + + + + constant + obdfilter + + directory + + + regular_expression + (^.+)-(OST[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + ost_index + + + directory + + + constant + recovery_status + + file + RECOVERY_STATUS_ITEM(5, recovery_start, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, recovery_duration, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_CONNECTED_ITEM(5, completed_clients, ost, 1) + RECOVERY_STATUS_ITEM(5, replayed_requests, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, last_transno, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, time_remaining, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_CONNECTED_ITEM(5, connected_clients, ost, 1) + RECOVERY_STATUS_ITEM(5, req_replay_clients, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, lock_replay_clients, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, evicted_clients, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, queued_requests, ost, ([[:digit:]]+), number, 1) + RECOVERY_STATUS_ITEM(5, next_transno, ost, ([[:digit:]]+), number, 1) + + + + + constant + stats + + file + OST_STATS_ITEM_RW(5, read, 1) + OST_STATS_ITEM_RW(5, write, 1) + OST_STATS_ITEM_V2(5, getattr, usecs, 1) + OST_STATS_ITEM_V2(5, setattr, usecs, 1) + OST_STATS_ITEM_V2(5, punch, usecs, 1) + OST_STATS_ITEM_V2(5, sync, usecs, 1) + OST_STATS_ITEM_V2(5, destroy, usecs, 1) + OST_STATS_ITEM_V2(5, create, usecs, 1) + OST_STATS_ITEM_V2(5, statfs, usecs, 1) + OST_STATS_ITEM_V2(5, get_info, usecs, 1) + OST_STATS_ITEM_V2(5, set_info_async, usecs, 1) + OST_STATS_ITEM_V2(5, quotactl, usecs, 1) + + + SUBPATH(5, constant, exports, 1) + MODE(5, directory, 1) + + TWO_FIELD_SUBPATH(6, regular_expression, (.+)@(.+), ost_exp_client, ost_exp_type, 1) + MODE(6, directory, 1) + EXPORT_OST_STATS_ENTRY_V2(6, , 1) + + + + + constant + job_stats + + file + + ost_jobstats + - +job_id: +(.+) + +snapshot_time: +.+ + read_bytes: +\{ samples: +([[:digit:]]+), unit: bytes, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+).+ } + write_bytes: +\{ samples: +([[:digit:]]+), unit: bytes, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+).+ } + read: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + write: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + getattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + setattr: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + punch: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + sync: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + destroy: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + create: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + statfs: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + get_info: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + set_info: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + quotactl: +\{ samples: +([[:digit:]]+), unit: usecs, min: *([[:digit:]]+), max: *([[:digit:]]+), sum: *([[:digit:]]+), sumsq: *([[:digit:]]+) } + JOBSTAT_FIELD(6, 1, job_id, string, derive, ost, jobid, 1) + OST_JOBSTAT_FIELD(6, 2, read_samples, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 3, min_read_bytes, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 4, max_read_bytes, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 5, sum_read_bytes, number, derive, 1) + OST_JOBSTAT_FIELD(6, 6, write_samples, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 7, min_write_bytes, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 8, max_write_bytes, number, derive, 1) + OST_JOBSTAT_FIELD_BYTES(6, 9, sum_write_bytes, number, derive, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 10, read, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 15, write, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 20, getattr, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 25, setattr, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 30, punch, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 35, sync, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 40, destroy, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 45, create, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 50, statfs, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 55, get_info, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 60, set_info, number, derive, ost, 1) + JOBSTAT_FIELD_META_OPERATIONS(6, 65, quotactl, number, derive, ost, 1) + + + + + + + constant + mdc + + directory + + + regular_expression + (^.+)-(MDT.)+-(mdc.+)$ + + 1 + fs_name + + + 2 + mdt_index + + + 3 + mdc_tag + + + directory + MDC_MDT_CONSTANT_FILE_ENTRY(4, max_rpcs_in_flight, (.+), mdc_rpcs, gauge, max_rpcs_in_flight, max_rpcs_in_flight, 1) + + + + + + constant + /sys/fs/lustre + + directory + + + constant + osd-ldiskfs + + directory + + + regular_expression + (^.+)-(MDT[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + mdt_index + + + directory + FILES_KBYTES_INFO_ENTRIES(4, mdt, ${subpath:fs_name}-${subpath:mdt_index}, 1) + + + + regular_expression + (^.+)-(OST[0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + ost_index + + + directory + FILES_KBYTES_INFO_ENTRIES(4, ost, ${subpath:fs_name}-${subpath:ost_index}, 1) + + + + + constant + ldlm + + directory + + + constant + namespaces + + directory + + + regular_expression + ^filter-(.+)-(OST[0-9a-fA-F]+)_UUID$ + + 1 + fs_name + + + 2 + ost_index + + + directory + LDLM_LOCK_INFO_ENTRIES(5, ost, ${subpath:fs_name}-${subpath:ost_index}, 1) + + + + regular_expression + ^mdt-(.+)-(MDT[0-9a-fA-F]+)_UUID$ + + 1 + fs_name + + + 2 + mdt_index + + + directory + LDLM_LOCK_INFO_ENTRIES(5, mdt, ${subpath:fs_name}-${subpath:mdt_index}, 1) + + + + + + + constant + /sys/kernel/debug/lustre + + directory + + + constant + mds + + directory + + + constant + MDS + + directory + + + constant + mdt + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt, reqbuf_avail, bufs, 1) + SERVICE_STATS_ITEM(6, mdt, ldlm_ibits_enqueue, reqs, 1) + SERVICE_STATS_ITEM(6, mdt, mds_getattr, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, mds_connect, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, mds_get_root, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, mds_statfs, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, mds_getxattr, usecs, 1) + SERVICE_STATS_ITEM(6, mdt, obd_ping, usecs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, normal_metadata_ops, gauge, 1) + + + + constant + mdt_readpage + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_readpage, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, reqbuf_avail, bufs, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, mds_close, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_readpage, mds_readpage, usecs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, readpage, gauge, 1) + + + + constant + mdt_setattr + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_setattr, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_setattr, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_setattr, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_setattr, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_setattr, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, setattr_service, gauge, 1) + + + + constant + mdt_fld + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_fld, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_fld, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_fld, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_fld, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_fld, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, fld_service, gauge, 1) + + + + constant + mdt_out + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_out, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_out, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_out, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_out, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_out, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, metadata_out_service, gauge, 1) + + + + constant + mdt_seqm + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_seqm, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_seqm, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_seqm, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_seqm, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_seqm, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, metadata_seqm_service, gauge, 1) + + + + constant + mdt_seqs + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, mdt_seqs, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, mdt_seqs, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_seqs, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, mdt_seqs, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, mdt_seqs, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, mds, mds, metadata_seqs_service, gauge, 1) + + + + + + constant + ost + + directory + + + constant + OSS + + directory + + + constant + ost + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ost, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ost, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ost, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ost, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ost, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, ost, ost, normal_data, gauge, 1) + + + + constant + ost_io + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ost_io, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ost_io, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ost_io, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ost_io, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ost_io, reqbuf_avail, bufs, 1) + SERVICE_STATS_ITEM(6, ost_io, ost_read, usecs, 1) + SERVICE_STATS_ITEM(6, ost_io, ost_write, usecs, 1) + SERVICE_STATS_ITEM(6, ost_io, ost_punch, usecs, 1) + + THREAD_INFO_ENTRIES(5, ost_io, ost, bulk_data_IO, gauge, 1) + + + + constant + ost_create + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ost_create, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ost_create, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ost_create, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ost_create, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ost_create, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, ost_create, ost, obj_pre-creation_service, gauge, 1) + + + + constant + ost_seq + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ost_seq, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ost_seq, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ost_seq, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ost_seq, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ost_seq, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, ost_seq, ost, seq_service, gauge, 1) + + + + + + constant + ldlm + + directory + + + constant + services + + directory + + + constant + ldlm_canceld + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ldlm_canceld, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ldlm_canceld, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ldlm_canceld, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ldlm_canceld, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ldlm_canceld, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, ldlm_cancel, ldlm_service, lock_cancel, gauge, 1) + + + + constant + ldlm_cbd + + directory + + + constant + stats + + file + 0 + SERVICE_STATS_ITEM(6, ldlm_cbd, req_waittime, usecs, 1) + SERVICE_STATS_ITEM(6, ldlm_cbd, req_qdepth, reqs, 1) + SERVICE_STATS_ITEM(6, ldlm_cbd, req_active, reqs, 1) + SERVICE_STATS_ITEM(6, ldlm_cbd, req_timeout, sec, 1) + SERVICE_STATS_ITEM(6, ldlm_cbd, reqbuf_avail, bufs, 1) + + THREAD_INFO_ENTRIES(5, ldlm_cbd, ldlm_service, lock_grant, gauge, 1) + + + + + constant + namespaces + + directory + + + regular_expression + ^filter-(.+)-(OST[0-9a-fA-F]+)_UUID$ + + 1 + fs_name + + + 2 + ost_index + + + directory + + + constant + pool + + directory + + + + constant + stats + + file + + ost_ldlm_stats + snapshot_time +([[:digit:]]+).+ +granted +[[:digit:]]+ samples \[locks\] +([[:digit:]]+).+ +grant +[[:digit:]]+ samples \[locks\] +([[:digit:]]+).+ +cancel +[[:digit:]]+ samples \[locks\] +([[:digit:]]+).+ +grant_rate +[[:digit:]]+ samples \[locks\/s\] +([[:digit:]]+).+ +cancel_rate +[[:digit:]]+ samples \[locks\/s\] +([[:digit:]]+).+ +grant_plan +[[:digit:]]+ samples \[locks\/s\] +([[:digit:]]+).+ +slv +[[:digit:]]+ samples \[slv\] +([[:digit:]]+).+ +recalc_freed +[[:digit:]]+ samples \[locks\] +([[:digit:]]+).+ +recalc_timing +[[:digit:]]+ samples \[sec\] +([[:digit:]]+).+ + LDLM_STATS_FIELD(8, 1, snapshot_time, number, gauge) + LDLM_STATS_FIELD(8, 2, granted, number, gauge) + LDLM_STATS_FIELD(8, 3, grant, number, gauge) + LDLM_STATS_FIELD(8, 4, cancel, number, gauge) + LDLM_STATS_FIELD(8, 5, grant_rate, number, gauge) + LDLM_STATS_FIELD(8, 6, cancel_rate, number, gauge) + LDLM_STATS_FIELD(8, 7, grant_plan, number, gauge) + LDLM_STATS_FIELD(8, 8, slv, number, gauge) + LDLM_STATS_FIELD(8, 9, recalc_freed, number, gauge) + LDLM_STATS_FIELD(8, 10, recalc_timing, number, gauge) + + + + + + + + + + constant + /sys/kernel/debug/lustre/llite + + directory + + + regular_expression + (^.+)-([0-9a-fA-F]+$) + + 1 + fs_name + + + 2 + client_uuid + + + directory + + + constant + stats + + file + CLIENT_STATS_ITEM_FOUR_BYTES(4, read_bytes, bytes, derive) + CLIENT_STATS_ITEM_FOUR_BYTES(4, write_bytes, bytes, derive) + CLIENT_STATS_ITEM_FOUR(4, read, usecs) + CLIENT_STATS_ITEM_FOUR(4, write, usecs) + CLIENT_STATS_ITEM_ONE(4, ioctl, reqs) + CLIENT_STATS_ITEM_FOUR(4, open, usecs) + CLIENT_STATS_ITEM_FOUR(4, close, usecs) + CLIENT_STATS_ITEM_FOUR(4, mmap, usecs) + CLIENT_STATS_ITEM_FOUR(4, page_fault, usecs) + CLIENT_STATS_ITEM_FOUR(4, page_mkwrite, usecs) + CLIENT_STATS_ITEM_FOUR(4, seek, usecs) + CLIENT_STATS_ITEM_FOUR(4, fsync, usecs) + CLIENT_STATS_ITEM_FOUR(4, readdir, usecs) + CLIENT_STATS_ITEM_FOUR(4, setattr, usecs) + CLIENT_STATS_ITEM_FOUR(4, truncate, usecs) + CLIENT_STATS_ITEM_FOUR(4, flock, usecs) + CLIENT_STATS_ITEM_FOUR(4, getattr, usecs) + CLIENT_STATS_ITEM_FOUR(4, fallocate, usecs) + CLIENT_STATS_ITEM_FOUR(4, create, usecs) + CLIENT_STATS_ITEM_FOUR(4, link, usecs) + CLIENT_STATS_ITEM_FOUR(4, unlink, usecs) + CLIENT_STATS_ITEM_FOUR(4, symlink, usecs) + CLIENT_STATS_ITEM_FOUR(4, mkdir, usecs) + CLIENT_STATS_ITEM_FOUR(4, rmdir, usecs) + CLIENT_STATS_ITEM_FOUR(4, mknod, usecs) + CLIENT_STATS_ITEM_FOUR(4, rename, usecs) + CLIENT_STATS_ITEM_FOUR(4, statfs, usecs) + CLIENT_STATS_ITEM_FOUR(4, setxattr, usecs) + CLIENT_STATS_ITEM_FOUR(4, getxattr, usecs) + CLIENT_STATS_ITEM_ONE(4, getxattr_hits, reqs) + CLIENT_STATS_ITEM_FOUR(4, listxattr, usecs) + CLIENT_STATS_ITEM_FOUR(4, removexattr, usecs) + CLIENT_STATS_ITEM_FOUR(4, inode_permission, usecs) + + + + + diff --git a/barreleye/lustre.m4 b/barreleye/lustre.m4 index 8ceaa4e..935f525 100644 --- a/barreleye/lustre.m4 +++ b/barreleye/lustre.m4 @@ -603,6 +603,23 @@ OPTION($1 + 1, type_instance, $3, 0) OPTION($1 + 1, tsdb_name, client_stats_$3, 0) OPTION($1 + 1, tsdb_tags, fs_name=${subpath:fs_name} client_uuid=${subpath:client_uuid}, 0)', 0)')dnl dnl +dnl CLIENT_STATS_ITEM_FOUR_BYTES is differnet with CLIENT_STATS_ITEM_FOUR +dnl in the sense that it saves derive of sum field rather than gauge. This +dnl is useful to get read or write bandwidths. +dnl $1: number of INDENT +dnl $2: name of CLIENT_STATS_ITEM_FOUR_BYTES +dnl $3: unit of ITEM +define(`CLIENT_STATS_ITEM_FOUR_BYTES', + `ELEMENT($1, item, + `NAME($1 + 1, client_stats_$2, 1) +PATTERN($1 + 1, `$2 +([[:digit:]]+) samples \[$3\] ([[:digit:]]+) ([[:digit:]]+) ([[:digit:]]+)', 0) +CLIENT_STATS_FIELD($1 + 1, 1, $2_samples, number, derive) +CLIENT_STATS_FIELD($1 + 1, 2, $2_min, number, gauge) +CLIENT_STATS_FIELD($1 + 1, 3, $2_max, number, gauge) +CLIENT_STATS_FIELD($1 + 1, 4, $2_sum, number, derive) +', 1)')dnl +dnl +dnl dnl $1: number of INDENT dnl $2: name of CLIENT_STATS_ITEM dnl $3: unit of ITEM diff --git a/bash_completion/Makefile.am b/bash_completion/Makefile.am index 9b3ca95..d0c1fc0 100644 --- a/bash_completion/Makefile.am +++ b/bash_completion/Makefile.am @@ -1,12 +1,13 @@ # Somehow rpmbuild does not include pip3's library to Python3's sys.path # which will cause missing module. So add it explicitly here. PIP3_PACKAGE_PATH = /usr/local/lib/python3.6/site-packages:/usr/local/lib64/python3.6/site-packages - +COMPLETIONS = +if ENABLE_BARRELE +COMPLETIONS += barrele barrele: PYTHONPATH=$(PIP3_PACKAGE_PATH) ../barrele -- --completion > barrele.tmp || exit 1; \ mv barrele.tmp barrele - -COMPLETIONS = barrele +endif clean-local: rm -f $(COMPLETIONS) diff --git a/configure.ac b/configure.ac index 934cffe..2dd1af4 100644 --- a/configure.ac +++ b/configure.ac @@ -1,9 +1,12 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT([coral], [m4_esyscmd(./coral version show)], [Coralfs], [coral]) +AC_INIT([coral],[m4_esyscmd(./coral version show)],[Coralfs],[coral]) AC_CONFIG_SRCDIR(./pycoral/ssh_host.py) -AC_CANONICAL_SYSTEM +AC_CANONICAL_TARGET AC_CONFIG_HEADERS(config.h) +dnl To avoid warning about $(wildcard ...) +AC_SUBST([DOLLAR],[$]) + AM_INIT_AUTOMAKE([tar-pax dist-bzip2 foreign subdir-objects]) AM_EXTRA_RECURSIVE_TARGETS([check_clean]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) @@ -19,10 +22,7 @@ AC_PROG_MAKE_SET AM_PROG_CC_C_O AM_CONDITIONAL(COMPILER_IS_GCC, test "x$GCC" = "xyes") -AC_DISABLE_STATIC -AC_PROG_LIBTOOL -AC_PROG_LEX -AC_PROG_YACC +LT_INIT([disable-static]) PKG_PROG_PKG_CONFIG # @@ -84,20 +84,29 @@ AC_DEFUN([MB_ARG_CANON_PATH], [ __MB_ARG_CANON_PATH([$1], m4_translit([$1], [-.], [__]), [$2]) ]) -dnl We may be able to use older versions, but I have not verified that -PKG_CHECK_MODULES([ext2fs], [ext2fs >= 1.42.7 com_err >= 1.42.7]) PKG_CHECK_MODULES([json_c], [json-c >= 0.11]) # -------- check for distro version -------- AC_MSG_CHECKING([for distro version]) DISTRO=$(sh detect-distro.sh) DISTRO_NAME=$(echo $DISTRO | awk -F '-' '{print $1}') -if [[[ "$DISTRO_NAME" != "rhel" ]]]; then +if [[[ "$DISTRO_NAME" = "rhel" ]]]; then + DISTRO_RELEASE=$(echo $DISTRO | awk -F 'rhel-' '{print $2}' | awk -F '.' '{print $1}') + DISTRO_SHORT=el$DISTRO_RELEASE + make_rpms="yes" +elif [[[ "$DISTRO_NAME" = "ubuntu" ]]]; then + DISTRO_RELEASE=$(echo $DISTRO | awk -F 'ubuntu-' '{print $2}' | awk -F '.' '{print $1}') + if [[[ "$DISTRO_RELEASE" != "22" ]]]; then + AC_MSG_ERROR([$DISTRO_RELEASE of Ubuntu is not a supported distro.]) + fi + DISTRO_SHORT=ubuntu2204 + make_debs="yes" +else AC_MSG_ERROR([$DISTRO_NAME is not a supported distro.]) fi -DISTRO_RELEASE=$(echo $DISTRO | awk -F 'rhel-' '{print $2}' | awk -F '.' '{print $1}') -DISTRO_SHORT=el$DISTRO_RELEASE AC_MSG_RESULT([$DISTRO_SHORT]) +AM_CONDITIONAL(MAKE_DEBS, test "$make_debs" = "yes") +AM_CONDITIONAL(MAKE_RPMS, test "$make_rpms" = "yes") AC_SUBST(DISTRO_SHORT) # ------- check for target_cpu -------- @@ -112,7 +121,7 @@ fi # -------- check whether enable zfs support -------- AC_MSG_CHECKING([whether enable zfs support]) AC_ARG_ENABLE([zfs], - AC_HELP_STRING([--enable-zfs], + AS_HELP_STRING([--enable-zfs], [enable zfs support]), [], [enable_zfs="no"]) AC_MSG_RESULT([$enable_zfs]) @@ -179,7 +188,7 @@ AS_IF([test "x$enable_zfs" = xyes ], [ # -------- whether enable Barreleye plugin -------- AC_MSG_CHECKING([whether enable Barreleye plugin]) AC_ARG_ENABLE([barrele], - AC_HELP_STRING([--disable-barrele], + AS_HELP_STRING([--disable-barrele], [disable Barreleye plugin]), [], [enable_barrele="yes"]) AC_MSG_RESULT([$enable_barrele]) @@ -190,10 +199,8 @@ AM_CONDITIONAL(ENABLE_BARRELE, test "$enable_barrele" = "yes") # -------- check for ISO cache dir -------- AC_MSG_CHECKING([for ISO cache dir]) ISO_CACHE_PATH="/var/log/coral/build_cache/release/iso_cache" -AS_IF([test "$enable_devel" != "no"], - [ISO_CACHE_PATH="/var/log/coral/build_cache/devel/iso_cache"]) AC_ARG_WITH([iso-cache], - AC_HELP_STRING([--with-iso-cache=path], + AS_HELP_STRING([--with-iso-cache=path], [set path of ISO cache dir]), [MB_ARG_CANON_PATH([iso-cache], [ISO_CACHE_PATH])], []) diff --git a/coral.spec.in b/coral.spec.in index 603e34b..60a70de 100644 --- a/coral.spec.in +++ b/coral.spec.in @@ -64,8 +64,8 @@ mkdir -p $RPM_BUILD_ROOT%{_datadir}/bash-completion/completions %if %{with barrele} cp dist/barrele $RPM_BUILD_ROOT%{_bindir} -cp example_configs/barreleye.toml \ - $RPM_BUILD_ROOT%{_sysconfdir}/coral/barreleye.conf.example +cp example_configs/barreleye.conf.example \ + $RPM_BUILD_ROOT%{_sysconfdir}/coral install -g 0 -o 0 -m 0644 bash_completion/barrele \ $RPM_BUILD_ROOT%{_datadir}/bash-completion/completions mkdir -p $RPM_BUILD_ROOT%{_sharedstatedir}/coral/barrele/xmls diff --git a/detect-distro.sh b/detect-distro.sh index 8bbdd36..ecff91e 100755 --- a/detect-distro.sh +++ b/detect-distro.sh @@ -30,6 +30,9 @@ if which lsb_release >/dev/null 2>&1; then "Fedora") name="fc" ;; + "Ubuntu") + name="ubuntu" + ;; *) fatal 1 "I don't know what distro name $name and version $version is.\nEither update autodetect_distro() or use the --distro argument." ;; diff --git a/example_configs/barreleye.toml b/example_configs/barreleye.conf.example similarity index 98% rename from example_configs/barreleye.toml rename to example_configs/barreleye.conf.example index 7fe0942..2ff535e 100644 --- a/example_configs/barreleye.toml +++ b/example_configs/barreleye.conf.example @@ -36,7 +36,7 @@ jobstat_pattern = "unknown" # The Lustre version to use, if the Lustre RPMs installed on the agent(s) # is not with the supported version. # -# To get the list of suppoerted versions, please run "barrele lustre_versions". +# To get the list of supported versions, please run "barrele lustre_versions". # # Default value: "2.12" lustre_fallback_version = "2.12" diff --git a/pybarrele/barrele.py b/pybarrele/barrele.py index 1f66f4a..b8823bc 100644 --- a/pybarrele/barrele.py +++ b/pybarrele/barrele.py @@ -803,6 +803,21 @@ def stop(self, host): ret = barreleye_instance.bei_stop_agents(log, hostnames) cmd_general.cmd_exit(log, ret) + def install(self): + """ + Install the Barreleye agent on the local host. + + This command can be run on a host that has not been configured + as agent in barreleye.conf. It is useful when installing standalone + agents on hosts with distro of Ubuntu. + """ + log, barreleye_instance = init_env(self._bac_config_fpath, + self._bac_logdir, + self._bac_log_to_file, + self._bac_iso) + ret = barreleye_instance.bei_install_agent_locally(log) + cmd_general.cmd_exit(log, ret) + class BarreleCommand(): """ diff --git a/pybarrele/barrele_agent.py b/pybarrele/barrele_agent.py index 01b15c2..e191b3b 100644 --- a/pybarrele/barrele_agent.py +++ b/pybarrele/barrele_agent.py @@ -19,7 +19,7 @@ def __init__(self, host, barreleye_server, enable_disk=False, enable_lustre_oss=True, enable_lustre_mds=True, enable_lustre_client=False, enable_infiniband=False): - # Barreleye server with thye of BarreleServer + # Barreleye server with type of BarreleServer self.bea_barreleye_server = barreleye_server # Host to run commands. self.bea_host = host @@ -75,7 +75,9 @@ def _bea_sanity_check(self, log): return -1 distro = self.bea_host.sh_distro(log) - if distro not in [ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8]: + if distro not in (ssh_host.DISTRO_RHEL7, + ssh_host.DISTRO_RHEL8, + ssh_host.DISTRO_UBUNTU2204): log.cl_error("host [%s] has unsupported distro [%s]", self.bea_host.sh_hostname, distro) return -1 @@ -112,7 +114,7 @@ def _bea_sanity_check(self, log): return -1 return 0 - def _bea_check_lustre_version(self, log, lustre_fallback_version): + def _bea_check_lustre_version_rpm(self, log, lustre_fallback_version): """ Check the Lustre version according to the installed RPMs """ @@ -149,18 +151,97 @@ def _bea_check_lustre_version(self, log, lustre_fallback_version): skip_kernel=True, skip_test=True) if version is None: - log.cl_warning("failed to match Lustre version according to RPM " - "names on host [%s], using default [%s]", - self.bea_host.sh_hostname, - lustre_fallback_version.lv_name) - self.bea_lustre_version = lustre_fallback_version - else: + version, _ = lustre_version.match_lustre_version_from_rpms(log, + rpm_fnames, + client=True) + if version is None: + log.cl_warning("failed to match Lustre version according to RPM " + "names on host [%s], using default [%s]", + self.bea_host.sh_hostname, + lustre_fallback_version.lv_name) + self.bea_lustre_version = lustre_fallback_version + if version is not None: log.cl_info("detected Lustre version [%s] on host [%s]", version.lv_name, self.bea_host.sh_hostname) self.bea_lustre_version = version return 0 + def _bea_check_lustre_version_deb(self, log, lustre_fallback_version): + """ + Check the Lustre version according to the installed debs + """ + # pylint: disable=too-many-return-statements,too-many-branches + command = ("apt list --installed | grep lustre-client-modules") + retval = self.bea_host.sh_run(log, command) + if (retval.cr_exit_status == 1 and retval.cr_stdout == ""): + log.cl_info("Lustre deb is not installed on host [%s], " + "using default [%s]", + self.bea_host.sh_hostname, + lustre_fallback_version.lv_name) + self.bea_lustre_version = lustre_fallback_version + return 0 + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + self.bea_host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + deb_lines = retval.cr_stdout.splitlines() + if len(deb_lines) != 1: + log.cl_error("multiple lines outputed by command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + self.bea_host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + deb_line = deb_lines[0] + fields = deb_line.split() + if len(fields) != 4: + log.cl_error("unexpected field number outputed by command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + self.bea_host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + version = fields[1] + lversion = lustre_version.match_lustre_version_from_deb(log, version) + if lversion is None: + log.cl_error("failed to detect Lustre version on host [%s]", + self.bea_host.sh_hostname) + return -1 + + log.cl_info("Lustre version [%s] detected on host [%s]", + lversion.lv_name, + self.bea_host.sh_hostname) + self.bea_lustre_version = lversion + return 0 + + def _bea_check_lustre_version(self, log, lustre_fallback_version): + """ + Check the Lustre version according to the installed RPMs or debs + """ + host = self.bea_host + distro = host.sh_distro(log) + + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + return self._bea_check_lustre_version_rpm(log, lustre_fallback_version) + if distro in (ssh_host.DISTRO_UBUNTU2204): + return self._bea_check_lustre_version_deb(log, lustre_fallback_version) + + log.cl_error("distro [%s] of host [%s] is not supported", + distro, host.sh_hostname) + return -1 + def _bea_generate_collectd_config(self, log, barreleye_instance, collectd_test=False): """ @@ -223,14 +304,6 @@ def bea_generate_configs(self, log, barreleye_instance): "usage") return -1 self.bea_collectd_config_for_production = collectd_config - - # Check that needed collectd RPMs are installed - for rpm_type in self.bea_needed_collectd_rpm_types: - if rpm_type not in barreleye_instance.bei_collectd_rpm_type_dict: - log.cl_error("needed Collectd RPM [%s] of agent [%s] does not " - "exist", - rpm_type, self.bea_host.sh_hostname) - return -1 return 0 def _bea_influxdb_measurement_check(self, log, measurement_name, tags): @@ -350,6 +423,19 @@ def bea_collectd_send_config(self, log, barreleye_instance, Dump and send the collectd.conf to the agent host """ host = self.bea_host + local_host = barreleye_instance.bei_local_host + command = "mkdir -p %s" % barreleye_instance.bei_workspace + retval = local_host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + local_host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + fpath = barreleye_instance.bei_workspace + "/" if test_config: fpath += barrele_collectd.COLLECTD_CONFIG_TEST_FNAME @@ -361,7 +447,11 @@ def bea_collectd_send_config(self, log, barreleye_instance, collectd_config.cdc_dump(fpath) - etc_path = "/etc/collectd.conf" + distro = self.bea_host.sh_distro(log) + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + etc_path = "/etc/collectd.conf" + elif distro in (ssh_host.DISTRO_UBUNTU2204): + etc_path = "/etc/collectd/collectd.conf" ret = host.sh_send_file(log, fpath, etc_path) if ret: log.cl_error("failed to send file [%s] on local host [%s] to " diff --git a/pybarrele/barrele_collectd.py b/pybarrele/barrele_collectd.py index 5b51134..c621ba1 100644 --- a/pybarrele/barrele_collectd.py +++ b/pybarrele/barrele_collectd.py @@ -26,6 +26,7 @@ XML_FNAME_ES5_1 = "lustre-b_es5_1.xml" XML_FNAME_ES5_2 = "lustre-b_es5_2.xml" XML_FNAME_ES6_0 = "lustre-b_es6_0.xml" +XML_FNAME_ES6_1 = "lustre-b_es6_1.xml" XML_FNAME_2_13 = "lustre-2.13.xml" XML_FNAME_IME_1_1 = "ime-1.1.xml" XML_FNAME_IME_1_2 = "ime-1.2.xml" @@ -33,7 +34,7 @@ SUPPORTED_ZFS_XML_FNAMES = [XML_FNAME_ES3, XML_FNAME_ES4, XML_FNAME_2_12, XML_FNAME_ES5_1, XML_FNAME_ES5_2, XML_FNAME_2_13, - XML_FNAME_ES6_0] + XML_FNAME_ES6_0, XML_FNAME_ES6_1] def lustre_version_xml_fname(log, version, quiet=False): @@ -48,6 +49,8 @@ def lustre_version_xml_fname(log, version, quiet=False): xml_fname = XML_FNAME_ES5_2 elif version.lv_name == lustre_version.LUSTRE_VERSION_NAME_ES6_0: xml_fname = XML_FNAME_ES6_0 + elif version.lv_name == lustre_version.LUSTRE_VERSION_NAME_ES6_1: + xml_fname = XML_FNAME_ES6_1 else: if not quiet: log.cl_error("unsupported Lustre version of [%s]", @@ -937,7 +940,7 @@ def cdc_plugin_lustre(self, log, version, enable_lustre_oss=False, self.cdc_jobstat_pattern) return -1 - config += """ + config += """ Type "mdt_stats_req_waittime" @@ -1295,24 +1298,106 @@ def cdc_plugin_infiniband(self): barreleye_agent.bea_needed_collectd_rpm_types.append(rpm_name) -def collectd_rpm_type_from_name(log, name): +def collectd_package_type_from_name(log, name): """ - Return Collectd type from full RPM name or RPM fname - The Collectd RPM types. The RPM type is the minimum string + Return Collectd type from full RPM/deb name or RPM/deb fname + The Collectd RPM/deb types. The RPM type is the minimum string that yum could understand and find the RPM. For example: libcollectdclient-5.11.0...rpm has a type of libcollectdclient; collectd-5.11.0...rpm has a type of collectd; collectd-disk-5.11.0...rpm has a type of collectd-disk. + + Example debs: + collectd_5.12.0.brl3_amd64.deb + collectd-core_5.12.0.brl3_amd64.deb + collectd-dev_5.12.0.brl3_all.deb + collectd-utils_5.12.0.brl3_amd64.deb + libcollectdclient1_5.12.0.brl3_amd64.deb + libcollectdclient-dev_5.12.0.brl3_amd64.deb """ if ((not name.startswith("collectd")) and (not name.startswith("libcollectdclient"))): return None - collectd_pattern = (r"^(?P\S+)-(\d+)\.(\d+).+") + collectd_pattern = (r"^(?P\S+)[-_](\d+)\.(\d+).+") collectd_regular = re.compile(collectd_pattern) match = collectd_regular.match(name) if match is None: log.cl_error("name [%s] starts with [collectd] but does not match " - "the RPM pattern", name) + "the package pattern", name) return None return match.group("type") + + +def get_collectd_package_type_dict(log, host, packages_dir): + """ + Return a dict. Key is the RPM/deb type, value is the file name. + + The RPM type is the minimum string that yum could understand and + find the RPM. + + For example: + libcollectdclient-5.11.0...rpm has a type of libcollectdclient; + collectd-5.11.0...rpm has a type of collectd; + collectd-disk-5.11.0...rpm has a type of collectd-disk. + """ + fnames = host.sh_get_dir_fnames(log, packages_dir) + if fnames is None: + log.cl_error("failed to get fnames under dir [%s] on " + "host [%s]", packages_dir, host.sh_hostname) + return None + collectd_package_type_dict = {} + for fname in fnames: + if ((not fname.startswith("collectd")) and + (not fname.startswith("libcollectdclient"))): + continue + package_type = collectd_package_type_from_name(log, fname) + if package_type is None: + log.cl_error("failed to get the package type from name [%s]", + fname) + return None + if package_type in collectd_package_type_dict: + log.cl_error("both Collectd packages [%s] and [%s] matches " + "type [%s]", fname, + collectd_package_type_dict[package_type], + package_type) + return None + + collectd_package_type_dict[package_type] = fname + log.cl_debug("Collectd package [%s] is found under dir [%s] on local " + "host [%s]", package_type, packages_dir, + host.sh_hostname) + return collectd_package_type_dict + + +def collectd_debs_install(log, host, packages_dir): + """ + Install all the Collectd debs under the package dir + """ + # Remove the existing collectd configuration to avoid failure of starting + # collectd service when installing Collectd deb file. + command = ("rm -f /etc/collectd/collectd.conf") + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + command = ("dpkg -i %s/collectd*.deb %s/libcollectdclient*.deb" % + (packages_dir, packages_dir)) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 diff --git a/pybarrele/barrele_constant.py b/pybarrele/barrele_constant.py index e41738e..9a7a03c 100644 --- a/pybarrele/barrele_constant.py +++ b/pybarrele/barrele_constant.py @@ -30,6 +30,8 @@ # RPMs needed to download for Barreleye server and agent BARRELE_DOWNLOAD_DEPENDENT_RPMS = (BARRELE_SERVER_DOWNLOAD_DEPENDENT_RPMS + BARRELE_AGENT_DEPENDENT_RPMS) +# Debs needed to download for Barreleye server and agent (Ubuntu) +BARRELE_DOWNLOAD_DEPENDENT_DEBS = ["librrd8"] # RPMS needed by Barreleye servers. BARRELE_SERVER_DEPENDENT_RPMS = ["influxdb", "grafana"] BARRELE_SERVER_DEPENDENT_RPMS += BARRELE_DOWNLOAD_DEPENDENT_RPMS diff --git a/pybarrele/barrele_instance.py b/pybarrele/barrele_instance.py index 46701e7..dd4352d 100644 --- a/pybarrele/barrele_instance.py +++ b/pybarrele/barrele_instance.py @@ -70,67 +70,39 @@ def __init__(self, workspace, config, config_fpath, log_to_file, self.bei_iso_dir = constant.CORAL_ISO_DIR # The server of barreleye self.bei_barreleye_server = barreleye_server - # The Collectd RPM types. The RPM type is the minimum string - # that yum could understand and find the RPM. - # For example: - # libcollectdclient-5.11.0...rpm has a type of libcollectdclient; - # collectd-5.11.0...rpm has a type of collectd; - # collectd-disk-5.11.0...rpm has a type of collectd-disk. - # - # Key is RPM type. Value is RPM fname. - self.bei_collectd_rpm_type_dict = None # ISO file path self.bei_iso_fpath = iso_fpath - def _bei_get_collectd_rpm_types(self, log): + def _bei_get_collectd_package_type_dict(self, log): """ - Get Collectd RPMs from ISO dir on local host + Return a dict. Key is the RPM/deb type, value is the file name. """ packages_dir = self.bei_iso_dir + "/" + constant.BUILD_PACKAGES - fnames = self.bei_local_host.sh_get_dir_fnames(log, packages_dir) - if fnames is None: - log.cl_error("failed to get fnames under dir [%s] on local " - "host [%s]", self.bei_iso_dir, - self.bei_local_host.sh_hostname) - return -1 - self.bei_collectd_rpm_type_dict = {} - for fname in fnames: - if ((not fname.startswith("collectd")) and - (not fname.startswith("libcollectdclient"))): - continue - rpm_type = \ - barrele_collectd.collectd_rpm_type_from_name(log, fname) - if rpm_type is None: - log.cl_error("failed to get the RPM type from name [%s]", - fname) - return -1 - if rpm_type in self.bei_collectd_rpm_type_dict: - log.cl_error("both Collectd RPMs [%s] and [%s] matches " - "type [%s]", fname, - self.bei_collectd_rpm_type_dict[rpm_type], - rpm_type) - return -1 - - self.bei_collectd_rpm_type_dict[rpm_type] = fname - log.cl_debug("Collectd RPM [%s] is found under dir [%s] on local " - "host [%s]", rpm_type, self.bei_iso_dir, - self.bei_local_host.sh_hostname) - return 0 + return barrele_collectd.get_collectd_package_type_dict(log, + self.bei_local_host, + packages_dir) def _bei_cluster_install_rpms(self, log): """ Install RPMs on the cluster """ - ret = self._bei_get_collectd_rpm_types(log) - if ret: + rpm_type_dict = self._bei_get_collectd_package_type_dict(log) + if rpm_type_dict is None: log.cl_error("failed to get Collectd RPM types") return -1 + for rpm_type in (barrele_collectd.LIBCOLLECTDCLIENT_TYPE_NAME, + barrele_collectd.COLLECTD_TYPE_NAME): + if rpm_type not in rpm_type_dict: + log.cl_error("failed to find Collectd RPM [%s]", + rpm_type) + return -1 + for agent in self.bei_agent_dict.values(): ret = agent.bea_generate_configs(log, self) if ret: - log.cl_error("failed to detect the Lustre version on host [%s]", + log.cl_error("failed to generate Barreleye agent configs on host [%s]", agent.bea_host.sh_hostname) return -1 @@ -269,6 +241,90 @@ def bei_start_agents(self, log, hostnames): return -1 return 0 + def bei_install_agent_locally(self, log): + """ + Install Barreleye agent on local host. + """ + distro = self.bei_local_host.sh_distro(log) + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + log.cl_error("please install agent on host [%s] by using [barrele cluster install] " + "command", + self.bei_local_host.sh_hostname) + return -1 + if distro not in (ssh_host.DISTRO_UBUNTU2204): + log.cl_error("distro [%s] of host [%s] is not supported", + distro, self.bei_local_host.sh_hostname) + return -1 + + iso = self.bei_iso_fpath + if iso is not None: + ret = install_common.sync_iso_dir(log, self.bei_workspace, + self.bei_local_host, iso, + self.bei_iso_dir) + if ret: + log.cl_error("failed to sync ISO files from [%s] to dir [%s] " + "on local host [%s]", + iso, self.bei_iso_dir, + self.bei_local_host.sh_hostname) + return -1 + + # Install Barreleye deb file. + package_dir = self.bei_iso_dir + "/" + constant.BUILD_PACKAGES + command = ("dpkg -i %s/coral-*.deb" % (package_dir)) + retval = self.bei_local_host.sh_run(log, command, timeout=None) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + self.bei_local_host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + package_type_dict = self._bei_get_collectd_package_type_dict(log) + if package_type_dict is None: + log.cl_error("failed to get Collectd package types") + return -1 + + for deb_type in ("collectd-core", "collectd-utils", + "collectd", "libcollectdclient1"): + if deb_type not in package_type_dict: + log.cl_error("failed to find Collectd deb [%s]", + deb_type) + return -1 + + packages_dir = self.bei_iso_dir + "/" + constant.BUILD_PACKAGES + ret = barrele_collectd.collectd_debs_install(log, self.bei_local_host, + packages_dir) + if ret: + log.cl_error("failed to install Collectd debs on local host [%s]", + self.bei_local_host.sh_hostname) + return -1 + + agent = barrele_agent.BarreleAgent(self.bei_local_host, + self.bei_barreleye_server, + enable_disk=False, + enable_lustre_oss=False, + enable_lustre_mds=False, + enable_lustre_client=True, + enable_infiniband=False) + ret = agent.bea_generate_configs(log, self) + if ret: + log.cl_error("failed to generate configs for Barreleye agent [%s]", + agent.bea_host.sh_hostname) + return -1 + + ret = agent.bea_config_agent(log, self) + if ret: + log.cl_error("failed to configure Barreleye agent [%s]", + agent.bea_host.sh_hostname) + return -1 + + log.cl_info("Barreleye agent has been installed successfully on host [%s]", + agent.bea_host.sh_hostname) + return 0 + def parse_server_config(log, config, config_fpath, host_dict): """ diff --git a/pybarrele/barrele_server.py b/pybarrele/barrele_server.py index 6655780..3061c0f 100644 --- a/pybarrele/barrele_server.py +++ b/pybarrele/barrele_server.py @@ -60,6 +60,8 @@ GRAFANA_FOLDER_DISABLED = "Disabled" # Grafana folders GRAFANA_FOLDERS = [GRAFANA_FOLDER_DISABLED] +# The name of Grafana service +GRAFANA_SERVICE_NAME = "grafana-server" def sed_replacement_escape(path): @@ -421,17 +423,16 @@ def _bes_grafana_service_restart_and_enable(self, log): Reinstall Grafana """ host = self.bes_server_host - service_name = "grafana-server" - ret = host.sh_service_restart(log, service_name) + ret = host.sh_service_restart(log, GRAFANA_SERVICE_NAME) if ret: log.cl_error("failed to restart service [%s] on host [%s]", - service_name, host.sh_hostname) + GRAFANA_SERVICE_NAME, host.sh_hostname) return -1 - ret = host.sh_service_enable(log, service_name) + ret = host.sh_service_enable(log, GRAFANA_SERVICE_NAME) if ret: log.cl_error("failed to start service [%s] on host [%s]", - service_name, host.sh_hostname) + GRAFANA_SERVICE_NAME, host.sh_hostname) return -1 ret = utils.wait_condition(log, self._bes_grafana_try_connect, @@ -991,7 +992,7 @@ def _bes_grafana_user_add(self, log, name, email_address, login, return -1 return 0 - def _bes_grafana_user_info(self, log, name): + def _bes_grafana_user_info(self, log, login): """ Add viewer user """ @@ -1000,19 +1001,19 @@ def _bes_grafana_user_info(self, log, name): "Accept": "application/json"} url = self.bes_grafana_admin_url("/api/users/lookup?loginOrEmail=%s" % - (slugify(name))) + (slugify(login))) try: response = requests.get(url, headers=headers) except: - log.cl_error("not able to get users through [%s]: %s", + log.cl_error("failed to get user info through [%s]: %s", url, traceback.format_exc()) return -1, None if response.status_code == HTTPStatus.OK: return 1, response.json() if response.status_code == HTTPStatus.NOT_FOUND: return 0, None - log.cl_error("got status [%d] when getting user info from Grafana", - response.status_code) + log.cl_error("got status [%d] when getting user info through [%s]", + response.status_code, url) return -1, None def _bes_grafana_user_recreate(self, log, name, email_address, login, @@ -1021,7 +1022,7 @@ def _bes_grafana_user_recreate(self, log, name, email_address, login, If user doesn't exist, add the user. If user exists, remove it first. """ - ret, json_info = self._bes_grafana_user_info(log, "viewer") + ret, json_info = self._bes_grafana_user_info(log, login) if ret < 0: return -1 if ret == 1: @@ -1044,13 +1045,14 @@ def _bes_grafana_reinstall(self, log, barreleye_instance): Reinstall Grafana """ host = self.bes_server_host - service_name = "grafana-server" log.cl_info("restarting and enabling service [%s] on host [%s]", - service_name, host.sh_hostname) + GRAFANA_SERVICE_NAME, host.sh_hostname) + + ret = self._bes_grafana_service_restart_and_enable(log) if ret: - log.cl_error("failed to restart or enable service [%s] on " - "host [%s]", service_name, + log.cl_error("failed to restart and enable Grafana service on " + "host [%s]", host.sh_hostname) return -1 diff --git a/pybuild/Makefile.am b/pybuild/Makefile.am index 5cfb248..978402b 100644 --- a/pybuild/Makefile.am +++ b/pybuild/Makefile.am @@ -9,11 +9,12 @@ AM_CFLAGS = -Wall -Werror -g $(json_c_CFLAGS) $(json_c_LIBS) \ PIP3_PACKAGE_PATH = /usr/local/lib/python3.6/site-packages:/usr/local/lib64/python3.6/site-packages CORAL_CMD=PYTHONPATH=$(PIP3_PACKAGE_PATH) ../coral +../pycoral/version.py: build_version.py + $(CORAL_CMD) version save ../pycoral/version.py + SOURCES = ../pycoral/version.py CHECKS = -../pycoral/version.py: build_version.py - $(CORAL_CMD) version save ../pycoral/version.py clean-local: rm -fr $(SOURCES) $(CHECKS) diff --git a/pybuild/build_barrele.py b/pybuild/build_barrele.py index f0b2596..0a54f11 100644 --- a/pybuild/build_barrele.py +++ b/pybuild/build_barrele.py @@ -1,6 +1,7 @@ """ Library for building Barreleye """ +# pylint: disable=too-many-lines import os import stat from pycoral import constant @@ -13,12 +14,15 @@ PACAKGE_URL_DICT = {} # The URL of Collectd tarball COLLECTD_URL = ("https://github.com/LiXi-storage/collectd/releases/download/" - "collectd-5.12.0.brl2/collectd-5.12.0.brl2.tar.bz2") + "collectd-5.12.0.brl3/collectd-5.12.0.brl3.tar.bz2") # The sha1sum of Collectd tarball. Need to update together with # COLLECTD_URL -COLLECTD_SHA1SUM = "9fb8be9d7c0bf7c84b93ef5bf441d393b081e7d9" -PACAKGE_URL_DICT["collectd"] = COLLECTD_URL +COLLECTD_SHA1SUM = "7469694df09576b9e5460a0a6ee8d429af962bda" +PACAKGE_URL_DICT["collectd"] = COLLECTD_URL# The RPM names of Collectd to check +# The deb names of Collectd to check +COLLECTD_DEB_NAMES = ["collectd", "collectd-core", + "collectd-utils", "libcollectdclient1"] # The RPM names of Collectd to check COLLECTD_RPM_NAMES = ["collectd", "collectd-disk", "collectd-filedata", "collectd-sensors", "collectd-ssh", @@ -102,8 +106,90 @@ ["postgresql-devel", "python-devel"]) COLLECTD_BUILD_DEPENDENT_RHEL8_RPMS = (BARRELEYE_BUILD_DEPENDENT_COMMON_RPMS + ["libpq-devel", "python36-devel"]) +COLLECTD_BUILD_DEPENDENT_UBUNTU2204_DEBS = ["bison", + "default-jdk", + "default-libmysqlclient-dev", + "flex", + "intel-cmt-cat", + "javahelper", + "libatasmart-dev", + "libcap-dev", + "libcurl4-gnutls-dev", + "libcurl4-gnutls-dev", + "libcurl4-gnutls-dev", + "libdbi-dev", + "libdpdk-dev", + "libesmtp-dev", + "libganglia1-dev", + "libgcrypt20-dev", + "libglib2.0-dev", + "libgps-dev", + "libgrpc++-dev", + "libhiredis-dev", + "libi2c-dev", + "libip4tc-dev", + "libip6tc-dev", + "libiptc-dev", + "libiptc-dev", + "libldap2-dev", + "liblua5.3-dev", + "libmemcached-dev", + "libmicrohttpd-dev", + "libmnl-dev", + "libmodbus-dev", + "libmongoc-dev", + "libmosquitto-dev", + "libnotify-dev", + "libntirpc-dev", + "libopenipmi-dev", + "liboping-dev", + "libpcap0.8-dev", + "libpcap-dev", + "libperl-dev", + "libpq-dev", + "libprotobuf-c-dev", + "libprotobuf-dev", + "libqpid-proton11-dev", + "librabbitmq-dev", + "librdkafka-dev", + "libriemann-client-dev", + "librrd-dev", + "libsensors-dev", + "libslurm-dev", + "libsnmp-dev", + "libssh2-1-dev", + "libudev-dev", + "libupsclient-dev", + "libupsclient-dev", + "libvarnishapi-dev", + "libvirt-dev", + "libxen-dev", + "libxml2-dev", + "libyajl-dev", + "libzmq3-dev", + "perl", + "protobuf-c-compiler", + "protobuf-compiler", + "protobuf-compiler-grpc", + "python3-dev", + "riemann-c-client", + "uthash-dev"] +BARRELE_BUILD_DEPENDENT_UBUNTU2204_DEBS = (COLLECTD_BUILD_DEPENDENT_UBUNTU2204_DEBS + + ["bzip2", + "libattr1-dev", + "libext2fs-dev", + "pylint", + "python3-slugify", + "python3-pip", + "genisoimage"]) + BARRELEYE_BUILD_DEPENDENT_PIPS = ["requests", "python-slugify"] +# The URL of Collectd debian tarball +COLLECTD_DEBIAN_URL = "http://deb.debian.org/debian/pool/main/c/collectd/collectd_5.12.0-14.debian.tar.xz" +# The sha1sum of Collectd debian tarball. Need to update together with +# COLLECTD_DEBIAN_URL +COLLECTD_DEBIAN_SHA1SUM = "b78277bee7e55d5a58da9d75a8d1eab9de3ef733" def get_collectd_rpm_suffix(distro_number, target_cpu, collectd_version_release): @@ -184,11 +270,11 @@ def get_and_clean_collectd_rpms(log, host, packages_dir, return collectd_fnames -def remove_collectd_rpms(log, host, packages_dir): +def remove_collectd_packages(log, host, packages_dir): """ - Remove old Collectd RPMs + Remove old Collectd RPMs or debs """ - patterns = ["collectd-*", "libcollectdclient-*"] + patterns = ["collectd*", "libcollectdclient*"] for pattern in patterns: command = "rm -f %s/%s" % (packages_dir, pattern) retval = host.sh_run(log, command) @@ -336,10 +422,10 @@ def build_collectd_rpms(log, host, target_cpu, packages_dir, return 0 -def collectd_build_and_check(log, host, target_cpu, packages_dir, - collectd_src_dir, collectd_version, - collectd_version_release, - tarball_fpath, extra_package_fnames): +def collectd_build_and_check_rhel(log, host, target_cpu, packages_dir, + collectd_src_dir, collectd_version, + collectd_version_release, + tarball_fpath, extra_package_fnames): """ Check and build Collectd RPMs """ @@ -356,7 +442,7 @@ def collectd_build_and_check(log, host, target_cpu, packages_dir, elif distro == ssh_host.DISTRO_RHEL8: distro_number = "8" else: - log.cl_error("build on distro [%s] is not supported yet", distro) + log.cl_error("build Barreleye on distro [%s] is not supported yet", distro) return -1 ret = check_collectd_rpms_integrity(log, existing_rpm_fnames, @@ -377,7 +463,7 @@ def collectd_build_and_check(log, host, target_cpu, packages_dir, return 0 log.cl_debug("building Collectd RPMs") - ret = remove_collectd_rpms(log, host, packages_dir) + ret = remove_collectd_packages(log, host, packages_dir) if ret: log.cl_error("failed to remove old Collectd RPMs") return -1 @@ -419,7 +505,349 @@ def collectd_build_and_check(log, host, target_cpu, packages_dir, return 0 -def build_collectd_tarball(log, workspace, host, target_cpu, packages_dir, +def get_collectd_deb_suffix(log, target_cpu, + collectd_version): + """ + Return the suffix of Collectd debs. + The suffix starts from "_", e.g. + "_5.12.0.brl3_amd64.deb" + """ + if target_cpu == "x86_64": + deb_target_cpu = "amd64" + else: + log.cl_error("unsupported target CPU [%s]", target_cpu) + return None + return ("_%s_%s.deb" % + (collectd_version, deb_target_cpu)) + + +def get_collectd_dev_deb_suffix(collectd_version): + """ + Return the suffix of Collectd debs. + The suffix starts from "_", e.g. + "_5.12.0.brl3_all.deb" + """ + return ("_%s_all.deb" % + (collectd_version)) + + +def check_collectd_debs_integrity(log, deb_fnames, target_cpu, + collectd_version, quiet=True): + """ + Check whether the existing Collectd debs are complete. + """ + suffix = get_collectd_deb_suffix(log, target_cpu, + collectd_version) + if suffix is None: + return -1 + for collect_deb_name in COLLECTD_DEB_NAMES: + collect_deb_full = collect_deb_name + suffix + if collect_deb_full not in deb_fnames: + if not quiet: + log.cl_error("Deb [%s] does not exist", + collect_deb_full) + else: + log.cl_debug("Deb [%s] does not exist", + collect_deb_full) + return -1 + return 0 + + +def get_and_clean_collectd_debs(log, host, packages_dir, + deb_fnames, target_cpu, + collectd_version, + expect_clean=False): + """ + Return a list of Collectd debs under a directory. + If there are other version of Collectd debs, remove them. + """ + # pylint: disable=too-many-locals + suffix = get_collectd_deb_suffix(log, target_cpu, + collectd_version) + if suffix is None: + return None + dev_suffix = get_collectd_dev_deb_suffix(collectd_version) + + prefixes = ["collectd", "libcollectdclient"] + collectd_fnames = [] + for deb_fname in deb_fnames: + found = False + for prefix in prefixes: + if deb_fname.startswith(prefix): + found = True + if not found: + continue + + if not deb_fname.endswith(suffix) and not deb_fname.endswith(dev_suffix): + if expect_clean: + log.cl_error("Collectd deb [%s] has different suffix, " + "expected [%s]", + deb_fname, suffix) + return None + + log.cl_info("Collectd deb [%s] has different suffix, " + "expected [%s], removing", + deb_fname, suffix) + fpath = packages_dir + "/" + deb_fname + command = ("rm -f %s" % (fpath)) + retval = host.sh_run(command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return None + continue + collectd_fnames.append(deb_fname) + return collectd_fnames + + +def collectd_tarball_fpath2version(log, tarball_fpath): + """ + Return the Collectd version by parse the tarball fpath. + """ + collectd_tarball_fname = os.path.basename(tarball_fpath) + suffix = ".tar.bz2" + if not collectd_tarball_fname.endswith(suffix): + log.cl_error("tarball [%s] does not end with suffix [%s]", + collectd_tarball_fname, suffix) + return None + + prefix = "collectd-" + if not collectd_tarball_fname.startswith(prefix): + log.cl_error("tarball [%s] does not start with prefix [%s]", + collectd_tarball_fname, prefix) + return None + collectd_version = collectd_tarball_fname[len(prefix):-len(suffix)] + return collectd_version + + +def build_collectd_debs(log, host, source_dir, type_cache, + packages_dir, collectd_build_dir, + collectd_src_dir, tarball_fpath, + collectd_version): + """ + Build Collectd debs on a host + """ + # pylint: disable=too-many-locals + collectd_version = collectd_tarball_fpath2version(log, tarball_fpath) + if collectd_version is None: + return -1 + + tarball_fname = os.path.basename(COLLECTD_DEBIAN_URL) + tarball_fpath = type_cache + "/" + tarball_fname + ret = host.sh_download_file(log, COLLECTD_DEBIAN_URL, tarball_fpath, + COLLECTD_DEBIAN_SHA1SUM) + if ret: + log.cl_error("failed to download Collectd debian tarball") + return -1 + + command = "tar xf %s -C %s" % (tarball_fpath, collectd_src_dir) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + patch_dir = source_dir + "/barreleye/collectd/ubuntu_build_patches" + rc = build_common.apply_patches(log, host, collectd_src_dir, + patch_dir) + if rc: + log.cl_error("failed to apply ubuntu build patches to [%s]", + collectd_src_dir) + return -1 + + command = "date -R" + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + changelog_date = retval.cr_stdout.strip() + + changelog_lines = ["collectd (%s) unstable; urgency=medium" % collectd_version, + "", + " * Automated changelog." + "" + "Coral Packaging Team %s" % changelog_date] + changelog_fpath = collectd_src_dir + "/debian/changelog" + command = "> %s" % changelog_fpath + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + for changelog_line in changelog_lines: + command = ("echo \"%s\" >> %s" % + (changelog_line, + changelog_fpath)) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + command = "cd %s && dpkg-buildpackage -us -uc -I.git -I.github" % collectd_src_dir + log.cl_info("running command [%s] on host [%s]", + command, host.sh_hostname) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + command = ("mv %s/*.deb %s" % + (collectd_build_dir, packages_dir)) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 + + +def collectd_build_and_check_ubuntu(log, host, source_dir, + type_cache, target_cpu, + packages_dir, + collectd_build_dir, + collectd_src_dir, + collectd_version, + tarball_fpath, + extra_package_fnames): + """ + If existing Collectd debs are not complete, build them. + """ + existing_deb_fnames = host.sh_get_dir_fnames(log, packages_dir) + if existing_deb_fnames is None: + log.cl_error("failed to get fnames under dir [%s] on host [%s]", + packages_dir, + host.sh_hostname) + return -1 + + ret = check_collectd_debs_integrity(log, existing_deb_fnames, + target_cpu, + collectd_version) + if ret == 0: + log.cl_debug("Collectd debs already exist") + collectd_deb_fnames = get_and_clean_collectd_debs(log, host, + packages_dir, + existing_deb_fnames, + target_cpu, + collectd_version) + if collectd_deb_fnames is None: + log.cl_error("failed to get the Collectd deb names") + return -1 + extra_package_fnames += collectd_deb_fnames + return 0 + + log.cl_debug("building Collectd debs") + ret = remove_collectd_packages(log, host, packages_dir) + if ret: + log.cl_error("failed to remove old Collectd debs") + return -1 + + ret = build_collectd_debs(log, host, source_dir, + type_cache, packages_dir, + collectd_build_dir, + collectd_src_dir, tarball_fpath, + collectd_version) + if ret: + log.cl_error("failed to build Collectd debs from src [%s]", + collectd_src_dir) + return -1 + + existing_deb_fnames = host.sh_get_dir_fnames(log, packages_dir) + if existing_deb_fnames is None: + log.cl_error("failed to get fnames under dir [%s] on host [%s]", + packages_dir, + host.sh_hostname) + return -1 + + ret = check_collectd_debs_integrity(log, existing_deb_fnames, + target_cpu, + collectd_version) + if ret == 0: + collectd_deb_fnames = get_and_clean_collectd_debs(log, host, + packages_dir, + existing_deb_fnames, + target_cpu, + collectd_version) + if collectd_deb_fnames is None: + log.cl_error("failed to get the Collectd deb names") + return -1 + extra_package_fnames += collectd_deb_fnames + return 0 + return 0 + + +def collectd_build_and_check(log, host, source_dir, type_cache, + target_cpu, packages_dir, + collectd_build_dir, + collectd_src_dir, collectd_version, + collectd_version_release, + tarball_fpath, extra_package_fnames): + """ + If existing Collectd packages are not complete, build them. + """ + distro = host.sh_distro(log) + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + return collectd_build_and_check_rhel(log, host, target_cpu, + packages_dir, + collectd_src_dir, + collectd_version, + collectd_version_release, + tarball_fpath, + extra_package_fnames) + if distro in (ssh_host.DISTRO_UBUNTU2204): + return collectd_build_and_check_ubuntu(log, host, + source_dir, + type_cache, + target_cpu, + packages_dir, + collectd_build_dir, + collectd_src_dir, + collectd_version, + tarball_fpath, + extra_package_fnames) + return 0 + + +def build_collectd_tarball(log, workspace, host, source_dir, type_cache, + target_cpu, packages_dir, tarball_fpath, extra_package_fnames, known_collectd_version=None): """ @@ -487,23 +915,29 @@ def build_collectd_tarball(log, workspace, host, target_cpu, packages_dir, "source dir") return -1 - ret = collectd_build_and_check(log, host, target_cpu, packages_dir, - collectd_src_dir, collectd_version, + ret = collectd_build_and_check(log, host, source_dir, + type_cache, + target_cpu, + packages_dir, + collectd_build_dir, + collectd_src_dir, + collectd_version, collectd_version_release, tarball_fpath, extra_package_fnames) if ret: - log.cl_error("failed to build and check Collectd RPMs") + log.cl_error("failed to build and check Collectd packages") return -1 return 0 -def download_and_build_collectd(log, workspace, host, type_cache, target_cpu, +def download_and_build_collectd(log, workspace, host, source_dir, + type_cache, target_cpu, packages_dir, collectd_url, expected_sha1sum, extra_package_fnames): """ Download Collectd source code tarball and build """ - log.cl_info("building Collectd RPMs from URL [%s] on host [%s]", + log.cl_info("building Collectd packages from URL [%s] on host [%s]", collectd_url, host.sh_hostname) tarball_fname = os.path.basename(collectd_url) tarball_fpath = type_cache + "/" + tarball_fname @@ -513,8 +947,8 @@ def download_and_build_collectd(log, workspace, host, type_cache, target_cpu, log.cl_error("failed to download Collectd sourcecode tarball") return -1 - ret = build_collectd_tarball(log, workspace, host, target_cpu, - packages_dir, tarball_fpath, + ret = build_collectd_tarball(log, workspace, host, source_dir, type_cache, + target_cpu, packages_dir, tarball_fpath, extra_package_fnames) if ret: log.cl_error("failed to build Collectd tarball [%s]", @@ -523,8 +957,10 @@ def download_and_build_collectd(log, workspace, host, type_cache, target_cpu, return 0 -def build_collectd_dir(log, workspace, host, target_cpu, packages_dir, - origin_collectd_dir, extra_package_fnames): +def build_collectd_dir(log, workspace, host, source_dir, + type_cache, target_cpu, + packages_dir, origin_collectd_dir, + extra_package_fnames): """ Build Collectd from src dir """ @@ -604,7 +1040,8 @@ def build_collectd_dir(log, workspace, host, target_cpu, packages_dir, return -1 collectd_tarball_fpath = collectd_dir + "/" + collectd_tarball_fname - ret = build_collectd_tarball(log, workspace, host, target_cpu, packages_dir, + ret = build_collectd_tarball(log, workspace, host, source_dir, + type_cache, target_cpu, packages_dir, collectd_tarball_fpath, extra_package_fnames, known_collectd_version=collectd_version) if ret: @@ -614,23 +1051,29 @@ def build_collectd_dir(log, workspace, host, target_cpu, packages_dir, return 0 -def build_collectd(log, workspace, host, type_cache, target_cpu, packages_dir, +def build_collectd(log, workspace, host, source_dir, + type_cache, target_cpu, packages_dir, collectd, extra_package_fnames): """ Build Collectd """ if collectd is None: - return download_and_build_collectd(log, workspace, host, type_cache, - target_cpu, packages_dir, COLLECTD_URL, + return download_and_build_collectd(log, workspace, host, + source_dir, + type_cache, + target_cpu, + packages_dir, + COLLECTD_URL, COLLECTD_SHA1SUM, extra_package_fnames) stat_result = host.sh_stat(log, collectd) if stat_result is not None: if stat.S_ISREG(stat_result.st_mode): - log.cl_info("building Collectd RPMs from tarball [%s] on host [%s]", + log.cl_info("building Collectd packages from tarball [%s] on host [%s]", collectd, host.sh_hostname) - ret = build_collectd_tarball(log, workspace, host, target_cpu, + ret = build_collectd_tarball(log, workspace, host, source_dir, + type_cache, target_cpu, packages_dir, collectd, extra_package_fnames) if ret: @@ -638,9 +1081,11 @@ def build_collectd(log, workspace, host, type_cache, target_cpu, packages_dir, collectd) return -1 elif stat.S_ISDIR(stat_result.st_mode): - log.cl_info("building Collectd RPMs from dir [%s] on host [%s]", + log.cl_info("building Collectd packages from dir [%s] on host [%s]", collectd, host.sh_hostname) - ret = build_collectd_dir(log, workspace, host, target_cpu, + ret = build_collectd_dir(log, workspace, host, source_dir, + type_cache, + target_cpu, packages_dir, collectd, extra_package_fnames) if ret: @@ -653,8 +1098,15 @@ def build_collectd(log, workspace, host, type_cache, target_cpu, packages_dir, return -1 return 0 - return download_and_build_collectd(log, workspace, host, type_cache, - target_cpu, packages_dir, collectd, None, + return download_and_build_collectd(log, + workspace, + host, + source_dir, + type_cache, + target_cpu, + packages_dir, + collectd, + None, extra_package_fnames) @@ -819,36 +1271,45 @@ def build_grafana_plugins(log, host, type_cache, iso_cache, extra_iso_fnames): return 0 -def build_barreleye(log, workspace, host, type_cache, target_cpu, iso_cache, +def build_barreleye(log, workspace, host, source_dir, + type_cache, target_cpu, iso_cache, packages_dir, collectd, extra_iso_fnames, - extra_package_fnames, extra_rpm_names): + extra_package_fnames, extra_package_names): """ Build barreleye """ - rc = build_collectd(log, workspace, host, type_cache, target_cpu, - packages_dir, collectd, extra_package_fnames) + rc = build_collectd(log, workspace, host, source_dir, type_cache, + target_cpu, packages_dir, collectd, + extra_package_fnames) if rc: log.cl_error("failed to build Collectd RPMs") return -1 - rc = build_grafana_plugins(log, host, type_cache, iso_cache, - extra_iso_fnames) - if rc: - log.cl_error("failed to download Grafana") - return -1 + distro = host.sh_distro(log) + if distro == ssh_host.DISTRO_UBUNTU2204: + log.cl_info("skip building server packages for Barreleye " + "since distro [%s] has no server support", + distro) + extra_package_names += barrele_constant.BARRELE_DOWNLOAD_DEPENDENT_DEBS + else: + rc = build_grafana_plugins(log, host, type_cache, iso_cache, + extra_iso_fnames) + if rc: + log.cl_error("failed to download Grafana") + return -1 - rc = build_grafana(log, host, target_cpu, packages_dir, extra_package_fnames) - if rc: - log.cl_error("failed to download Grafana") - return -1 + rc = build_grafana(log, host, target_cpu, packages_dir, extra_package_fnames) + if rc: + log.cl_error("failed to download Grafana") + return -1 - rc = build_influxdb(log, host, target_cpu, packages_dir, - extra_package_fnames) - if rc: - log.cl_error("failed to download Influxdb") - return -1 + rc = build_influxdb(log, host, target_cpu, packages_dir, + extra_package_fnames) + if rc: + log.cl_error("failed to download Influxdb") + return -1 - extra_rpm_names += barrele_constant.BARRELE_DOWNLOAD_DEPENDENT_RPMS + extra_package_names += barrele_constant.BARRELE_DOWNLOAD_DEPENDENT_RPMS return 0 @@ -863,28 +1324,32 @@ def __init__(self): is_devel=False, need_collectd=True) - def cpt_build_dependent_rpms(self, distro): + def cpt_build_dependent_packages(self, distro): """ Return the RPMs needed to install before building """ + # pylint: disable=no-self-use if distro == ssh_host.DISTRO_RHEL7: return COLLECTD_BUILD_DEPENDENT_RHEL7_RPMS if distro == ssh_host.DISTRO_RHEL8: return COLLECTD_BUILD_DEPENDENT_RHEL8_RPMS + if distro == ssh_host.DISTRO_UBUNTU2204: + return BARRELE_BUILD_DEPENDENT_UBUNTU2204_DEBS return None def cpt_build(self, log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, option_dict): + extra_package_fnames, extra_package_names, option_dict): """ Build the plugin """ # pylint: disable=unused-argument,no-self-use collectd = option_dict["collectd"] - ret = build_barreleye(log, workspace, local_host, type_cache, + ret = build_barreleye(log, workspace, local_host, source_dir, + type_cache, target_cpu, iso_cache, packages_dir, collectd, extra_iso_fnames, extra_package_fnames, - extra_rpm_names) + extra_package_names) if ret: log.cl_error("failed to build Barreleye") return -1 @@ -975,8 +1440,8 @@ def build(self, collectd=None): local_host.sh_hostname) cmd_general.cmd_exit(log, -1) - rc = build_collectd(log, workspace, local_host, type_cache, - target_cpu, packages_dir, collectd, + rc = build_collectd(log, workspace, local_host, source_dir, + type_cache, target_cpu, packages_dir, collectd, extra_package_fnames) if rc: log.cl_error("failed to build Collectd RPMs") diff --git a/pybuild/build_common.py b/pybuild/build_common.py index f862795..145e015 100644 --- a/pybuild/build_common.py +++ b/pybuild/build_common.py @@ -122,7 +122,7 @@ def __init__(self, plugin_name, else: self.cpt_plugins = plugins - def cpt_build_dependent_rpms(self, distro): + def cpt_build_dependent_packages(self, distro): """ Return the RPMs needed to install before building. Return None on failure. @@ -140,7 +140,7 @@ def cpt_install_build_dependency(self, log, workspace, host, def cpt_build(self, log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, option_dict): + extra_package_fnames, extra_package_names, option_dict): """ Build the plugin """ @@ -176,7 +176,7 @@ def __init__(self, package_name, depend_package_names=None): # useful when need to install other newly buildt packages. self.cpb_depend_package_names = depend_package_names - def cpb_build_dependent_rpms(self, distro): + def cpb_build_dependent_packages(self, distro): """ Return the RPMs needed to install before building """ @@ -430,3 +430,38 @@ def get_shared_build_cache(log, host, workspace, shared_cache): "than 10 minutes, aborting", lock_file) return ret + + +def apply_patches(log, host, target_source_dir, patch_dir): + """ + Apply a series of patches. The patches shall be in patch_dir. And they + shall have sorted file names like: + 0000-fname.patch + 0001-fname.patch + ... + """ + patch_fnames = host.sh_get_dir_fnames(log, patch_dir) + if patch_fnames is None: + log.cl_error("failed to get patches of Mpifileutils under dir [%s] " + "on host [%s]", + patch_dir, host.sh_hostname) + return -1 + + patch_fnames.sort() + + for patch_fname in patch_fnames: + if not patch_fname.endswith(".patch"): + continue + patch_fpath = patch_dir + "/" + patch_fname + command = "cd %s && patch -p1 < %s" % (target_source_dir, patch_fpath) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 diff --git a/pybuild/coral.py b/pybuild/coral.py index 49cf077..f2df02a 100644 --- a/pybuild/coral.py +++ b/pybuild/coral.py @@ -24,9 +24,9 @@ def coral_command_bootstrap(tsinghua_mirror=False): local_host.sh_hostname) sys.exit(-1) - missing_rpms, missing_pips = \ + missing_packages, missing_pips = \ install_common.command_missing_packages(distro) - if missing_rpms is None: + if missing_packages is None: log.cl_error("failed to get the missing packages of host [%s]", local_host.sh_hostname) sys.exit(-1) @@ -41,7 +41,8 @@ def coral_command_bootstrap(tsinghua_mirror=False): retval.cr_stderr) sys.exit(-1) - ret = install_common.bootstrap_from_internet(log, local_host, missing_rpms, + ret = install_common.bootstrap_from_internet(log, local_host, + missing_packages, missing_pips, constant.CORAL_BUILD_CACHE_PIP_DIR, tsinghua_mirror=tsinghua_mirror) diff --git a/pybuild/coral_build.py b/pybuild/coral_build.py index 48704d8..cd77904 100644 --- a/pybuild/coral_build.py +++ b/pybuild/coral_build.py @@ -25,6 +25,8 @@ PYINSTALLER_TARBALL_SHA1SUM = "60c595f5cbe66223d33c6edf1bb731ab9f02c3de" # "v4.10.tar.gz" is not a good name, specify the fname to save. PYINSTALLER_TABALL_FNAME = "pyinstaller-4.10.tar.gz" +REPLACE_DEB_DICT = {} +REPLACE_DEB_DICT["debconf-2.0"] = "debconf" def merge_list(list_x, list_y): @@ -212,7 +214,7 @@ def check_package_rpms(log, host, packages_dir, dependent_rpms, def download_dependent_rpms(log, host, distro, target_cpu, packages_dir, extra_package_fnames, - extra_rpm_names): + extra_package_names): """ Download dependent RPMs """ @@ -233,7 +235,7 @@ def download_dependent_rpms(log, host, distro, target_cpu, return -1 dependent_rpms = merge_list(constant.CORAL_DEPENDENT_RPMS, - extra_rpm_names) + extra_package_names) if distro == ssh_host.DISTRO_RHEL7: ret = download_dependent_rpms_rhel7(log, host, target_cpu, @@ -243,6 +245,9 @@ def download_dependent_rpms(log, host, distro, target_cpu, ret = download_dependent_rpms_rhel8(log, host, packages_dir, dependent_rpms, extra_package_fnames) + else: + log.cl_error("unsupported distro [%s]", distro) + return -1 if ret: log.cl_error("failed to download dependent RPMs on host [%s]", host.sh_hostname) @@ -257,6 +262,100 @@ def download_dependent_rpms(log, host, distro, target_cpu, return 0 +def download_dependent_debs(log, host, packages_dir, extra_package_fnames, + extra_package_names): + """ + Download dependent debs + """ + # pylint: disable=consider-using-get + if len(extra_package_names) == 0: + return 0 + log.cl_info("downloading dependency debs") + command = 'apt-rdepends' + for extra_package_name in extra_package_names: + command += " " + extra_package_name + command = command + ' | grep -v "^ "' + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + packages = retval.cr_stdout.splitlines() + + downloading_dir = packages_dir + "/downloading." + cmd_general.get_identity() + command = "mkdir %s" % downloading_dir + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + for package in packages: + if package in REPLACE_DEB_DICT: + package = REPLACE_DEB_DICT[package] + command = ("cd %s && apt download %s" % + (downloading_dir, package)) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + downloaded_fnames = host.sh_get_dir_fnames(log, downloading_dir) + if downloaded_fnames is None: + log.cl_error("failed to get the fnames under [%s]", + downloading_dir) + return -1 + extra_package_fnames += downloaded_fnames + + command = ("mv %s/* %s && rmdir %s" % + (downloading_dir, packages_dir, downloading_dir)) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 + + +def download_dependent_packages(log, host, distro, target_cpu, + packages_dir, extra_package_fnames, + extra_package_names): + """ + Download packages for Barreleye. + """ + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + return download_dependent_rpms(log, host, distro, target_cpu, + packages_dir, extra_package_fnames, + extra_package_names) + if distro in (ssh_host.DISTRO_UBUNTU2204): + return download_dependent_debs(log, host, + packages_dir, extra_package_fnames, + extra_package_names) + log.cl_error("unsupported distro [%s]", distro) + return -1 + + def install_pyinstaller(log, host, type_cache, tsinghua_mirror=False): """ Install pyinstaller @@ -299,9 +398,29 @@ def prepare_install_modulemd_tools(log, host): return ["modulemd-tools"] -def install_build_dependency(log, workspace, host, distro, target_cpu, - type_cache, plugins, package_dict, pip_dir, - tsinghua_mirror=False): +def get_build_dependent_packages(log, distro, plugins, package_dict): + """ + Return a list of dependent packages (RPMs/debs) and pips + """ + dependent_packages = [] + dependent_pips = [] + for plugin in plugins: + packages = plugin.cpt_build_dependent_packages(distro) + if packages is None: + log.cl_error("failed to get the dependet packages for building [%s]", + plugin.cpt_plugin_name) + return None, None + dependent_packages += packages + dependent_pips += plugin.cpt_build_dependent_pips + + for package in package_dict.values(): + dependent_packages += package.cpb_build_dependent_packages(distro) + return dependent_packages, dependent_pips + + +def install_build_dependency_rhel(log, workspace, host, distro, target_cpu, + type_cache, plugins, package_dict, pip_dir, + tsinghua_mirror=False): """ Install the dependency of building Coral """ @@ -366,20 +485,11 @@ def install_build_dependency(log, workspace, host, distro, target_cpu, retval.cr_stderr) return -1 - # We need all dependency for all plugins since no matter they - # are needed or not, the Python codes will be checked, and the Python - # codes might depend on the RPMs. - for plugin in build_common.CORAL_PLUGIN_DICT.values(): - rpms = plugin.cpt_build_dependent_rpms(distro) - if rpms is None: - log.cl_error("failed to get the dependet RPMs for building [%s]", - plugin.cpt_plugin_name) - return -1 - dependent_rpms += rpms - dependent_pips += plugin.cpt_build_dependent_pips - - for package in package_dict.values(): - dependent_rpms += package.cpb_build_dependent_rpms(distro) + rpms, pips = get_build_dependent_packages(log, distro, plugins, package_dict) + if rpms is None or pips is None: + return -1 + dependent_rpms += rpms + dependent_pips += pips ret = install_common.bootstrap_from_internet(log, host, dependent_rpms, dependent_pips, @@ -407,6 +517,74 @@ def install_build_dependency(log, workspace, host, distro, target_cpu, return 0 +def install_build_dependency_ubuntu(log, workspace, host, distro, + target_cpu, type_cache, plugins, + package_dict, pip_dir, + tsinghua_mirror=False): + """ + Install the dependency of building Coral for Ubuntu + """ + # pylint: disable=unused-argument,too-many-locals + if tsinghua_mirror: + ret = install_common.ubuntu2204_apt_mirror_replace_to_tsinghua(log, host) + if ret: + log.cl_error("failed to replace deb mirrors to Tsinghua University") + return -1 + + command = 'apt update' + log.cl_info("running command [%s] on host [%s]", + command, host.sh_hostname) + retval = host.sh_watched_run(log, command, None, None, + return_stdout=False, + return_stderr=False) + if retval.cr_exit_status != 0: + log.cl_error("failed to run command [%s] on host [%s]", + command, host.sh_hostname) + return -1 + + dependent_debs = ["libjson-c-dev", "apt-rdepends"] + dependent_pips = ["PyInstaller", "tinyaes", "pycryptodome"] + + debs, pips = get_build_dependent_packages(log, distro, plugins, + package_dict) + if debs is None or pips is None: + return -1 + dependent_pips += pips + dependent_debs += debs + + ret = install_common.bootstrap_from_internet(log, host, + dependent_debs, + dependent_pips, + pip_dir, + tsinghua_mirror=tsinghua_mirror) + if ret: + log.cl_error("failed to install missing packages on host [%s] " + "from Internet", host.sh_hostname) + return -1 + return 0 + + +def install_build_dependency(log, workspace, host, distro, target_cpu, + type_cache, plugins, package_dict, pip_dir, + tsinghua_mirror=False): + """ + Install the dependency of building Coral + """ + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + return install_build_dependency_rhel(log, workspace, host, distro, + target_cpu, type_cache, plugins, + package_dict, pip_dir, + tsinghua_mirror=tsinghua_mirror) + + if distro in (ssh_host.DISTRO_UBUNTU2204): + return install_build_dependency_ubuntu(log, workspace, host, distro, + target_cpu, type_cache, plugins, + package_dict, pip_dir, + tsinghua_mirror=tsinghua_mirror) + log.cl_error("unsupported distro [%s]", distro) + return -1 + + def sync_shared_build_cache(log, host, private_cache, shared_parent): """ Sync from the local cache to shared cache @@ -628,7 +806,7 @@ def get_needed_packages(log, plugins): def build_packages(log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, option_dict, + extra_package_fnames, extra_package_names, option_dict, package_dict): """ Build the packages in cpt_packages. @@ -650,7 +828,7 @@ def build_packages(log, workspace, local_host, source_dir, target_cpu, ret = package.cpb_build(log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, + extra_package_fnames, extra_package_names, option_dict) if ret: log.cl_error("failed to build package [%s]", @@ -754,7 +932,8 @@ def build(log, source_dir, workspace, local_host = ssh_host.get_local_host(ssh=False) distro = local_host.sh_distro(log) - if distro not in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + if distro not in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8, + ssh_host.DISTRO_UBUNTU2204): log.cl_error("build on distro [%s] is not supported yet", distro) return -1 @@ -762,7 +941,7 @@ def build(log, source_dir, workspace, # Shared cache for this build type shared_type_cache = shared_cache + "/" + type_fname # Extra RPMs to download - extra_rpm_names = [] + extra_package_names = [] # Extra RPM file names under package directory extra_package_fnames = [] # Extra file names under ISO directory @@ -851,7 +1030,7 @@ def build(log, source_dir, workspace, ret = build_packages(log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, option_dict, + extra_package_fnames, extra_package_names, option_dict, package_dict) if ret: log.cl_error("failed to build packages") @@ -861,18 +1040,19 @@ def build(log, source_dir, workspace, ret = plugin.cpt_build(log, workspace, local_host, source_dir, target_cpu, type_cache, iso_cache, packages_dir, extra_iso_fnames, - extra_package_fnames, extra_rpm_names, + extra_package_fnames, extra_package_names, option_dict) if ret: log.cl_error("failed to build plugin [%s]", plugin.cpt_plugin_name) return -1 - ret = download_dependent_rpms(log, local_host, distro, - target_cpu, packages_dir, - extra_package_fnames, extra_rpm_names) + ret = download_dependent_packages(log, local_host, distro, + target_cpu, packages_dir, + extra_package_fnames, + extra_package_names) if ret: - log.cl_error("failed to download dependent rpms") + log.cl_error("failed to download dependent packages") return -1 contents = ([constant.BUILD_PACKAGES] + diff --git a/pybuild/coral_command.py b/pybuild/coral_command.py index e909cd8..4431324 100644 --- a/pybuild/coral_command.py +++ b/pybuild/coral_command.py @@ -4,10 +4,12 @@ # pylint: disable=unused-import import sys import os +import traceback from fire import Fire from pycoral import constant from pycoral import cmd_general from pycoral import ssh_host +from pycoral import lustre_version from pybuild import coral_build from pybuild import build_common @@ -101,6 +103,58 @@ def plugins(coral_command): build_common.coral_command_register("plugins", plugins) +def detect_lustre(coral_command, fpath): + """ + Detect the Lustre version from RPM names. + :param fpath: The file path that saves RPM names with or with out .rpm suffix. + """ + # pylint: disable=protected-access + source_dir = os.getcwd() + identity = build_common.get_build_path() + logdir_is_default = True + log, _ = cmd_general.init_env_noconfig(source_dir, + coral_command._cc_log_to_file, + logdir_is_default, + identity=identity) + + try: + with open(fpath, "r", encoding='utf-8') as fd: + lines = fd.readlines() + except: + log.cl_error("failed to read file [%s]: %s", + fpath, traceback.format_exc()) + cmd_general.cmd_exit(log, -1) + + rpm_fnames = [] + for line in lines: + line = line.strip() + fields = line.split() + for rpm_fname in fields: + if not rpm_fname.endswith(".rpm"): + rpm_fname += ".rpm" + rpm_fnames.append(rpm_fname) + log.cl_info("RPM: %s", rpm_fname) + + version, _ = lustre_version.match_lustre_version_from_rpms(log, + rpm_fnames, + skip_kernel=True, + skip_test=True) + if version is None: + version, _ = lustre_version.match_lustre_version_from_rpms(log, + rpm_fnames, + client=True) + if version is None: + log.cl_error("failed to match Lustre version according to RPM names") + cmd_general.cmd_exit(log, -1) + log.cl_stdout("Lustre client: %s", version.lv_name) + cmd_general.cmd_exit(log, 0) + log.cl_stdout("Lustre server: %s", version) + cmd_general.cmd_exit(log, 0) + + +build_common.coral_command_register("detect_lustre", detect_lustre) + + def main(): """ main routine diff --git a/pycheck/Makefile.am b/pycheck/Makefile.am index 061246d..2760b80 100644 --- a/pycheck/Makefile.am +++ b/pycheck/Makefile.am @@ -1,8 +1,23 @@ PYTHON_COMMANDS = \ - ../barrele \ ../coral -PYTHON_LIB_FILES = $(wildcard ../pybuild/*.py ../pybarrele/*.py ../pycoral/*.py) +PYTHON_LIB_FILES = $(wildcard ../pycoral/*.py) \ + ../pybuild/build_common.py \ + ../pybuild/build_constant.py \ + ../pybuild/build_doc.py \ + ../pybuild/build_release_info.py \ + ../pybuild/build_version.py \ + ../pybuild/coral_build.py \ + ../pybuild/coral_command.py \ + ../pybuild/coral.py \ + ../pybuild/__init__.py + +if ENABLE_BARRELE +PYTHON_COMMANDS += ../barrele +PYTHON_LIB_FILES += $(wildcard ../pybarrele/*.py) \ + ../pybuild/build_barrele.py +endif + PYTHON_FILES = $(PYTHON_LIB_FILES) $(PYTHON_COMMANDS) PYTHON_CHECKS = $(PYTHON_FILES:%=%.python_checked) diff --git a/pycoral/cmd_general.py b/pycoral/cmd_general.py index 60e11e5..622c97d 100644 --- a/pycoral/cmd_general.py +++ b/pycoral/cmd_general.py @@ -70,6 +70,9 @@ def check_argument_fpath(log, local_host, fpath): elif len(fpath) == 0: log.cl_error("empty file path") cmd_exit(log, 1) + if fpath[0] != '/': + cwd = os.getcwd() + fpath = cwd + "/" + fpath real_path = local_host.sh_real_path(log, fpath) if real_path is None: log.cl_error("failed to get the real path of [%s]", fpath) @@ -112,7 +115,7 @@ def init_env_noconfig(logdir, log_to_file, logdir_is_default, if not isinstance(log_to_file, bool): print("ERROR: invalid debug option [%s], should be a bool type" % - (log_to_file), file=sys.stderr) + str(log_to_file), file=sys.stderr) sys.exit(1) if log_to_file: @@ -787,9 +790,9 @@ def check_argument_str(log, name, value): return value -def lustre_release_name_is_valid(value): +def name_is_valid(value): """ - Check whether Lustre release string is valid. + Check a name is valid """ for char in value: if char.isalnum() or char in ["_", "@", ".", "-"]: @@ -800,6 +803,13 @@ def lustre_release_name_is_valid(value): return 0 +def lustre_release_name_is_valid(value): + """ + Check whether Lustre release string is valid. + """ + return name_is_valid(value) + + def check_lustre_release_name(log, name, value): """ Check the argument is valid Lustre release name. If not, exit. @@ -816,13 +826,7 @@ def coral_release_name_is_valid(value): """ Check whether Coral release string is valid. """ - for char in value: - if char.isalnum() or char in ["_", "."]: - continue - return -1 - if value in (".", ".."): - return -1 - return 0 + return name_is_valid(value) def check_coral_release_name(log, name, value): diff --git a/pycoral/install_common.py b/pycoral/install_common.py index db04fa2..99e8d3d 100644 --- a/pycoral/install_common.py +++ b/pycoral/install_common.py @@ -952,7 +952,7 @@ def cic_install(self, log, parallelism=10): def yum_install_rpm_from_internet(log, host, rpms, tsinghua_mirror=False): """ - Check whether a RPM installed or not. If not, use yum to install + Install RPMs by downloading it from Internet. """ # pylint: disable=too-many-branches if tsinghua_mirror: @@ -1101,15 +1101,113 @@ def yum_replace_to_tsinghua(log, host): return 0 -def bootstrap_from_internet(log, host, rpms, pip_packages, pip_dir, +def ubuntu2204_apt_mirror_replace_to_tsinghua(log, host): + """ + Replace apt mirror. + """ + # See https://mirror.tuna.tsinghua.edu.cn/help/ubuntu/ for more information. + source_list = "/etc/apt/sources.list" + command = "grep tsinghua %s" % source_list + retval = host.sh_run(log, command) + if retval.cr_exit_status == 0: + return 0 + + if retval.cr_exit_status != 1 or retval.cr_stdout != "": + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + command = "echo '# Configured by Coral' > %s" % source_list + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + + lines = ["deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy main restricted universe multiverse", + "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-updates main restricted universe multiverse", + "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ jammy-backports main restricted universe multiverse", + "deb http://security.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse"] + for line in lines: + command = "sed -i '$a\%s' %s" % (line, source_list) + retval = host.sh_run(log, command) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 + + +def ubuntu2204_install_deb_from_internet(log, host, debs, tsinghua_mirror=False): + """ + Install debs by downloading it from Internet. + """ + # pylint: disable=too-many-branches + if tsinghua_mirror: + ret = ubuntu2204_apt_mirror_replace_to_tsinghua(log, host) + if ret: + log.cl_error("failed to replace apt mirror to Tsinghua " + "University on host [%s]", host.sh_hostname) + return -1 + + if len(debs) == 0: + return 0 + + command = "apt install -y" + for deb in debs: + command += " " + deb + + retval = host.sh_run(log, command, timeout=None) + if retval.cr_exit_status: + log.cl_error("failed to run command [%s] on host [%s], " + "ret = [%d], stdout = [%s], stderr = [%s]", + command, + host.sh_hostname, + retval.cr_exit_status, + retval.cr_stdout, + retval.cr_stderr) + return -1 + return 0 + + +def bootstrap_from_internet(log, host, packages, pip_packages, pip_dir, tsinghua_mirror=False): """ Install the dependent RPMs and pip packages from Internet """ - ret = yum_install_rpm_from_internet(log, host, rpms, - tsinghua_mirror=tsinghua_mirror) + distro = host.sh_distro(log) + if distro is None: + log.cl_error("failed to get distro of host [%s]", + host.sh_hostname) + return -1 + + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + ret = yum_install_rpm_from_internet(log, host, packages, + tsinghua_mirror=tsinghua_mirror) + elif distro == ssh_host.DISTRO_UBUNTU2204: + ret = ubuntu2204_install_deb_from_internet(log, host, packages, + tsinghua_mirror=tsinghua_mirror) + else: + log.cl_error("unsupported distro [%s] of host [%s]", + distro, host.sh_hostname) + return -1 if ret: - log.cl_error("failed to install missing RPMs on host [%s]", + log.cl_error("failed to install missing packages on host [%s]", host.sh_hostname) return -1 @@ -1247,17 +1345,76 @@ def command_missing_packages_rhel7(): return missing_rpms, missing_pips +def command_missing_packages_ubuntu2204(): + """ + Add the missing debs and pip packages for RHEL7 + """ + # pylint: disable=unused-import,bad-option-value,import-outside-toplevel + # pylint: disable=unused-variable + missing_debs = [] + try: + import fire + except ImportError: + missing_debs.append("python3-fire") + + try: + import prettytable + except ImportError: + missing_debs.append("python3-prettytable") + + try: + import toml + except ImportError: + missing_debs.append("python3-toml") + + try: + import dateutil + except ImportError: + missing_debs.append("python3-dateutil") + + try: + import filelock + except ImportError: + missing_debs.append("python3-filelock") + + try: + import psutil + except ImportError: + missing_debs.append("python3-psutil") + + missing_pips = [] + return missing_debs, missing_pips + + def command_missing_packages(distro): """ - Add the missing RPMs and pip packages + Add the missing RPMs/debs and pip packages """ if distro == ssh_host.DISTRO_RHEL7: return command_missing_packages_rhel7() if distro == ssh_host.DISTRO_RHEL8: return command_missing_packages_rhel8() + if distro == ssh_host.DISTRO_UBUNTU2204: + return command_missing_packages_ubuntu2204() return None, None +def install_packages_from_internet(log, host, packages, tsinghua_mirror=False): + """ + Install packages from Internet. + """ + distro = host.sh_distro(log) + if distro in (ssh_host.DISTRO_RHEL7, ssh_host.DISTRO_RHEL8): + return yum_install_rpm_from_internet(log, host, packages, + tsinghua_mirror=tsinghua_mirror) + if distro == ssh_host.DISTRO_UBUNTU2204: + return ubuntu2204_install_deb_from_internet(log, host, packages, + tsinghua_mirror=tsinghua_mirror) + log.cl_error("unsupported distro [%s] of host [%s]", + distro, host.sh_hostname) + return -1 + + def download_pip3_packages(log, host, pip_dir, pip_packages, tsinghua_mirror=False): """ @@ -1271,10 +1428,11 @@ def download_pip3_packages(log, host, pip_dir, pip_packages, message = "" log.cl_info("downloading pip3 packages %s to dir [%s] on host [%s]%s", pip_packages, pip_dir, host.sh_hostname, message) - ret = yum_install_rpm_from_internet(log, host, ["python3-pip"], - tsinghua_mirror=tsinghua_mirror) + + ret = install_packages_from_internet(log, host, ["python3-pip"], + tsinghua_mirror=tsinghua_mirror) if ret: - log.cl_error("failed to install [python3-pip] RPM") + log.cl_error("failed to install [python3-pip] package") return -1 command = ("mkdir -p %s && cd %s && pip3 download" % (pip_dir, pip_dir)) diff --git a/pycoral/lustre_version.py b/pycoral/lustre_version.py index 857fcb5..c89c59f 100644 --- a/pycoral/lustre_version.py +++ b/pycoral/lustre_version.py @@ -6,8 +6,10 @@ RPM_KERNEL = "kernel" RPM_KERNEL_FIRMWARE = "kernel-firmware" RPM_LUSTRE = "lustre" +RPM_LUSTRE_CLIENT = "lustre_client" RPM_IOKIT = "iokit" RPM_KMOD = "kmod" +RPM_KMOD_LUSTRE_CLENT = "kmod_lustre_client" RPM_OSD_LDISKFS = "osd_ldiskfs" RPM_OSD_LDISKFS_MOUNT = "osd_ldiskfs_mount" RPM_OSD_ZFS = "osd_zfs" @@ -32,6 +34,7 @@ # fail. LUSTRE_REQUIRED_RPM_TYPES = (LUSTRE_VERSION_DETECTION_RPM_TYPES + LUSTRE_TEST_RPM_TYPES) +LUSTRE_CLIENT_REQUIRED_RPM_TYPES = (RPM_LUSTRE_CLIENT, RPM_KMOD_LUSTRE_CLENT) class LustreVersion(): @@ -72,10 +75,10 @@ def __init__(self, name, rpm_patterns, priority): LUSTRE_VERSION_NAME_2_12 = "2.12" LUSTRE_VERSION_2_12 = LustreVersion(LUSTRE_VERSION_NAME_2_12, B2_12_PATTERNS, - 0) + 0 # Priority + ) LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_2_12] = LUSTRE_VERSION_2_12 - ES5_1_PATTERNS = { RPM_IOKIT: r"^(lustre-iokit-2\.12\.3.+\.rpm)$", RPM_KERNEL: r"^(kernel-3.+\.rpm)$", @@ -91,7 +94,8 @@ def __init__(self, name, rpm_patterns, priority): LUSTRE_VERSION_NAME_ES5_1 = "es5.1" LUSTRE_VERSION_ES5_1 = LustreVersion(LUSTRE_VERSION_NAME_ES5_1, ES5_1_PATTERNS, - 1) + 1 # Priority + ) LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_ES5_1] = LUSTRE_VERSION_ES5_1 @@ -110,11 +114,41 @@ def __init__(self, name, rpm_patterns, priority): LUSTRE_VERSION_NAME_ES5_2 = "es5.2" LUSTRE_VERSION_ES5_2 = LustreVersion(LUSTRE_VERSION_NAME_ES5_2, ES5_2_PATTERNS, - 1) + 1 # Priority + ) LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_ES5_2] = LUSTRE_VERSION_ES5_2 +# Since 2.14.0-ddn87 (include), the following patch is included and the +# format of proc entires has been changed dramatically. Thus, consider the +# versions between 2.14.0-ddn0 and 2.14.0-ddn86 as the ES6.0 and the +# 2.14.0-ddn87+ as ES6.1. +# +# LU-15642 obdclass: use consistent stats units +# ES6_0_PATTERNS = { + RPM_IOKIT: r"^(lustre-iokit-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_KERNEL: r"^(kernel-3.+\.rpm)$", + RPM_KMOD: r"^(kmod-lustre-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_LUSTRE: r"^(lustre-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_OSD_LDISKFS: r"^(kmod-lustre-osd-ldiskfs-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_OSD_LDISKFS_MOUNT: r"^(lustre-osd-ldiskfs-mount-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_OSD_ZFS: r"^(kmod-lustre-osd-zfs-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_OSD_ZFS_MOUNT: r"^(lustre-osd-zfs-mount-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_TESTS: r"^(lustre-tests-2.+\.rpm)$", + RPM_TESTS_KMOD: r"^(kmod-lustre-tests-2.+\.rpm)$", + RPM_LUSTRE_CLIENT: r"^(lustre-client-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", + RPM_KMOD_LUSTRE_CLENT: r"^(kmod-lustre-client-2\.14\.[0]_ddn([0-9]|[1-7][0-9]|8[0-6])\D.+\.rpm)$", +} +LUSTRE_VERSION_NAME_ES6_0 = "es6.0" +LUSTRE_VERSION_ES6_0 = LustreVersion(LUSTRE_VERSION_NAME_ES6_0, + ES6_0_PATTERNS, + 1 # Priority + ) +LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_ES6_0] = LUSTRE_VERSION_ES6_0 + + +ES6_1_PATTERNS = { RPM_IOKIT: r"^(lustre-iokit-2\.14\.[0].+\.rpm)$", RPM_KERNEL: r"^(kernel-3.+\.rpm)$", RPM_KMOD: r"^(kmod-lustre-2\.14\.[0]_ddn.+\.rpm)$", @@ -125,12 +159,15 @@ def __init__(self, name, rpm_patterns, priority): RPM_OSD_ZFS_MOUNT: r"^(lustre-osd-zfs-mount-2\.14\.[0].+\.rpm)$", RPM_TESTS: r"^(lustre-tests-2.+\.rpm)$", RPM_TESTS_KMOD: r"^(kmod-lustre-tests-2.+\.rpm)$", + RPM_LUSTRE_CLIENT: r"^(lustre-client-2\.14\.[0]_ddn.+\.rpm)$", + RPM_KMOD_LUSTRE_CLENT: r"^(kmod-lustre-client-2\.14\.[0]_ddn.+\.rpm)$", } -LUSTRE_VERSION_NAME_ES6_0 = "es6.0" -LUSTRE_VERSION_ES6_0 = LustreVersion(LUSTRE_VERSION_NAME_ES6_0, - ES6_0_PATTERNS, - 1) -LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_ES6_0] = LUSTRE_VERSION_ES6_0 +LUSTRE_VERSION_NAME_ES6_1 = "es6.1" +LUSTRE_VERSION_ES6_1 = LustreVersion(LUSTRE_VERSION_NAME_ES6_1, + ES6_1_PATTERNS, + 0 # Priority should be lower than ES6.0 + ) +LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_ES6_1] = LUSTRE_VERSION_ES6_1 B2_15_PATTERNS = { @@ -148,16 +185,23 @@ def __init__(self, name, rpm_patterns, priority): LUSTRE_VERSION_NAME_2_15 = "2.15" LUSTRE_VERSION_2_15 = LustreVersion(LUSTRE_VERSION_NAME_2_15, B2_15_PATTERNS, - 0) + 0 # Priority + ) LUSTRE_VERSION_DICT[LUSTRE_VERSION_NAME_2_15] = LUSTRE_VERSION_2_15 def match_lustre_version_from_rpms(log, rpm_fnames, skip_kernel=False, - skip_test=False): + skip_test=False, client=False): """ Match the Lustre version from RPM names """ # pylint: disable=too-many-locals,too-many-branches + # pylint: disable=too-many-statements + rpm_types = LUSTRE_REQUIRED_RPM_TYPES + definition_message = "server " + if client: + rpm_types = LUSTRE_CLIENT_REQUIRED_RPM_TYPES + definition_message = "client " # Key is version name, type is matched_rpm_type_dict matched_version_dict = {} for version in LUSTRE_VERSION_DICT.values(): @@ -166,15 +210,27 @@ def match_lustre_version_from_rpms(log, rpm_fnames, skip_kernel=False, # Key is RPM fname, value is RPM type used_rpm_fname_dict = {} version_matched = True - for rpm_type in LUSTRE_REQUIRED_RPM_TYPES: + + if client: + # Check whether client support has been added. + supported = True + for rpm_type in rpm_types: + if rpm_type not in version.lv_rpm_pattern_dict: + supported = False + break + if not supported: + continue + + for rpm_type in rpm_types: if rpm_type == RPM_KERNEL and skip_kernel: continue if rpm_type in LUSTRE_TEST_RPM_TYPES and skip_test: continue if rpm_type not in version.lv_rpm_pattern_dict: - log.cl_error("Lustre version [%s] does not have required RPM" + log.cl_error("Lustre %sversion [%s] does not have required RPM " "pattern for type [%s]", - version.lv_name, rpm_type) + definition_message, version.lv_name, + rpm_type) return None, None pattern = version.lv_rpm_pattern_dict[rpm_type] @@ -187,36 +243,39 @@ def match_lustre_version_from_rpms(log, rpm_fnames, skip_kernel=False, matched = True if rpm_type in matched_rpm_type_dict: log.cl_error("both RPM [%s] and [%s] can be matched to " - "type [%s] of Lustre version [%s]", + "type [%s] of Lustre %sversion [%s]", rpm_fname, matched_rpm_type_dict[rpm_type], rpm_type, + definition_message, version.lv_name) return None, None if rpm_fname in used_rpm_fname_dict: log.cl_error("RPM [%s] can be matched to both type [%s] " - "and [%s] of Lustre version [%s]", + "and [%s] of Lustre %sversion [%s]", rpm_fname, used_rpm_fname_dict[rpm_fname], rpm_type, + definition_message, version.lv_name) return None, None used_rpm_fname_dict[rpm_fname] = rpm_type matched_rpm_type_dict[rpm_type] = rpm_fname if not matched: - log.cl_debug("not able to match to Lustre version " - "[%s] because of missing RPM type [%s]", - version.lv_name, rpm_type) + log.cl_debug("unmatched Lustre %sversion " + "[%s] due to unmatched RPM type [%s]", + definition_message, version.lv_name, + rpm_type) version_matched = False break if version_matched: matched_version_dict[version.lv_name] = matched_rpm_type_dict if len(matched_version_dict) == 0: - log.cl_debug("no Lustre version is matched by RPMs %s", - rpm_fnames) + log.cl_debug("no Lustre %sversion is matched by RPMs %s", + definition_message, rpm_fnames) return None, None highest_priority = 0 @@ -239,7 +298,37 @@ def match_lustre_version_from_rpms(log, rpm_fnames, skip_kernel=False, version_string += ", " + version.lv_name if len(matched_versions) > 1: - log.cl_error("multiple Lustre versions [%s] are matched by RPMs", - version_string) + log.cl_error("multiple Lustre %sversions [%s] are matched by RPMs", + definition_message, version_string) return None, None return matched_versions[0], matched_rpm_type_dicts[0] + +# See the commend of ES6_0_PATTERNS for the difference between es6.0 +# and es6.1. +DEB_ES6_0_PATTERN = r"^(2\.14\.[0]-ddn([0-9]|[1-7][0-9]|8[0-6])\D.+)$" +DEB_ES6_1_PATTERN = r"^(2\.14\.[0]-ddn.+)$" +DEB_2_15_PATTERN = r"^(2\.15\..+)$" + +def match_lustre_version_from_deb(log, deb_version): + """ + The version of deb package usually comes from command + apt list --installed | grep lustre-client-modules + or + apt show lustre-client-modules-5.15.0-69-generic | grep Version + Example: + 2.15.2-70-gb74560d-1 + """ + match = re.search(DEB_ES6_0_PATTERN, deb_version) + if match is not None: + return LUSTRE_VERSION_ES6_0 + + match = re.search(DEB_ES6_1_PATTERN, deb_version) + if match is not None: + return LUSTRE_VERSION_ES6_1 + + match = re.search(DEB_2_15_PATTERN, deb_version) + if match is not None: + return LUSTRE_VERSION_2_15 + + log.cl_error("unsupported Lustre version [%s]", deb_version) + return None diff --git a/pycoral/ssh_host.py b/pycoral/ssh_host.py index 9abb28d..796bba6 100644 --- a/pycoral/ssh_host.py +++ b/pycoral/ssh_host.py @@ -30,6 +30,8 @@ DISTRO_RHEL7 = "rhel7" # OS distribution RHEL8/CentOS8 DISTRO_RHEL8 = "rhel8" +# OS distribution ubuntu2204 +DISTRO_UBUNTU2204 = "ubuntu2204" # The shortest time that a reboot could finish. It is used to check whether # a host has actually rebooted or not. SHORTEST_TIME_REBOOT = 10 @@ -386,6 +388,8 @@ def sh_distro(self, log): log.cl_error("unsupported version [%s] of [%s] on host [%s]", version, "fc", self.sh_hostname) return None + if name == "Ubuntu": + return DISTRO_UBUNTU2204 log.cl_error("unsupported version [%s] of [%s] on host [%s]", version, name, self.sh_hostname) return None @@ -3509,7 +3513,7 @@ def sh_check_dir_content(self, log, directory, contents, for fname in contents: if fname not in existing_fnames: - log.cl_error("can not find necessary content [%s] under " + log.cl_error("failed to find necessary content [%s] under " "directory [%s] of host [%s]", fname, directory, self.sh_hostname) return -1 diff --git a/pycoral/utils.py b/pycoral/utils.py index 6d5cc7c..aae1109 100644 --- a/pycoral/utils.py +++ b/pycoral/utils.py @@ -363,6 +363,7 @@ def thread_start(target, args): """ Wrap the target function and start a thread to run it """ + # pylint: disable=deprecated-method run_thread = threading.Thread(target=target, args=args) run_thread.setDaemon(True) @@ -541,6 +542,7 @@ def lr_release(self, number, info_string): """ Release resource """ + # pylint: disable=deprecated-method self.lr_condition.acquire() self.lr_number += number self.lr_condition.notifyAll()