diff --git a/.gitignore b/.gitignore index c2336b48..910b9467 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,20 @@ .*.swp conf/systemd/deploy ~ + .env -.DS_Store -*.pyc -data +docker-compose.yml +docker-compose.override.yml +userConfig +bin/docker-netsage-downloads.sh +cron.d/docker-netsage-downloads.cron +bin/restart-logstash-container.sh +cron.d/restart-logstash-container.cron +conf-pmacct/*_1* +conf-pmacct/*_2* +conf-logstash/support/sensor_groups.json +conf-logstash/support/sensor_types.json + # Dependencies /website/node_modules @@ -29,10 +39,16 @@ yarn-debug.log* yarn-error.log* build +*.pyc .vscode .history .idea replayData -userConfig -docker-compose.override.yml node_modules + +Makefile +grnoc-netsage-pipeline-2.0.0.tar.gz +blib +blib/* +pm_to_blib + diff --git a/CHANGES.md b/CHANGES.md index 708a664b..8e13aaf3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,99 @@ +------------------------------------------------------ +## GRNOC NetSage Pipeline 2.0.0 --, 2022 +NEW PACKAGE NAME; USING PMACCT INSTEAD OF NFDUMP AND IMPORTER +Docker will pull 7Jun2022 images we made for nfacctd and sfacctd images from Github Container Registry +------------------------------------------------------ +Features: + * Renamed package to grnoc-netsage-pipeline + * Got rid of old importer references, requirements, files, etc. + * Used the %post section in the spec file to check to see if pmacct is installed. + * Added systemd unit files for bare-metal sfacctd and nfacctd (default will be 1 sflow, 1 netflow source, for docker installs) + * Revised docker-compose.yml file, etc. to work with pmacct containers. + * Revised parts of the .env file, including adding variables for number of sflow and netflow sensors. + * Added example/default sfacct and nfacct config files in conf-pmacct/ (.ORIG files to be copied) + * Added setup-pmacct-compose.sh script which the user runs to create pmacct config files and docker-compose.yml, based on + docker-compose.example.yml and .env files. (pmacct configs cannot use env vars directly, so script fills them in.) + * The number of sflow or netflow sensors can be 0. In this case, the setup script does not include any of the unneeded services + in the docker-compose.yml file. + * Added 05-translate-pmacct.conf logstash config to translate pmacct fields to ones the pipeline uses. + * Revised 40-aggregation.conf to deal with pmacct; there are separate sections for sflow and netflow. + * For netflow, in 40-aggregation.conf, the start time of incoming flows will be adjusted if duration is greater than the active timeout + (ie, for "updates" to long lasting flows) + * The default inactive timeout for logstash aggregation has been set to 6 minutes (to go with 5 minute sflow aggregation by sfacctd) + * Added 41-thresholds.conf - applies size threshold of 10 MB (will drop smaller flows) and duration threshold of 1.0 sec (will set + duration and rates to 0 if shorter) after aggregation is finished. + * Added new field: @sampling_corrected = yes/no. If sampling rate correction has been applied by pmacct or logstash, value will be yes. + * Sampling rate corrections will be done in logstash when requested (ie, flag is set in the env file) but + ONLY IF a correction has not yet been applied (by pmacct). + * Sensor list for sampling rate corrections in the env file is now semicolon-delimited. + * Allowed "ALL" when specifying sensors for sampling rate corrections. + * When a sampling rate correction is applied by logstash, add a tag with the rate. + * Added an option to skip de-identification. Set it in .env. + * 0.0.0.x and 0.0.0.0 flows are tagged and dropped by default. (Unadvertised option to keep them is available in the env file.) + * Added setup-cron.sh script which copies .ORIG .cron and .sh files and writes in username and the location of the git checkout. + The user must copy cron files to /etc/cron.d/. + * One cron file runs a script to download all files (caida, maxmind, etc) from scienceregistry.grnoc once/wk. + * Another cron file restarts the logstash container each day. + * Changed sensor_groups.json and sensor_types.json in the git checkout to .example files. From now on, our particular files/regexes + will be downloaded from scienceregistry.grnoc by a cron job (different cron jobs for docker and bare-metal installations). + * Docker-compose.yml ensures logstash runs with uid 1000, while setup-cron.sh sets the owner of logstash-temp/ to 1000, + so logstash can write and read aggregation map files when it stops and starts. (User 1000 could be anyone on the host; name doesn't matter.) + * AARNET privatization is no longer needed, so added .disabled to 80-privatize-org.conf, and made it into a generalized version as an + example. Moved lines making the AARNET org name consistent to 95-cleanup.conf. + + * Documentation updates + * Dependabot automatic remediations of vulnerabilites (for docusaurus) + +Bugs: + * Fixed ifindex filtering to be able to filter only specified sensors and keep all flows for other sensors; allow "ALL" for sensor names or interfaces. + +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.12 -- Jan 4, 2022 +------------------------------------------------------ +Usage note: With this release, we will move to using logstash 7.16.2 to fix a Log4j vulnerability. +Bare-metal installations will need to upgrade logstash manually. +(Dec 14,2021- original 1.2.12 release with logstash 7.16.1 in the pipeline_logstash Dockerfile) + +Features: + * In the dockerfile, increased the version of logstash on which the pipeline_logstash container is based + * Added LEARN to the regexes in the sensor groups and types support files + +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.11 -- Sept 3, 2021 +------------------------------------------------------ +Features: + * Made filtering by ifindex (optionally) sensor-specific + * Added tags to flows with src and dst IPs = 0.0.0.x (user can set outputs filter to do something based on tags) + * When duration <= 0.002 sec, set duration, bits/s, and packets/s to 0 as rates are inaccurate for small durations + * Added NORDUnet* and tacc_netflows to sensor group and type regexes + * Added onenet-members-list.rb to the members-list files to download + * Increased version numbers for some website-related packages is response to Dependabot + * Documentation improvements + +Bugs: + * Fixed es_doc_id. The hash had been missing meta.id due to a bug. + * At the beginning of the pipeline, set missing IPs to 0.0.0.0, missing ifindexes to -10, missing durations to 0. + +------------------------------------------------------ +## GRNOC NetSage Deidentfier 1.2.10 -- May 10 2021 +------------------------------------------------------ +Usage note: With this release, we will move to using nfdump v1.6.23. +This includes a fix for IPs not being parsed in MPLS flows, as well as the fix for missing ASNs from April. + * docker-compose.override_example.yml has been updated to refer to this version. + +Features: + * 15-sensor-specific-changes.conf can now be used to drop all flows from a certain sensor except those from listed ifindexes. + * 0.0.0.0 flows are no longer dropped + * Will now tag flows with the pipeline version number (@pipeline_ver) + * Added a sript (to util/) that can be used to process as-org files from CAIDA into the ASN lookup files that we need. + * Documentation updates + ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.9 -- Apr 7 2021 ------------------------------------------------------ -Usage note: With this release, we are also moving to using a version of nfdump built from github master which includes commits through Feb 20, 2021. This includes a fix for incorrect ASNs being added to flows when the ASN data is actually missing. - * To go along with this, docker-compose.override_example.yml refers to a "nightly" tag of nfdump (this is not updated nightly!) +Usage note: With this release, we are also moving to using a version of nfdump built from github master which includes +commits through Feb 20, 2021. This includes a fix for incorrect ASNs being added to flows when the ASN data is actually missing. + * To go along with this, docker-compose.override_example.yml refers to a "nightly" tag of nfdump (this is not actually updated nightly!) Features: * The installed version of 15-sensor-specific-changes.conf now accomodates environment variables for @@ -17,13 +108,14 @@ Bugs * Flow-filter changes have been made to accomodate changes to simp * Flows with IPs of 0.0.0.0 are dropped * For Docker installs, rabbit host name will be fixed - * Docusaurus and some packages flagged by depndabot were upgraded + * Docusaurus and some packages flagged by dependabot were upgraded ------------------------------------------------------ ## GRNOC NetSage Deidentfier 1.2.8 -- Jan 28 2021 ------------------------------------------------------ Features: - * Added 15-sensor-specific-changes.conf with multiplication by mirroring-sampling rate for a pacificwave sensor and changing of the sensor name for NEAAR flows using a certain ifindex. + * Added 15-sensor-specific-changes.conf with multiplication by mirroring-sampling rate for a pacificwave sensor and changing of + the sensor name for NEAAR flows using a certain ifindex. * Started saving ifindexes to ES (at least for now) * Added consideration of continents to possibly get a country_scope value when a country is missing. * Stopped saving old 'projects' array field to ES diff --git a/MANIFEST b/MANIFEST index 05510a4e..8cad8bdf 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,77 +1,45 @@ -bin/netsage-flow-filter-daemon -bin/netsage-netflow-importer-daemon -bin/restart-logstash.sh +grnoc-netsage-pipeline.spec CHANGES.md +bin/restart-logstash-service.sh +conf-pmacct/sfacctd.conf.ORIG +conf-pmacct/nfacctd.conf.ORIG +conf-pmacct/sfacctd-pretag.map.ORIG +conf-pmacct/nfacctd-pretag.map.ORIG conf-logstash/01-input-jsonfile.conf.disabled conf-logstash/01-input-multiline-json-file.conf.disabled conf-logstash/01-input-rabbit.conf +conf-logstash/05-translate-pmacct.conf conf-logstash/10-preliminaries.conf conf-logstash/15-sensor-specific-changes.conf conf-logstash/20-add-id.conf conf-logstash/40-aggregation.conf +conf-logstash/41-thresholds.conf conf-logstash/45-geoip-tagging.conf conf-logstash/50-asn.conf conf-logstash/53-caida-org.conf conf-logstash/55-member-orgs.conf conf-logstash/60-scireg-tagging-fakegeoip.conf conf-logstash/70-deidentify.conf -conf-logstash/80-privatize-org.conf +conf-logstash/80-privatize-org.conf.disabled conf-logstash/88-preferred-location-org.conf conf-logstash/90-additional-fields.conf conf-logstash/95-cleanup.conf conf-logstash/98-post-process.conf +conf-logstash/99-output-rabbit.conf +conf-logstash/99-output-file.conf.disabled conf-logstash/99-output-elastic.conf.disabled -conf-logstash/99-output-jsonlog.conf.disabled -conf-logstash/99-output-multiline-json.conf.disabled conf-logstash/99-output-stdout.conf.disabled -conf-logstash/99-output-rabbit.conf conf-logstash/ruby/anonymize_ipv6.rb conf-logstash/ruby/domestic.rb -conf-logstash/support/sensor_groups.json -conf-logstash/support/sensor_types.json +conf-logstash/support/sensor_groups.json.example +conf-logstash/support/sensor_types.json.example conf-logstash/support/networkA-members-list.rb.example -conf/logging-debug.conf -conf/logging.conf -conf/netsage_flow_filter.xml -conf/netsage_netflow_importer.xml -conf/netsage_shared.xml -cron.d/netsage-maxmind-update.cron -cron.d/netsage-caida-update.cron -cron.d/netsage-scireg-update.cron -cron.d/netsage-logstash-restart.cron -cron.d/netsage-memberlists-update.cron -grnoc-netsage-deidentifier.spec -init.d/netsage-flow-filter-daemon -init.d/netsage-netflow-importer-daemon -systemd/netsage-flow-filter.service -systemd/netsage-netflow-importer.service +cron.d/baremetal-netsage-downloads.cron +cron.d/restart-logstash-service.cron systemd/logstash.service -lib/GRNOC/NetSage/Deidentifier.pm -lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm -lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm -lib/GRNOC/NetSage/Deidentifier/Pipeline.pm -lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm +systemd/sfacctd.service +systemd/nfacctd.service Makefile.PL MANIFEST README.md website/docs/deploy/bare_metal_install.md -reporting/flow-mongo-stats.pl -reporting/getdata-cron.pl -reporting/getdata.pl -reporting/queues.txt -reporting/queuestats.pl -test-data/data1.json -test-data/data2.json -test-data/scireg2.json -util/export-tsds -util/generate_data.pl -util/header.pl -util/hist-export.pl -util/json2lines -util/json_to_rabbit.pl -util/lines2json -util/netsage-raw-data-importer -util/netsage_raw_data_importer.xml.example -util/nfcache -util/RawDataImporter.pm -util/tstat-flow-copier diff --git a/Makefile.PL b/Makefile.PL index ef129cf6..14ed353b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -6,29 +6,31 @@ use ExtUtils::MakeMaker; sub MY::postamble { <<'END'; } rpm: dist - rpmbuild -ta grnoc-netsage-deidentifier-$(VERSION).tar.gz + rpmbuild -ta grnoc-netsage-pipeline-$(VERSION).tar.gz END -sub MY::test - { - q( -TEST_VERBOSE=1 +#sub MY::test +# { +# q( +#TEST_VERBOSE=1 +# +#test : pure_all +# $(FULLPERL) t/TEST $(TEST_VERBOSE) +# +#test_jenkins : pure_all +# $(FULLPERL) t/TEST $(TEST_VERBOSE) --formatter=TAP::Formatter::Console +# ); +#} -test : pure_all - $(FULLPERL) t/TEST $(TEST_VERBOSE) - -test_jenkins : pure_all - $(FULLPERL) t/TEST $(TEST_VERBOSE) --formatter=TAP::Formatter::Console - ); -} +# VERSION_FROM => 'lib/GRNOC/NetSage/Deidentifier.pm', WriteMakefile( - NAME => 'grnoc-netsage-deidentifier', + NAME => 'grnoc-netsage-pipeline', AUTHOR => 'GRNOC Software Engineering ', - VERSION_FROM => 'lib/GRNOC/NetSage/Deidentifier.pm', + VERSION => '2.0.0', PL_FILES => {}, PREREQ_PM => { }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, - clean => { FILES => 'grnoc-netsage-deidentifier-*' }, + clean => { FILES => 'grnoc-netsage-pipeline-*' }, ); diff --git a/README.md b/README.md index c2775c40..2fea9971 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ [![Build Status](https://travis-ci.com/netsage-project/netsage-pipeline.svg?branch=master)](https://travis-ci.com/netsage-project/netsage-pipeline) -The Netsage Flow Processing Pipeline includes several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +The Netsage Flow Processing Pipeline includes several components for processing network flow data, including collection, deidentification, metadata tagging, flow stitching, etc. -Detailed documentation is available [here](https://netsage-project.github.io/netsage-pipeline/) +Detailed documentation is available [here]. (https://netsage-project.github.io/netsage-pipeline/) diff --git a/bin/docker-netsage-downloads.sh.ORIG b/bin/docker-netsage-downloads.sh.ORIG new file mode 100755 index 00000000..98cb2e98 --- /dev/null +++ b/bin/docker-netsage-downloads.sh.ORIG @@ -0,0 +1,42 @@ +#!/bin/bash + +# Download possibly-updated files required by the Netsage Pipeline +# Use touch to change the file time to the time of download + +# -for docker installations - +# DOWNLOAD_PATH="/PATH-TO-GIT-CHECKOUT/downloads" +# SUPPORT_PATH="/PATH-TO-GIT-CHECKOUT/conf-logstash/support" + +# -for bare metal installations - run with sudo and use +# DOWNLOAD_PATH="/var/lib/grnoc/netsage" +# SUPPORT_PATH="/etc/logstash/conf.d/support" + +DOWNLOAD_PATH="-PATH-TO-GIT-CHECKOUT-/logstash-downloads" +SUPPORT_PATH="-PATH-TO-GIT-CHECKOUT-/conf-logstash/support" + +# MAXMIND ASN +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O $DOWNLOAD_PATH/GeoLite2-ASN.mmdb && touch $DOWNLOAD_PATH/GeoLite2-ASN.mmdb + +# MAXMIND CITY +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O $DOWNLOAD_PATH/GeoLite2-City.mmdb && touch $DOWNLOAD_PATH/GeoLite2-City.mmdb + +# CAIDA file +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O $DOWNLOAD_PATH/CAIDA-org-lookup.csv && touch $DOWNLOAD_PATH/CAIDA-org-lookup.csv + +# SCIENCE REGISTRY +/usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O $DOWNLOAD_PATH/scireg.mmdb && touch $DOWNLOAD_PATH/scireg.mmdb + +# FRGP MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O $SUPPORT_PATH/FRGP-members-list.rb && touch $SUPPORT_PATH/FRGP-members-list.rb + +# ILIGHT MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O $SUPPORT_PATH/ilight-members-list.rb && touch $SUPPORT_PATH/ilight-members-list.rb + +# ONENET MEMBER LIST +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O $SUPPORT_PATH/onenet-members-list.rb && touch $SUPPORT_PATH/onenet-members-list.rb + +# SENSOR_GROUPS +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_groups.json -q -O $SUPPORT_PATH/sensor_groups.json && touch $SUPPORT_PATH/sensor_groups.json + +# SENSOR_TYPES +/usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_types.json -q -O $SUPPORT_PATH/sensor_types.json && touch $SUPPORT_PATH/sensor_types.json diff --git a/bin/netsage-flow-filter-daemon b/bin/netsage-flow-filter-daemon deleted file mode 100755 index bb3a2624..00000000 --- a/bin/netsage-flow-filter-daemon +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::FlowFilter; -use GRNOC::NetSage::Deidentifier::WorkerManager; - -use Getopt::Long; -use Data::Dumper; - -### constants ### -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml'; -use constant DEFAULT_SHARED_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_shared.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $shared_config = DEFAULT_SHARED_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $nofork; -my $help; - -# TODO: change jsonfile to flow data directory -GetOptions( 'config=s' => \$config, - 'sharedconfig=s' => \$shared_config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize filter -my $flow_importer = GRNOC::NetSage::Deidentifier::FlowFilter->new( config_file => $config, - shared_config_file => $shared_config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_flow_filter' ); - -my $worker = GRNOC::NetSage::Deidentifier::WorkerManager->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_flow_filter', - worker => $flow_importer ); - - -$worker->start(); -print (" ** Check ps or /var/log/messages to be sure the processes have started successfully. **\n"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--logging ] [--flowpath ]\n"; - - exit( 1 ); -} diff --git a/bin/netsage-netflow-importer-daemon b/bin/netsage-netflow-importer-daemon deleted file mode 100755 index a07925f9..00000000 --- a/bin/netsage-netflow-importer-daemon +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::NetflowImporter; -use GRNOC::NetSage::Deidentifier::WorkerManager; - -use Getopt::Long; -use Data::Dumper; - -### constants ### -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml'; -use constant DEFAULT_SHARED_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_shared.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $shared_config = DEFAULT_SHARED_CONFIG_FILE; -my $nofork; -my $flowpath; -my $cachefile; -my $help; - -GetOptions( 'config=s' => \$config, - 'sharedconfig=s' => \$shared_config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'flowpath=s' => \$flowpath, - 'cachefile=s' => \$cachefile, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize importer -my $flow_importer = GRNOC::NetSage::Deidentifier::NetflowImporter->new( config_file => $config, - shared_config_file => $shared_config, - logging_file => $logging, - daemonize => !$nofork, - cache_file => $cachefile, - process_name => 'netsage_netflow_importer', - flow_path => $flowpath ); - -my $worker = GRNOC::NetSage::Deidentifier::WorkerManager->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - process_name => 'netsage_netflow_importer', - worker => $flow_importer ); - - -$worker->start("no_input_queue"); -print (" ** Check ps or /var/log/messages to be sure the processes have started successfully. **\n"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--sharedconfig ] [--logging ] [--flowpath ]\n"; - - exit( 1 ); -} diff --git a/bin/restart-logstash-container.sh.ORIG b/bin/restart-logstash-container.sh.ORIG new file mode 100755 index 00000000..c7ac7ca4 --- /dev/null +++ b/bin/restart-logstash-container.sh.ORIG @@ -0,0 +1,35 @@ +#!/bin/bash + +# restart logstash container only if it's already running + +cd -PATH-TO-GIT-CHECKOUT- +date +echo " " + +nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` +if [[ $nlogstash -eq 1 ]] +then + docker-compose stop logstash + echo "Contents of logstash-temp/ after stopping:" + ls -l logstash-temp + echo " " + + nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` + if [[ $nlogstash -eq 0 ]] + then + docker-compose start logstash + sleep 30 # give it plenty of time + echo "Contents of logstash-temp/ after starting:" + ls -l logstash-temp + + nlogstash=`docker-compose ps | grep logstash | grep " Up " | wc -l` + if [[ $nlogstash -eq 0 ]] + then + echo " " + echo "Logstash restart failed?! Check on it! " + fi + fi +else + echo "Logstash is not running so no restart." +fi + diff --git a/bin/restart-logstash.sh b/bin/restart-logstash-service.sh old mode 100644 new mode 100755 similarity index 100% rename from bin/restart-logstash.sh rename to bin/restart-logstash-service.sh diff --git a/compose/importer/Dockerfile b/compose/importer/Dockerfile deleted file mode 100644 index e2ffe463..00000000 --- a/compose/importer/Dockerfile +++ /dev/null @@ -1,54 +0,0 @@ -FROM centos:7 - -## Stage 1 Build the PRMs -## Setup baseline -RUN \ - yum -y update && \ - yum install -y epel-release && \ - yum install -y rpm-build perl-ExtUtils-MakeMaker make - -COPY . /root/code -WORKDIR /root/code - -RUN mkdir rpmbuild && cd rpmbuild && \ - mkdir BUILD BUILDROOT RPMS SOURCES SPECS SRPMS TMP && \ - cd /root/code/ && perl Makefile.PL - - -#RUN chown -R coder /home/coder -RUN make rpm - -## Stage 2 -FROM centos:7 - -COPY --from=0 /root/rpmbuild/RPMS/noarch/*.rpm /tmp - -COPY compose/importer/grnoc7.repo /etc/yum.repos.d/grnoc7.repo -COPY compose/importer/docker_init.sh /tmp/ -COPY compose/importer/run.sh /tmp/ -COPY compose/importer/netsage_shared.xml /etc/grnoc/netsage/deidentifier/ -COPY compose/importer/logging.conf /etc/grnoc/netsage/deidentifier/ - -## Setup baseline -RUN \ - yum -y update && \ - yum install -y dnf epel-release && \ - yum install -y python-pip nfdump wget && \ - dnf install -y /tmp/*.rpm && \ - pip install --upgrade pip pika && \ - yum clean all && \ - rm -rf /var/cache/yum - -RUN mkdir /data; chown 777 /data - -## Exposed but likely not needed -VOLUME /var/cache/netsage/ -VOLUME /etc/grnoc/netsage/deidentifier/ -VOLUME /var/lib/grnoc/netsage/ - -#Data volume -VOLUME /data -## Config exposed -VOLUME /etc/grnoc/netsage/ - -CMD /tmp/run.sh diff --git a/compose/importer/docker_init.sh b/compose/importer/docker_init.sh deleted file mode 100755 index 599938ce..00000000 --- a/compose/importer/docker_init.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -#DATA_DIR=/var/lib/grnoc/netsage/ -DATA_DIR=/data/cache/ -LOGSTASH_DIR=/usr/share/logstash/pipeline/support -mkdir -p $DATA_DIR && echo "Cache directory ${DATA_DIR} created" || echo "cache dir ${DATA_DIR} already exists" - -FILES="GeoLite2-ASN scireg GeoLite2-City" -CAIDA_FILES="CAIDA-org-lookup" -RUBY_DATA="FRGP-members-list ilight-members-list" - -function downloadFiles() { - ext=$1 - shift 1 - ## Download all files to temporary destination - for f in $@; do - wget https://scienceregistry.grnoc.iu.edu/exported/${f}.${ext} --no-use-server-timestamps -q -O ${DATA_DIR}/$f.tmp - done - - ## Rename the temporary files to replace the production ones. - for f in $@; do - mv ${DATA_DIR}/$f.tmp ${DATA_DIR}/${f}.${ext} - done - -} - -echo "Download ScienceRegistry and maxmind" -downloadFiles mmdb $FILES -echo "Download Caida Files" -downloadFiles csv $CAIDA_FILES -echo "Download Ruby files" -DATA_DIR=$LOGSTASH_DIR -downloadFiles rb $RUBY_DATA diff --git a/compose/importer/grnoc7.repo b/compose/importer/grnoc7.repo deleted file mode 100644 index 082a3671..00000000 --- a/compose/importer/grnoc7.repo +++ /dev/null @@ -1,6 +0,0 @@ -[grnoc7] -name=GlobalNOC Public el7 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 diff --git a/compose/importer/logging.conf b/compose/importer/logging.conf deleted file mode 100644 index 79c48b75..00000000 --- a/compose/importer/logging.conf +++ /dev/null @@ -1,4 +0,0 @@ -log4j.rootLogger=INFO, CONSOLE -log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender -log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout -log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p %c %x - %m%n diff --git a/compose/importer/netsage_shared.xml b/compose/importer/netsage_shared.xml deleted file mode 100644 index fd6117ee..00000000 --- a/compose/importer/netsage_shared.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - /data/input_data/netflow - $netflowSensorName - netflow - - - - /data/input_data/sflow - $sflowSensorName - sflow - - - - - - - 1 - 3 - - - - - rabbit - 5672 - guest - guest - 0 - 100 - / - 1 - - - rabbit - 5672 - guest - guest - 0 - 100 - / - 1 - - - diff --git a/compose/importer/run.sh b/compose/importer/run.sh deleted file mode 100755 index aba024b9..00000000 --- a/compose/importer/run.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -/tmp/docker_init.sh - -netsage-netflow-importer-daemon --nofork --config /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml \ No newline at end of file diff --git a/compose/logstash.repo b/compose/logstash.repo deleted file mode 100644 index db5793b0..00000000 --- a/compose/logstash.repo +++ /dev/null @@ -1,8 +0,0 @@ -[elastic-7.x] -name=Elastic repository for 7.x packages -baseurl=https://artifacts.elastic.co/packages/7.x/yum -gpgcheck=1 -gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch -enabled=1 -autorefresh=1 -type=rpm-md \ No newline at end of file diff --git a/compose/logstash/Dockerfile b/compose/logstash/Dockerfile deleted file mode 100644 index 1d935220..00000000 --- a/compose/logstash/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM docker.elastic.co/logstash/logstash:7.10.1 - -#Create symlink so can use paths from production with logstash docker defaults -USER root -RUN mkdir -p /etc/logstash && \ - ln -s /usr/share/logstash/pipeline /etc/logstash/conf.d - -COPY --chown=logstash:root compose/logstash/pipelines.yml /usr/share/logstash/config/ - -USER logstash - -VOLUME /var/cache/netsage -VOLUME /var/lib/grnoc/netsage/ -VOLUME /usr/share/logstash/config/ \ No newline at end of file diff --git a/compose/logstash/pipelines.yml b/compose/logstash/pipelines.yml deleted file mode 100644 index 464b2c7e..00000000 --- a/compose/logstash/pipelines.yml +++ /dev/null @@ -1,2 +0,0 @@ -- pipeline.id: elastiflow - path.config: "/usr/share/logstash/pipeline/*.conf" \ No newline at end of file diff --git a/conf-logstash/01-input-rabbit.conf b/conf-logstash/01-input-rabbit.conf index b333b13c..7f575da5 100644 --- a/conf-logstash/01-input-rabbit.conf +++ b/conf-logstash/01-input-rabbit.conf @@ -1,10 +1,9 @@ -##### COPY NEEDED CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. input { - # Normally, input events are flows from the named rabbit queue on LOCALHOST - # (The 'netsage_deidentfier_raw' rabbit queue may contain flows from netsage-netflow-importer-daemon and/or tstat_send.) - # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. - # Change the queue and key name, if needed. rabbitmq{ host => "${rabbitmq_input_host:localhost}" user => "${rabbitmq_input_username:guest}" diff --git a/conf-logstash/05-translate-pmacct.conf b/conf-logstash/05-translate-pmacct.conf new file mode 100644 index 00000000..c4c89a95 --- /dev/null +++ b/conf-logstash/05-translate-pmacct.conf @@ -0,0 +1,131 @@ +# Translate pmacct fields to those the rest of the pipeline uses + # NOTE: pmacct (nfacctd and sfacctd) must be run with + # pretag files wherein 'label' must be set to 'sfacct--' or 'nfacct--' + # followed by the sensor name with spaces replaced by #s. + +filter { + # skip all this for tstat! + if [label] { + + # FOF SFLOW - + if [label] =~ /^sfacct--/ { + mutate { + add_field => { "[meta][flow_type]" => "sflow" } + # Assuming some aggregation over time by sfacctd: + rename => {'timestamp_min' => 'start'} + rename => {'timestamp_max' => 'end'} + id => "05-1" + } + } + + # FOR NETFLOW - + if [label] =~ /^nfacct--/ { + mutate { + add_field => { "[meta][flow_type]" => "netflow" } + # Assuming no aggregation over time by nfacctd: + rename => {'timestamp_start' => 'start'} + rename => {'timestamp_end' => 'end'} + id => "05-2" + } + } + + # FOR ALL PMACCT FLOWS - + # Save sampling correction flag and Tag flows without a sampling rate. + # (router may not be sending it, template may not have arrived yet, there may be no sampling) + # Here sampling_rate from pmacct is just a flag, 0 or 1. + if [sampling_rate] == 0 { + mutate { + id => "05-3" + add_tag => ["No pre-ingest sampling correction"] + add_field => { "@sampling_corrected" => "no" } + } + } else { + mutate { + id => "05-4" + add_field => { "@sampling_corrected" => "yes" } + } + } + # In case pmacct starts sending the actual sampling rate + if [sampling_rate] > 1 { + mutate { + id => "05-3.1" + add_tag => ["Pre-ingest sampling rate = %{sampling_rate}."] + } + } + + # Get sensor name + # Note: In the pmacct pretag file, label must be set to sfacct-- or nfacct-- + # followed by the real sensor name with spaces replaced by #s. + ruby { + code => ' + sensor = event.get("label") + sensor = sensor.gsub("sfacct--", "") + sensor = sensor.gsub("nfacct--", "") + sensor = sensor.gsub("#", " ") + event.set( "[meta][sensor_id]", sensor ) + ' + tag_on_exception => '_rubyexception getting sensor from label in 05-translate-pmacct. ' + id => "05-5" + } + # Do field name translations + mutate { + rename => {'ip_src' => '[meta][src_ip]'} + rename => {'ip_dst' => '[meta][dst_ip]'} + rename => {'port_src' => '[meta][src_port]'} + rename => {'port_dst' => '[meta][dst_port]'} + rename => {'ip_proto' => '[meta][protocol]'} + rename => {'iface_in' => '[meta][src_ifindex]'} + rename => {'iface_out' => '[meta][dst_ifindex]'} + rename => {'as_src' => '[meta][src_asn]'} + rename => {'as_dst' => '[meta][dst_asn]'} + rename => {'packets' => '[values][num_packets]'} + id => "05-6" + } + # Start and end are timestamps at this point. Make sure they are floats. + mutate { + convert => { + 'start' => 'float' + 'end' => 'float' + } + id => "05-7" + } + # Calculations + ruby { + code => ' + event.set( "[values][num_bits]", event.get("bytes") * 8 ) + event.set( "[values][duration]", event.get("end") - event.get("start") ) + if event.get("[values][duration]") <= 0.001 ## == 0 to within roundoff error + event.set( "[values][bits_per_second]", 0.0 ) + event.set( "[values][packets_per_second]", 0.0 ) + else + bps = event.get("[values][num_bits]") / event.get("[values][duration]") + pps = event.get("[values][num_packets]") / event.get("[values][duration]") + event.set( "[values][bits_per_second]" , bps ) + event.set( "[values][packets_per_second]", pps ) + end + ' + tag_on_exception => '_rubyexception in 05-translate-pmacct. ' + id => "05-8" + } + # Make sure these are numeric types. We need to use them in calculations and comparisons later. + mutate { + convert => { + '[values][num_bits]' => 'integer' + '[values][num_packets]' => 'integer' + '[values][duration]' => 'float' + '[values][bits_per_second]' => 'float' + '[values][packets_per_second]' => 'float' + '[meta][src_port]' => 'integer' + '[meta][dst_port]' => 'integer' + } + id => "05-9" + } + # Remove unneeded fields + mutate { + remove_field => [ 'sampling_rate', 'event_type', 'writer_id' ] + remove_field => [ 'label', 'bytes' ] + id => "05-10" + } + + } +} diff --git a/conf-logstash/10-preliminaries.conf b/conf-logstash/10-preliminaries.conf index f9b6c967..4b7dbb8a 100644 --- a/conf-logstash/10-preliminaries.conf +++ b/conf-logstash/10-preliminaries.conf @@ -2,12 +2,48 @@ filter { - # 1. Drop flows to or from private addresses (or other ranges we want to drop) - # Apr 2021 - dropping 0.0.0.0/32 flows for now. There is an nfdump bug affecting MPLS flows. (src and dst) + # 1. Check for missing fields that can cause logstash to crash + if ![meta][src_ip] { + mutate{ + id => "10-01" + add_tag => [ "src_ip was missing in flow header!?" ] + add_field => { "[meta][src_ip]" => "0.0.0.0" } + } + } + if ![meta][dst_ip] { + mutate{ + id => "10-02" + add_tag => [ "dst_ip was missing in flow header!?" ] + add_field => { "[meta][dst_ip]" => "0.0.0.0" } + } + } + if ![meta][src_ifindex] { + mutate{ + id => "10-03" + add_tag => [ "src_ifindex was missing in flow header!?" ] + add_field => { "[meta][src_ifindex]" => -10 } + } + } + if ![meta][dst_ifindex] { + mutate{ + id => "10-04" + add_tag => [ "dst_ifindex was missing in flow header!?" ] + add_field => { "[meta][dst_ifindex]" => -10 } + } + } + if ![values][duration] { + mutate{ + id => "10-05" + add_tag => [ "duration was missing!?" ] + add_field => { "[values][duration]" => 0.0 } + } + } + + # 2. Drop flows to or from private addresses (or other ranges we want to drop) cidr { id => "10-1" address => [ "%{[meta][src_ip]}" ] - network => [ "0.0.0.0/32", "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] + network => [ "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] add_field => { "@private_src" => "yes" } } # can skip dst if src is private @@ -15,50 +51,33 @@ filter { cidr { id => "10-2" address => [ "%{[meta][dst_ip]}" ] - network => [ "0.0.0.0/32", "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] + network => [ "10.0.0.0/8", "100.64.0.0/10", "192.168.0.0/16", "172.16.0.0/12", "fc00::/7", "169.254.0.0/16", "fe80::/10", "::1/128" ] add_field => { "@private_dst" => "yes" } } } + # drop if [@private_src] == "yes" or [@private_dst] == "yes" { - drop { } + drop { id => "10-3" } } - # 2. Add ingest_time here in case aggregation isn't done (eg, for tstat). - # (Copy makes a string; have to convert it to a date.) + # 3. Add @ingest_time field (useful for debugging) mutate { - id => "10-3" + id => "10-4" add_field => { '@ingest_time' => "%{@timestamp}" } } date { - id => "10-4" + id => "10-5" match => [ "@ingest_time", "ISO8601" ] target => "@ingest_time" } - # 3. Convert strings to numeric types where appropriate. We need to use these in calculations later. - # Start and end are timestamps at this point. Make sure they are floats. - mutate { - id => "10-5" - convert => { - 'start' => 'float' - 'end' => 'float' - '[values][duration]' => 'float' - '[values][num_bits]' => 'integer' - '[values][num_packets]' => 'integer' - '[values][bits_per_second]' => 'float' - '[values][packets_per_second]' => 'float' - } - } - - # 4. Convert any ms timestamps to s - # 5. Drop any events with start or end times in the future or too far in the past + # 4. Convert any timestamps in ms to s ruby { id => "10-6" code => " flow_ts = event.get('start').to_f flow_te = event.get('end').to_f - # Convert any timestamps in ms to s if flow_ts > 9999999999.0 flow_ts = flow_ts / 1000.0 event.set('start', flow_ts) @@ -68,8 +87,8 @@ filter { event.set('end', flow_te) end - # DROP any event with a strange start or end time - # > 10 sec in the future or > 1 year in the past, or end < start + # 5. DROP any event with a strange start or end time + # > 10 sec in the future or > 1 year in the past, or end < start current_t = Time.now.to_f age_s = current_t - flow_ts age_e = current_t - flow_te diff --git a/conf-logstash/15-sensor-specific-changes.conf b/conf-logstash/15-sensor-specific-changes.conf index d9341587..d6651401 100644 --- a/conf-logstash/15-sensor-specific-changes.conf +++ b/conf-logstash/15-sensor-specific-changes.conf @@ -1,11 +1,24 @@ -# Make changes required for specific sensors -# Parameters are obtained from an environment file (default: /etc/logstash/logstash-env-vars - see the logstash systemd file). -# If values are not provided, the defaults following the :'s are used (flags will be False, so nothing will happen). -# With a bare-metal installation, you may also just edit this file and fill in the values you want. +# Make any desired changes for flows from specific sensors + +# Values for ${variable-name:default-value} are obtained from the environment. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars is specified in the logstash systemd file) +# If values are not provided by the environment, the defaults/examples following the :'s are used. +# +# Note that in a bare-metal installation, all logstash-pipelines use the same version of this file, so be sure options will +# apply only to those intended. You may just replace this with hardcoded "if" statements and "code" for what you want to happen. + +# Using env vars in conditionals has been an open issue for logstash since 2016! Workaround is to add a "flag" field. +# (@metadata fields are not saved to elasticsearch) filter { + # IFINDEX FILTERING #---- Drop flows that do not have src or dst ifindex in a specified list of ifindexes + # Specifying a sensor name is optional. If not given, the ifindex list will apply to all sensors. + # ALL can refer to all sensors or all interfaces. + # If a sensor is not referred to, keep all its flows. + # Example settings in env file: ifindex_filter_keep="500; Sensor 1: 123,456; Sensor 2 :ALL" + # (If specified, the sensor name must be exact, otherwise spaces don't matter. Separate lists with semicolons.) mutate { add_field => { "[@metadata][ifindex_filter_flag]" => "${ifindex_filter_flag:False}" } } @@ -15,72 +28,176 @@ filter { id => "15-1" } mutate { - # Split the field into an array (in a separate mutate, since in mutate, split happens before all add_fields) - # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file, - # otherwise 'in' will search for a substring in a string, which may not do what we want. - split => { "[@metadata][ifindex_filter_keep]" => "," } + # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, + # split happens before all add_fields) + # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. + split => { "[@metadata][ifindex_filter_keep]" => ";" } add_field => { "[@metadata][ifindex_filter_keep]" => "dummy" } id => "15-2" } - if [meta][src_ifindex] not in [@metadata][ifindex_filter_keep] and [meta][dst_ifindex] not in [@metadata][ifindex_filter_keep] { - drop { } - } + # Each (non-dummy) array element should have 'sensor-name: list-of-approved-ifindexes' ("sensor-name:" optional) + ruby { + id => "15-3" + tag_on_exception => "_rubyexception A in 15-sensor-specific-changes. " + code => " + # keep any flows that the filter list does not mention + action = 'keep' + # loop over the array of ifindex filters + filters = event.get('[@metadata][ifindex_filter_keep]') + filters.each do |f| + next if f == 'dummy' + # if filter-sensor=flow-sensor or the filter apples to all sensors, check the ifindex list + # Once 'keep' is determined, quit loop and move on to next flow. + if (f =~ /^\s*ALL\s*:/) or (f =~ /^\s*#{event.get('[meta][sensor_id]')}\s*:/) or (! f.include? ':') + f.sub!(/.*:/, '') # remove : and everything before it + f.gsub!(/\s/, '') # get rid of spaces in ifindex list + indexes = f.split(',') # split on commas into an array + # only if the ifindex-list is ALL or includes the current ifindex, keep this flow. + action = 'drop' + if (indexes.include? 'ALL') or (indexes.include? event.get('[meta][src_ifindex]').to_s) or (indexes.include? event.get('[meta][dst_ifindex]').to_s) + action = 'keep' + break + end + end + end + event.cancel if action == 'drop' + " + } } + # SENSOR NAME CHANGE BY IFINDEX #---- Change the sensor name for flows from a certain interface (ifindex) mutate { add_field => { "[@metadata][ifindex_sensor_rename_flag]" => "${ifindex_sensor_rename_flag:False}" } - id => "15-3" + id => "15-4" } if [@metadata][ifindex_sensor_rename_flag] == "True" { mutate { add_field => { "[@metadata][ifindex_sensor_rename_old_name]" => "${ifindex_sensor_rename_old_name:oldname}" } add_field => { "[@metadata][ifindex_sensor_rename_new_name]" => "${ifindex_sensor_rename_new_name:newname}" } add_field => { "[@metadata][ifindex_sensor_rename_ifindex]" => "${ifindex_sensor_rename_ifindex:1}" } - id => "15-4" + id => "15-5" + } + # src and dst ifindexes are integers, so we need to convert this field before comparing! + mutate { + convert => { '[@metadata][ifindex_sensor_rename_ifindex]' => 'integer' } + id => "15-6" } + if [meta][sensor_id] == [@metadata][ifindex_sensor_rename_old_name] - and ( [meta][src_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] or [meta][dst_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] ) { + and ( [meta][src_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] + or [meta][dst_ifindex] == [@metadata][ifindex_sensor_rename_ifindex] ) { mutate { replace => { "[meta][sensor_id]" => "%{[@metadata][ifindex_sensor_rename_new_name]}" } - id => "15-5" + id => "15-7" } } } - #---- Manually apply a sampling correction to listed sensors. Use only in special cases when the flow exporter or collector is providing corrections. - # For netflow, a sampling rate correction can be done here or in the nfsen config or nfcapd command using the -s option. - # For sflow, there is no such option, so it must be done here. - + # SAMPLING RATE CORRECTIONS + #---- Manually apply a sampling correction to listed sensors + # but ONLY IF there was no pre-logstash sampling correction applied by pmacct + # ALL can be used to apply the same correction to all sensors. mutate { - add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } - id => "15-6" + add_field => { "[@metadata][sampling_correction_flag]" => "${sampling_correction_flag:False}" } + id => "15-8" } - if [@metadata][sampling_correction_flag] == "True" { + if [@metadata][sampling_correction_flag] == "True" and [@sampling_corrected] == "no" { mutate { - add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1,sensor2}" } + add_field => { "[@metadata][sampling_correction_sensors]" => "${sampling_correction_sensors:sensor1;sensor2}" } add_field => { "[@metadata][sampling_correction_factor]" => "${sampling_correction_factor:1}" } - id => "15-7" + id => "15-9" } + # make the field into an array (see comments about split above) mutate { - # make the field into an array (see comments about split above) - split => { "[@metadata][sampling_correction_sensors]" => "," } + split => { "[@metadata][sampling_correction_sensors]" => ";" } add_field => { "[@metadata][sampling_correction_sensors]" => "dummy" } - id => "15-8" + id => "15-10" + } + ruby { + id => "15-11" + tag_on_exception => "_rubyexception B in 15-sensor-specific-changes. " + code => ' + # strip any leading or trailing spaces from sensor names + sensors = event.get("[@metadata][sampling_correction_sensors]").map! { |e| e.strip } + # if event sensor is in the list, apply corrections + if (sensors.include? "ALL") or (sensors.include? event.get("[meta][sensor_id]")) + correction_factor = event.get("[@metadata][sampling_correction_factor]") + event.set("[values][num_bits]", correction_factor.to_i * event.get("[values][num_bits]").to_i) + event.set("[values][num_packets]", correction_factor.to_i * event.get("[values][num_packets]").to_i) + event.set("[values][bits_per_second]", correction_factor.to_f * event.get("[values][bits_per_second]").to_f) + event.set("[values][packets_per_second]", correction_factor.to_f * event.get("[values][packets_per_second]").to_f) + event.set("@sampling_corrected", "yes") + newtags = event.get("tags") + newtags ||= [] # if undefined, set to empty array + newtags.push( "Logstash sampling correction = #{correction_factor}" ) + event.set("[tags]", newtags ) + end + ' + } + } + + # SUBNET FILTERING + #---- For named sensors, drop all flows except those that have src or dst IP in a specified list of subnets. + # But keep all flows if a sensor is not referenced at all in the list (by name or ALL)! + # Example setting in env file: subnet_filter_keep="ALL: 123.45.6.0/24; Sensor 1: 98.765.43.0/24, 100.222.33.0/24" + # "ALL:" or a sensor name must be specified before each subnet list. If specified, the sensor name must be exact. + # Separate subnets with commas and lists with semicolons. + mutate { + add_field => { "[@metadata][subnet_filter_flag]" => "${subnet_filter_flag:False}" } + id => "15-12" + } + if [@metadata][subnet_filter_flag] == "True" { + mutate { + add_field => { "[@metadata][subnet_filter_keep]" => "${subnet_filter_keep:Some-Sensor:134.456.78.0/24}" } + id => "15-13" + } + mutate { + # Split the string on ';' into an array of the same name (in a separate mutate, since in mutate, split happens before all add_fields) + # Add a dummy array element to force it to be an array, in case there is just 1 value in the env file. + split => { "[@metadata][subnet_filter_keep]" => ";" } + add_field => { "[@metadata][subnet_filter_keep]" => "dummy" } + id => "15-14" } - if [meta][sensor_id] in [@metadata][sampling_correction_sensors] { - ruby { - code => " - correction_factor = event.get('[@metadata][sampling_correction_factor]').to_i - event.set('[values][num_bits]', correction_factor * event.get('[values][num_bits]').to_i) - event.set('[values][num_packets]', correction_factor * event.get('[values][num_packets]').to_i) - event.set('[values][bits_per_second]', correction_factor * event.get('[values][bits_per_second]').to_i) - event.set('[values][packets_per_second]', correction_factor * event.get('[values][packets_per_second]').to_i) - " - id => "15-9" - } + # Each (non-dummy) array element should have 'sensor-name: list-of-approved-subnets' + # Use Ruby to loop and test + ruby { + id => "15-15" + tag_on_exception => "_rubyexception C in 15-sensor-specific-changes. " + code => ' + require "ipaddr" + # Keep any flows that the filter list does not mention + action = "keep" + flow_sensor = event.get("[meta][sensor_id]") + flow_src = event.get("[meta][src_ip]") + flow_dst = event.get("[meta][dst_ip]") + filters = event.get("[@metadata][subnet_filter_keep]").map! { |e| e.strip } # already an array; strip leading and trailing spaces + # Loop over array of filters + filters.each do |f| + next if f == "dummy" + # If filter f specifies a sensor that is not the current sensor, we can skip it. + # Otherwise, parse f to remove the sensor name and get the subnet list. + if (f.include? "ALL:") or (f =~ /^#{flow_sensor}\s*:/) + f.sub!(/#{flow_sensor}\s*:/, "") + f.sub!(/ALL\s*:/, "") + f.gsub!(/\s/, "") + subnets = f.split(",") + # default is now to drop the flow + action = "drop" + # Loop over the subnets in the list + subnets.each do |net| + netobj = IPAddr.new(net) + if ( netobj.include? flow_src ) or ( netobj.include? flow_dst ) + action = "keep" + break + end + end + end # end if this filter list applies + end + event.cancel if action == "drop" + ' } } diff --git a/conf-logstash/40-aggregation.conf b/conf-logstash/40-aggregation.conf index e5cff902..628a853b 100644 --- a/conf-logstash/40-aggregation.conf +++ b/conf-logstash/40-aggregation.conf @@ -1,135 +1,249 @@ -##### COPY ANY CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### +# This filter stitches together incoming flows that go together. -## Fields most likely to be specific to a pipeline: -## These may be set via environment variables. -## aggregate_maps_path - must be unique for each pipeline. Aggregation info is written here if logstash exits. Default is /tmp/logstash-aggregation-maps. -## inactivity_timeout - value depends on timespan of nfcapd files. Default is 630 sec. -## timeout - the maximum length of a flow. Default is 1 day. -## (trial - this may be useful for testing. Commented out by default.) +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. -# This filter stitches together flows from different nfcapd files, each (usually) spanning a 5 min. period. -# Note: netflow keeps the start time the same for all flows with the same fingerprint, even across different nfcapd files; -# duration is cumulative but counts are not. Sflow ends each flow as it is written out, as one would expect. -# If only 1 packet is seen, end time will = start time and duration will be 0. +## Fields most likely to be specific to a logstash pipeline: +## aggregate_maps_path - must be unique for each logstash pipeline. Default is /tmp/logstash-aggregation-maps. +## inactivity_timeout - when to declare a flow ended. +## timeout - the maximum length of a flow. -# NOTE: tags added to events before this point in the pipeline aren't kept. +## NOTE THAT THERE ARE SEPARATE SECTIONS FOR SFLOW AND NETFLOW, +## EDIT BOTH !!!! filter { - # TSTAT - tstat only reports complete flows, so no stitching is needed! - # Just add stitched_flows=0 (means no stitching attempted) and the fingerprint as meta.id + # === TSTAT === + # Tstat only reports complete flows, so no stitching is needed! + # Just add stitched_flows=0 (means no stitching attempted) if [meta][flow_type] == 'tstat' { - # on tstat flows, just add the fields we would have had during aggregation - mutate { - id => "40-1" - add_field => { 'stitched_flows' => 0 } - rename => { 'flow_fingerprint' => '[meta][id]' } - } - - } + mutate { + id => "40-1" + add_field => { 'stitched_flows' => 0 } + } + } - # SFLOW AND NETFLOW - aggregate flows spanning more than 1 nfcapd file else { - # We need the 'start' time as a date, as well as as a timestamp - date { - id => "40-2" - match => [ '[start]', 'UNIX' ] - target => '[start_date]' - } - - aggregate { - id => "40-3" - # unique ID used to aggregate events - task_id => '%{[flow_fingerprint]}' - - # save the fingerprint value as [meta][id] on timeout - timeout_task_id_field => "[meta][id]" - - # use event's start time rather than system time to determine whether a timeout has occured (must be type 'date') - timeout_timestamp_field => '[start_date]' - - # If more than inactivity_timeout seconds have passed between the 'start' of this event and the 'start' - # of the LAST matching event, OR if no matching flow has coming in for inactivity_timeout seconds - # on the clock, assume the flow has ended. - ## Use 630 sec = 10.5 min for 5-min files, 960 sec = 16 min for 15-min files. - ## (For 5-min files, this allows one 5 min gap or period during which the no. of bits transferred don't meet the cutoff) - inactivity_timeout => "${inactivity_timeout:630}" - - # Maximum possible flow length. Stop aggregating even if we're still seeing matching events coming in. - ## Use 86400 sec = 1 day - timeout => "${max_flow_timeout:86400}" + # for aggregation, we need the 'start' or 'end' date, as well as as timestamp + date { + id => "40-2" + match => [ '[start]', 'UNIX' ] + target => '[start_date]' + } + date { + id => "40-3" + match => [ '[end]', 'UNIX' ] + target => '[end_date]' + } + } - # send the aggregation map as a new event upon timeout - push_map_as_event_on_timeout => true + # === SFLOW === + # Aggregate on hash of 5-tuple + sensor + # Incoming events may be single samples or results from partial aggregation/stitching by sfacctd. + if [meta][flow_type] == "sflow" { + aggregate { + id => "40-4" + # Events that have matching task_id's will be aggregated. + task_id => '%{[flow_fingerprint]}' + + # Save the task_id value to this field in the aggregated event on timeout + timeout_task_id_field => "[flow_fingerprint]" + + # Use this field when determining if timeouts have occurred, in case we are processing historical data. + # It'll actually look at values of this field AND the clock times at which events come in. (Must be type 'date') + timeout_timestamp_field => "[start_date]" + + # Inactive timeout + # A flow is assumed to have ended if more than inactivity_timeout seconds have passed since the last matching event. + # (Aggregator compares timeout_timestamp_field of the current matching event and of the last matching event. If the diff is + # greater than inactivity_timeout, it ends the current flow and starts a new one. + # ALSO, every 5 sec, it compares the ingest clock time of the last matching event to NOW. + # If more than inactivity_timeout seconds have passed, it declares the flow finished.) + ## default 360 sec = 6 min + inactivity_timeout => "${inactivity_timeout:360}" + + # Active timeout + # = maximum possible flow duration + # (Aggregator compares timeout_timestamp_field of the current event to that of the FIRST event in the map. If the + # diff is greater than timeout, it ends the current flow and starts a new one, even if matching events are still coming in. + # ALSO, every 5 sec, it compares the ingest clock time of the first event in the map to NOW. + # If more than timeout seconds have passed, it declares the flow finished, even if matching events are still coming in.) + ## default 3600 sec = 1 hour + timeout => "${max_flow_timeout:3600}" + + # Save the aggregation map as a new event upon timeout + push_map_as_event_on_timeout => true + + # Save all the in-progress aggregation maps to this file when logstash shuts down, to be read back in when it restarts. + ## (use a different file for each logstash pipeline!) + aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' + + # Ruby code to run for each event. + # (The event will be added to the correct map (hash) according to its task_id. + # ||= assigns the value only if the variable does not yet exist. Only map values are included in the final event.) + code => " + # keep track of how many events we aggregate + map['stitched_flows'] ||= 0 + map['stitched_flows'] += 1 - # save the aggregation maps here in case logstash dies - ## (use a different file for each logstash pipeline!) - aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' - - - # ruby code to run each time we see an event - # (||= assigns the value only if the variable does not yet exist. 'map' values are included in the final event.) - code => " - # keep track of how many events we aggregate - map['stitched_flows'] ||= 0 - map['stitched_flows'] += 1 - - # map[start and end] are start and end times of the full stitched flow (timestamps) - map['start'] ||= event.get('start') - map['end'] ||= event.get('end') - - # save info from the first subflow - # values will be updated as we stitch on other flows - map['meta'] ||= event.get('meta') - map['values'] ||= event.get('values') - - # essentially the time the flow entered the pipeline - map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. - # but this works. - # An @timestamp will be added when the map is finally pushed as an event. - - #### FOR TESTING - # map['trial'] = 1 - # map['values']['durations_sum'] ||= 0; - # map['values']['durations_sum'] += event.get('[values][duration]') - # map['values']['durations'] ||= ' ' - # map['values']['durations'] += event.get('[values][duration]').to_s - # map['values']['durations'] += '; ' - #### - - # if we are seeing a subsequent flow event - if map['stitched_flows'] > 1 - - # be very sure we are getting the correct start and end times, even if events are out of order - map['start'] = [ map['start'], event.get('start') ].min - map['end'] = [ map['end'], event.get('end') ].max - - # sum the packet and bit counters - map['values']['num_packets'] += event.get('[values][num_packets]') - map['values']['num_bits'] += event.get('[values][num_bits]') - - # recalculate total duration - map['values']['duration'] = map['end'] - map['start'] - - # recalculate average pps and bps - if map['values']['duration'] > 0 - map['values']['packets_per_second'] = (map['values']['num_packets'] / map['values']['duration']).to_i; - map['values']['bits_per_second'] = (map['values']['num_bits'] / map['values']['duration']).to_i; - else - # can't calculate so set to 0 # - map['values']['packets_per_second'] = 0; - map['values']['bits_per_second'] = 0; + # map[start and end] are start and end times of the full stitched flow (timestamps) + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') + + # Save these fields from the FIRST event. + # Only 'values' will be updated as we stitch events or at the very end !!!!! + map['meta'] ||= event.get('meta') + map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') + map['@sampling_corrected'] ||= event.get('@sampling_corrected') + + # Essentially the time the flow entered the pipeline + map['@ingest_time'] ||= Time.now # Saving @timestamp caused problems when aggregate map was saved to a file then read. + # but this works. + # An @timestamp will be added when the map is finally pushed as an event. + + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) + #map['trial'] = 1 + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' + #### + + # If we are seeing a subsequent flow event... (assumes all events are in order!) + if map['stitched_flows'] > 1 + map['end'] = event.get('end') + # sum the packet and bit counters + map['values']['num_packets'] += event.get('[values][num_packets]') + map['values']['num_bits'] += event.get('[values][num_bits]') end - # round off after calculations - map['values']['duration'] = (map['values']['duration']).round(3) + # Discard the original events. We only care about the aggregation. + event.cancel() + " - end + # Code to run on the new aggregated event before it's pushed out + timeout_code => " + # recalculate total duration + duration = event.get('end') - event.get('start') + event.set( '[values][duration]', duration.round(3) ) - # discard the original event. we only care about the aggregation. - event.cancel() - " - } + # recalculate average pps and bps (say duration < .001 is 0 within roundoff error) + if duration >= 0.001 + event.set( '[values][packets_per_second]', event.get('[values][num_packets]') / duration ) + event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) + else + # can't calculate (accurate) rates so set to 0 + event.set( '[values][duration]', 0.0 ) + event.set( '[values][packets_per_second]', 0.0 ) + event.set( '[values][bits_per_second]', 0.0 ) + end + " + } } + # === NETFLOW === + # Aggregate on hash of 5-tuple + sensor + start time + # + # Before aggregating, we have to do special start-time adjustments due to the fact that netflow sensors send "updates" + # about active flows, all with the same start time, but bytes and packet counts are only for the time since the last update. + # We will aggregate the updates up to max_flow_timeout (1 hr by default) then start a new aggregated flow. + # If a flow (update) comes in with a duration over max_flow_timeout, the start time will be adjusted. Multiples of + # max_flow_timeout (eg whole hours) will be cut off, since bits from those times should have already been accounted for in + # a previous aggregated flow. + # Note that if there's a timeout at the router (default inactive timeout is usually only 15 sec), the incoming flows will stay + # separate and not be stitched here, even though they have the same 5-tuple, since the start time will change. + else if [meta][flow_type] == "netflow" { + mutate { + add_field => { "[@metadata][max_dur]" => "${max_flow_timeout:3600}" } + id => "40-5" + } + ruby { + # if duration is > timeout (1 hr), adjust start time to cut off n*timeout (whole hours). + # That part of the flow should have already been processed and pushed out. + id => "40-6" + tag_on_exception => "_rubyexception in 40-aggregation.conf" + code => " + max_dur = event.get( '[@metadata][max_dur]' ).to_f + duration = event.get( '[values][duration]' ).to_f + start = event.get( 'start' ) + cuts = 0 # how many times the start time got cut + while duration > max_dur + start = start + max_dur # move start forward + duration -= max_dur + cuts += 1 + end + if cuts > 0 + event.set( 'start', start ) + event.set( '[values][duration]', duration ) + event.set( '@dur_cuts', cuts ) #### no. of max_dur's cut off - FOR TESTING + end + " + } + aggregate { + id => "40-7" + # unique ID used to aggregate events ## A second agg filter must have different task_id "pattern" + # For Netflow, include start time so only "updates" with the same start time are aggregated, not + # continuations after short gaps that the router considers timeouts. + task_id => '%{[flow_fingerprint]}-%{[start]}' + + # see comments above. MAKE SURE THE VALUES/DEFAULTS ARE THE SAME HERE. + timeout_timestamp_field => "[start_date]" + inactivity_timeout => "${inactivity_timeout:360}" + timeout => "${max_flow_timeout:3600}" + push_map_as_event_on_timeout => true + + ## can only set this in 1 agg. filter and it is set above! + ## aggregate_maps_path => '${aggregation_maps_path:/tmp/logstash-aggregation-maps}' + + # Ruby code to run for each event. + code => " + # we have to save flow_fingerprint explicitly for netflow + map['flow_fingerprint'] ||= event.get('flow_fingerprint') + + map['stitched_flows'] ||= 0 + map['stitched_flows'] += 1 + map['start'] ||= event.get('start') + map['end'] ||= event.get('end') + map['meta'] ||= event.get('meta') + map['values'] ||= event.get('values') + map['tags'] ||= event.get('tags') # saves first aggregated event only! + map['@sampling_corrected'] ||= event.get('@sampling_corrected') # saves first aggregated event only! + map['@ingest_time'] ||= Time.now + + #### FOR TESTING (EDIT IN BOTH SFLOW AND NETFLOW SECTIONS !!!) + #map['trial'] = 1 + # For netflow updates, indiv durations will be the cumulative duration of the aggregated flow as it aggregates + #map['values']['indivDurations'] ||= ' ' + #map['values']['indivDurations'] += event.get('[values][duration]').to_f.round(3).to_s + #map['values']['indivDurations'] += '; ' + #### + + if map['stitched_flows'] > 1 + map['end'] = event.get('end') + map['values']['num_packets'] += event.get('[values][num_packets]') + map['values']['num_bits'] += event.get('[values][num_bits]') + map['@dur_cuts'] = event.get('@dur_cuts') #### FOR TESTING + end + + event.cancel() + " + + timeout_code => " + duration = event.get('end') - event.get('start') + event.set( '[values][duration]', duration.round(3) ) + + if duration >= 0.001 + event.set( '[values][packets_per_second]', event.get('[values][num_packets]') / duration ) + event.set( '[values][bits_per_second]', event.get('[values][num_bits]') / duration ) + else + event.set( '[values][duration]', 0.0 ) + event.set( '[values][packets_per_second]', 0.0 ) + event.set( '[values][bits_per_second]', 0.0 ) + end + " + } + } # end if netflow + } diff --git a/conf-logstash/41-thresholds.conf b/conf-logstash/41-thresholds.conf new file mode 100644 index 00000000..80e63659 --- /dev/null +++ b/conf-logstash/41-thresholds.conf @@ -0,0 +1,23 @@ +# Apply various thresholds after aggregating/stitching (if any) + +filter { + + # The minimum flow size threshold is 10 MB = 80,000,000 bits. + # Drop any flows still smaller than that. + if [values][num_bits] < 80000000 { + drop { id => "41-1" } + } + + # If duration is too small, it's almost certainly inaccurate and it will make rates inaccurate. + # Netsage is also not interested in very small flows and we don't want to see them listed as fastest. + # For durations under the threshold, set duration and rates to 0. + if [values][duration] < 1.0 { + mutate { + id => "41-2" + replace => {"[values][duration]" => 0.0} + replace => {"[values][bits_per_second]" => 0.0} + replace => {"[values][packets_per_second]" => 0.0} + } + } + +} diff --git a/conf-logstash/53-caida-org.conf b/conf-logstash/53-caida-org.conf index c88e08b3..e290b4e6 100644 --- a/conf-logstash/53-caida-org.conf +++ b/conf-logstash/53-caida-org.conf @@ -8,8 +8,8 @@ filter { if [meta][src_asn] != -1 { translate { id => "53-1" - field => "[meta][src_asn]" - destination => "[meta][src_organization]" + source => "[meta][src_asn]" + target => "[meta][src_organization]" dictionary_path => "/var/lib/grnoc/netsage/CAIDA-org-lookup.csv" fallback => "Unknown" override => true @@ -26,8 +26,8 @@ filter { if [meta][dst_asn] != -1 { translate { id => "53-3" - field => "[meta][dst_asn]" - destination => "[meta][dst_organization]" + source => "[meta][dst_asn]" + target => "[meta][dst_organization]" dictionary_path => "/var/lib/grnoc/netsage/CAIDA-org-lookup.csv" fallback => "Unknown" override => true diff --git a/conf-logstash/60-scireg-tagging-fakegeoip.conf b/conf-logstash/60-scireg-tagging-fakegeoip.conf index 3a1d7367..e1af7e5f 100644 --- a/conf-logstash/60-scireg-tagging-fakegeoip.conf +++ b/conf-logstash/60-scireg-tagging-fakegeoip.conf @@ -40,6 +40,7 @@ filter { if [meta][scireg][src][projects][0] { ruby { id => "60-5" + tag_on_exception => '_rubyexception A in 60-scireg-tagging-fakegeoip.conf' code => " event.set('[meta][scireg][src][project_names]', event.get('[meta][scireg][src][projects]').map{ |n| n['project_name'] }) " @@ -48,6 +49,7 @@ filter { if [meta][scireg][dst][projects][0] { ruby { id => "60-6" + tag_on_exception => '_rubyexception B in 60-scireg-tagging-fakegeoip.conf' code => " event.set('[meta][scireg][dst][project_names]', event.get('[meta][scireg][dst][projects]').map{ |n| n['project_name'] }) " diff --git a/conf-logstash/70-deidentify.conf b/conf-logstash/70-deidentify.conf index 8d8e1374..40d273f0 100644 --- a/conf-logstash/70-deidentify.conf +++ b/conf-logstash/70-deidentify.conf @@ -11,6 +11,17 @@ filter { # For IPV6 addresses, # In anonymize_ipv6.rb script, expand to full format with 8 hextets, then replace the last 4 with x:x:x:x. +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. + +# By default, IP addresses are deidentified + mutate { + add_field => { "[@metadata][full_IPs_flag]" => "${full_IPs_flag:False}" } + } + if [@metadata][full_IPs_flag] == "False" { + # source ip's grok { id => "70-1" @@ -70,4 +81,6 @@ filter { mutate { update => { "[meta][dst_ip]" => "INVALID IP" } } } + } # end if deidentifying + } diff --git a/conf-logstash/80-privatize-org.conf b/conf-logstash/80-privatize-org.conf deleted file mode 100644 index 19332cf7..00000000 --- a/conf-logstash/80-privatize-org.conf +++ /dev/null @@ -1,114 +0,0 @@ -# Remove information about any organizations that have privacy rules that require us to not identify them. -filter { - - ruby { - id => "80-1" - code => ' - # Australian ASNs to privatize - # 7/11/19 - ASNs from "/usr/bin/whois AS7575:AS-RNO" and "/usr/bin/whois AS7575:AS-EDGE" - # (state based networks connected to AARNet and customers on AARNet using public AS numbers): - asn_array = ["AS4738", "AS7569", "AS7571", "AS7570", "AS7572", "AS7573", "AS7574", "AS1851", "AS4822", "AS6262", "AS7475", "AS7476", "AS7573", "AS7575", "AS7637", "AS7645", "AS9348", "AS4608", "AS9383", "AS9517", "AS10106", "AS10148", "AS17807", "AS20144", "AS22556", "AS23654", "AS23719", "AS23859", "AS23935", "AS24032", "AS24101", "AS24313", "AS24390", "AS24431", "AS24433", "AS24434", "AS24436", "AS24437", "AS24490", "AS24510", "AS37978", "AS38076", "AS38083", "AS38280", "AS38307", "AS38474", "AS38568", "AS38568", "AS38795", "AS38858", "AS45128", "AS45158", "AS45213", "AS45797", "AS45962", "AS55354", "AS55363", "AS55491", "AS55773", "AS55813", "AS56065", "AS56132", "AS56210", "AS56219", "AS56303", "AS58422", "AS58528", "AS58582", "AS58584", "AS58611", "AS58686", "AS58698", "AS58877", "AS59206", "AS64090", "AS131294", "AS137188", "AS132129", "AS132158", "AS132345", "AS132693", "AS132728", "AS132868", "AS133019", "AS134096", "AS134111", "AS134115", "AS134197", "AS134197", "AS134700", "AS134748", "AS137965", "AS135350", "AS135520", "AS135892", "AS135893", "AS136013", "AS136016", "AS136135", "AS136247", "AS136549", "AS136753", "AS136770", "AS136912", "AS136921", "AS136621", "AS137073", "AS137400", "AS138017", "AS137837", "AS137529", "AS138201", "AS138390", "AS138447", "AS138468", "AS138537", "AS137429"] - - # Convert array to hash with values of true - asn_hash = asn_array.map {|x| [x,true]}.to_h - - # event values - src_asn = "AS" + event.get("[meta][src_asn]").to_s - dst_asn = "AS" + event.get("[meta][dst_asn]").to_s - src_country = event.get("[meta][src_country_name]") - dst_country = event.get("[meta][dst_country_name]") - - # Are flow src or dst in the list? - # Redact only if src or dst is also physically IN Australia - if asn_hash[src_asn] and src_country == "Australia" - event.set( "[@metadata][REDACT-SRC]" , "YES" ) - end - if asn_hash[dst_asn] and dst_country == "Australia" - event.set( "[@metadata][REDACT-DST]" , "YES" ) - end - - return [event] - ' - tag_on_exception => '_rubyexception in 80-privatize-org' - } - - # Australian SRCs: Copy some info to [private] and replace sensitive info with AARnet values. - # (Copy then replace in the same mutate filter results in both private and public values being privatized, - # because copy is always done last, so use separate mutates. Update will not create a field if one doesn't exist.) - if [@metadata][REDACT-SRC] == "YES" { - mutate { - id => "80-2" - copy => { "[meta][src_organization]" => "[PRIVATE][src_organization]" } - copy => { "[meta][src_asn]" => "[PRIVATE][src_asn]" } - copy => { "[meta][src_ip]" => "[PRIVATE][src_ip]" } - copy => { "[meta][scireg][src][org_name]" => "[PRIVATE][scireg_src_org_name]" } - copy => { "[meta][scireg][src][resource]" => "[PRIVATE][scireg_src_resource]" } - } - - mutate { - id => "80-3" - replace => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } - replace => { "[meta][src_asn]" => -1 } - replace => { "[meta][src_ip]" => "xx.xx.xx.xx" } - replace => { "[meta][src_location][lat]" => -25 } - replace => { "[meta][src_location][lon]" => 135 } - - update => { "[meta][scireg][src][org_name]" => "Australian Academic and Research Network (AARNet)" } - update => { "[meta][scireg][src][org_abbr]" => "AARNet.au" } - update => { "[meta][scireg][src][resource]" => "AARNet member" } - update => { "[meta][scireg][src][resource_abbr]" => "AARNet" } - update => { "[meta][scireg][src][latitude]" => "-25" } - update => { "[meta][scireg][src][longitude]" => "135" } - - remove_field => [ "[meta][scireg][src][project_names]" ] - } - - } # end SRC - - # Australian DSTs: Copy some info to [private] and replace sensitive info with AARnet values - if [@metadata][REDACT-DST] == "YES" { - mutate { - id => "80-5" - copy => { "[meta][dst_organization]" => "[PRIVATE][dst_organization]" } - copy => { "[meta][dst_asn]" => "[PRIVATE][dst_asn]" } - copy => { "[meta][dst_ip]" => "[PRIVATE][dst_ip]" } - copy => { "[meta][scireg][dst][org_name]" => "[PRIVATE][scireg_dst_org_name]" } - copy => { "[meta][scireg][dst][resource]" => "[PRIVATE][scireg_dst_resource]" } - } - - mutate { - id => "80-6" - replace => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } - replace => { "[meta][dst_asn]" => -1 } - replace => { "[meta][dst_ip]" => "xx.xx.xx.xx" } - replace => { "[meta][dst_location][lat]" => -25 } - replace => { "[meta][dst_location][lon]" => 135 } - - update => { "[meta][scireg][dst][org_name]" => "Australian Academic and Research Network (AARNet)" } - update => { "[meta][scireg][dst][org_abbr]" => "AARNet.au" } - update => { "[meta][scireg][dst][resource]" => "AARNet member" } - update => { "[meta][scireg][dst][resource_abbr]" => "AARNet" } - update => { "[meta][scireg][dst][latitude]" => "-25" } - update => { "[meta][scireg][dst][longitude]" => "135" } - - remove_field => [ "[meta][scireg][dst][project_names]" ] - } - - } # end DST - - - # Make sure we have consistent AARNET names even if no redaction (case insensitive) - if [meta][src_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { - mutate { - id => "80-8" - update => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } - } - } - if [meta][dst_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { - mutate { - id => "80-9" - update => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } - } - } - -} diff --git a/conf-logstash/80-privatize-org.conf.disabled b/conf-logstash/80-privatize-org.conf.disabled new file mode 100644 index 00000000..450a60bc --- /dev/null +++ b/conf-logstash/80-privatize-org.conf.disabled @@ -0,0 +1,107 @@ +# Remove information about any organizations that have privacy rules that require us to not identify them. + +### The .disabled file has a fictional example +# To enable, set the following in the code below and remove the .disabled suffix. +# List the ASNs to privatize (asn_array) and specify the country the ASNs are in (called "CountryA" below). +# Specify what to use when overwriting org names (eg, "NetworkA"), org abbreviations ("NetA"), +# latitudes and longitudes (set to -25 and 25 below), +# scireg resource names ("NetworkA member"), and scireg resource abbrevations ("NetA member"). + +filter { + + ruby { + id => "80-1" + code => ' + # ASNs to privatize + asn_array = ["AS0001", "AS0002", "AS0003"] + + # Convert array to hash with values of true + asn_hash = asn_array.map {|x| [x,true]}.to_h + + # event values + src_asn = "AS" + event.get("[meta][src_asn]").to_s + dst_asn = "AS" + event.get("[meta][dst_asn]").to_s + src_country = event.get("[meta][src_country_name]") + dst_country = event.get("[meta][dst_country_name]") + + # Are flow src or dst in the list? + # Redact only if src or dst is also physically IN countryA + if asn_hash[src_asn] and src_country == "countryA" + event.set( "[@metadata][REDACT-SRC]" , "YES" ) + end + if asn_hash[dst_asn] and dst_country == "countryA" + event.set( "[@metadata][REDACT-DST]" , "YES" ) + end + + return [event] + ' + tag_on_exception => '_rubyexception in 80-privatize-org' + } + + # CountryA SRCs: replace sensitive info + if [@metadata][REDACT-SRC] == "YES" { + + # Save original values if needed or desired +## mutate { +## id => "80-2" +## copy => { "[meta][src_organization]" => "[PRIVATE][src_organization]" } +## copy => { "[meta][src_asn]" => "[PRIVATE][src_asn]" } +## copy => { "[meta][src_ip]" => "[PRIVATE][src_ip]" } +## copy => { "[meta][scireg][src][org_name]" => "[PRIVATE][scireg_src_org_name]" } +## copy => { "[meta][scireg][src][resource]" => "[PRIVATE][scireg_src_resource]" } +## } + + mutate { + id => "80-3" + replace => { "[meta][src_organization]" => "NetworkA" } + replace => { "[meta][src_asn]" => -1 } + replace => { "[meta][src_ip]" => "xx.xx.xx.xx" } + replace => { "[meta][src_location][lat]" => -25 } + replace => { "[meta][src_location][lon]" => 25 } + + update => { "[meta][scireg][src][org_name]" => "NetworkA" } + update => { "[meta][scireg][src][org_abbr]" => "NetA" } + update => { "[meta][scireg][src][resource]" => "NetworkA member" } + update => { "[meta][scireg][src][resource_abbr]" => "NetA member" } + update => { "[meta][scireg][src][latitude]" => "-25" } + update => { "[meta][scireg][src][longitude]" => "25" } + + remove_field => [ "[meta][scireg][src][project_names]" ] + } + + } # end SRC + + # CountryA DSTs: replace sensitive info + if [@metadata][REDACT-DST] == "YES" { + +## mutate { +## id => "80-5" +## copy => { "[meta][dst_organization]" => "[PRIVATE][dst_organization]" } +## copy => { "[meta][dst_asn]" => "[PRIVATE][dst_asn]" } +## copy => { "[meta][dst_ip]" => "[PRIVATE][dst_ip]" } +## copy => { "[meta][scireg][dst][org_name]" => "[PRIVATE][scireg_dst_org_name]" } +## copy => { "[meta][scireg][dst][resource]" => "[PRIVATE][scireg_dst_resource]" } +## } + + mutate { + id => "80-6" + replace => { "[meta][dst_organization]" => "NetworkA" } + replace => { "[meta][dst_asn]" => -1 } + replace => { "[meta][dst_ip]" => "xx.xx.xx.xx" } + replace => { "[meta][dst_location][lat]" => -25 } + replace => { "[meta][dst_location][lon]" => 25 } + + update => { "[meta][scireg][dst][org_name]" => "NetworkA" } + update => { "[meta][scireg][dst][org_abbr]" => "NetA" } + update => { "[meta][scireg][dst][resource]" => "NetworkA member" } + update => { "[meta][scireg][dst][resource_abbr]" => "NetA member" } + update => { "[meta][scireg][dst][latitude]" => "-25" } + update => { "[meta][scireg][dst][longitude]" => "25" } + + remove_field => [ "[meta][scireg][dst][project_names]" ] + } + + } # end DST + + +} diff --git a/conf-logstash/90-additional-fields.conf b/conf-logstash/90-additional-fields.conf index 4dabfa00..84b7cc49 100644 --- a/conf-logstash/90-additional-fields.conf +++ b/conf-logstash/90-additional-fields.conf @@ -5,8 +5,8 @@ filter { # sensor_group: Use dictionary to group together sensor IDs translate { id => "90-1" - field => "[meta][sensor_id]" - destination => "[meta][sensor_group]" + source => "[meta][sensor_id]" + target => "[meta][sensor_group]" dictionary_path => "/etc/logstash/conf.d/support/sensor_groups.json" regex => true } @@ -14,8 +14,8 @@ filter { # sensor_type: Use dictionary to set sensor_type such as Archive, Circuit, Exchange Point, etc. translate { id => "90-2" - field => "[meta][sensor_id]" - destination => "[meta][sensor_type]" + source => "[meta][sensor_id]" + target => "[meta][sensor_type]" dictionary_path => "/etc/logstash/conf.d/support/sensor_types.json" regex => true } @@ -51,8 +51,8 @@ filter { } } - # Unique id based on the meta.id (five-tuple-plus-sensor) + start time. - # Can be used as the document id in elasticsearch to avoid duplicate records (see ES output filter) + # Unique id based on five-tuple-plus-sensor + start time. + # Can possibly be used as the document id in elasticsearch to avoid duplicate records (see ES output filter) # (use for sflow only). fingerprint { id => '90-6' diff --git a/conf-logstash/95-cleanup.conf b/conf-logstash/95-cleanup.conf index 7e11903b..81e5ccc9 100644 --- a/conf-logstash/95-cleanup.conf +++ b/conf-logstash/95-cleanup.conf @@ -1,30 +1,53 @@ filter { - # make sure this has been renamed (in case aggregation conf has not been used) + # Tag flows with 2 missing IPs (0.0.0.0s). + # Check/edit the 99-outputs file for any action to be taken based on this tag. + if ([meta][src_ip] == "0.0.0.x" and [meta][dst_ip] == "0.0.0.x") + or ([meta][src_ip] == "0.0.0.0" and [meta][dst_ip] == "0.0.0.0") { + mutate { + id => "95-1" + add_tag => ["Missing IPs"] + } + } + + # rename the 5-tuple+sensor hash to meta.id if [flow_fingerprint] { mutate { - id => "95-1" + id => "95-2" rename => { 'flow_fingerprint' => '[meta][id]' } } } # replace start and end timestamps with date fields date { - id => "95-2" + id => "95-3" match => [ '[start]', 'UNIX' ] target => '[start]' } date { - id => "95-3" + id => "95-4" match => [ '[end]' ,'UNIX' ] target => '[end]' } # remove unneeded fields mutate { - id => "95-4" + id => "95-5" remove_field => "[interval]" remove_field => "[type]" } + # Make sure we have consistent AARNET names (case insensitive) + if [meta][src_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { + mutate { + id => "95-6" + update => { "[meta][src_organization]" => "Australian Academic and Research Network (AARNet)" } + } + } + if [meta][dst_organization] =~ /(?i)Australian Academic and Research Network|AARNET/ { + mutate { + id => "95-7" + update => { "[meta][dst_organization]" => "Australian Academic and Research Network (AARNet)" } + } + } } diff --git a/conf-logstash/98-post-process.conf b/conf-logstash/98-post-process.conf index dd70c55d..a6053906 100644 --- a/conf-logstash/98-post-process.conf +++ b/conf-logstash/98-post-process.conf @@ -1,11 +1,14 @@ # info useful for monitoring what logstash is doing filter { + ruby { id => '98-1' code => ' event.set( "@exit_time", Time.now ); event.set( "@processing_time", event.get("@exit_time") - event.get("@ingest_time") ); + event.set( "@pipeline_ver", "2.0.0" ); ' - tag_on_exception => '_rubyexception in 98-outputs, failed to set @processing_time' + tag_on_exception => '_rubyexception in 98-post-process.conf' } + } diff --git a/conf-logstash/99-output-file.conf.disabled b/conf-logstash/99-output-file.conf.disabled new file mode 100644 index 00000000..040841ce --- /dev/null +++ b/conf-logstash/99-output-file.conf.disabled @@ -0,0 +1,27 @@ +output { + file { + path => "/logstash-temp/test-output" + + #codec => json + + # json with newlines (default) + #codec => json_lines + + # one line per field (x => v) + codec => rubydebug + + # one line per event (no field names?) + #codec => plain + + # formatted output + #codec => line {format => + #'{ "start":%{start}, "end":%{end}, "interval":%{interval}, + # "meta":{ "sensor_id":"%{[meta][sensor_id]}", "protocol":"%{[meta][protocol]}", "flow_type":"%{[meta][flow_type]}", + # "src_ip":"%{[meta][src_ip]}", "src_port":%{[meta][src_port]}, "src_asn":%{[meta][src_asn]}, "src_ifindex":%{[meta][src_ifindex]}, + # "dst_ip":"%{[meta][dst_ip]}", "dst_port":%{[meta][dst_port]}, "dst_asn":%{[meta][dst_asn]}, "dst_ifindex":%{[meta][dst_ifindex]} }, + # "values":%{values} } '} + + # append (default) or overwrite - use append to write all events, but delete file between runs + write_behavior => append + } +} diff --git a/conf-logstash/99-output-jsonlog.conf.disabled b/conf-logstash/99-output-jsonlog.conf.disabled deleted file mode 100644 index 98d9ce8e..00000000 --- a/conf-logstash/99-output-jsonlog.conf.disabled +++ /dev/null @@ -1,8 +0,0 @@ -output { - file { - path => "/data/all.json" - codec => json_lines - #codec => json - # write_behavior => overwrite - } -} \ No newline at end of file diff --git a/conf-logstash/99-output-multiline-json.conf.disabled b/conf-logstash/99-output-multiline-json.conf.disabled deleted file mode 100644 index 6e6f8a18..00000000 --- a/conf-logstash/99-output-multiline-json.conf.disabled +++ /dev/null @@ -1,17 +0,0 @@ -# Mainly for dev work. -# Write events as readable json (ie, with new-lines; only fields as specified below between single quotes). -# The output file can be read with 01-input-multiline-json.conf. -# We need "append" to write all events to the file, but delete the file in-between runs or it'll keep appending. -# (The example output fields are to directly write out events read from the "raw" rabbit queue.) -output { - file { - path => "/testdir/test-data.json" - codec => line {format => - '{ "start":%{start}, "end":%{end}, "interval":%{interval}, - "meta":{ "sensor_id":"%{[meta][sensor_id]}", "protocol":"%{[meta][protocol]}", "flow_type":"%{[meta][flow_type]}", - "src_ip":"%{[meta][src_ip]}", "src_port":%{[meta][src_port]}, "src_asn":%{[meta][src_asn]}, "src_ifindex":%{[meta][src_ifindex]}, - "dst_ip":"%{[meta][dst_ip]}", "dst_port":%{[meta][dst_port]}, "dst_asn":%{[meta][dst_asn]}, "dst_ifindex":%{[meta][dst_ifindex]} }, - "values":%{values} } '} - write_behavior => append - } -} diff --git a/conf-logstash/99-output-rabbit.conf b/conf-logstash/99-output-rabbit.conf index 28bd0fb1..6519274a 100644 --- a/conf-logstash/99-output-rabbit.conf +++ b/conf-logstash/99-output-rabbit.conf @@ -1,10 +1,22 @@ -##### COPY ANY CHANGES TO YOUR EXISTING VERSION AFTER AN UPGRADE ##### +# Values for ${variable-name:default-value} are obtained from an environment file. For docker, from the .env file; +# for bare-metal installations, /etc/logstash/logstash-env-vars - specified in the logstash systemd file) +# If values are not provided in an env file, the defaults/examples following the :'s are used. +# With a bare-metal installation, you may also just edit this file and fill in the values you want. + +# By default, flows with missing IPs are dropped. This can be changed in the env file. +filter { + mutate { + add_field => { "[@metadata][drop_missing_IPs_flag]" => "${drop_missing_IPs_flag:True}" } + } + if [@metadata][drop_missing_IPs_flag] == "True" and "Missing IPs" in [tags] { + drop{ id => "99-1" } + } +} -output { +output { #-- To send results to rabbitmq - # "${env-var:default-value}" will be replaced by the env-var environment variable value, or default-value if that is not set. - # Rabbitmq host may be, eg, "localhost", "xx.xx.xx.xx", "["hostname1", "hostname2"]. + # Rabbitmq host may be localhost, xx.xx.xx.xx, ["hostname1", "hostname2"], etc. # Change the queue key and exchange, if needed. rabbitmq { host => "${rabbitmq_output_host:localhost}" @@ -16,6 +28,6 @@ output { connection_timeout => 10000 durable => true persistent => false + id => "99-2" } - } diff --git a/conf-logstash/support/networkA-members-list.rb.example b/conf-logstash/support/networkA-members-list.rb.example index 8a6caebb..63fbf80a 100644 --- a/conf-logstash/support/networkA-members-list.rb.example +++ b/conf-logstash/support/networkA-members-list.rb.example @@ -1,12 +1,14 @@ -# This is an example of how to set up member or customer netblock mappings -# The name of the file must be networkA-members-list.rb -# (replace networkA with the name of the network in the filename and below) +# This is an example of how to set up member or customer netblock mappings. +# The name of the file must be networkA-members-list.rb. +# (replace "networkA" with the name of the actual network in the filename and 2 places below) # List of ASNs that include all the member netblocks (integers) +# An IP will be looked up only if the ASN is in this list. @asn_list['networkA'] = [1234, 4567] # List of netblocks and the desired organization name for each -# Best to put the biggest blocks/those with most flows at top +# The first match is returned, so it'll be fastest if you put the biggest blocks/those with most flows at top, +# but it is vital to put the most specific netblocks first. Put any "catch alls" at the bottom. @members['networkA'] = { "111.22.33.0/24" => "Member A", "444.55.66.0/32" => "Customer B", diff --git a/conf-logstash/support/sensor_groups.json b/conf-logstash/support/sensor_groups.json deleted file mode 100644 index 28ed5f24..00000000 --- a/conf-logstash/support/sensor_groups.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "^AMPATH.*": "AMPATH", - "^.*cenic.*": "CENIC", - "^FRGP.*": "FRGP", - "^GEANT.*": "GEANT", - "^gpn-.*": "GPN", - "^GPN-.*": "GPN", - "^GPN .*": "GPN", - "^.*Hawaii.*": "University of Hawaii", - "^i-Light.*": "I-Light", - "^GigaPOP.*": "I-Light", - "^NEAAR.*": "NEAAR", - "^NEA3R.*": "NEAAR", - "^.*nersc.*": "NERSC", - "^.*pacificwave.*": "PacWave", - "^.*pnw-gigapop\\.net$": "PacWave", - "^PennREN.*": "PennREN", - "^SANReN.*": "SANReN", - "^SingAREN.*": "SingAREN", - "^.*sox.*": "SOX", - "^.*SoX.*": "SOX", - "^Sun Corridor.*": "Sun Corridor", - "^TACC.*": "TACC", - "^tacc.*": "TACC", - "^TransPAC.*": "TransPAC", - "^.*UCAR.*": "UCAR" -} diff --git a/conf-logstash/support/sensor_groups.json.example b/conf-logstash/support/sensor_groups.json.example new file mode 100644 index 00000000..8706bcb5 --- /dev/null +++ b/conf-logstash/support/sensor_groups.json.example @@ -0,0 +1,4 @@ +{ + "^networkA-.*": "group A", + "^networkB-.*": "group B" +} diff --git a/conf-logstash/support/sensor_types.json b/conf-logstash/support/sensor_types.json deleted file mode 100644 index 206f4491..00000000 --- a/conf-logstash/support/sensor_types.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "^.*Tstat$": "Data Archive", - "^.*nersc\\.gov$": "Data Archive", - "^GEANT.*$": "Circuit", - "^Hawaii.*netflow$": "Circuit", - "^NEAAR.*": "Circuit", - "^NEA3R.*": "Circuit", - "^TransPAC.*": "Circuit", - "^AMPATH.*$": "Exchange Point", - "^SingAREN.*$": "Exchange Point", - "^.*pacificwave\\.net$": "Exchange Point", - "^.*pnw-gigapop\\.net$": "Exchange Point", - "^.*cenic.*$": "Regional Network", - "^gpn-.*$": "Regional Network", - "^GPN-.*$": "Regional Network", - "^GPN .*$": "Regional Network", - "^FRGP.*$": "Regional Network", - "^GigaPOP.*$": "Regional Network", - "^i-Light.*$": "Regional Network", - "^PennREN.*$": "Regional Network", - "^SANReN.*$": "Regional Network", - "^.*sox.*$": "Regional Network", - "^.*SoX.*$": "Regional Network", - "^Sun Corridor.*$": "Regional Network", - "^tacc_sflows$": "Regional Network" -} diff --git a/conf-logstash/support/sensor_types.json.example b/conf-logstash/support/sensor_types.json.example new file mode 100644 index 00000000..9337d60b --- /dev/null +++ b/conf-logstash/support/sensor_types.json.example @@ -0,0 +1,5 @@ +{ + "^.*Tstat$": "Data Archive", + "^Network A -.*": "Circuit", + "^Network B -.*": "Regional Network" +} diff --git a/conf-pmacct/nfacctd-pretag.map.ORIG b/conf-pmacct/nfacctd-pretag.map.ORIG new file mode 100644 index 00000000..65bce704 --- /dev/null +++ b/conf-pmacct/nfacctd-pretag.map.ORIG @@ -0,0 +1,10 @@ +! This file is referenced in the nfacctd config file and used to set the "label" field +! which is parsed to obtain and set the sensor name. +! +! After the setup script runs, label should be "nfacct--" for sflow +! followed by the sensor name with spaces replaced by #'s. +! No commas are allowed in the sensor name! +! eg, set_label=nfacct--Netflow#Sensor + +set_label=${netflowSensorName_1} + diff --git a/conf-pmacct/nfacctd.conf.ORIG b/conf-pmacct/nfacctd.conf.ORIG new file mode 100644 index 00000000..ea4bc88b --- /dev/null +++ b/conf-pmacct/nfacctd.conf.ORIG @@ -0,0 +1,74 @@ +! PMACCT CONFIG FOR NETFLOW +! Settings most likely to need changes: NFACCTD_PORT, PRE_TAG_MAP, and possibly AMQP_ROUTING_KEY + +!# debug: true + +! Port nfacctd should listen to +nfacctd_port: ${netflowContainerPort_1} + +! Get a value for 'label' from the pre_tag_map file. +! We use this to encode the sensor name for each port. +pre_tag_map: /etc/pmacct/nfacctd-pretag_1.map + +! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) +! Default is tab-separated output. If 'label' or any variable length field is in the aggregation list, you have to use csv format. +!# plugins: print +!# print_output_file: /tmp/nfacct/%Y/%m/%d/nfacct.%Y%m%d-%H%M +!# print_output: csv +! How often to write to file (default is 60 sec) +!# print_refresh_time: 60 + +! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE + plugins: amqp + amqp_host: ${rabbitmq_input_host} + amqp_user: ${rabbitmq_input_username} + amqp_passwd: ${rabbitmq_input_pw} + amqp_exchange_type: direct + amqp_exchange: amq.direct + amqp_persistent_msg: true + amqp_routing_key: netsage_deidentifier_raw + amqp_vhost: / +! How often to write to the rabbit queue (default is 60 sec) + amqp_refresh_time: 60 + +! To aggregate flows over time (x sec)- +! 1. Set print_refresh_time or amqp_refresh_time to x sec. This is the period over which it will aggregate / how often to output. +! 2. Do not include times (timestamp_start and timestamp_end) in aggregate list. +! 3. Set nfacctd_stitching: true +! FOR NETSAGE, DO NOT AGGREGATE NETFLOW OVER TIME WITH PMACCT! + +! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings. +! For netflow, the available timestamps are timestamp_start and timestamp_end (since the netflow exporter itself aggregates over time). +!! Note: Sampling_rate is just a flag to say whether it is known and applied. +!! Before a netflow template arrives, sampling_rate = 0 and no correction is applied! +!! There may be a logstash conf that drops flows with sampling_rate = 0. This should be removed if there really IS no sampling! + aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, timestamp_start, timestamp_end, label + +! Stitching - determine and add timestamp_min and timestamp_max fields + nfacctd_stitching: false + +! Output timestamps as epochs rather than strings that we need to parse + timestamps_since_epoch: true +! Don't round off to whole seconds + timestamps_secs: false + +! Gets the sampling rate from flow packet and automatically applies it +! Example: If sample_rate is 1000 (meaning 1/1000) then it multiplies +! packets and bytes by 1000. + nfacctd_renormalize: true + +! Get AS info from the netflow datagrams + nfacctd_as: netflow + +! write to /var/log/messages + syslog: local0 + +! save template file for netflow to use on next startup +!## nfacctd_templates_file: /path/file + +! Increase buffer size for larger numbers of flows. +! I'M NOT SURE WHAT THIS SHOULD BE! + plugin_buffer_size: 10240 + plugin_pipe_size: 10240000 + +! additional aggregate_primitives fields for netflow ? diff --git a/conf-pmacct/sfacctd-pretag.map.ORIG b/conf-pmacct/sfacctd-pretag.map.ORIG new file mode 100644 index 00000000..179aa22f --- /dev/null +++ b/conf-pmacct/sfacctd-pretag.map.ORIG @@ -0,0 +1,10 @@ +! This file is referenced in the sfacctd config file and used to set the "label" field +! which is parsed to obtain and set the sensor name. +! +! After the setup script runs, label should be "sfacct--" for sflow +! followed by the sensor name with spaces replaced by #'s. +! No commas are allowed in the sensor name! +! eg, set_label=sfacct--Sflow#Sensor + +set_label=${sflowSensorName_1} + diff --git a/conf-pmacct/sfacctd.conf.ORIG b/conf-pmacct/sfacctd.conf.ORIG new file mode 100644 index 00000000..6e6c3d14 --- /dev/null +++ b/conf-pmacct/sfacctd.conf.ORIG @@ -0,0 +1,75 @@ +! PMACCT CONFIG FOR SFLOW +! Settings most likely to need changes: SFACCTD_PORT, PRE_TAG_MAP, and possibly AMQP_ROUTING_KEY + +!# debug: true + +! Port sfacctd should listen to +sfacctd_port: ${sflowContainerPort_1} + +! Get a value for 'label' from the pre_tag_map file. +! We use this to encode the sensor name for each port. +pre_tag_map: /etc/pmacct/sfacctd-pretag_1.map + +! FOR PRINTING TO FILES instead of writing to rabbit queue (comment out amqp_* lines) +! Default format is tab-separated. If 'label' or any variable length field is in the aggregation list, you have to use csv format. +!## plugins: print +!## print_output_file: /tmp/sfacct/%Y/%m/%d/sfacct.%Y%m%d-%H%M +!## print_output: csv +! How often to write to file (default = 60 sec) +!## print_refresh_time: 300 + +! FOR SENDING FLOWS TO THE LOCAL RABBIT QUEUE + plugins: amqp + amqp_host: ${rabbitmq_input_host} + amqp_user: ${rabbitmq_input_username} + amqp_passwd: ${rabbitmq_input_pw} + amqp_exchange_type: direct + amqp_exchange: amq.direct + amqp_persistent_msg: true + amqp_routing_key: netsage_deidentifier_raw + amqp_vhost: / +! How often to write to rabbit queue = time to aggregate over (default = 60 sec) + amqp_refresh_time: 300 + +! To aggregate flows over time (x sec)- +! 1. Set print_refresh_time or amqp_refresh_time to x sec. +! 2. Do not include timestamp_arrival in aggregate list. +! Timestamp_min and timestamp_max will be added to output automatically, do not include in aggregate list. +! 3. Set sfacctd_stitching: true +! FOR NETSAGE, AGGREGATE SFLOW OVER TIME (5 minutes) WITH PMACCT + +! Fields to aggregate on - include all fields we want in the ouput. See pmacct docs for field meanings +! For sflow, the only available timestamp is timestamp_arrival (arrival at collector). Do not include it to aggregate over time. +!! Note: Sampling_rate is just a flag to say whether it is known and applied (could be a pmacct bug). +!! There may be a logstash conf that drops flows with Sampling_rate = 0, which needs to be removed if there really is no sampling. +! FOR SFLOW AGGREGATION: + aggregate: src_host, dst_host, src_port, dst_port, proto, src_as, dst_as, in_iface, out_iface, sampling_rate, label + +! Stitching - determine and add timestamp_min and timestamp_max fields + sfacctd_stitching: true + +! Output timestamps as epochs rather than strings that we need to parse + timestamps_since_epoch: true +! Don't round off to whole seconds + timestamps_secs: false + +! Gets the sampling rate from flow packet and automatically applies it +! Example: If sample_rate is 1000 (meaning 1/1000) then it multiplies +! packets and bytes by 1000. + sfacctd_renormalize: true + +! Get AS info from the sflow datagrams + sfacctd_as: sflow + +! write to /var/log/messages + syslog: local0 + +! Increase buffer size for larger numbers of flows. +! I'M NOT SURE WHAT THIS SHOULD BE! + plugin_buffer_size: 10240 + plugin_pipe_size: 10240000 + +! there's no template file for sflow +! no additional aggregate_primitives fields for sflow ? + + diff --git a/conf/logging-debug.conf b/conf/logging-debug.conf deleted file mode 100644 index b5897a27..00000000 --- a/conf/logging-debug.conf +++ /dev/null @@ -1,21 +0,0 @@ -log4perl.rootLogger = DEBUG, SYSLOG, screen -log4perl.appender.SYSLOG = Log::Dispatch::Syslog -# uncomment this next line if you want to restrict syslog to 'info' and above -#log4perl.appender.SYSLOG.min_level = info -log4perl.appender.SYSLOG.ident = sub { \ - my $process = $0; \ - if ( $process =~ /netsage-(.+)-daemon/ ) { \ - my $ident = $1; \ - return "NETSAGE-".uc($ident); \ - } else { \ - return "NETSAGE-".$process; \ - } \ -} -log4perl.appender.SYSLOG.facility = LOCAL0 -log4perl.appender.SYSLOG.layout = PatternLayout -log4perl.appender.SYSLOG.layout.ConversionPattern=[%d] %F %L %c - %m%n - -log4perl.appender.screen = Log::Log4perl::Appender::Screen -log4perl.appender.screen.stderr = 0 -log4perl.appender.screen.layout = PatternLayout -log4perl.appender.screen.layout.ConversionPattern = %d %p> %F{1}:%L %M - %m%n diff --git a/conf/logging.conf b/conf/logging.conf deleted file mode 100644 index d8f89915..00000000 --- a/conf/logging.conf +++ /dev/null @@ -1,16 +0,0 @@ -log4perl.rootLogger = INFO, SYSLOG - -log4perl.appender.SYSLOG = Log::Dispatch::Syslog -log4perl.appender.SYSLOG.min_level = info -log4perl.appender.SYSLOG.facility = LOCAL0 -log4perl.appender.SYSLOG.ident = sub { \ - my $process = $0; \ - if ( $process =~ /netsage-(.+)-daemon/ ) { \ - my $ident = $1; \ - return "NETSAGE-".uc($ident); \ - } else { \ - return "NETSAGE-".$process; \ - } \ -} -log4perl.appender.SYSLOG.layout = PatternLayout -log4perl.appender.SYSLOG.layout.ConversionPattern=%F[%L] %p: %m%n diff --git a/conf/netsage_flow_filter.xml b/conf/netsage_flow_filter.xml deleted file mode 100644 index 882f59a8..00000000 --- a/conf/netsage_flow_filter.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - netsage_deidentifier_raw_prefilter - 2 - - - 3 - netsage_deidentifier_raw - - - - 127.0.0.1 - 5672 - guest - guest - Simp - 60 - Simp.Data - - - - 100 - - 1 - - - - - - /var/run/netsage-flow-filter-daemon.pid - - diff --git a/conf/netsage_netflow_importer.xml b/conf/netsage_netflow_importer.xml deleted file mode 100644 index 7bdf7410..00000000 --- a/conf/netsage_netflow_importer.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - netsage_deidentifier_netflow_fake - 2 - - - 3 - netsage_deidentifier_raw - - - - - 100 - - - 1 - - - - - - /var/cache/netsage/netflow_importer.cache - - - - 10000000 - - - 10m - - - - - - - - - - - - - - /var/run/netsage-netflow-importer-daemon.pid - - - diff --git a/conf/netsage_shared.xml b/conf/netsage_shared.xml deleted file mode 100644 index 63c60c99..00000000 --- a/conf/netsage_shared.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - sensorname - - - /path/to/flow-files - - - sflow - - - - - - - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - diff --git a/cron.d/baremetal-netsage-downloads.cron b/cron.d/baremetal-netsage-downloads.cron new file mode 100644 index 00000000..6eb6a3f9 --- /dev/null +++ b/cron.d/baremetal-netsage-downloads.cron @@ -0,0 +1,40 @@ +SHELL=/bin/sh +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin +MAILTO=root + +# Download possibly-updated files required by the Netsage Pipeline from scienceregistry.grnoc.iu.edu +# This cron file is to be used for bare-metal installations +DOWNLOAD_PATH="/var/lib/grnoc/netsage" +SUPPORT_PATH="/etc/logstash/conf.d/support" + + +# MAXMIND ASN on Saturdays at 23:30 UTC +30 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O $DOWNLOAD_PATH/newASN.mmdb && mv $DOWNLOAD_PATH/newASN.mmdb $DOWNLOAD_PATH/GeoLite2-ASN.mmdb && touch $DOWNLOAD_PATH/GeoLite2-ASN.mmdb + +# MAXMIND CITY on Saturdays at 23:35 UTC +35 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O $DOWNLOAD_PATH/newCity.mmdb && mv $DOWNLOAD_PATH/newCity.mmdb $DOWNLOAD_PATH/GeoLite2-City.mmdb && touch $DOWNLOAD_PATH/GeoLite2-City.mmdb + +# CAIDA file on Saturdays at 23:40 UTC +40 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O $DOWNLOAD_PATH/newCAIDA.mmdb && mv $DOWNLOAD_PATH/newCAIDA.mmdb $DOWNLOAD_PATH/CAIDA-org-lookup.csv && touch $DOWNLOAD_PATH//CAIDA-org-lookup.csv + +# SCIENCE REGISTRY Saturdays at 23:45 UTC +45 23 * * 6 root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O $DOWNLOAD_PATH/scireg.mmdb.new && mv $DOWNLOAD_PATH/scireg.mmdb.new $DOWNLOAD_PATH/scireg.mmdb && touch $DOWNLOAD_PATH/scireg.mmdb + +# FRGP MEMBER LIST on Saturdays at 23:50 UTC +50 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O $SUPPORT_PATH/newFRGP.rb && mv $SUPPORT_PATH/newFRGP.rb $SUPPORT_PATH/FRGP-members-list.rb && touch $SUPPORT_PATH/FRGP-members-list.rb + +# ILIGHT MEMBER LIST on Saturdays at 23:52 UTC +52 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O $SUPPORT_PATH/newilight.rb && mv $SUPPORT_PATH/newilight.rb $SUPPORT_PATH/ilight-members-list.rb && touch $SUPPORT_PATH/ilight-members-list.rb + +# ONENET MEMBER LIST on Saturdays at 23:54 UTC +54 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/onenet-members-list.rb -q -O $SUPPORT_PATH/newonenet.rb && mv $SUPPORT_PATH/newonenet.rb $SUPPORT_PATH/onenet-members-list.rb && touch $SUPPORT_PATH/onenet-members-list.rb + +# SENSOR_GROUPS on Saturdays at 23:15 UTC +15 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_groups.json -q -O $SUPPORT_PATH/newsensor_groups.json && mv $SUPPORT_PATH/newsensor_groups.json $SUPPORT_PATH/sensor_groups.json && touch $SUPPORT_PATH/sensor_groups.json + +# SENSOR_TYPES on Saturdays at 23:20 UTC +20 23 * * 6 root /usr/bin/wget https://scienceregistry.grnoc.iu.edu/exported/sensor_types.json -q -O $SUPPORT_PATH/newsensor_types.json && mv $SUPPORT_PATH/newsensor_types.json $SUPPORT_PATH/sensor_types.json && touch $SUPPORT_PATH/sensor_types.json + + + + diff --git a/cron.d/docker-netsage-downloads.cron.ORIG b/cron.d/docker-netsage-downloads.cron.ORIG new file mode 100644 index 00000000..62f68539 --- /dev/null +++ b/cron.d/docker-netsage-downloads.cron.ORIG @@ -0,0 +1,9 @@ +SHELL=/bin/sh + +# This cron file is to be used with Docker installations. Fill in missing info manually or use the setup-cron.sh script. + +# Use wget to download possibly-updated files required for the Netsage Pipeline from scienceregistry.grnoc.iu.edu. +# Put them in directories in the git checkout of the pipeline code. + +# Get all on Saturdays at 23:00 UTC +00 23 * * 6 -USER- -PATH-TO-GIT-CHECKOUT-/bin/docker-netsage-downloads.sh > /dev/null 2>&1 diff --git a/cron.d/netsage-caida-update.cron b/cron.d/netsage-caida-update.cron deleted file mode 100644 index 0ddd42f9..00000000 --- a/cron.d/netsage-caida-update.cron +++ /dev/null @@ -1,13 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated CAIDA asn-to-org csv file from scienceregistry.grnoc.iu.edu -# It will be updated only quartly but can be downloaded weekly to be able to monitor its freshness -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time -# - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:40 UTC -##40 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/CAIDA-org-lookup.csv -q -O /var/lib/grnoc/netsage/newCAIDA.mmdb && mv /var/lib/grnoc/netsage/newCAIDA.mmdb /var/lib/grnoc/netsage/CAIDA-org-lookup.csv && touch /var/lib/grnoc/netsage/CAIDA-org-lookup.csv diff --git a/cron.d/netsage-maxmind-update.cron b/cron.d/netsage-maxmind-update.cron deleted file mode 100644 index f9d8aca0..00000000 --- a/cron.d/netsage-maxmind-update.cron +++ /dev/null @@ -1,14 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated MaxMind GeoLite2-ASN and GeoLite2-City databases from scienceregistry.grnoc.iu.edu -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:30 UTC -## 30 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-ASN.mmdb -q -O /var/lib/grnoc/netsage/newASN.mmdb && mv /var/lib/grnoc/netsage/newASN.mmdb /var/lib/grnoc/netsage/GeoLite2-ASN.mmdb && touch /var/lib/grnoc/netsage/GeoLite2-ASN.mmdb -# -# # on Wednesdays at 23:35 UTC -## 35 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/GeoLite2-City.mmdb -q -O /var/lib/grnoc/netsage/newCity.mmdb && mv /var/lib/grnoc/netsage/newCity.mmdb /var/lib/grnoc/netsage/GeoLite2-City.mmdb && touch /var/lib/grnoc/netsage/GeoLite2-City.mmdb diff --git a/cron.d/netsage-memberlists-update.cron b/cron.d/netsage-memberlists-update.cron deleted file mode 100644 index 0d834cde..00000000 --- a/cron.d/netsage-memberlists-update.cron +++ /dev/null @@ -1,16 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get updated member-org lists from scienceregistry.grnoc.iu.edu -# These will be updated randomly and rarely, but can be downloaded weekly to be able to monitor their freshness -# -q for quiet - so no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time -# - -## UNCOMMENT AFTER FILLING IN USERNAME AND PW -# on Wednesdays at 23:45 UTC -##45 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/FRGP-members-list.rb -q -O /etc/logstash/conf.d/support/newFRGP.rb && mv /etc/logstash/conf.d/support/newFRGP.rb /etc/logstash/conf.d/support/FRGP-members-list.rb && touch /etc/logstash/conf.d/support/FRGP-members-list.rb - -# on Wednesdays at 23:50 UTC -##50 23 * * 3 root /usr/bin/wget --user xxx --password xxx https://scienceregistry.grnoc.iu.edu/exported/ilight-members-list.rb -q -O /etc/logstash/conf.d/support/newilight.rb && mv /etc/logstash/conf.d/support/newilight.rb /etc/logstash/conf.d/support/ilight-members-list.rb && touch /etc/logstash/conf.d/support/ilight-members-list.rb diff --git a/cron.d/netsage-scireg-update.cron b/cron.d/netsage-scireg-update.cron deleted file mode 100644 index 4e6a7de1..00000000 --- a/cron.d/netsage-scireg-update.cron +++ /dev/null @@ -1,16 +0,0 @@ -SHELL=/bin/sh -PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin -MAILTO=root - -# Get Science Registry info from scienceregistry.netsage.global (scienceregistry.grnoc.iu.edu) -# This "fake geoip" mmdb file is used by logstash -# -q for quiet - no email if no output. -# Instead of touch, if your wget version has it, you can use --no-use-server-timestamps which sets the file's time to the download time - -## UNCOMMENT AFTER CONFIRMING A TIME TO RUN -# daily at 00:00 UTC -##00 00 * * * root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.mmdb -q -O /var/lib/grnoc/netsage/scireg.mmdb.new && mv /var/lib/grnoc/netsage/scireg.mmdb.new /var/lib/grnoc/netsage/scireg.mmdb && touch /var/lib/grnoc/netsage/scireg.mmdb - -# get yaml file in case a human wants to view the data. csv and json are also available. -##05 00 * * * root /usr/bin/wget https://scienceregistry.netsage.global/exported/scireg.yaml -q -O /var/lib/grnoc/netsage/scireg.yaml.new && mv /var/lib/grnoc/netsage/scireg.yaml.new /var/lib/grnoc/netsage/scireg.yaml && touch /var/lib/grnoc/netsage/scireg.yaml - diff --git a/cron.d/restart-logstash-container.cron.ORIG b/cron.d/restart-logstash-container.cron.ORIG new file mode 100644 index 00000000..55f24767 --- /dev/null +++ b/cron.d/restart-logstash-container.cron.ORIG @@ -0,0 +1,7 @@ +SHELL=/bin/sh +PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin +MAILTO=root + +# restart logstash so caida, geoip, and scireg db's are reread, in case they've been updated +# daily at 11:00 UTC +00 11 * * * root -PATH-TO-GIT-CHECKOUT-/bin/restart-logstash-container.sh > -PATH-TO-GIT-CHECKOUT-/logstash-temp/restart-output.txt 2>&1 diff --git a/cron.d/netsage-logstash-restart.cron b/cron.d/restart-logstash-service.cron similarity index 100% rename from cron.d/netsage-logstash-restart.cron rename to cron.d/restart-logstash-service.cron diff --git a/data/.place_holder b/data/.place_holder deleted file mode 100644 index e69de29b..00000000 diff --git a/docker-compose.develop.yml b/docker-compose.ES-Kibana.yml similarity index 100% rename from docker-compose.develop.yml rename to docker-compose.ES-Kibana.yml diff --git a/docker-compose.build.yml b/docker-compose.build.yml deleted file mode 100644 index d1d60cf3..00000000 --- a/docker-compose.build.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: "3.7" -services: - importer: - image: netsage/pipeline_importer:latest - build: - context: . - dockerfile: compose/importer/Dockerfile - logstash: - image: netsage/pipeline_logstash:latest - build: - context: . - dockerfile: ./compose/logstash/Dockerfile diff --git a/docker-compose.example.yml b/docker-compose.example.yml new file mode 100644 index 00000000..77855815 --- /dev/null +++ b/docker-compose.example.yml @@ -0,0 +1,89 @@ +version: "3.7" + +# Docker services and settings. +# Setup-pmacct-compose.sh uses this file to create the non-example version docker-compose.yml. +# Optionally, use the docker-compose.override.yml file for any further manual overrides. + +# Shared network for the containers. Processes will be able to communicate over their default ports. +networks: + netsage-network: + +# Reusable blocks of settings +x-default-pmacct-settings: + &pmacct-defaults + volumes: + # location of configs on host : location in container : read-only + - ./conf-pmacct:/etc/pmacct:ro + networks: + - netsage-network + depends_on: + - rabbit + +x-default-sfacct-settings: + &sflow-defaults + image: ghcr.io/netsage-project/sfacctd:7Jun2022 + +x-default-nfacct-settings: + &netflow-defaults + image: ghcr.io/netsage-project/nfacctd:7Jun2022 + +# The containers (the setup script will replace those for sfacctd and nfacctd with the correct number and names) +services: + + sfacctd_1: + container_name: sfacctd_1 + << : *pmacct-defaults + << : *sflow-defaults + command: + # parameters for the sfacctd command + - -f + - /etc/pmacct/sfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" + + nfacctd_1: + container_name: nfacctd_1 + << : *pmacct-defaults + << : *netflow-defaults + command: + # parameters for the nfacctd command + - -f + - /etc/pmacct/nfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" + + rabbit: + container_name: rabbit + hostname: rabbit + image: rabbitmq:3.9-management + env_file: .env + ports: + # The port for the UI needs to be mapped to that on the host + # To view, go to https:///rabbit + - "15672:15672" + networks: + - netsage-network + + logstash: + container_name: logstash + image: docker.elastic.co/logstash/logstash:7.16.2 + env_file: .env + # user uid is expected to be 1000 + user: 1000:1000 + # Explicitly specify *.conf to be sure logstash doesn't use *.disabled configs. + command: logstash -f /etc/logstash/conf.d/*.conf + volumes: + # location of logstash configs on host : location within container : read-only + - ./conf-logstash/:/etc/logstash/conf.d/:ro + # location of downloaded maxmind and caida files on host : location within container : read-only + - ./logstash-downloads/:/var/lib/grnoc/netsage/:ro + # location for aggregation map files, which save flows being aggregated when logstash shuts down + - ./logstash-temp/:/logstash-temp/ + networks: + - netsage-network + depends_on: + - rabbit + # restart is not included since if logstash dies, there may be an error and we dont' want it to keep restarting over and over + diff --git a/docker-compose.override_example.yml b/docker-compose.override_example.yml index 2d943578..643c8998 100644 --- a/docker-compose.override_example.yml +++ b/docker-compose.override_example.yml @@ -1,31 +1,19 @@ version: "3.7" + +# Settings in this file override or add to those in docker-compose.yml. +# Add anything that needs to be changed manually. (This is unusual). +# Docker-compose.yml will not be overwritten on upgrade. +# For example: + services: - logstash: - image: netsage/pipeline_logstash:latest - ## If you need to allocate more than 1GB (default) override the JMV options - # volumes: - # - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options - importer: - image: netsage/pipeline_importer:latest - ## If you are using custom collectors you need to create this file and specify any addition ENV flags to identify the collector source - # volumes: - # - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml - ## Please remove or add any additional collectors here. Default setup should work fine without any custom config. - ## You may remove any collectors that are not needed. - sflow-collector: - image: netsage/nfdump-collector:alpine-nightly - restart: always - command: sfcapd -T all -l /data -S 1 -w -z -p 9998 - volumes: - - ./data/input_data/sflow:/data + sfacctd_1: ports: - - "9998:9998/udp" - netflow-collector: - image: netsage/nfdump-collector:alpine-nightly - command: nfcapd -T all -l /data -S 1 -w -z -p 9999 + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" + + nfacctd_1: ports: - - "9999:9999/udp" - restart: always - volumes: - - ./data/input_data/netflow:/data + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" + diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 9430fd0c..00000000 --- a/docker-compose.yml +++ /dev/null @@ -1,44 +0,0 @@ -version: "3.7" -services: - rabbit: - image: rabbitmq:3.8-management - env_file: .env - hostname: rabbit - volumes: - - ./data/rabbit:/var/lib/rabbitmq - ports: - - "15672:15672" - - "5671:5671" - - "5672:5672" - importer: - image: netsage/pipeline_importer:latest - env_file: .env - depends_on: - - rabbit - restart: always - volumes: - - ./data:/data - - ./data/importer_cache:/var/cache/netsage - - ./conf-logstash:/usr/share/logstash/pipeline/ - labels: - ofelia.enabled: "true" - ofelia.job-exec.dataUpdate.schedule: "@weekly" - ofelia.job-exec.dataUpdate.command: "/tmp/docker_init.sh" - logstash: - image: netsage/pipeline_logstash:latest - env_file: .env - depends_on: - - importer - ports: - - "5044:5044" - volumes: - - ./conf-logstash:/usr/share/logstash/pipeline/ - - ./data:/data - - ./data/cache:/var/lib/grnoc/netsage/ - ofelia: ## Scheduler Task - image: mcuadros/ofelia:v0.3.0 - command: daemon --docker - depends_on: - - importer - volumes: - - /var/run/docker.sock:/var/run/docker.sock:ro diff --git a/env.example b/env.example index b4645404..2c4e34d6 100644 --- a/env.example +++ b/env.example @@ -1,60 +1,101 @@ -# Local RabbitMQ Server config -RABBITMQ_ERLANG_COOKIE='secret cookie' -RABBIT_HOST=rabbit -RABBITMQ_DEFAULT_USER=guest -RABBITMQ_DEFAULT_PASS=guest -discovery.type=single-node +# FOR PMACCT CONFIGS: +# Number of sensors of each type +#--- UPDATE IF THERE ARE NOT 1 OF EACH TYPE --- +sflowSensors=1 +netflowSensors=1 + +# Env variables for one sensor should all end in the same suffix, +# and there should be a sequence (_1, _2, etc) for sflow and a sequence for netflow. +# For each sensor, list the following: +# The sensor name to assign to flows (cannot contain commas with pmacct) +# The port on the pipeline host to which the router is sending flows +#--- REPLACE EXAMPLE VALUES --- +# sflow sensors: +sflowSensorName_1=The Sflow Sensor Name +sflowPort_1=8000 + +# netflow sensors: +netflowSensorName_1=The Netflow Sensor Name +netflowPort_1=9000 -# For importer output queue / logstash input queue +# FOR LOGSTASH AND PAMACCT CONFIGS: +# pmacct will write and logstash will read flows from this rabbit host rabbitmq_input_host=rabbit rabbitmq_input_username=guest rabbitmq_input_pw=guest -# For logstash output queue +# FOR LOGSTASH CONFIGS: +# By default, processed flows are sent to a rabbit queue. +# The example settings write to the rabbitmq container, where they will accumulate, by default. +#--- TO SEND PROCESSED FLOWS TO GlobalNOC, ASK FOR THE PROPER SETTINGS --- rabbitmq_output_host=rabbit rabbitmq_output_username=guest rabbitmq_output_pw=guest -rabbitmq_output_key=netsage_archive_input - -# To drop all flows except those using the specfied interfaces -# (see the Docker Advanced documentation) - ifindex_filter_flag=False -# ifindex_filter_keep=123,456 - -# To change the sensor name for flows using a certain interface -# (see the Docker Advanced documentation) - ifindex_sensor_rename_flag=False -# ifindex_sensor_rename_old_name=oldname -# ifindex_sensor_rename_new_name=newname -# ifindex_sensor_rename_ifindex=0 - -# To correct flow sizes and rates for sampling for certain sensors -# (see the Docker Advanced documentation) - sampling_correction_flag=False -# sampling_correction_sensors=sensor1,sensor2 -# sampling_correction_factor=1 - -# Logstash settings -# set this to false so we don't install elasticsearch locally -XPACK_MONITORING_ENABLED=false -# java heap size for logstash -LS_JAVA_OPTS=-Xmx2g -Xms2g -# for the logstash aggregation filter, ensure only one logstash worker is running +rabbitmq_output_key=processed_flows + +# Aggregation Filter settings +# Default inactivity_timeout is 6-minute. If no matching flows have come in for 6 minutes, end the aggregated flow. +# Default max_flow_timeout is 1 hour. This is the maximum allowed flow duration; longer flows will be broken up. +# Aggregation_maps_path is the file where flows undergoing aggregation are saved if logstash shuts down. The default is for Docker installs. +# These should not normally be changed. +inactivity_timeout=360 +max_flow_timeout=3600 +aggregation_maps_path=/logstash-temp/logstash-aggregation-maps + +# Advanced Processing Options - see the "Docker Advanced" documentation: + +# To do ifindex (interface) filtering of flows from specified sensors: +# Flows from listed sensors will be dropped unless src or dst interface is in the list of ifindexes to keep. +# "ALL" can refer to all sensors or all interfaces of a sensor. +# If a sensor is not referenced, all its flows will be kept. +ifindex_filter_flag=False +##ifindex_filter_keep=Sensor A Name: 456,789; Sensor B Name: ALL + +# To change the sensor name for flows from a specified sensor and interface: +# Provide the ifindex, old and new sensor names. +ifindex_sensor_rename_flag=False +##ifindex_sensor_rename_ifindex=123 +##ifindex_sensor_rename_old_name=Old Sensor Name +##ifindex_sensor_rename_new_name=New Sensor Name + +# To correct for sampling in the logstash pipeline: +# Normally, sampling corrections are applied before ingest into logstash, but in certain cases, +# it may need to be done in logstash. Logstash will do corrections only if pmacct reports that it has not! +# List affected sensors and the correction factor. "ALL" can refer to all sensors. +sampling_correction_flag=False +##sampling_correction_sensors=Sensor A Name; Sensor B Name +##sampling_correction_factor=100 + +# To do subnet filtering of flows: +# Flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. +# "ALL" can refer to all sensors. +# If a sensor is not referenced, all its flows will be kept. +subnet_filter_flag=False +##subnet_filter_keep=Sensor A Name: 123.45.6.0/16; Sensor B Name: 123.33.33.0/24, 456.66.66.0/24 + +# To NOT deidentify flows: +# Deidentification of IP addresses is done by default. +# To keep full IP addresses, set this parameter to True. +full_IPs_flag=False + +# LOGSTASH PROCESS SETTINGS: +# memory - java heap size. Keep Xmx=Xms! +LS_JAVA_OPTS=-Xmx4g -Xms4g +# The aggregation filter requires there be only one logstash worker! Do not change. PIPELINE_WORKERS=1 -# for debugging -## LOG_LEVEL=debug - -# Importer settings -# == EXAMPLE VALUES MUST BE REPLACED == -sflowSensorName=The Sflow Sensor Name -netflowSensorName=The Netflow Sensor Name - -# Logstash Aggregation Filter settings -# default inactivity_timeout is 630 sec for 5-minute nfcapd files; for 15-minute files, use 960 sec. -# max_flow_timeout is the maximum flow duration; longer flows will be broken up. -inactivity_timeout=630 -max_flow_timeout=86400 -aggregation_maps_path=/data/logstash-aggregation-maps - -# In case you run elasticsearch and kibana +PIPELINE_ORDERED=true +# other +PIPELINE_ECS_COMPATIBILITY=disabled + +# LOCAL RABBITMQ SERVER SETTINGS: +# (for the post-pmacct/pre-logstash queue) +# (when running outside of docker, input_host should be localhost) +RABBIT_HOST=rabbit +RABBITMQ_DEFAULT_USER=guest +RABBITMQ_DEFAULT_PASS=guest +RABBITMQ_ERLANG_COOKIE='secret cookie' + +# In case you run an elasticsearch container ELASTIC_HOSTNAME='elastic' +discovery.type=single-node +XPACK_MONITORING_ENABLED=false diff --git a/grnoc-netsage-deidentifier.spec b/grnoc-netsage-deidentifier.spec deleted file mode 100644 index 78104187..00000000 --- a/grnoc-netsage-deidentifier.spec +++ /dev/null @@ -1,213 +0,0 @@ -Summary: GRNOC NetSage Flow-Processing Pipeline -Name: grnoc-netsage-deidentifier -Version: 1.2.9 -Release: 1%{?dist} -License: GRNOC -Group: Measurement -URL: http://globalnoc.iu.edu -Source0: %{name}-%{version}.tar.gz -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root -BuildArch: noarch -Requires: perl >= 5.8.8 -# these are part of perl with centos6, not with centos7. Could just require perl-core package? -%if 0%{?rhel} >= 7 -Requires: perl-Data-Dumper -Requires: perl-Getopt-Long -Requires: perl-Storable -%endif -Requires: perl-AnyEvent -Requires: perl-Clone -Requires: perl-Data-Validate-IP -Requires: perl-TimeDate -Requires: perl-Digest-SHA -Requires: perl-GRNOC-Config -Requires: perl-GRNOC-Log -Requires: perl-GRNOC-RabbitMQ -Requires: perl-Hash-Merge -Requires: perl-IPC-ShareLite -Requires: perl-JSON-SL -Requires: perl-JSON-XS -Requires: perl-List-MoreUtils -Requires: perl-Math-Round -Requires: perl-Moo -Requires: perl-Net-AMQP-RabbitMQ -Requires: perl-Net-IP -Requires: perl-Number-Bytes-Human -Requires: perl-Parallel-ForkManager -Requires: perl-Path-Class -Requires: perl-Path-Tiny -Requires: perl-Proc-Daemon -Requires: perl-TimeDate -Requires: perl-Time-Duration -Requires: perl-Time-HiRes -Requires: perl-Try-Tiny -Requires: perl-Type-Tiny -Requires: wget - -Requires: rubygem-ipaddress - -%description -GRNOC NetSage Flow-Processing Pipeline - -%prep -%setup -q -n grnoc-netsage-deidentifier-%{version} - -%build -%{__perl} Makefile.PL PREFIX="%{buildroot}%{_prefix}" INSTALLDIRS="vendor" -make - -%install -rm -rf $RPM_BUILD_ROOT -make pure_install - -%{__install} -d -p %{buildroot}/etc/grnoc/netsage/deidentifier/ -%{__install} -d -p %{buildroot}/var/lib/grnoc/netsage/ -%{__install} -d -p %{buildroot}/var/cache/netsage/ -%{__install} -d -p %{buildroot}/usr/bin/ -%{__install} -d -p %{buildroot}/etc/init.d/ -%{__install} -d -p %{buildroot}/etc/systemd/system/ -%{__install} -d -p %{buildroot}/etc/cron.d/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ruby/ -%{__install} -d -p %{buildroot}/etc/logstash/conf.d/support/ -%{__install} -d -p %{buildroot}/usr/share/logstash/config/ -%{__install} -d -p %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/ - -%{__install} CHANGES.md %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/CHANGES.md -%{__install} website/docs/deploy/bare_metal_install.md %{buildroot}/usr/share/doc/grnoc/netsage-deidentifier/INSTALL.md - -%{__install} conf/logging.conf %{buildroot}/etc/grnoc/netsage/deidentifier/logging.conf -%{__install} conf/logging-debug.conf %{buildroot}/etc/grnoc/netsage/deidentifier/logging-debug.conf -%{__install} conf/netsage_shared.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_shared.xml -%{__install} conf/netsage_flow_filter.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml -%{__install} conf/netsage_netflow_importer.xml %{buildroot}/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml -%{__install} conf-logstash/*.conf %{buildroot}/etc/logstash/conf.d/ -%{__install} conf-logstash/*.conf.disabled %{buildroot}/etc/logstash/conf.d/ -%{__install} conf-logstash/ruby/* %{buildroot}/etc/logstash/conf.d/ruby/ -%{__install} conf-logstash/support/* %{buildroot}/etc/logstash/conf.d/support/ - -%if 0%{?rhel} >= 7 -%{__install} systemd/netsage-netflow-importer.service %{buildroot}/etc/systemd/system/netsage-netflow-importer.service -%{__install} systemd/netsage-flow-filter.service %{buildroot}/etc/systemd/system/netsage-flow-filter.service -%{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service -%else -%{__install} init.d/netsage-flow-filter-daemon %{buildroot}/etc/init.d/netsage-flow-filter-daemon -%{__install} init.d/netsage-netflow-importer-daemon %{buildroot}/etc/init.d/netsage-netflow-importer-daemon -%endif - -%{__install} cron.d/netsage-scireg-update.cron %{buildroot}/etc/cron.d/netsage-scireg-update.cron -%{__install} cron.d/netsage-maxmind-update.cron %{buildroot}/etc/cron.d/netsage-maxmind-update.cron -%{__install} cron.d/netsage-caida-update.cron %{buildroot}/etc/cron.d/netsage-caida-update.cron -%{__install} cron.d/netsage-memberlists-update.cron %{buildroot}/etc/cron.d/netsage-memberlists-update.cron -%{__install} cron.d/netsage-logstash-restart.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron - -%{__install} bin/netsage-flow-filter-daemon %{buildroot}/usr/bin/netsage-flow-filter-daemon -%{__install} bin/netsage-netflow-importer-daemon %{buildroot}/usr/bin/netsage-netflow-importer-daemon - -%{__install} bin/restart-logstash.sh %{buildroot}/usr/bin/restart-logstash.sh - -# clean up buildroot -find %{buildroot} -name .packlist -exec %{__rm} {} \; - -%{_fixperms} $RPM_BUILD_ROOT/* - -%clean -rm -rf $RPM_BUILD_ROOT - -%files - -%defattr(644, root, root, 755) - -# Don't overwrite cron files. Create .rpmnew files if needed. -%config(noreplace) /etc/cron.d/netsage-scireg-update.cron -%config(noreplace) /etc/cron.d/netsage-maxmind-update.cron -%config(noreplace) /etc/cron.d/netsage-caida-update.cron -%config(noreplace) /etc/cron.d/netsage-memberlists-update.cron -%config(noreplace) /etc/cron.d/netsage-logstash-restart.cron - -# Don't overwrite importer configs. Create .rpmnew files if needed. -%config(noreplace) /etc/grnoc/netsage/deidentifier/logging.conf -%config(noreplace) /etc/grnoc/netsage/deidentifier/logging-debug.conf -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_shared.xml -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml -%config(noreplace) /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - -# We don't want to overwrite these .confs. Create .rpmnew files if needed. -%config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled -%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf -%config(noreplace) /etc/logstash/conf.d/99-output-jsonlog.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-multiline-json.conf.disabled -%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled -%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf -%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf -# logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) -%config /etc/logstash/conf.d/10-preliminaries.conf -%config /etc/logstash/conf.d/20-add-id.conf -%config /etc/logstash/conf.d/45-geoip-tagging.conf -%config /etc/logstash/conf.d/50-asn.conf -%config /etc/logstash/conf.d/53-caida-org.conf -%config /etc/logstash/conf.d/55-member-orgs.conf -%config /etc/logstash/conf.d/60-scireg-tagging-fakegeoip.conf -%config /etc/logstash/conf.d/70-deidentify.conf -%config /etc/logstash/conf.d/80-privatize-org.conf -%config /etc/logstash/conf.d/88-preferred-location-org.conf -%config /etc/logstash/conf.d/90-additional-fields.conf -%config /etc/logstash/conf.d/95-cleanup.conf -%config /etc/logstash/conf.d/98-post-process.conf -%config /etc/logstash/conf.d/99-output-stdout.conf.disabled -%config /etc/logstash/conf.d/ruby/anonymize_ipv6.rb -%config /etc/logstash/conf.d/ruby/domestic.rb -%config /etc/logstash/conf.d/support/sensor_groups.json -%config /etc/logstash/conf.d/support/sensor_types.json -%config /etc/logstash/conf.d/support/networkA-members-list.rb.example - -/usr/share/doc/grnoc/netsage-deidentifier/CHANGES.md -/usr/share/doc/grnoc/netsage-deidentifier/INSTALL.md - -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/Pipeline.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/WorkerManager.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/FlowFilter.pm -%{perl_vendorlib}/GRNOC/NetSage/Deidentifier/NetflowImporter.pm - -%defattr(754, root, root, -) -/usr/bin/netsage-flow-filter-daemon -/usr/bin/netsage-netflow-importer-daemon -/usr/bin/restart-logstash.sh - -%if 0%{?rhel} >= 7 -%defattr(644, root, root, -) -/etc/systemd/system/netsage-flow-filter.service -/etc/systemd/system/netsage-netflow-importer.service -/etc/systemd/system/logstash.service -%else -%defattr(754, root, root, -) -/etc/init.d/netsage-flow-filter-daemon -/etc/init.d/netsage-netflow-importer-daemon -%endif - -%defattr(-, root, root, 755) -/var/lib/grnoc/netsage/ -/var/cache/netsage/ - -%post -echo " " -echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" -echo "AFTER UPGRADING..." -echo " " -echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." -echo " * Logstash configs 01, 15, 40, and 99 are not replaced by updated versions, so check to see if there are changes. " -echo " * If using 55-member-orgs.conf, make sure you have the required files in support/. See comments in the conf file. " -echo " " -echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." -echo " * Nor does it manage multiple init.d files for sensor- or network-specific importers." -echo " " -echo " * IMPORTANT: Be sure the number of logstash pipeline workers is 1, or flow stitching (aggregation) won't work right. **" -echo " * and be sure logstash configs are specified by *.conf in the right directory." -echo " " -echo " * [Re]start logstash, netsage netflow importers (and netsage flow filters for cenic sensors only) " -echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" -echo " " - diff --git a/grnoc-netsage-pipeline.spec b/grnoc-netsage-pipeline.spec new file mode 100644 index 00000000..5c2644a8 --- /dev/null +++ b/grnoc-netsage-pipeline.spec @@ -0,0 +1,200 @@ +Summary: GRNOC NetSage Flow-Processing Pipeline +Name: grnoc-netsage-pipeline +Version: 2.0.0 + # update Version here, in Makefile.PL, conf-logstash/98-post-process.conf +Release: 1%{?dist} +License: GRNOC +Group: Measurement +URL: http://globalnoc.iu.edu +Source0: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root +BuildArch: noarch +#Requires: perl >= 5.8.8 +# these are part of perl with centos6, not with centos7. Could just require perl-core package? +#%if 0%{?rhel} >= 7 +#Requires: perl-Data-Dumper +#Requires: perl-Getopt-Long +#Requires: perl-Storable +#%endif +#Requires: perl-AnyEvent +#Requires: perl-Clone +#Requires: perl-Data-Validate-IP +#Requires: perl-TimeDate +#Requires: perl-Digest-SHA +#Requires: perl-GRNOC-Config +#Requires: perl-GRNOC-Log +#Requires: perl-GRNOC-RabbitMQ +#Requires: perl-Hash-Merge +#Requires: perl-IPC-ShareLite +#Requires: perl-JSON-SL +#Requires: perl-JSON-XS +#Requires: perl-List-MoreUtils +#Requires: perl-Math-Round +#Requires: perl-Moo +#Requires: perl-Net-AMQP-RabbitMQ +#Requires: perl-Net-IP +#Requires: perl-Number-Bytes-Human +#Requires: perl-Parallel-ForkManager +#Requires: perl-Path-Class +#Requires: perl-Path-Tiny +#Requires: perl-Proc-Daemon +#Requires: perl-TimeDate +#Requires: perl-Time-Duration +#Requires: perl-Time-HiRes +#Requires: perl-Try-Tiny +#Requires: perl-Type-Tiny +Requires: wget +Requires: logstash >= 7.16.2 +Requires: rubygem-ipaddress +#Requires: pmacct > 1.7.7 (really date > 10/12/21. Not installed by rpm; see post section below for a check. Update ver num there!) + +%description +GRNOC NetSage Flow-Processing Pipeline + +%prep +%setup -q -n grnoc-netsage-pipeline-%{version} + +%build +%{__perl} Makefile.PL PREFIX="%{buildroot}%{_prefix}" INSTALLDIRS="vendor" +make + +%install +rm -rf $RPM_BUILD_ROOT +make pure_install + +# for lookup files (maxmind, etc) +%{__install} -d -p %{buildroot}/var/lib/grnoc/netsage/ + +#%{__install} -d -p %{buildroot}/var/cache/netsage/ +#%{__install} -d -p %{buildroot}/etc/init.d/ + +%{__install} -d -p %{buildroot}/usr/bin/ +%{__install} -d -p %{buildroot}/etc/cron.d/ +%{__install} -d -p %{buildroot}/etc/systemd/system/ +%{__install} -d -p %{buildroot}/etc/pmacct/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/ruby/ +%{__install} -d -p %{buildroot}/etc/logstash/conf.d/support/ +%{__install} -d -p %{buildroot}/usr/share/logstash/config/ +%{__install} -d -p %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/ + +%{__install} bin/restart-logstash-service.sh %{buildroot}/usr/bin/restart-logstash.sh + +%{__install} cron.d/restart-logstash-service.cron %{buildroot}/etc/cron.d/netsage-logstash-restart.cron +%{__install} cron.d/baremetal-netsage-downloads.cron %{buildroot}/etc/cron.d/netsage-downloads.cron + +%{__install} systemd/logstash.service %{buildroot}/etc/systemd/system/logstash.service +%{__install} systemd/sfacctd.service %{buildroot}/etc/systemd/system/sfacctd.service +%{__install} systemd/nfacctd.service %{buildroot}/etc/systemd/system/nfacctd.service + +%{__install} conf-pmacct/* %{buildroot}/etc/pmacct/ + +%{__install} conf-logstash/*.conf %{buildroot}/etc/logstash/conf.d/ +%{__install} conf-logstash/*.conf.disabled %{buildroot}/etc/logstash/conf.d/ +%{__install} conf-logstash/ruby/* %{buildroot}/etc/logstash/conf.d/ruby/ +%{__install} conf-logstash/support/* %{buildroot}/etc/logstash/conf.d/support/ + +%{__install} CHANGES.md %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/CHANGES.md +%{__install} website/docs/deploy/bare_metal_install.md %{buildroot}/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md + + +# clean up buildroot +find %{buildroot} -name .packlist -exec %{__rm} {} \; + +%{_fixperms} $RPM_BUILD_ROOT/* + +%clean +rm -rf $RPM_BUILD_ROOT + +%files + +%defattr(644, root, root, 755) + +# Don't overwrite cron files. Create .rpmnew files if needed. +%config(noreplace) /etc/cron.d/netsage-downloads.cron +%config(noreplace) /etc/cron.d/netsage-logstash-restart.cron + +# Don't overwrite these .confs. Create .rpmnew files if needed. +%config(noreplace) /etc/logstash/conf.d/01-input-rabbit.conf +%config(noreplace) /etc/logstash/conf.d/15-sensor-specific-changes.conf +%config(noreplace) /etc/logstash/conf.d/40-aggregation.conf +%config(noreplace) /etc/logstash/conf.d/99-output-rabbit.conf +# logstash files that can be updated automatically (if there are updates, the old ver will be in .rpmsave) +%config(noreplace) /etc/logstash/conf.d/01-input-jsonfile.conf.disabled +%config(noreplace) /etc/logstash/conf.d/01-input-multiline-json-file.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-file.conf.disabled +%config(noreplace) /etc/logstash/conf.d/99-output-elastic.conf.disabled +%config /etc/logstash/conf.d/05-translate-pmacct.conf +%config /etc/logstash/conf.d/10-preliminaries.conf +%config /etc/logstash/conf.d/20-add-id.conf +%config /etc/logstash/conf.d/41-thresholds.conf +%config /etc/logstash/conf.d/45-geoip-tagging.conf +%config /etc/logstash/conf.d/50-asn.conf +%config /etc/logstash/conf.d/53-caida-org.conf +%config /etc/logstash/conf.d/55-member-orgs.conf +%config /etc/logstash/conf.d/60-scireg-tagging-fakegeoip.conf +%config /etc/logstash/conf.d/70-deidentify.conf +%config /etc/logstash/conf.d/80-privatize-org.conf.disabled +%config /etc/logstash/conf.d/88-preferred-location-org.conf +%config /etc/logstash/conf.d/90-additional-fields.conf +%config /etc/logstash/conf.d/95-cleanup.conf +%config /etc/logstash/conf.d/98-post-process.conf +%config /etc/logstash/conf.d/99-output-stdout.conf.disabled +%config /etc/logstash/conf.d/ruby/anonymize_ipv6.rb +%config /etc/logstash/conf.d/ruby/domestic.rb +%config /etc/logstash/conf.d/support/sensor_groups.json.example +%config /etc/logstash/conf.d/support/sensor_types.json.example +%config /etc/logstash/conf.d/support/networkA-members-list.rb.example + +%config /etc/pmacct/sfacctd.conf.ORIG +%config /etc/pmacct/nfacctd.conf.ORIG +%config /etc/pmacct/sfacctd-pretag.map.ORIG +%config /etc/pmacct/nfacctd-pretag.map.ORIG + +/usr/share/doc/grnoc/netsage-pipeline/CHANGES.md +/usr/share/doc/grnoc/netsage-pipeline/INSTALL.md + +%defattr(754, root, root, -) +/usr/bin/restart-logstash.sh + +%defattr(644, root, root, -) +/etc/systemd/system/logstash.service +/etc/systemd/system/sfacctd.service +/etc/systemd/system/nfacctd.service + +%defattr(-, root, root, 755) +/var/lib/grnoc/netsage/ +#/var/cache/netsage/ + +%post +# make sure pmacct is installed (no rpm so can't just require it) +echo " " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +if [ -f /usr/local/sbin/nfacctd ]; then + echo "PLEASE CHECK: " + echo "It looks like pmacct has been installed." + echo "Check the version with sfacctd -V and nfacctd -V." + echo "The Netage Pipeline has been tested with version 1.7.8-git from 2022/06/02. (>1.7.7 is required.)" +else + echo "WARNING: " + echo "Required package pmacct does not appear to have been installed. " + echo "See the NDCA doc or pmacct on github for instructions." + echo "The Netage Pipeline has been tested with version 1.7.8-git from 2022/06/02. (>1.7.7 is required.)" +fi + +echo " " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +echo "AFTER UPGRADING..." +echo " " +echo " * Check config and cron files with .rpmnew and .rpmsave versions to see if any need manual updates." +echo " * Pmacct configs: You must create or copy to /etc/pmacct/. Examples are provided. See if any changes are required to existing files." +echo " * Logstash configs: /etc/logstash/conf.d/. 01, 15, 40, and 99 are not replaced by updated versions, so check for changes. " +echo " * Make sure you have any required member organization files in support/. See comments in the conf file. " +echo " " +echo " * Note that this rpm puts logstash config files in /etc/logstash/conf.d/ and doesn't manage multiple pipelines in pipelines.yml." +echo " * Nor does it manage multiple pmacct processes." +echo " " +echo " * [Re]start logstash and pmacct processes " +echo "-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" +echo " " + diff --git a/init.d/netsage-flow-filter-daemon b/init.d/netsage-flow-filter-daemon deleted file mode 100644 index b87dee65..00000000 --- a/init.d/netsage-flow-filter-daemon +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/sh -# -# netsage-flow-filter-daemon init file for starting up the NetSage FlowFilter daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage FlowFilter daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-flow-filter-daemon" -exec="/usr/bin/$name" -## I believe the pid file name is actually set in the deidentifier config! -## This is just using that name. -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_flow_filter.xml" -SHAREDCONFIG="/etc/grnoc/netsage/deidentifier/netsage_shared.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG" --sharedconfig $SHAREDCONFIG - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown haven't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo -n $"Use ps aux to be sure the worker has stopped also" - echo - return $retval -} - -restart() { - echo -n $"Use stop, check with ps, then start" - echo -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/init.d/netsage-netflow-importer-daemon b/init.d/netsage-netflow-importer-daemon deleted file mode 100644 index 4989bac5..00000000 --- a/init.d/netsage-netflow-importer-daemon +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh -# -# netsage-netflow-importer-daemon init file for starting up the NetSage Netflow importer daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage Netflow Importer daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-netflow-importer-daemon" -exec="/usr/bin/$name" -## The pid file name is actually set in the deidentifier config file! -## This is just using that name. -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml" -SHAREDCONFIG="/etc/grnoc/netsage/deidentifier/netsage_shared.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG --sharedconfig $SHAREDCONFIG" - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown doesn't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo -n $"Use ps aux to be sure the daemon and worker both stopped !!" - echo - return $retval -} - -# workers don't always quit, certainly not quickly! -restart() { - echo -n $"Use stop, check ps aux, then start" - echo -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/lib/GRNOC/NetSage/Deidentifier.pm b/lib/GRNOC/NetSage/Deidentifier.pm deleted file mode 100644 index 395bec49..00000000 --- a/lib/GRNOC/NetSage/Deidentifier.pm +++ /dev/null @@ -1,9 +0,0 @@ -package GRNOC::NetSage::Deidentifier; - -use strict; -use warnings; - -our $VERSION = "1.2.9"; - -1; - diff --git a/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm b/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm deleted file mode 100644 index 55825e11..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/FlowFilter.pm +++ /dev/null @@ -1,278 +0,0 @@ -package GRNOC::NetSage::Deidentifier::FlowFilter; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; -use GRNOC::RabbitMQ::Client; - -use AnyEvent; -use Data::Validate::IP; -use Net::IP; -use Digest::SHA; -use POSIX; -use utf8; - -use Data::Dumper; - - -### internal attributes ### - -has handler => ( is => 'rwp'); - -has simp_config => ( is => 'rwp' ); - -has simp_client => ( is => 'rwp'); - -has router => ( is => 'rwp'); - -has router_details => ( is => 'rwp', default => sub { {} } ); - -has snmp_cache_time => ( is => 'rwp', default => 3600 ); - -has stats => ( is => 'rwp', default => sub { { - dropped => 0, - imported => 0 -} } ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - my $router = $config->{'worker'}->{'router-address'}; - $self->_set_router( $router ); - $self->_set_simp_config( $config->{'simp'} ); - $self->_set_handler( sub { $self->_filter_messages(@_) } ); - $self->_connect_simp(); - $self->get_router_details(); - - my $snmp_cache_time = $config->{'worker'}->{'snmp-cache-time'}; - $self->_set_snmp_cache_time( $snmp_cache_time ) if defined $snmp_cache_time; - - return $self; -} - -### private methods ### - -# expects an array of data for it to filter -# returns the filtered array -sub _filter_messages { - my ( $self, $caller, $messages ) = @_; - - my $finished_messages = $messages; - - my $router_details = $self->router_details; - # drop all messages if we don't have router derailts from simp - if ( keys %$router_details < 1 ) { - $self->_add_dropped_count( @$messages ); - - return []; - } - - my $i = 0; - my @delete_indices = (); - foreach my $message ( @$messages ) { - my $sensor = $message->{'meta'}->{'sensor_id'}; - my $details = $router_details->{ $sensor }; - - my $import_flow = $self->_filter_flow( $message, $details ); - if ( $import_flow < 1 ) { - push @delete_indices, $i; - $self->_add_dropped_count( 1 ); - } - $i++; - } - - # remove all the deleted indices - splice @$finished_messages, $_, 1 for reverse @delete_indices; - - $self->_add_imported_count( scalar @$finished_messages ); - - $self->logger->debug( "stats " . Dumper $self->stats ); - - return $finished_messages; -} - -sub _filter_flow { - my ( $self, $message, $details ) = @_; - - return 0 if !defined ($details) || !defined( $details->{'results'} ) || keys %{ $details->{'results'} } == 0; - - my $src_ifindex = $message->{'meta'}->{'src_ifindex'}; - my $dst_ifindex = $message->{'meta'}->{'dst_ifindex'}; - - if (! defined $dst_ifindex or ! defined $src_ifindex ) { - $self->logger->warn("Missing an ifindex!? Skipping flow.". $message->{'meta'}->{'sensor_id'}); - return 0; - } - - my $num_results = keys ( %{ $details->{'results'} } ); - - return 0 if $num_results < 1; - - my $host = ( keys ( %{ $details->{'results'} } ) )[0]; - - my $mib_base = "1.3.6.1.2.1.31.1.1.1.18"; - my $src_key = "$mib_base.$src_ifindex"; - my $dst_key = "$mib_base.$dst_ifindex"; - - my $src_description = $details->{ 'results' }->{ $host }->{ $src_key }->{ 'value' } || ""; - my $dst_description = $details->{ 'results' }->{ $host }->{ $dst_key }->{ 'value' } || ""; - - - # see if src OR dst description contains [ns-exp] - - my $import = 0; - - if ( $src_description =~ /\[ns-exp\]/ ) { - $self->logger->debug( "IMPORTING src: $src_ifindex!" ); - $import = 1; - } else { - $self->logger->debug( "SKIPPING src: $src_ifindex!" ); - } - - if ( $dst_description =~ /\[ns-exp\]/ ) { - $self->logger->debug( "IMPORTING dst: $dst_ifindex!" ); - $import = 1; - } else { - $self->logger->debug( "SKIPPING dst: $dst_ifindex!" ); - - } - - return $import; - -} - -sub get_router_details { - my ( $self ) = @_; - - my $client = $self->simp_client; - - my $router_details = $self->router_details || {}; - - my $collections = $self->config->{'collection'}; - - if ( ref($collections) ne "ARRAY" ) { - $collections = [ $collections ]; - - } - - foreach my $collection (@$collections) { - - #my $router = $self->router; - my $sensor = $collection->{'sensor'}; - my $router = $collection->{'sensor'}; - # 3/22/21 - simp on netsage-simp is not returning anything by IP (ie router-address), so only use sensor name - # $router = $collection->{'router-address'} if $collection->{'router-address'}; - - my $row = {}; - - my $details = $router_details->{'router'}; - if ( defined $details->{'ts'} ) { - if ( time() - $details->{'ts'} <= $self->snmp_cache_time ) { - return; - } - } - - my %query = ( - node => [$router], - oidmatch => ["1.3.6.1.2.1.31.1.1.1.18.*"] - - ); - - my $results = $client->get( %query ); - -# as of simp 1.6.0, node results are wrapped in the port used to query the data on the node -# 161 is the port used for traditional SNMP - if ( exists( $results->{'results'}->{'161'} ) && %{ $results->{'results'}->{'161'} } ) { - $self->logger->debug( "router found: $router" ); - $row->{'results'} = $results->{'results'}->{'161'}; - $self->logger->debug( "router found in simp: "); ## . Dumper $results->{'results'} ); - } else { - $self->logger->warn( "router NOT found in simp: " . Dumper $router ); - $row->{'results'} = undef; - - } - - my $now = time(); - - $row->{'ts'} = $now; - - $router_details->{ $sensor } = $row; - } - - $self->_set_router_details( $router_details ); - - -} - -sub _add_dropped_count { - my ( $self, $num ) = @_; - $self->_update_stats( { - dropped => $num - }); - -} - -sub _add_imported_count { - my ( $self, $num ) = @_; - $self->_update_stats( { - imported => $num - }); - -} - -sub _update_stats { - my ( $self, $update ) = @_; - my $stats = $self->stats; - my $dropped = $stats->{'dropped'}; - my $imported = $stats->{'imported'}; - if ( $update->{'dropped'} ) { - $dropped += $update->{'dropped'}; - } - if ( $update->{'imported'} ) { - $imported += $update->{'imported'}; - } - - $stats->{'dropped'} = $dropped; - $stats->{'imported'} = $imported; - - $self->_set_stats( $stats ); -} - - -sub _connect_simp { - my ( $self ) = @_; - - my $simp = $self->simp_config; - - my $host = $simp->{'host'}; - my $port = $simp->{'port'} || 5672; - my $user = $simp->{'username'} || "guest"; - my $pass = $simp->{'password'} || "guest"; - my $exchange = $simp->{'exchange'} || "Simp"; - my $timeout = $simp->{'timeout'} || 60; - my $topic = $simp->{'topic'} || "Simp.Data"; - - my $client = GRNOC::RabbitMQ::Client->new( - host => $host, - port => $port, - user => $user, - pass => $pass, - exchange => $exchange, - timeout => $timeout, - topic => $topic); - $self->_set_simp_client( $client ); - return $client; -} - -1; - diff --git a/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm b/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm deleted file mode 100644 index 48dca956..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm +++ /dev/null @@ -1,755 +0,0 @@ -package GRNOC::NetSage::Deidentifier::NetflowImporter; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; - -use POSIX qw( floor ); -use Net::AMQP::RabbitMQ; -use JSON::XS; -use Math::Round qw( nlowmult nhimult ); -use List::MoreUtils qw( natatime ); -use Try::Tiny; -use Date::Parse; -use Date::Format; -use DateTime; -use File::stat; -use File::Find; -use Path::Class; -use Path::Tiny; -use Storable qw( store retrieve ); -use Sys::Hostname; -use Env; - -use Data::Dumper; - -### required attributes ### - -has config_file => ( is => 'ro', - required => 1 ); - -has logging_file => ( is => 'ro', - required => 1 ); - -### optional attributes ### - -has sensor_id => ( is => 'rwp', default => hostname() ); - -has instance_id => ( is => 'rwp', default => 0 ); - -### internal attributes ### - -has flow_path => ( is => 'rwp' ); - -has json => ( is => 'rwp' ); - -has json_data => ( is => 'rwp' ); - -has status => ( is => 'rwp' ); - -has min_bytes => ( is => 'rwp', - default => 500000000 ); # 500 MB - -has flow_batch_size => ( is => 'rwp' ); - -has status_cache => ( is => 'rwp', - default => sub { {} } ); - -has cache_file => ( is => 'rwp' ); - - -# min_file_age must be one of "older" or "newer". $age must match /^(\d+)([DWMYhms])$/ where D, W, M, Y, h, m and s are "day(s)", "week(s)", "month(s)", "year(s)", "hour(s)", "minute(s)" and "second(s)" -# see http://search.cpan.org/~pfig/File-Find-Rule-Age-0.2/lib/File/Find/Rule/Age.pm -has min_file_age => ( is => 'rwp', - default => '0' ); - -has cull_enable => ( is => 'rwp', - default => 0 ); - -# wait until files are this old to delete them after processing -# in days -has cull_ttl => ( is => 'rwp', - default => 3 ); - -# cull after reading $cull_count files -has cull_count => ( is => 'rwp', - default => 10 ); - -has nfdump_path => ( is => 'rwp' ); - -has flow_type => ( is => 'rwp', - default => 'netflow' ); - -my @files; - -### constructor builder ### -sub getSensorValue() -{ - my $sensor_id = $_[0]; - # check if sensorId value starts with a $ sign, if so get value from env - if (index($sensor_id, '$') == 0) { - my $env_var = substr $sensor_id, 1; ##chop off the $ sign - my $env_value = $ENV{$env_var} || ''; - ## IF the env is set use its value, otherwise fallback on hostname - if ($env_value ne '') { - $sensor_id = $env_value; - } else { - $sensor_id = hostname(); - } - # If the sensor is set to empty string use hostname - } elsif ($sensor_id eq ""){ - $sensor_id = hostname(); - } - - return $sensor_id; -} - - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - my $sensor_id = &getSensorValue($config->{ 'sensor' } || ''); - - if ( defined ( $sensor_id ) ) { - $self->_set_sensor_id( $sensor_id ); - } - my $instance_id = $config->{ 'instance' }; - - # for some reason if you leave blank, you get - # an empty hashref back. work around that. - if ( defined ( $instance_id ) && ! ( ref $instance_id eq ref {} ) ) { - $self->_set_instance_id( $instance_id ); - } - - $self->logger->debug("instance id: " . $self->instance_id); - - my $flow_batch_size = $config->{'worker'}->{'flow-batch-size'}; - my $cache_file = $config->{'worker'}->{'cache-file'} if not defined $self->cache_file; - $cache_file = '/var/cache/netsage/netflow_importer.cache' if not defined $cache_file; - $self->_set_cache_file( $cache_file ); - $self->logger->debug("cache file: " . $cache_file); - - my $flow_path = $self->flow_path; - - $flow_path = $config->{'worker'}->{'flow-path'} if not defined $flow_path; - - $self->_set_flow_path( $flow_path ); - $self->logger->debug("flow path: " . Dumper $flow_path); - - my $min_file_age = $self->min_file_age; - $min_file_age = $config->{'worker'}->{'min-file-age'} if defined $config->{'worker'}->{'min-file-age'}; - $self->_set_min_file_age( $min_file_age ); - - my $flow_type = $self->flow_type; - $flow_type = $config->{'worker'}->{'flow-type'} if defined $config->{'worker'}->{'flow-type'}; - $self->_set_flow_type( $flow_type ); - $self->logger->debug("flow type: $flow_type"); - - $self->_set_flow_batch_size( $flow_batch_size ); - $self->_set_handler( sub{ $self->_run_netflow_import(@_) } ); - - $self->_set_nfdump_path( $config->{'worker'}->{'nfdump-path'} ) - if defined $config->{'worker'}->{'nfdump-path'}; - - my $min_bytes = $self->min_bytes; - $min_bytes = $config->{'worker'}->{'min-bytes'} if defined $config->{'worker'}->{'min-bytes'}; - $self->_set_min_bytes( $min_bytes ); - - my $cull_enable = $self->cull_enable; - $cull_enable = $config->{'worker'}->{'cull-enable'} if defined $config->{'worker'}->{'cull-enable'}; - $self->_set_cull_enable( $cull_enable ); - - my $cull_ttl = $self->cull_ttl; - $cull_ttl = $config->{'worker'}->{'cull-ttl'} if defined $config->{'worker'}->{'cull-ttl'}; - $self->_set_cull_ttl( $cull_ttl ); - - # create JSON object - my $json = JSON::XS->new(); - - $self->_set_json( $json ); - - $self->_read_cache(); - - return $self; -} - -### public methods ### - -sub _run_netflow_import { - - my ( $self ) = @_; - - # get flow data - my $success = $self->_get_flow_data(); - - # publish flow data - return $self->_publish_flows(); - -} - -sub _get_params { - my ( $self, $collection ) = @_; - my %params = (); - my $config = $self->config; - - my $path = $collection->{'flow-path'} || $self->flow_path; - my $sensor = $collection->{'sensor'} || $self->sensor_id; - my $instance = $collection->{'instance'} || $self->instance_id || ''; - my $flow_type = $collection->{'flow-type'} || $self->flow_type || 'netflow'; - - - %params = ( - path => $path, - sensor => $sensor, - instance => $instance, - flow_type => $flow_type - ); - - - return \%params; -} - -sub _get_flow_data { - my ( $self ) = @_; - - my $flow_batch_size = $self->flow_batch_size; - my $status = $self->status_cache; - - my $collections = $self->config->{'collection'}; - - - if ( ref($collections) ne "ARRAY" ) { - $collections = [ $collections ]; - } - - foreach my $collection ( @$collections ) { - - my $path = $collection->{'flow-path'}; # || $self->flow_path; - # if path doesn't end with an /, add one. Required for finding @paths_to_check. - if ( $path !~ /.+\/$/) { - $path = $path."/"; - } - - my $sensor = &getSensorValue($collection->{'sensor'} || ''); - $self->logger->info( " Doing collection $sensor "); - - my %params = %{ $self->_get_params( $collection ) }; - $params{'flow-path'} = $path; - $params{'sensor'} = $sensor; - - my $min_bytes = $self->min_bytes; - - $self->logger->debug("path: $path"); - $self->logger->debug("min_file_age: " . $self->min_file_age ); - - $self->_cull_flow_files( $path ); - - # We need to compare files to the contents of the cache file to see if they have been imported already. - # --- If files are not being culled, we don't want to compare every file ever saved, so - # --- first, narrow down the list of dirs to look through to only those with dates more recent than N months ago. - my $collection_dir = $path; - my @paths_to_check; - if ( $self->cull_enable < 1 ) { - my $now = DateTime->today; # UTC (at 00:00:00) - my $now_yr = $now->year(); - my $now_mo = $now->month(); - my $now_day = $now->day(); - my $too_old_date = $now->subtract( months => 2 ); # HARDCODED THRESHOLD N (must be less than the cache file culling threshold!) - my $too_old_yr = $too_old_date->year(); - my $too_old_mo = $too_old_date->month(); - my $too_old_day = $too_old_date->day(); - - for (my $yr = $too_old_yr; $yr <= $now_yr ; $yr++) { - for (my $mo = 1; $mo <= 12; $mo++) { - # don't need to continue beyond current month - last if ( $yr == $now_yr and $mo == $now_mo + 1); - # If first and last day of month are not too old, we want to look at all files in that month - my $first_day = DateTime->new( { year=>$yr, month=>$mo, day=>"01" } ); - my $last_day = DateTime->last_day_of_month( { year=>$yr, month=>$mo } ); - if ( $first_day >= $too_old_date and $last_day > $too_old_date ) { - # add dir to list - my $subdir = sprintf("%02d/%02d/", $yr, $mo); - push (@paths_to_check, $collection_dir.$subdir); - $self->logger->debug("will check ".$collection_dir.$subdir); - } - elsif ( $first_day <= $too_old_date and $too_old_date <= $last_day ) { - # if $too_old_date is in the middle of the month, go through the day dirs. - for (my $day = 1; $day <= $last_day->day(); $day++) { - my $day_date = DateTime->new( { year=>$yr, month=>$mo, day=>$day } ); - if ( $day_date >= $too_old_date ) { - my $subdir = sprintf("%02d/%02d/%02d/", $yr, $mo, $day); - push (@paths_to_check, $collection_dir.$subdir); - $self->logger->debug("will check ".$collection_dir.$subdir); - } - } - } - } - } - } else { - # if culling is enabled, it's shouldn't be a big deal to just examine all existing files - @paths_to_check = ( $collection_dir ); - } - - - # Get list of files to compare to cache file contents, exclude files that are too new (< min_file_age) - try { - @files = (); - find({ wanted => sub { find_nfcapd($self, \%params) }, follow => 1 }, @paths_to_check ); - - } catch { - $self->logger->error( "Error retrieving nfcapd file listing: " . Dumper($_) ); - sleep(10); - return; - }; - - # Get list of files to actually import by comparing to cache file record of what's been done before - my @filepaths = (); - for(my $i=0; $i<@files; $i++) { - my $file = $files[$i]; - #$self->logger->debug("file: $file"); - my $file_path = dir( $path, $file ) . ""; - my $stats = stat($file_path); - my $abs = file( $file_path ); - # TODO: changed rel to abs; need a way to figure out a way to convert - # the old rel paths to abs - - - # skip empty files (header and/or footer only). They can cause problems. - if( ! $stats or ! $stats->size ) { - $self->logger->info("*** For $path $file, there are no stats!? skipping."); - next; - } elsif( $stats->size <= 420 ) { - $self->logger->debug("skipping $path $file because size is <= 420"); - next; - } - - my $rel = $abs->relative( $path ) . ""; - if ( exists ( $status->{ $rel } ) ) { - $status->{ $abs } = $status->{ $rel }; - delete $status->{ $rel }; - #warn "$rel being changed to $abs in file cache ..."; - } - if ( exists ( $status->{ $abs } ) ) { - my $entry = $status->{ $abs }; - if ( (!defined $stats) or (!defined $entry) ) { - next; - } - my $mtime_cache = $entry->{'mtime'}; - my $size_cache = $entry->{'size'}; - - # If file size and last-modified time are unchanged, skip it - if ( $mtime_cache == $stats->mtime - && $size_cache == $stats->size ) { - next; - } - } - push @filepaths, dir( $path, $file ) . ""; - - } - @filepaths = sort @filepaths; - - # Read the nfcapd files to import - if ( @filepaths > 0 ) { - my $success = $self->_get_nfdump_data(\@filepaths, %params); - - # --- make cache file smaller. (sub will do nothing if nfcapd file culling is enabled) (if it is enabled, it will cull the cache file itself.) - if ($success) { - $self->logger->debug( "calling cull_cache_file for $sensor"); - $self->_cull_cache_file(); - $self->logger->debug( "done with cull_cache_file for $sensor"); - } - } - - - } # end loop over collections - - -} - -# Loop over files to import, using nfdump to read each. Write cache file after each file is read. -sub _get_nfdump_data { - my ( $self, $flowfiles, %params ) = @_; - - my $sensor = $params{'sensor'}; - my $instance = $params{'instance'}; - - my $path = $params{'path'}; # flow-path - - my $flow_type = $params{'flow_type'}; - - my $status = $self->status_cache; - - my $flow_batch_size = $self->flow_batch_size; - - my $min_bytes = $self->min_bytes; - - my $config_path = $self->nfdump_path; - my $nfdump = '/usr/bin/nfdump'; - # if configured nfdump path is a file and is executable, use it - if ( defined $config_path ) { - if ( -f $config_path && -x _ ) { - $nfdump = $config_path - } else { - $self->logger->error("Invalid nfdump path specified; quitting"); - $self->_set_is_running( 0 ); - return; - } - - } - - my $file_count = 0; - my $cull_count = $self->cull_count; - my @all_data = (); - foreach my $flowfile ( @$flowfiles ) { - - # quit if the process has been told to stop - if ( !$self->is_running ) { - $self->logger->debug("Quitting flowfile loop and returning from _get_nfdump_data()"); - return; - } - - $file_count++; - if ( $cull_count > 0 && $file_count > 0 && $file_count % $cull_count == 0 ) { - $self->_cull_flow_files( $path ); - } - - my $stats = stat($flowfile); - - # If file does not exist, skip this file - if ( !defined $stats ) { - next; - } - - $self->logger->info(" importing file: $flowfile"); - - my $command = "$nfdump -r '$flowfile'"; - $command .= " -a"; # perform aggregation based on 5 tuples - $command .= ' -o "fmt:%ts,%te,%td,%sa,%da,%sp,%dp,%pr,%flg,%fwd,%stos,%ipkt,%ibyt,%opkt,%obyt,%in,%out,%sas,%das,%smk,%dmk,%dtos,%dir,%nh,%nhb,%svln,%dvln,%ismc,%odmc,%idmc,%osmc,%mpls1,%mpls2,%mpls3,%mpls4,%mpls5,%mpls6,%mpls7,%mpls8,%mpls9,%mpls10,%ra,%eng,%bps,%pps,%bpp"'; - $command .= ' -6'; # to get full ipv6 addresses - $command .= ' -L +' . $min_bytes; - $command .= " -N -q"; - $command .= ' |'; - $self->logger->debug(" command:\n$command\n"); - - my $fh; - open($fh, $command); - - my $i = 0; - while ( my $line = <$fh> ) { - my ( $ts,$te,$td,$sa,$da,$sp,$dp,$pr,$flg,$fwd,$stos,$ipkt,$ibyt,$opkt,$obyt,$in,$out,$sas,$das,$smk,$dmk,$dtos,$dir,$nh,$nhb,$svln,$dvln,$ismc,$odmc,$idmc,$osmc,$mpls1,$mpls2,$mpls3,$mpls4,$mpls5,$mpls6,$mpls7,$mpls8,$mpls9,$mpls10,$ra,$eng,$bps,$pps,$bpp ) = split( /\s*,\s*/, $line); - - if ($ts =~ /^Byte/ ) { next; } - - my $start = str2time( $ts ); - my $end = str2time( $te ); - - if ( !defined $start || !defined $end ) { - $self->logger->error("Invalid line in $flowfile. $!. Start or End time is undefined."); - $self->logger->error("line: $line"); - $self->logger->error("ts: $ts start: $start"); - $self->logger->error("te: $te end: $end"); - next; - } - - my $sum_bytes = $ibyt + $obyt; - my $sum_packets = $ipkt + $opkt; - my $proto = ''; - if( $pr =~ /^\d+$/ ) { - $proto = getprotobynumber( $pr ); - } else { - $proto = lc($pr); - } - - my $row = {}; - $row->{'type'} = 'flow'; - $row->{'interval'} = 600; - $row->{'meta'} = {}; - $row->{'meta'}->{'flow_type'} = $flow_type || 'netflow'; - $row->{'meta'}->{'src_ip'} = $sa; - $row->{'meta'}->{'src_port'} = $sp; - $row->{'meta'}->{'dst_ip'} = $da; - $row->{'meta'}->{'dst_port'} = $dp; - $row->{'meta'}->{'protocol'} = $proto; - $row->{'meta'}->{'sensor_id'} = $sensor; - $row->{'meta'}->{'instance_id'} = $instance if $instance ne ''; - $row->{'meta'}->{'src_asn'} = $sas; - $row->{'meta'}->{'dst_asn'} = $das; - $row->{'meta'}->{'src_ifindex'} = $in if $in; - $row->{'meta'}->{'dst_ifindex'} = $out if $out; - $row->{'start'} = $start; - $row->{'end'} = $end; - - $row->{'values'} = {}; - $row->{'values'}->{'duration'} = $td; - $row->{'values'}->{'num_bits'} = $sum_bytes * 8; - $row->{'values'}->{'num_packets'} = $sum_packets; - $row->{'values'}->{'bits_per_second'} = $bps; - $row->{'values'}->{'packets_per_second'} = $pps; - - - push @all_data, $row; - if ( @all_data % $flow_batch_size == 0 ) { - $self->logger->debug("processed " . @all_data . " (up to $flow_batch_size) flows; publishing ... "); - $self->_set_json_data( \@all_data ); - $self->_publish_flows(); - @all_data = (); - } - } - # publish any remaining data - # TODO: improve performance here by waiting until we have full batches - $self->_set_json_data( \@all_data ); - $self->_publish_flows(); - @all_data = (); - - # TODO: changed rel to abs; need a way to figure out a way to convert - # the old rel paths to abs - my $abs = file( $flowfile ); - #my $rel = $abs->relative( $path ) . ""; - $status->{$abs} = { - mtime => $stats->mtime, - size => $stats->size - }; - $self->_set_status_cache( $status ); - $self->_write_cache(); - - } ## end loop over flow files - - - if ( $self->run_once ) { - $self->logger->debug("only running once, stopping"); - $self->_set_is_running( 0 ); - } - - if (!@all_data) { - # @all_data should be empty. success. - return 1; - } else { - # something went wrong - return; - } - - -}; - -### private methods ### - -sub _write_cache { - my ( $self ) = @_; - my $filename = $self->cache_file; - $self->logger->debug( "writing cache file $filename" ); - my $status = $self->status_cache; - store $status, $filename; - $self->logger->debug( "done writing cache file $filename" ); - -} - - -sub _read_cache { - my ( $self ) = @_; - my $filename = $self->cache_file; - $self->logger->debug( "reading cache file $filename" ); - my $status = $self->status_cache; - if ( not -f $filename ) { - open my $fh, '>', $filename - or die "Cache file $filename does not exist, and failed to created it: $!\n"; - close $fh; - store $status, $filename; - } - $status = retrieve $filename; - $self->_set_status_cache( $status ); - $self->logger->debug( "done reading cache file $filename" ); -} - -sub _publish_flows { - my $self = shift; - my $flows = $self->json_data; - if ( defined $flows ) { - $self->_publish_data( $flows ); - } - - $self->_set_json_data( [] ); -} - -sub _cull_flow_files { - my ( $self, $path ) = @_; - my $status = $self->status_cache; - #warn "status " . Dumper $status; - #$self->logger->debug( "cache status" . Dumper $status ); - - if ( $self->cull_enable < 1 ) { - $self->logger->debug("not culling files (disabled)"); - return; - } - - $self->logger->debug("CULLING files (enabled)"); - - - # see how old files should be (in days) - my $cull_ttl = $self->cull_ttl; - - my @cache_remove = (); - my %dirs_to_remove = (); - - - while( my ($filename, $attributes) = each %$status ) { - my $mtime = DateTime->from_epoch( epoch => $attributes->{'mtime'} ); - - my $dur = DateTime::Duration->new( - days => $cull_ttl - ); - - my $dt = DateTime->now; - - if ( DateTime->compare( $mtime, $dt->subtract_duration( $dur ) ) == -1 ) { - # Make sure that the file exists, AND that it is under our main - # flow directory. Just a sanity check to prevent deleting files - # outside the flow data directory tree. - - my $filepath = $filename; - my $realpath = ""; - - try { - $realpath = path( $filepath )->realpath; - - my $subsumes = path( $path )->subsumes( $realpath ); - - # if the flow path does not subsume the file we're asked to delete, - # refuse - if ( !$subsumes ) { - #$self->logger->debug("Tried to delete a file outside the flow path!: " . $realpath . "; path: " . $path); - #push @cache_remove, $filename; - #next; - } - } catch { - # an error here is not necessarily a problem, could just be the file - # doesn't exist - #push @cache_remove, $filename; - #next; - - }; - - #return; - # - - if ( -f $realpath ) { - my $parent = path( $realpath )->parent; - $self->logger->debug("deleting $filepath ..."); - unlink $filepath or $self->logger->error( "Could not unlink $realpath: $!" ); - $dirs_to_remove{ $parent } = 1; - } else { - #warn "file does not exist; would delete from cache"; - - } - push @cache_remove, $filename; - } - - } - foreach my $file ( @cache_remove ) { - delete $status->{$file}; - - } - - foreach my $dir ( keys %dirs_to_remove ) { - rmdir $dir; - - } - $self->_write_cache(); - -} - -# If culling of nfcapd files is not enabled, the cache file can become huge. This cuts it down to last X months. -sub _cull_cache_file { - my ( $self ) = @_; - - # If file culling is enabled, that will also cull the cache file, so just return. - if ( $self->cull_enable == 1 ) { - $self->logger->debug("not running cull_cache_file"); - return; - } - - # delete files older than X months (by filename) from the cache file. - my $cull_to = DateTime->now->subtract( months => 3 ); # UTC datetime HARDCODED THRESHOLD = 3 mo. - # Make sure this is > hardcoded threshold in _get_flow_data. - my $status = $self->status_cache; - - foreach my $key ( keys %$status ) { - # Key = full path and filename in cache file. Parse filename for date and time - my ($file_yr, $file_mo, $file_day, $file_hr, $file_min) = $key =~ /.*(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})$/; - # Make it into a date object - my $file_date = DateTime->new( year => $file_yr, month => $file_mo, day => $file_day, - hour => $file_hr, minute => $file_min, time_zone => "UTC" ); - # Delete if $file_date < $cull_to - if ( DateTime->compare($file_date, $cull_to) == -1 ) { - delete $status->{ $key }; - } - } - - $self->_set_status_cache( $status ); - $self->_write_cache(); -} - - -sub find_nfcapd { - my ( $self, $params ) = @_; - my $path = $params->{'path'}; # flow-path, base dir - my $filepath = $File::Find::name; # full path+filename - return if not defined $filepath; - if ( not -f $filepath ) { - return; - - } - return if $filepath =~ /nfcapd\.current/; - return if $filepath =~ /\.nfstat$/; - - my $name = 'nfcapd.*'; - my $relative = path( $filepath )->relative( $path ); - - # if min_file_age is '0' then we don't care about file age (this is default). - # if not, ignore files younger than min_file_age. - if ( $self->min_file_age ne '0' ) { - if ( ! $self->get_age( "older", $self->min_file_age, $filepath ) ) { - return; - } - } - - push @files, "$relative"; - -} - -sub get_age { - my ( $self, $criterion, $age, $filename ) = @_; - - my ( $interval, $unit ) = ( $age =~ /^(\d+)([DWMYhms])$/ ); - if ( ! $interval or ! $unit ) { - return; - } else { - my %mapping = ( - "D" => "days", - "W" => "weeks", - "M" => "months", - "Y" => "years", - "h" => "hours", - "m" => "minutes", - "s" => "seconds", ); - #exec( sub { - my $dt = DateTime->now; - $dt->subtract( $mapping{$unit} => $interval ); - my $compare_to = $dt->epoch; - my $mtime = stat( $filename )->mtime; - return $criterion eq "older" ? - $mtime < $compare_to : - $mtime > $compare_to; - # } ); - } -} - - -1; diff --git a/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm b/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm deleted file mode 100644 index b3570ba3..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/Pipeline.pm +++ /dev/null @@ -1,641 +0,0 @@ -package GRNOC::NetSage::Deidentifier::Pipeline; - -use strict; -use warnings; - -use Moo; - -use GRNOC::Log; -use GRNOC::Config; - -use Net::AMQP::RabbitMQ; -use JSON::XS; -use Math::Round qw( nlowmult nhimult ); -use List::MoreUtils qw( natatime ); -use Try::Tiny; -use Data::Validate::IP; -use Net::IP; -use Hash::Merge qw( merge ); -use POSIX; - -use Data::Dumper; - -### constants ### - -use constant QUEUE_PREFETCH_COUNT => 20; -use constant QUEUE_PREFETCH_COUNT_NOACK => 0; -use constant QUEUE_FETCH_TIMEOUT => 10 * 1000; -use constant RECONNECT_TIMEOUT => 10; - -### required attributes ### - -has config_file => ( is => 'ro', - required => 1 ); - - -has logging_file => ( is => 'ro', - required => 1 ); - -has process_name => ( is => 'ro', - required => 1 ); - -# input queue, identified by name -#has input_queue_name => ( is => 'ro', -# required => 1 ); - -# output queue, identified by name -#has output_queue_name => ( is => 'ro', -# required => 1 ); - -has handler => ( is => 'rwp'); -# required => 1 ); - -### internal attributes ### - -has logger => ( is => 'rwp' ); - -has config => ( is => 'rwp' ); - -has config_obj => ( is => 'rwp' ); - -has is_running => ( is => 'rwp', - default => 0 ); - -has rabbit_config => ( is => 'rwp' ); - -has task_type => ( is => 'rwp' ); - -has shared_config_file => ( is => 'ro' ); - - -# ack_messages indicates whether to ack rabbit messages. normally, this should be 1 (enabled). -# if you disable this, we don't ack the rabbit messages and they go back in the queue. -# usually this is only desired for testing purposes. Don't touch this unless you -# know what you're doing. -has ack_messages => ( is => 'rwp', - default => 1 ); - -has run_once => ( is => 'rwp', - default => 0 ); - -has rabbit_input => ( is => 'rwp' ); - -has rabbit_output => ( is => 'rwp' ); - -has input_queue => ( is => 'rwp' ); - -has input_channel => ( is => 'rwp' ); - -has output_queue => ( is => 'rwp' ); - -has output_channel => ( is => 'rwp' ); - -has batch_size => ( is => 'rwp' ); - -has json => ( is => 'rwp' ); - -has num_published_messages => ( is => 'rwp', - default => 0 ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - # create and store logger object - my $grnoc_log = GRNOC::Log->new( config => $self->logging_file ); - my $logger = GRNOC::Log->get_logger(); - - $self->_set_logger( $logger ); - - # create and store config object - my $config_obj = GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); - - - # create and store shared config object - my $shared_config_obj; - my $shared_config = {}; - if ( defined ( $self->shared_config_file ) ) { - $shared_config_obj = GRNOC::Config->new( config_file => $self->shared_config_file, - force_array => 0 ); - my $new_shared_config = {}; - if ( !$shared_config_obj->{'error'} ) { - $new_shared_config = $shared_config_obj->get('/*'); - if ( $new_shared_config ) { - $shared_config = $new_shared_config; - } - } - } - - my $config_single = $config_obj->get('/*') or die "DEATH2!!"; - - # Merge the hashes; the "single" values should overrride those - # from the "shared" config. - my $config = merge( $config_single, $shared_config ); - - $self->_set_config( $config ); - - $self->_rabbit_config(); - - return $self; -} - -### public methods ### - -sub start { - - my ( $self, $task_type ) = @_; - $self->_set_task_type( $task_type ); - - $self->logger->info( "Starting." ); - - # flag that we're running - $self->_set_is_running( 1 ); - - # change our process name - $0 = $self->process_name . " [worker]"; - - # setup signal handlers - $SIG{'TERM'} = sub { - - $self->logger->info( "Received SIG TERM." ); - $self->stop(); - }; - - $SIG{'HUP'} = sub { - - $self->logger->info( "Received SIG HUP." ); - }; - - # create JSON object - my $json = JSON::XS->new(); - - $self->_set_json( $json ); - - # connect to rabbit queues - $self->_rabbit_connect(); - - if ( $self->task_type && $self->task_type eq "no_input_queue" ) { - $self->start_noinput(); - - } else { - # continually consume messages from rabbit queue, making sure we have to acknowledge them - return $self->_consume_loop(); - } - -} - -sub start_noinput { - my ( $self ) = @_; - - return $self->_consume_noinput(); -} - - -sub stop { - - my ( $self ) = @_; - - $self->logger->debug( 'Stopping.' ); - - # this will cause the consume loop to exit - $self->_set_is_running( 0 ); -} - -### private methods ### - -sub _consume_noinput { - # for no input queue - - my ( $self ) = @_; - - $self->logger->debug( 'Starting consume_noinput loop.' ); - while( 1 ) { - # have we been told to stop? - if ( !$self->is_running ) { - $self->logger->debug( 'Exiting consume_noinput loop.' ); - return 0; - } - my $handler = $self->handler; - $self->handler->( $self ); - sleep RECONNECT_TIMEOUT; - - } - -} - -sub _consume_loop { - # if there is an input queue - - my ( $self ) = @_; - - - my $input_queue = $self->rabbit_config->{'input'}->{'queue'}; - my $input_channel = $self->rabbit_config->{'input'}->{'channel'}; - my $rabbit = $self->rabbit_input; - - $self->logger->debug( 'Starting consume_loop.' ); - while ( 1 ) { - - # have we been told to stop? - if ( !$self->is_running ) { - - $self->logger->debug( 'Exiting consume loop.' ); - return 0; - } - - # receive the next rabbit message - my $rabbit_message; - - my $delivery_tag; - - try { - - $rabbit_message = $rabbit->recv( QUEUE_FETCH_TIMEOUT ); - - - } - - - catch { - - $self->logger->error( "Error receiving rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - - # didn't get a message? (eg, no more to retrieve) - if ( !$rabbit_message ) { - - #$self->logger->debug( 'No message received.' ); - - # re-enter loop to retrieve the next message - next; - } - - # try to JSON decode the messages - my $messages; - - $delivery_tag = $rabbit_message->{'delivery_tag'}; - - try { - - $messages = $self->json->decode( $rabbit_message->{'body'} ); - } - - catch { - - $self->logger->error( "Unable to JSON decode message: $_" ); - }; - - if ( !$messages ) { - - try { - - # reject the message and do NOT requeue it since its malformed JSON - $rabbit->reject( $input_channel, $delivery_tag, 0 ); - } - - catch { - - $self->logger->error( "Unable to reject rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } - - # retrieve the next message from rabbit if we couldn't decode this one - next if ( !$messages ); - - # make sure its an array (ref) of messages - if ( ref( $messages ) ne 'ARRAY' ) { - - # make it into a one-element array (needed for rabbit msgs written by logstash) - $messages = [$messages] - - } - - my $num_messages = @$messages; - - my $t1 = time(); - - my $success = $self->_consume_messages( $messages ); - - my $t2 = time(); - my $delta = $t2 - $t1; - - $self->logger->debug( "Consumed $num_messages updates in $delta seconds." ); - - # didn't successfully consume the messages, so reject but requeue the entire message to try again - if ( !$success ) { - - $self->logger->debug( "Rejecting rabbit message, requeueing." ); - - try { - - $rabbit->reject( $input_channel, $rabbit_message->{'delivery_tag'}, 1 ); - } - - catch { - - $self->logger->error( "Unable to reject rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } - - # successfully consumed message, acknowledge it to rabbit - else { - if ( $self->ack_messages ) { - - #$self->logger->debug( "Acknowledging successful message." ); - - try { - - $rabbit->ack( $input_channel, $rabbit_message->{'delivery_tag'} ); - } - - catch { - - $self->logger->error( "Unable to acknowledge rabbit message: $_" ); - - # reconnect to rabbit since we had a failure - $self->_rabbit_connect(); - }; - } else { - # do nothing - $self->logger->warn("Not acking message"); - } - } - } -} - -sub _consume_messages { - - my ( $self, $messages ) = @_; - - my $num_messages = @$messages; - #$self->logger->debug( "---consuming $num_messages messages" ); - - # gather all messages to process - my $flows_to_process = []; - - # handle every message that came within the rabbit message - foreach my $message ( @$messages ) { - - # make sure message is an object/hash (ref) - if ( ref( $message ) ne 'HASH' ) { - - $self->logger->error( "Messages must be an object/hash of data, skipping." ); - next; - } - - # include this to our list of messages to process if it was valid - push( @$flows_to_process, $message ) if $message; - - } - - # process all of the data across all messages - my $success = 1; - - - try { - - $flows_to_process = $self->_process_messages( $flows_to_process ) if ( @$flows_to_process > 0 ); - } - - catch { - - $self->logger->error( "Error processing messages: $_" ); - $success = 0; - }; - # if there's no output queue, eg, we're caching in memory, we don't need to push to rabbit - just return success - if ( $self->task_type && $self->task_type eq "no_output_queue" ) { - return $success; - } - - try { - - $self->_publish_data( $flows_to_process ) if ( @$flows_to_process > 0 ); - } - - catch { - - $self->logger->error( "Error publishing messages: $_" ); - $success = 0; - }; - - return $success; -} - -sub _publish_data { - my ( $self, $messages ) = @_; - my $batch_size = $self->rabbit_config->{'output'}->{'batch_size'}; - if ( ! @$messages ) { - $self->logger->debug("No data found to publish"); - return; - } - - # send a max of $batch_size messages at a time to rabbit - my $it = natatime( $batch_size, @$messages ); - - my $num = $self->num_published_messages; - $num += @$messages; - $self->_set_num_published_messages( $num ); - $self->logger->debug("Publishing up to " . $batch_size . " messages per batch ( this batch " . @$messages . " ); total: " . $num ); - - my $queue = $self->rabbit_config->{'output'}->{'queue'}; - my $channel = $self->rabbit_config->{'output'}->{'channel'}; - my $exchange = $self->rabbit_config->{'output'}->{'exchange'} || ""; - - $self->_rabbit_connect(); - while ( my @finished_messages = $it->() ) { - - $self->rabbit_output->publish( $channel, $queue, $self->json->encode( \@finished_messages ), {'exchange' => $exchange} ); - } - return $messages; - -} - - - -# _process_messages takes an argument of an arrayref of data to process -# and then it calls the specified handler function on it -sub _process_messages { - my ( $self, $flows_to_process ) = @_; - - my $handler = $self->handler; - $flows_to_process = $self->handler->( $self, $flows_to_process ); - - return $flows_to_process; - -} - -sub _rabbit_config { - my ( $self ) = @_ ; - - my $rabbit_config = {}; - my @directions = ('input', 'output'); - - my $config = $self->config; - - foreach my $direction ( @directions ) { - $rabbit_config->{$direction} = {}; - - my $rabbit_host = $config->{ "rabbit_$direction" }->{ "host"}; - $rabbit_config->{$direction}->{'host'} = $rabbit_host; - - my $rabbit_port = $config->{ "rabbit_$direction" }->{ "port" }; - $rabbit_config->{$direction}->{'port'} = $rabbit_port; - - my $rabbit_username = $config->{ "rabbit_$direction" }->{ "username" }; - $rabbit_config->{$direction}->{'username'} = $rabbit_username; - - my $rabbit_password = $config->{ "rabbit_$direction" }->{ "password" }; - $rabbit_config->{$direction}->{'password'} = $rabbit_password; - - my $rabbit_vhost = $config->{ "rabbit_$direction" }->{ "vhost" }; - $rabbit_config->{$direction}->{'vhost'} = $rabbit_vhost if defined $rabbit_vhost; - - my $rabbit_ssl = $config->{ "rabbit_$direction" }->{ "ssl" } || 0; - $rabbit_config->{$direction}->{'ssl'} = $rabbit_ssl if defined $rabbit_ssl; - - my $rabbit_ca_cert = $config->{ "rabbit_$direction" }->{ "cacert" }; - $rabbit_config->{$direction}->{'ca_cert'} = $rabbit_ca_cert if defined $rabbit_ca_cert; - - my $batch_size = $config->{"rabbit_$direction" }->{ "batch_size"} || 100; - $rabbit_config->{$direction}->{'batch_size'} = $batch_size if defined $batch_size; - - my $queue = $config->{"rabbit_$direction" }->{ "queue" }; - $rabbit_config->{$direction}->{'queue'} = $queue; - - my $exchange = $config->{"rabbit_$direction" }->{ "exchange" }; - $rabbit_config->{$direction}->{'exchange'} = $exchange; - - my $channel = $config->{"rabbit_$direction" }->{ "channel" }; - $rabbit_config->{$direction}->{'channel'} = $channel; - - my $durable = $config->{"rabbit_$direction" }->{ "durable" }; - $rabbit_config->{$direction}->{'durable'} = $durable; - - - } - $self->_set_rabbit_config($rabbit_config); - -} - -sub _rabbit_connect { - my ( $self ) = @_; - - my $rabbit_config = $self->rabbit_config; - - my %connected = (); - $connected{'input'} = 0; - $connected{'output'} = 0; - - while ( 1 ) { - - my @directions = ('input', 'output'); - - foreach my $direction ( @directions ) { - - my $rabbit_host = $rabbit_config->{ $direction }->{'host'}; - my $rabbit_port = $rabbit_config->{ $direction }->{'port'}; - my $rabbit_username = $rabbit_config->{ $direction }->{'username'}; - my $rabbit_password = $rabbit_config->{ $direction }->{'password'}; - my $rabbit_ssl = $rabbit_config->{ $direction }->{'ssl'}; - my $rabbit_ca_cert = $rabbit_config->{ $direction }->{'ca_cert'}; - my $rabbit_vhost = $rabbit_config->{ $direction }->{'vhost'}; - my $rabbit_channel = $rabbit_config->{ $direction }->{'channel'}; - my $rabbit_queue = $rabbit_config->{ $direction }->{'queue'}; - my $rabbit_exchange = $rabbit_config->{ $direction }->{'exchange'}; - my $rabbit_durable = $rabbit_config->{ $direction }->{'durable'}; - if ( !defined $rabbit_durable ) { - $rabbit_durable = 1; #default to durable - } - - # $self->logger->debug( "Connecting to $direction RabbitMQ $rabbit_host:$rabbit_port." ); - - $connected{ $direction } = 0; - - try { - - - my $rabbit = Net::AMQP::RabbitMQ->new(); - my $params = {}; - $params->{'port'} = $rabbit_port; - $params->{'user'} = $rabbit_username if $rabbit_username; - $params->{'password'} = $rabbit_password if $rabbit_password; - if ( $rabbit_ssl ) { - $params->{'ssl'} = $rabbit_ssl; - $params->{'ssl_verify_host'} = 0; - $params->{'ssl_cacert'} = $rabbit_ca_cert; - } - if ( $rabbit_vhost ) { - $params->{'vhost'} = $rabbit_vhost; - } - - if ( $rabbit_exchange ) { - $params->{'exchange'} = $rabbit_exchange; - } - - $rabbit->connect( $rabbit_host, $params ); - - if ( $direction eq 'input' ) { - # open channel to the pending queue we'll read from - $rabbit->channel_open( $rabbit_channel ); - $rabbit->queue_declare( $rabbit_channel, $rabbit_queue, {'auto_delete' => 0, durable => $rabbit_durable } ); - if ( $self->ack_messages ) { - $rabbit->basic_qos( $rabbit_channel, { prefetch_count => QUEUE_PREFETCH_COUNT } ); - } else { - #$rabbit->basic_qos( $rabbit_channel ); - $rabbit->basic_qos( $rabbit_channel, { prefetch_count => QUEUE_PREFETCH_COUNT_NOACK } ); - } - $rabbit->consume( $rabbit_channel, $rabbit_queue, {'no_ack' => 0} ); - - } else { - #open channel to the finished queue we'll send to - # - $rabbit->channel_open( $rabbit_channel ); - $rabbit->queue_declare( $rabbit_channel, $rabbit_queue, {'auto_delete' => 0, durable => $rabbit_durable} ); -# -# - - } - - my $setter = "_set_rabbit_$direction"; - $self->$setter( $rabbit ); - -# -# $self->_set_rabbit( $rabbit ); -# - $connected{ $direction } = 1; - } - - catch { - - $self->logger->error( "Error connecting to $direction RabbitMQ: $_" ); - }; - - if ( $connected{'input'} && $connected{'output'}) { - return; - }; - - next if $connected{ $direction }; - - - $self->logger->info( " Reconnecting $direction after " . RECONNECT_TIMEOUT . " seconds..." ); - sleep( RECONNECT_TIMEOUT ); - - } # end foreach directoin - - }# end while 1 - -} - -1; diff --git a/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm b/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm deleted file mode 100644 index a734a5f3..00000000 --- a/lib/GRNOC/NetSage/Deidentifier/WorkerManager.pm +++ /dev/null @@ -1,260 +0,0 @@ -package GRNOC::NetSage::Deidentifier::WorkerManager; - -use Moo; -use Types::Standard qw( Str Bool ); - -# this one needs to change -#use GRNOC::NetSage::Deidentifier::WorkerManager::Worker; -#use GRNOC::NetSage::Deidentifier::Pipeline; -## got rid of FlowTagger use GRNOC::NetSage::Deidentifier::FlowTagger; - -use GRNOC::Config; -use GRNOC::Log; - -use Parallel::ForkManager; -use Proc::Daemon; - -use Data::Dumper; - -### required attributes ### - -has config_file => ( is => 'ro', - isa => Str, - required => 1 ); - -has logging_file => ( is => 'ro', - isa => Str, - required => 1 ); - -has worker => ( is => 'ro', - required => 1 ); - -has process_name => ( is => 'ro', - required => 1 ); - -### optional attributes ### - -has daemonize => ( is => 'ro', - isa => Bool, - default => 1 ); - -has task_type => ( is => 'rwp' ); - -### private attributes ### - -has config => ( is => 'rwp' ); - -has logger => ( is => 'rwp' ); - -has children => ( is => 'rwp', - default => sub { [] } ); - -has flow_cache => ( is => 'rwp', - default => sub { {} } ); - -has knot => ( is => 'rwp' ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - # create and store logger object - my $grnoc_log = GRNOC::Log->new( config => $self->logging_file ); - my $logger = GRNOC::Log->get_logger(); - - $self->_set_logger( $logger ); - - # create and store config object - my $config = GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); - - $self->_set_config( $config ); - - return $self; -} - -sub _init_cache { - my $self = shift; - - my %flow_cache = (); # $self->flow_cache; - #$flow_cache{'test'} = 'value'; - - my $glue = 'flow'; - - #IPC::Shareable->clean_up_all; - my %options = ( - create => 0, - exclusive => 0, - mode => 0644, - destroy => 0 - ); - - #IPC::Shareable->clean_up; - #IPC::Shareable->clean_up_all; - - #my $knot = tie %flow_cache, 'IPC::Shareable', $glue, { %options } or die ("failed to tie cache"); - - #warn "getting cache ..." . Dumper %flow_cache; - #(tied %flow_cache)->shlock; - #$flow_cache{'locked_adding'} = 'w00t!'; - #%flow_cache = ( - # 'test2' => 'wow!' - #); - #(tied %flow_cache)->shunlock; - #warn "getting cache ..." . Dumper %flow_cache; - - #$self->_set_flow_cache( \%flow_cache ); - #$self->_set_knot( $knot ); - -} - -### public methods ### - -sub start { - - my ( $self, $task_type ) = @_; - - $self->_set_task_type( $task_type ); - - $self->logger->info( 'Starting.' ); - - $self->logger->debug( 'In WorkerManager->start()' ); - $self->logger->debug( 'Setting up signal handlers.' ); - - # setup signal handlers - $SIG{'TERM'} = sub { - - $self->logger->info( 'Received SIG TERM. Calling stop()' ); - $self->stop(); - }; - - $SIG{'HUP'} = sub { - - $self->logger->info( 'Received SIG HUP.' ); - }; - - # need to daemonize - if ( $self->daemonize ) { - - $self->logger->debug( 'Daemonizing.' ); - - my $daemon = Proc::Daemon->new( pid_file => $self->config->get( '/config/master/pid-file' ) ); - my $pid = $daemon->Init(); - - # Orig. process "splits into" orig. and child/daemon. Child/daemon has $pid=0, orig has $pid = pid of the child/daemon. - # Both continue from here. Original writes pid file then exits. Child/daemon keeps running. (???) - $self->logger->debug(" pid from daemon->init = $pid"); - - # if in child/daemon process - if ( !$pid ) { - - $self->logger->debug( 'Created daemon process.' ); - - # change process name of the child/daemon - $0 = $self->process_name."-pipeline-daemon"; - - $self->_create_workers(); - } - } - - # dont need to daemonize - else { - - $self->logger->debug( 'Running in foreground.' ); - - $self->_create_workers(); - } - - return 1; -} - -sub stop { - - my ( $self ) = @_; - - $self->logger->info( 'Stopping.' ); - - my @pids = @{$self->children}; - - $self->logger->debug( 'Stopping child worker processes ' . join( ' ', @pids ) . '.' ); - - return kill( 'TERM', @pids ); -} - -### helper methods ### - -sub _build_config { - - my ( $self ) = @_; - - $self->logger->debug( 'Building GRNOC::Config with config file ' . $self->config_file . '.' ); - - return GRNOC::Config->new( config_file => $self->config_file, - force_array => 0 ); -} - -sub _create_workers { - - my ( $self ) = @_; - - my $num_processes = $self->config->get( '/config/worker/num-processes' ); - - $self->logger->info( "Creating $num_processes child worker processes." ); - - $self->_init_cache(); - - my %flow_cache = %{ $self->flow_cache }; - - my $forker = Parallel::ForkManager->new( $num_processes ); - - # keep track of children pids - $forker->run_on_start( sub { - - my ( $pid ) = @_; - - $self->logger->debug( "Child worker process $pid created." ); - - push( @{$self->children}, $pid ); - } ); - - $forker->run_on_finish( sub { - $self->logger->debug("child process has finished"); - } ); - - for ( 1 .. $num_processes ) { - - $forker->start() and next; - - - #die "done"; - - # create worker in this process - #my $worker = GRNOC::NetSage::Deidentifier::FlowTagger->new( config => $self->config, - # logger => $self->logger, - # config_file => $self->config_file, - # logging_file => $self->logging_file ); - my $worker = $self->worker; - - # this should only return if we tell it to stop via TERM signal etc. -$self->logger->debug(" doing worker->start"); - $worker->start( $self->task_type ); - - # exit child process - $forker->finish(); - } - - $self->logger->debug( 'Waiting for all child worker processes to exit.' ); - - # wait for all children to return - $forker->wait_all_children(); - - $self->_set_children( [] ); - - #(tied %flow_cache)->remove; - - $self->logger->debug( 'All child workers have exited.' ); -} - -1; diff --git a/logstash-downloads/.gitignore b/logstash-downloads/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/logstash-downloads/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/logstash-temp/.gitignore b/logstash-temp/.gitignore new file mode 100644 index 00000000..5e7d2734 --- /dev/null +++ b/logstash-temp/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/old_stitcher/FlowStitcher.pm b/old_stitcher/FlowStitcher.pm deleted file mode 100644 index 07c08625..00000000 --- a/old_stitcher/FlowStitcher.pm +++ /dev/null @@ -1,311 +0,0 @@ -package GRNOC::NetSage::Deidentifier::FlowStitcher; - -use strict; -use warnings; - -use Moo; - -extends 'GRNOC::NetSage::Deidentifier::Pipeline'; - -use GRNOC::Log; -use GRNOC::Config; - -#use Data::Validate::IP; -#use Net::IP; -#use Digest::SHA; - -#use JSON::XS; -use Clone qw(clone); -use IPC::ShareLite qw( :lock ); -use Storable qw(freeze thaw); -use Try::Tiny; -use Number::Bytes::Human qw(format_bytes); -use Time::Duration; -use Time::HiRes; -use Data::Dumper; - -### internal attributes ### - -has handler => ( is => 'rwp'); - -#has input_data => ( is => 'rwp', default => [] ); - -has flow_cache => ( is => 'rwp' ); - -has ipc_key => ( is => 'rwp', default => 'flow' ); - -has stats => ( is => 'rw', default => sub { {} } ); - -has acceptable_offset => ( is => 'rwp', default => 5 ); - -has finished_flows => ( is => 'rwp', default => sub { [] } ); - -has latest_timestamp => ( is => 'rwp', default => 0 ); - -### constructor builder ### - -sub BUILD { - - my ( $self ) = @_; - - my $config = $self->config; - - my $ipc_key = $config->{'worker'}->{'ipc-key'}; - $self->_set_ipc_key( $ipc_key ) if defined $ipc_key; - #warn "BUILD ipc_key: $ipc_key"; - - $self->_set_handler( sub { $self->_run_flow_stitching(@_) } ); - - #$self->_run_flow_stitching(); - - return $self; -} - -### private methods ### -sub _init_cache { - my $self = shift; - my %options = ( - create => 0, - exclusive => 0, - mode => 0644, - destroy => 0, - ); - my %cache; - - $self->_set_flow_cache( \%cache ); - - $self->stats( { - stitched_flow_count => 0, - }); - -} - -# expects an array of data for it to stitch -# returns a stitched array? TODO: update this -sub _run_flow_stitching { - my ( $self, $caller, $messages ) = @_; - - $self->_stitch_flows( ); - - - # Flow stitching is a special case in the pipeline in that it doesn't simply - # return values to be stitched and then exit. It explicitly publishes them itself - # and returns an empty array when it's done. This is because it's a long-running process - # that looks at flows over time - $self->_publish_flows( ); - -} - -sub _publish_flows { - my $self = shift; - my $flows = $self->finished_flows; - - $self->_publish_data( $flows ); - $self->_set_finished_flows( [] ); -} - -sub _set_values_strings { - my $obj = shift; - foreach my $key ( keys %$obj ) { - my $val = $obj->{$key}; - next if not defined $val; - if ( ref($val) eq 'HASH' ) { - $val = _set_values_strings( $val ); - } else { - $obj->{$key} = "$val"; - } - } - - return $obj; -} - -sub _stitch_flows { - my ( $self ) = @_; - - my $ipc_key = $self->ipc_key; - #warn "_stitch_flow, ipc_key: $ipc_key"; - - my $cache_all; - my $share = IPC::ShareLite->new( - -key => $ipc_key, - -create => 0, - -destroy => 0, - ) or die $!; - - $share->lock( LOCK_SH ); - if ( not defined $share ) { - $cache_all = {}; - } else { - #warn "thawing cache ..."; - my $fetch = $share->fetch; - if ( $share->fetch ) { - $cache_all = thaw( $share->fetch ); - } else { - $cache_all = {}; - } - } - $self->_set_flow_cache( $cache_all ); - $share->unlock(); - - my $finished_flows = $self->finished_flows; - - my $overlaps = 0; - my $stitchable_flows = 0; - my $stitched_flow_count = 0; - - my $latest_timestamp = $self->latest_timestamp; - - while( my ( $sensor, $cache ) = each %$cache_all ) { - - - while( my ( $five_tuple, $flow_container ) = each %$cache ) { - my $flows = $flow_container->{'flows'}; - if ( @$flows > 0 ) { - my $previous_flow; - my $i = 0; - my %flows_to_remove = (); - foreach my $flow (@$flows ) { - $flow->{'stitching_finished'} = 0; - $flow->{'no_previous'} = 0 if not $flow->{'no_previous'}; - my $start = $flow->{'start'}; - my $end = $flow->{'end'}; - $flow->{'flow_num'} = $i; - $latest_timestamp = $end if $end > $latest_timestamp; - # If there is a previous flow - if ( $previous_flow ) { - # If this flow and the previous flow go together, merge them - # and remove previous flow - if ( $self->_can_stitch_flow( $previous_flow->{'end'}, $start ) ) { - $flow = $self->_stitch_flow( $previous_flow, $flow ); - $flows_to_remove{$i-1} = 1; - $stitched_flow_count++; - $stitchable_flows++; - } else { - # If can't stitch flows, that means that flow has ended and can be output and removed from the cache - $flow->{'stitching_finished'} = 1; - push @$finished_flows, \%{ clone ( $flow )}; - $flows_to_remove{$i} = 1; - } - - } else { - $flow->{'no_previous'}++; - if ( $flow->{'no_previous'} <= 1 ) { - #warn "no previous flow #1; caching"; - } else { - #warn "no previous flow #2; finished"; - $flow->{'stitching_finished'} = 1; - push @$finished_flows, \%{ clone ( $flow )}; - $flows_to_remove{$i} = 1; - } - - } - $previous_flow = $flow; - $i++; - } - - for (my $i=@$flows-1; $i>=0; $i--) { - if ( ( $self->acceptable_offset + $flows->[$i]->{'end'} < $latest_timestamp ) && ( not $flows_to_remove{$i} ) ) { - $flows_to_remove{$i} = 1; - push @$finished_flows, \%{ clone ( $flows->[$i] )}; - } - if ( $flows_to_remove{$i} ) { - splice @$flows, $i, 1; - } - - } - - if ( @$flows < 1 ) { - # no flows for this five tuple; remove it - delete $cache->{$five_tuple}; - - } - - } else { - # no flows for this five tuple; remove it - delete $cache->{$five_tuple}; - - } - - } - - if ( keys %{ $cache_all->{ $sensor } } < 1 ) { - delete $cache_all->{ $sensor }; - - } - - $self->_set_latest_timestamp( $latest_timestamp ); - - $self->_set_finished_flows( $finished_flows ); - - my $stats = $self->stats; - $stats->{'stitched_flow_count'} += $stitched_flow_count; - - # find stats on the final, stitched flows for this run - my $max_stitched_duration = 0; - my $max_stitched_bytes = 0; - my $min_stitched_duration; - while( my ( $five_tuple, $flow_container ) = each %$cache ) { - foreach my $row ( @{$flow_container->{'flows'}} ) { - my $bytes = $row->{'values'}->{'num_bits'} / 8; - my $duration = $row->{'values'}->{'duration'}; - if ( $duration > $max_stitched_duration ) { - $max_stitched_duration = $duration; - } - if ( $bytes > $max_stitched_bytes ) { - $max_stitched_bytes = $bytes; - } - } - - } - - $self->stats( $stats ); - - - } # end while sensors - - - # save updated cache - - $self->_set_flow_cache( $cache_all ); - $share->lock( LOCK_EX ); - $share->store( freeze( $cache_all ) ); - $share->unlock(); - -} - -# stitches an individual flow -sub _stitch_flow { - my ($self, $flowA, $flowB) = @_; - - my $flow1; - my $flow2; - - # make sure flow1 comes before flow2; - if ( $flowA->{'start'} < $flowB->{'start'} ) { - $flow1 = $flowA; - $flow2 = $flowB; - } else { - $flow1 = $flowB; - $flow2 = $flowA; - } - - $flow1->{'end'} = $flow2->{'end'}; - $flow1->{'values'}->{'duration'} += $flow2->{'values'}->{'duration'}; - $flow1->{'values'}->{'num_bits'} += $flow2->{'values'}->{'num_bits'}; - $flow1->{'values'}->{'num_packets'} += $flow2->{'values'}->{'num_packets'}; - $flow1->{'stitched'} = 1; - - return $flow1; - -} - -sub _can_stitch_flow { - my ($self, $time1, $time2) = @_; - if ( abs ( $time1 - $time2 ) < $self->acceptable_offset ) { - return 1; - } else { - return 0; - } -} - -1; diff --git a/old_stitcher/netsage-flow-stitcher-daemon b/old_stitcher/netsage-flow-stitcher-daemon deleted file mode 100644 index 236a3dbe..00000000 --- a/old_stitcher/netsage-flow-stitcher-daemon +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/sh -# -# netsage-flow-stitcher-daemon init file for starting up the NetSage Flow Stitcher daemon -# -# chkconfig: 2345 20 80 -# description: Starts and stops the NetSage Flow Stitcher daemon - -# Source function library. -. /etc/rc.d/init.d/functions - -name="netsage-flow-stitcher-daemon" -exec="/usr/bin/$name" -pidfile="/var/run/$name.pid" -CONFIG="/etc/grnoc/netsage/deidentifier/netsage_flow_stitcher.xml" - -start() { - [ -f $CONFIG ] || exit 6 - [ -x $exec ] || exit 5 - echo -n $"Starting $name: " - daemon "$exec --config $CONFIG" - retval=$? - echo - return $retval -} - -stop() { - echo -n $"Stopping $name: " - if [ -f $pidfile ] - then - # shutdown haven't work, try old way - killproc -p $pidfile $name - retval=$? - else - success "$name shutdown" - fi - echo - return $retval -} - -restart() { - stop - start -} - -rh_status() { - status -p $pidfile $name -} - -rh_status_q() { - rh_status >/dev/null 2>&1 -} - - -case "$1" in - start) - rh_status_q && exit 0 - $1 - ;; - stop) - rh_status_q || exit 0 - $1 - ;; - restart) - $1 - ;; - status) - rh_status - ;; - *) - echo $"Usage: $0 {start|stop|status|restart}" - exit 2 -esac -exit $? diff --git a/old_stitcher/netsage_flow_stitcher.xml b/old_stitcher/netsage_flow_stitcher.xml deleted file mode 100644 index e390ccc3..00000000 --- a/old_stitcher/netsage_flow_stitcher.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - 127.0.0.1 - 5672 - xxxxx - xxxxx - 100 - / - netsage_deidentifier_raw2 - 2 - 0 - /path/to/cert.crt - 1 - - - 127.0.0.1 - 5672 - xxxxx - xxxxx - 100 - / - netsage_deidentifier_stitched - 3 - 0 - 1 - /path/to/cert.crt - - - - - 1 - - - - /var/run/netsage-flow-stitcher-daemon.pid - - diff --git a/setup-cron.sh b/setup-cron.sh new file mode 100755 index 00000000..620317dc --- /dev/null +++ b/setup-cron.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# Copy and modify docker-netsage-downloads.cron.ORIG and .sh.ORIG +# and restart-logstash-container.cron.ORIG and .sh.ORIG +# to make non-example vesions and fill in user and path info. +# USER NEEDS TO COPY FILES TO etc/cron.d/ + +# $USER and $PWD env vars are assumed to be already set +cp cron.d/docker-netsage-downloads.cron.ORIG cron.d/docker-netsage-downloads.cron +sed -i "s|-USER-|$USER|" cron.d/docker-netsage-downloads.cron +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|" cron.d/docker-netsage-downloads.cron +cp bin/docker-netsage-downloads.sh.ORIG bin/docker-netsage-downloads.sh +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/docker-netsage-downloads.sh + +cp cron.d/restart-logstash-container.cron.ORIG cron.d/restart-logstash-container.cron +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" cron.d/restart-logstash-container.cron +cp bin/restart-logstash-container.sh.ORIG bin/restart-logstash-container.sh +sed -i "s|-PATH-TO-GIT-CHECKOUT-|$PWD|g" bin/restart-logstash-container.sh + +echo "" +echo " Cron and bin files have been set up." +echo " Please check cron.d/ docker-netsage-downloads.cron and restart-logstash-container.cron" +echo " files for correct user and path values and " +echo " !!!! >>>>> COPY THEM TO /etc/cron.d/ <<<<< !!!! " +echo " If you need to immediately download files, run bin/docker-netsage-downloads.sh manually." +echo "" + +# Also... When we restart logstash, the process needs to be able to write then read a file in logstash-temp/. +# Set the owner and group of logstash-temp/ to 1000, which is the default uid of the user that logstash runs as (see docker-compose.yml). +echo " If requested, enter the sudo password to allow the script to change the owner of logstash-temp/" +echo " (If you get an error, manually change the owner and group of logstash-temp/ to 1000. It doesn't matter what username this maps to.)" +echo "" +sudo chown 1000:1000 logstash-temp +echo "" + diff --git a/setup-pmacct-compose.sh b/setup-pmacct-compose.sh new file mode 100755 index 00000000..d83e74ed --- /dev/null +++ b/setup-pmacct-compose.sh @@ -0,0 +1,139 @@ +#!/bin/bash + +# This script reads pmacct env variables from the .env file, +# [re]creates pmacct config files from the examples, and copies the env variable +# values into them. (Needed because pmacct doesn't support using env vars) +# It also [re]creates the docker-compose.yml file based on .env file entries. + +echo "" + +# Get env variables from .env file +input=".env" +# read line by line, splitting each line at "=" +while IFS='=' read -r name value +do + # save only netflow, sflow, and rabbitmq_input variables + if [[ $name == sflow* || $name == netflow* || $name == rabbitmq_input_* ]] + then + ##echo "Got $name == $value" >&2 + # if this is a sensor name, we need to encode it using #'s for spaces and prefixing with "sfacct--" or "nfacct--" + if [[ $name == sflowSensorName_* ]] + then + value="${value// /#}" + value="sfacct--${value}" + fi + if [[ $name == netflowSensorName_* ]] + then + value="${value// /#}" + value="nfacct--${value}" + fi + # export name-value pairs as env vars + export $name="$value" + fi +done < "$input" + +# Create the docker-compose.yml file by copying the example (will overwrite any existing) +echo "Creating docker-compose.yml." +# Delete all the pmacct services, ie, everything between "services:" and "rabbit:" +# -0777 = treat the whole file as one string; -e code-to-run; .../s = interpret . as any char or newline. +perl -0777 -pe "s/services:.*rabbit:/services:\n\nINSERT-HERE\n\n rabbit:/s" < docker-compose.example.yml > docker-compose.yml + +# Loop over sflow sensors / create config files (will overwrite any existing) +port=8000 +for (( n=1; n<=${sflowSensors}; n++ )) +do + # assign the port the container will use + export sflowContainerPort_$n=$port + # create temp config files + cp conf-pmacct/sfacctd.conf.ORIG conf-pmacct/sfacctd_$n.conf.temp + cp conf-pmacct/sfacctd-pretag.map.ORIG conf-pmacct/sfacctd-pretag_$n.map.temp + # change *_1 env var names to *_n + sed -i "s/_1/_$n/g" conf-pmacct/sfacctd_$n.conf.temp + sed -i "s/_1/_$n/g" conf-pmacct/sfacctd-pretag_$n.map.temp + # replace all environment variables with values and save to final filenames + envsubst < conf-pmacct/sfacctd_$n.conf.temp > conf-pmacct/sfacctd_$n.conf + envsubst < conf-pmacct/sfacctd-pretag_$n.map.temp > conf-pmacct/sfacctd-pretag_$n.map + # remove temp files + rm conf-pmacct/*.temp + + # service info for compose file; export so perl can see it. + export section=' sfacctd_1: + container_name: sfacctd_1 + << : *pmacct-defaults + << : *sflow-defaults + command: + # parameters for the sfacctd command + - -f + - /etc/pmacct/sfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${sflowPort_1}:${sflowContainerPort_1}/udp" + +INSERT-HERE' + + # substitute _$n for _1 in $section + section=$(sed 's/_1/_'"$n"'/g' <<< "$section") + + # write it into the compose file + perl -i -pe 's/INSERT-HERE/$ENV{section}/' docker-compose.yml + + # next port number is 1 more + port=$(($port+1)) +done + +# Loop over netflow sensors / create config files (will overwrite any existing) +port=9000 +for (( n=1; n<=${netflowSensors}; n++ )) +do + # assign the port the container will use + export netflowContainerPort_$n=$port + # create temp config files + cp conf-pmacct/nfacctd.conf.ORIG conf-pmacct/nfacctd_$n.conf.temp + cp conf-pmacct/nfacctd-pretag.map.ORIG conf-pmacct/nfacctd-pretag_$n.map.temp + # change *_1 env var names to *_n + sed -i "s/_1/_$n/g" conf-pmacct/nfacctd_$n.conf.temp + sed -i "s/_1/_$n/g" conf-pmacct/nfacctd-pretag_$n.map.temp + # replace all environment variables with values and save to final filenames + envsubst < conf-pmacct/nfacctd_$n.conf.temp > conf-pmacct/nfacctd_$n.conf + envsubst < conf-pmacct/nfacctd-pretag_$n.map.temp > conf-pmacct/nfacctd-pretag_$n.map + # remove temp files + rm conf-pmacct/*.temp + + # service info for compose file; export so perl can see it. + export section=' nfacctd_1: + container_name: nfacctd_1 + << : *pmacct-defaults + << : *netflow-defaults + command: + # parameters for the nfacctd command + - -f + - /etc/pmacct/nfacctd_1.conf + ports: + # port on host receiving flow data : port in the container + - "${netflowPort_1}:${netflowContainerPort_1}/udp" + +INSERT-HERE' + + # substitute _$n for _1 in $section + section=$(sed 's/_1/_'"$n"'/g' <<< "$section") + + # write it into the compose file + perl -i -pe 's/INSERT-HERE/$ENV{section}/' docker-compose.yml + + # next port number is 1 more + port=$(($port+1)) +done + +# Get rid of any remaining "INSERT-HERE" lines + perl -i -pe 's/INSERT-HERE//' docker-compose.yml + + +# Replace any env variables in the compose file. +envsubst < docker-compose.yml > docker-compose.yml.temp +mv docker-compose.yml.temp docker-compose.yml + +echo " Pmacct config and docker-compose.yml files have been created, based on the .env file." +echo " Please check to be sure Docker-compose.yml has the right number of sfacctd and pmacctd services with the right port numbers!" +echo "" + + diff --git a/systemd/netsage-flow-filter.service b/systemd/netsage-flow-filter.service deleted file mode 100644 index e034b30b..00000000 --- a/systemd/netsage-flow-filter.service +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=NetSage Pipeline Flow Filter -After=rabbitmq-server.service -Requires=rabbitmq-server.service - -[Service] -User=root -Group=root -Environment=CONFIG=/etc/grnoc/netsage/deidentifier/netsage_flow_filter A.xml -Environment=SHAREDCONFIG=/etc/grnoc/netsage/deidentifier/netsage_shared .xml -ExecStart=/usr/bin/netsage-flow-filter-daemon --config ${CONFIG} --sharedconfig ${SHAREDCONFIG} --nofork - -# We don't have HUP capability yet -# We might want to try restarting automatically, but not now -# Restart=on-failure -# RestartSec=30s - -[Install] -WantedBy=multi-user.target diff --git a/systemd/netsage-netflow-importer.service b/systemd/netsage-netflow-importer.service deleted file mode 100644 index f509039b..00000000 --- a/systemd/netsage-netflow-importer.service +++ /dev/null @@ -1,21 +0,0 @@ -[Unit] -Description=Netsage Pipeline Importer -After=rabbitmq-server.service -Requires=rabbitmq-server.service - -[Service] -User=root -Group=root -Environment=CONFIG=/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml -Environment=SHAREDCONFIG=/etc/grnoc/netsage/deidentifier/netsage_shared.xml -ExecStart=/usr/bin/netsage-netflow-importer-daemon --config ${CONFIG} --sharedconfig ${SHAREDCONFIG} --nofork -ExecStopPost=/bin/echo "Use ps to be sure the daemon and worker both stopped" - -# PID file location is set in importer config file. Probably /var/run/. -# We don't have HUP capability yet -# We might want to try restarting automatically, but not now -# Restart=on-failure -# RestartSec=30s - -[Install] -WantedBy=multi-user.target diff --git a/systemd/nfacctd.service b/systemd/nfacctd.service new file mode 100644 index 00000000..a2341c5c --- /dev/null +++ b/systemd/nfacctd.service @@ -0,0 +1,11 @@ +[Unit] +Description=nfacctd daemon providing Netflow collection service +Wants=network.target +After=network.target +ConditionPathExists=/etc/pmacct/nfacctd.conf + +[Service] +ExecStart=/usr/local/sbin/nfacctd -f /etc/pmacct/nfacctd.conf + +[Install] +WantedBy=multi-user.target diff --git a/systemd/sfacctd.service b/systemd/sfacctd.service new file mode 100644 index 00000000..bedc0426 --- /dev/null +++ b/systemd/sfacctd.service @@ -0,0 +1,11 @@ +[Unit] +Description=sfacctd daemon providing Sflow collection service +Wants=network.target +After=network.target +ConditionPathExists=/etc/pmacct/sfacctd.conf + +[Service] +ExecStart=/usr/local/sbin/sfacctd -f /etc/pmacct/sfacctd.conf + +[Install] +WantedBy=multi-user.target diff --git a/userConfig/README.md b/userConfig/README.md index 81c3e819..5f4b3317 100644 --- a/userConfig/README.md +++ b/userConfig/README.md @@ -2,6 +2,14 @@ This directory is git ignore so it ensures any changes here are preserved. Any user overrides should go in here and saved for the next release. -Example of user overrides would be special importer configuration, logstash settings that are not configured via env and so on. +Example of user overrides would be special logstash settings that are not configured via env and so on. + +Eg, you could add a custom jvm.options file here and add the following to the docker-compose.override.yml file under logstash: + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options + +NOTE - don't use both environment variables in the .env file and a custom config file/volume with those settings here. + + diff --git a/util/hist-export.pl b/util/histogram-export.pl similarity index 100% rename from util/hist-export.pl rename to util/histogram-export.pl diff --git a/util/netsage-raw-data-importer b/util/netsage-raw-data-importer deleted file mode 100755 index 695003f0..00000000 --- a/util/netsage-raw-data-importer +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/perl - -##### I believe this was used for development and to read files created by the old netsage-flow-archive pipeline piece -##### Reads json files which are rabbitmq messages which are batches of 100 flows. -##### Used with RawDataImporter.pm - -use strict; -use warnings; - -use GRNOC::NetSage::Deidentifier::RawDataImporter; - -use Getopt::Long; -use Data::Dumper; - -### constants ### - -use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/deidentifier/netsage_raw_data_importer.xml'; -use constant DEFAULT_LOGGING_FILE => '/etc/grnoc/netsage/deidentifier/logging.conf'; - -### command line options ### - -my $config = DEFAULT_CONFIG_FILE; -my $logging = DEFAULT_LOGGING_FILE; -my $nofork; -my @files; -my $help; - -GetOptions( 'config=s' => \$config, - 'logging=s' => \$logging, - 'nofork' => \$nofork, - 'file=s{1,}' => \@files, - 'help|h|?' => \$help ); - -# did they ask for help? -usage() if $help; - -# start/daemonize writer -my $raw_importer = GRNOC::NetSage::Deidentifier::RawDataImporter->new( config_file => $config, - logging_file => $logging, - daemonize => !$nofork, - files => \@files, - process_name => "netsage_raw_data_importer", - task_type => "noinput" ); - -$raw_importer->start("no_input_queue"); - -### helpers ### - -sub usage { - - print "Usage: $0 [--config ] [--logging ] [--jsonfile ]\n"; - - exit( 1 ); -} diff --git a/util/netsage_raw_data_importer.xml.example b/util/netsage_raw_data_importer.xml.example deleted file mode 100644 index d2cf67a9..00000000 --- a/util/netsage_raw_data_importer.xml.example +++ /dev/null @@ -1,39 +0,0 @@ - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - /path/to/certificate.crt - netsage_deidentifier_netflow_raw2 - 2 - 1 - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - /path/to/certificate.crt - 3 - netsage_deidentified - netsage_deidentified - - - - - - 1 - - - /var/run/netsage-raw-importer.pid - - - diff --git a/util/process_caida_file.pl b/util/process_caida_file.pl new file mode 100644 index 00000000..6bc264be --- /dev/null +++ b/util/process_caida_file.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Data::Dumper; +# +# This script is used to process as-org database files downloaded from caida.org +# into the format the netsage pipeline requires. +# +# First, get the txt file from caida (these are released quarterly) +# eg, $ wget https://publicdata.caida.org/datasets/as-organizations/20210401.as-org2info.txt.gz +# and $ gunzip 20210401.as-org2info.txt.gz +# +# Then run this script +# eg, $ process-caida-file.pl 20210401.as-org2info.txt +# CAIDA-test.csv will be created. +# +# Save the final file with a name like CAIDA-2021-0401-lookup.csv +# Do a test run to be sure things are ok, as much as possible. +# +# Finally, +# Copy it to scienceregistry.grnoc.iu.edu - /usr/share/resourcedb/www/exported/CAIDA-org-lookup.csv +# so cron jobs on pipeline hosts will start downloading it. +# Note that it won't be used in the pipeline until logstash restarts. IU hosts have a cron job to restart logstash. +# (Docker instances will download it periodically but they don't currently restart logstash automatically.) +# +my $input_file = $ARGV[0]; +if (! -e $input_file) { die ("$input_file was not found\n"); } +if (! open( INFILE, '<', $input_file) ) { die ("Error opening $input_file\n"); }; +print ("Processing $input_file\n"); + +my $output_file = "caida-test.csv"; +if (! open( OUTFILE, '>', $output_file) ) { die ("Error opening $output_file\n"); }; +print ("Writing $output_file\n"); + +my $orgs; +my $asn_orgs; +my $section = "headers"; +while (my $line = ) { + chomp $line; + next if ($section eq "headers" and $line !~ /format:/); + if ($section eq "headers" and $line =~ /format:/) { + $section = "orgs"; + next; + } + if ($section eq "orgs" and $line =~ /format:/) { + $section = "asns"; + next; + } + + # have to escape the | separator! + my @parts = split('\|', $line); + + if ($section eq "orgs") { + # $orgs with key org-id = org-name + $orgs->{$parts[0]} = $parts[2]; + } + + if ($section eq "asns") { + # $asn_orgs with key asn = org-name + $asn_orgs->{$parts[0]} = $orgs->{$parts[3]}; + } +} + +# sort by ASN +my @sorted_asns = sort {$a <=> $b} keys $asn_orgs; + +foreach my $asn (@sorted_asns) { + my $org = $asn_orgs->{$asn}; + # handle missing orgs, quotes, backslashes, and commas in org names + if (! $org) { $org = "Unknown"; } + $org =~ s/\\/ /g; + $org =~ s/"/""/g; +# if ($org =~ /[,"]/) { $org = '"'.$org.'"'; } + $org = '"'.$org.'"'; + + # asn's are keys in the translate filter and they definitely need to be strings in quotes + $asn = '"'.$asn.'"'; + + print (OUTFILE $asn.','.$org."\n"); +} diff --git a/website/docs/deploy/bare_metal_install.md b/website/docs/deploy/bare_metal_install.md index c0c21510..25b44ab4 100644 --- a/website/docs/deploy/bare_metal_install.md +++ b/website/docs/deploy/bare_metal_install.md @@ -4,7 +4,7 @@ title: Manual Installation Guide sidebar_label: Manual Installation --- -This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. +This document covers installing and running the NetSage Flow Processing Pipeline manually (without using Docker). It assumes a RedHat Linux environment or one of its derivatives. ## Data sources @@ -15,72 +15,98 @@ The Processing pipeline needs data to ingest in order to do anything. There are At least one of these must be set up on a sensor to provide the incoming flow data. -Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. +See the Docker Installation instuctions for more info. -Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. -## Installing the Prerequisites +## Install Pmacct -### Installing nfdump +The pmacct package provides nfacctd and sfacctd processes which receive flow data and write it to a rabbitmq queue. -The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. -The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. +Since the pmacct devs have not released a tagged version (or docker containers) since 1.7.7, and we require some commits that fixed an issue for us on Oct 11, 2021, we need to build pmacct from master (or master from some time after Oct 11, 2021). - -Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. -Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. - -:::note -It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. -::: +``` + 1. Go to the host where you want to install or upgrade nfacctd and sfacctd + 2. Get dependencies if they were not previously installed on the host + $ sudo yum install libpcap-devel pkgconfig libtool autoconf automake make + $ sudo yum install libstdc++-devel gcc-c++ librabbitmq-devel jansson-devel.x86_64 + 3. Clone the repo + $ git clone https://github.com/pmacct/pmacct.git + 4. Rename the dir to, eg, pmacct-02Jun2022/, using today's date or the date + of the code you are going to check out. eg. + $ cd pmacct-02June2022 + 5. You should be in master at this point. + To build and install a specific release/tag/branch, just check out that + tag/branch and proceed. + We have done testing (and made docker images) with this version: + $ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 + 6. Get ready to build sfacctd and nfacctd + (the following options are needed for Netsage) + $ ./autogen.sh + $ ./configure --enable-rabbitmq --enable-jansson + 7. Build and install + $ make + $ sudo make install + $ make clean + $ make distclean + 8. Check the versions + $ sfacctd -V + $ nfacctd -V + These should give something like this where 20220602 is the date: + nfacctd 1.7.8-git [20220602-0 (5e4b0612)] +``` -If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. +## Install RabbitMQ -### Installing RabbitMQ +A local rabbitmq instance is used to hold flow data until logstash can retreive and process it. -The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). +Typically, the rabbitmq server runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). ```sh -[root@host ~]# yum install rabbitmq-server +$ sudo yum install rabbitmq-server ``` Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: ```sh -[root@host ~]# /sbin/service rabbitmq-server start - or # systemctl start rabbitmq-server.service +$ sudo systemctl start rabbitmq-server.service ``` -### Installing Logstash - -See the logstash documentation. We are currently using Version 7.10. +Being able to view the user interface in a browser window is very useful. Look up how to enable it. -### Installing the EPEL repo +## Install Logstash -Some of our dependencies come from the EPEL repo. To install this: +See the logstash documentation. We are currently using Version 7.16.2. ``` -[root@host ~]# yum install epel-release -``` +Download and install the public signing key + $ sudo rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch -### Installing the GlobalNOC Open Source repo +Create or edit /etc/yum.repos.d/ELK.repo + [logstash-7.x] + name=Elastic repository for 7.x packages + baseurl=https://artifacts.elastic.co/packages/7.x/yum + gpgcheck=1 + gpgkey=https://artifacts.elastic.co/GPG-KEY-elasticsearch + enabled=1 + autorefresh=1 + type=rpm-md -The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. +Install + $ sudo yum install logstash +``` -For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. -``` -[grnoc6] -name=GlobalNOC Public el6 Packages - $basearch -baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch -enabled=1 -gpgcheck=1 -gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 -``` +## Install the Netsage Pipeline + +Installing the Pipeline just means copying config, cron, and systemd files to the correct locations. There are no longer any perl scripts to install. + +The last Pipeline package released by GlobalNOC (**a non-pmacct version**) is in the GlobalNOC Open Source Repo. You can use that, if the version you want is there, or you can just build the rpm from scratch, or manually copy files to the correct locations (the .spec file indicates where). -For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. +(At least formerly, some of our dependencies come from the EPEL repo. We probably don't need this repo anymore though.) + +a. To use the GlobalNOC Public repo, for Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. ``` [grnoc7] @@ -93,207 +119,106 @@ gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. -## Installing the Pipeline (Importer and Logstash configs) - -Install it like this: +Install the package using yum: ``` -[root@host ~]# yum install grnoc-netsage-deidentifier +[root@host ~]# yum install grnoc-netsage-pipeline ``` -Pipeline components: - -1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. -2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) -3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! -4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. - -Nothing will automatically start after installation as we need to move on to configuration. - -## Importer Configuration - -Configuration files of interest are - - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information - - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings - - /etc/grnoc/netsage/deidentifier/logging.conf - logging config - - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled - -### Setting up the shared config file - -`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` - -There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. - -The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. +b. To build the rpm from a git checkout, ``` - - - /path/to/netflow-files/ - - - Netflow Sensor 1 - - - sflow - - - - - - - - -``` - -Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. - -There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) - -Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. - -If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. - -``` - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - - - - 127.0.0.1 - 5672 - guest - guest - 0 - 100 - / - 1 - +$ git clone https://github.com/netsage-project/netsage-pipeline.git +$ git checkout master (or a branch) +$ cd netsage-pipeline +$ perl Makefile.PL +$ make rpm +$ sudo yum install //rpmbuild/RPMS/noarch/grnoc-netsage-pipeline-2.0.0-1.el7.noarch.rpm + (use "reinstall" if the version number has not changed) ``` -### Setting up the Importer config file +c. You could also just move files manually to where they need to go. It should be fairly obvious. +- /etc/logstash/conf.d/ +- /etc/pmacct/ +- /etc/cron.d/ +- /usr/bin/ +- /etc/systemd/system/ +- /var/lib/grnoc/netsage/ and /etc/logstash/conf.d/support/ (cron downloads) -`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` +## Logstash Configuration Files -This file has a few more setting specific to the Importer component which you may like to adjust. +We normally use defaults in Logstash settings files, but for Netsage, which uses the Logstash Aggregation filter, it is **required to use only ONE logstash pipeline worker**. - - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. - - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) - - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. - - Min-file-age is used to be sure files are complete before being read. - - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. - - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. - - Keep num-processes set to 1. +IMPORTANT: Be sure to set `pipeline.workers: 1` in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use `-w 1`. -```xml - - +The Logstash config files containing the "filters" that comprise the Pipeline are installed in /etc/logstash/conf.d/. Most should be used as-is, but the input (01-) and output (99-) configs may be modified for your use. The aggregation filter (40-) also has settings that may be changed - check the two timeouts and the aggregation maps path. - - - netsage_deidentifier_netflow_fake - 2 - +> **When processing flows from multiple customers** +> +> - We use one logstash instance with multiple "logstash-pipelines". The logstash-pipelines are defined in /etc/logstash/pipelines.yml. +> - Each logstash-pipeline uses config files in a different directory under /etc/logstash/pipelines/. +> - Since most of the config files are the same for all logstash-pipelines, we use symlinks back to files in /etc/logstash/conf.d/. +> - The exceptions are the input, output, and aggregation files (01-, 99-, and 40-). These are customized so that each logstash-pipeline reads from a different rabbit queue, saves in-progress aggregations to a different file when logstash stops, and writes to a different rabbit queue after processing. +> - We normally use one input rabbit queue and logstash-pipeline per customer (where one customer may have multiple sensors), but if there are too many sensors, with too much data, we may split them up into 2 or 3 different input queues and pipelines. +> - The output rabbit queues for processed flows may be on a different host (for us, they are). There, additional independent logstash pipelines can grab the flows and stick them into elasticsearch. Various queues may connect to various ES indices. It's most convenient to put all flows from sensors that will show up in the same granfana portal together in one index (or set of dated indices). - - 3 - netsage_deidentifier_raw - +Check the 15-sensor-specific-changes.conf file. When running without Docker, especially with multiple customers, it's much easier to replace the contents of that file, which reference environment file values, with hard-coded "if" stagements and clauses that do just what you need. - - - 100 +ENV FILE: Our standard processing for Netsage uses the default values for environment variables. These are set directly in the logstash configs. If any of these need to be changed, you can use an environment file: `/etc/logstash/logstash-env-vars`. The systemd unit file for logstash is set to read this file if it exists. You could copy in any or all the logstash-related settings from the env.example file. Note that the values will apply in all logstash-pipelines. - - 1 - - - - /var/cache/netsage/netflow_importer.cache +## Pmacct Configuration and Unit Files - - - 100000000 +Each sensor is assumed to send to a different port on the pipeline host, and each port must have a different collector listening for incoming flow data. With pmacct, these collectors are nfacctd and sfacctd processes. Each requires its own config files and systemd unit file. - - 10m +The easiest way to make the config files is to use the .env file and the setup-pmacct-compose.sh script that were primarily written for use with docker installations. See the Docker Installation documentation for details. +Doing just a few sensors at a time, edit the .env file and run the script. After running the script, you will find files like nfacctd_1.conf and nfacctd_1-pretag.map in conf-pmacct/ (in the git checkout). - - - +You will have to then make the following changes: +- Rename the newly created .conf and .map files, replacing _1 with _sensorName (some string that makes sense to humans). Similarly for _2, etc. +- Edit each .conf file and change the name of the .map file within to match (the pre_tag_map value) +- Also, in each .conf file + - change the port number (nfacctd_port or sfacctd_port) to be the port to which the sensor is sending + - change the rabbit host (amqp_host) from "rabbit" to "localhost" + - change the name of the output rabbit queue (amqp_routing_key) to something unique (eg, netsage_deidentifier_raw_sensorName). This must match *queue* and *key* in the 01-input-rabbit.conf file of the logstash-pipeline that is handling the sensor! +- Finally, copy the files to /etc/pmacct/ - - - - - - - - /var/run/netsage-netflow-importer-daemon.pid - - - -``` +(You can have the script make some of these changes for you if you temporarily edit the conf-pmacct/*.ORIG files.) -## Logstash Setup Notes - -Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. - -The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. - -When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. - -FOR FLOW STITCHING/AGGREGATION - IMPORTANT! -Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! -Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". +You will also need to create systemd unit files to start and stop each process. Use systemd/sfacctd.service and nfacctd.service as examples. Each should be given a name like nfacctd-sensorName.service. Within the files, edit the config filename in two places. + ## Start Logstash ```sh -[root@host ~]# /sbin/service logstash start - or # systemctl start logstash.service +# systemctl start logstash.service ``` -It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. - -When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. - -## Start the Importer +It will take a minute or two to start. Log files are normally written to /var/log/messages and/or /var/log/logstash/logstash-plain.log. `sudo systemctl status logstash` is also handy. -Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: +Be sure to check to see if it starts ok. If it does not, look for an error message. If all is ok, the last couple lines should be something about connecting to rabbit and how many pipelines are running. -`--config [file]` - specify which config file to read +NOTE: When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/filename set in 40-aggregation.conf). This file will be read in and deleted when logstash is started again. (In some situations, it is desirable to just delete those files by hand before restarting.) -`--sharedconfig [file]` - specify which shared config file to read - -`--logging [file]` - the logging config - -`--nofork` - run in foreground (do not daemonize) +## Start Pmacct Processes ```sh -[root@host ~]# /sbin/service netsage-netflow-importer start - or # systemctl start netsage-netflow-importer.service +# systemctl start nfacctd-sensor1 +# systemctl start nfacctd-sensor2 +etc. ``` -The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. +After starting these processes, it's good to check the rabbit UI to watch for incoming flow data. Netflow data usually comes in every minute, depending on router settings, and sflow data should come in every 5 minutes since we have set sfacctd to do some pre-aggregation and send results every 5 minutes. You should also see that the messages are consumed by logstash and there is no long-term accumulation of messages in the queue. + +We have noted that in some cases, pmacct is providing so many flows that logstash cannot keep up and the number of messages in the queue just keeps increaseing! This is an issue that has yet to be resolved. + +Flows should exit the pipeline (and appear in Elasticsearch) after about 15 minutes. The delay is due to aggregation. Long-lasting flows will take longer to exit. ## Cron jobs -Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. +Inactive cron files are installed (and provided in the cron.d/ directory of the git checkout). Baremetal-netsage-downloads.cron and restart-logstash-service.cron should be in /etc/cron.d/. Please review and uncomment their contents. + +These periodically download MaxMind, CAIDA, Science Registry, and member-list files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be used. MaxMind GeoIP files change weekly. CAIDA updates their ASN-Organization mappings every quarter (we need to manually process those to create the file Netsage uses). Science Registry records can change at random times. diff --git a/website/docs/deploy/choosing.md b/website/docs/deploy/choosing.md index 43ae4429..d8531513 100644 --- a/website/docs/deploy/choosing.md +++ b/website/docs/deploy/choosing.md @@ -6,20 +6,15 @@ sidebar_label: Choose Install ## Manual or BareMetal Installation -The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. +The Manual (bare-metal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. -If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. +If you are the ultimate consumer of the data then setting up a bare-metal version might be worth doing. ## Dockerized Version -The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. +The Docker version makes it easier to bring up the pipeline. More of the work is done for you. -If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption (eg, sending Netsage data to IU), then using the docker pipeline would be preferred. -## Choose your adventure - -- [Manual/Server Installation](bare_metal_install) -- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor -- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/docs/deploy/docker_install_advanced.md b/website/docs/deploy/docker_install_advanced.md index b0c7fb12..04123221 100644 --- a/website/docs/deploy/docker_install_advanced.md +++ b/website/docs/deploy/docker_install_advanced.md @@ -4,200 +4,167 @@ title: Docker Advanced Options Guide sidebar_label: Docker Advanced Options --- -If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. +The following customizations will allow for more complex situations than described in the Docker Installation guide. Find the section(s) which apply to you. *Please first read the Docker Installation guide in detail. This guide will build on top of that.* -## To Add an Additional Sflow or Netflow Collector +## To Add Additional Sflow or Netflow Collectors -If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. +Any number of sensors can be accomodated, although if there are more than a few being processed by the same pipeline, you may run into scaling issues. -Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) +#### a. Edit environment file -### a. Edit docker-compose.override.yml +As an example, say we have three netflow sensors. In the .env file, first set `netflowSensors=3`. Then, in the next section, add the actual sensor names and ports for the additional sensors using variable names ending with _2 and _3. An example: -The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add - -```yaml - example-collector: - image: netsage/nfdump-collector:1.6.18 - restart: always - command: sfcapd -T all -l /data -S 1 -w -z -p 9997 - volumes: - - ./data/input_data/example:/data - ports: - - "9997:9997/udp" ``` +netflowSensorName_1=The 1st Netflow Sensor Name +netflowPort_1=9000 -- collector-name: should be updated to something that has some meaning, in our example "example-collector". -- command: choose between sfcapd for sflow and nfcapd for netflow, and at the end of the command, specify the port to watch for incoming flow data. (Unless your flow exporter is already set up to use a different port, you can use the default ports and configure the exporters on the routers to match.) -- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the -router should be configured to export data to the same port. (If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) -- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change the last part of the path to something meaningful. - -You will also need to uncomment these lines: +netflowSensorName_2=The 2nd Netflow Sensor Name +netflowPort_2=9001 -```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +netflowSensorName_3=The 3rd Netflow Sensor Name +netflowPort_3=9002 ``` +#### b. Rerun setup-pmacct-compose.sh -### b. Edit netsage_override.xml - -To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. - -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml ``` - -Edit netsage_override.xml and add a "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; it will be replaced with a value set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. - -```xml - - /data/input_data/example/ - $exampleSensorName - sflow - +./setup-pmacct-compose.sh ``` -### c. Edit environment file +Check the new docker-compose.yml and files in conf-pmacct/ for consistency. -Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. +#### d. Start new containers -```ini -exampleSensorName=Example New York sFlow -``` +To be safe, bring everything down first, then back up. +``` +docker-compose down +docker-compose up -d +``` -### d. Running the new collector +## To Filter Flows by Interface +If your sensors are exporting all flows, but only those using particular interfaces are relevant, use this option in the .env file. All incoming flows will be read in, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. (Processing a large number of unecessary flows may overwhelm logstash, so if at all possible, try to limit the flows at the router level or using iptables.) -After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): +In the .env file, uncomment lines in the appropriate section and enter the information required. "ALL" can refer to all sensors or all interfaces of a sensor. If a sensor is not referenced at all, all of its flows will be kept. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Use semicolons to separate sensors. Some examples (use just one!): ```sh -docker-compose up -d example-collector +ifindex_filter_flag=True +## examples (include only 1 such line): +ifindex_filter_keep=ALL:123 +ifindex_filter_keep=Sensor 1: 123 +ifindex_filter_keep=Sensor 1: 456, 789 +ifindex_filter_keep=Sensor 1: ALL; Sensor 2: 800, 900 ``` -:::note -The default version of the collector is 1.6.18. There are other versions released and :latest should be point to the latest one, but there is no particular effort made to make sure we released the latest version. You can get a listing of all the current tags listed [here](https://hub.docker.com/r/netsage/nfdump-collector/tags) and the source to generate the docker image can be found [here](https://github.com/netsage-project/docker-nfdump-collector) the code for the You may use a different version though there is no particular effort to have an image for every nfdump release. -::: +- In the first example, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. All other flows will be discarded. +- In the 2nd case, if src or dst ifindex is 123 and the sensor name is "Sensor 1", the flow will be kept. If there are flows from "Sensor 2", all of them will be kept. +- In the 3rd case, flows from Sensor 1 having ifindex 456 or 789 will be kept. +- In the last example, all Sensor 1 flows will be kept, and those from Sensor 2 having ifindex 800 or 900 will be kept. +Spaces don't matter except within the sensor names. Punctuation is required as shown. -## To Keep Only Flows From Certain Interfaces -If your sensors are exporting all flows, but you only want to keep some of them (eg, only send some of them to NetSage), use this option. The collectors and importer will process all flows, but in the logstash pipeline, those that do not have src_ifindex or dst_inindex equal to one of the listed interfaces will be dropped. +## To Filter Flows by Subnet -In the .env file, uncomment the apprpriate section and enter the information required. Be sure "True" is capitalized as shown and list all the ifindex values of flows that should be kept and passed on to NetSage. You may enter one or more ifindex values. For example, +With this option, flows from specified sensors will be dropped unless src or dst is in the list of subnets to keep. It works similarly to the option to filter by interface. "ALL" can refer to all sensors. +If a sensor is not referenced at all, all of its flows will be kept. -```sh -ifindex_filter_flag=True -ifindex_filter_keep=123,456 -``` - -In this case, only flows that have src_ifindex = 123 or src_ifindex = 456 or dst_ifindex = 123 or dst_ifindex = 456 will be kept. All others will be dropped. +For example, +``` +subnet_filter_flag=True +subnet_filter_keep=Sensor A Name: 123.45.6.0/16; Sensor B Name: 123.33.33.0/24, 456.66.66.0/24 +``` ## To Change a Sensor Name Depending on the Interface Used -In some cases, users want to differentiate between flows that enter or exit through specific sensor interfaces. This can be done by editing the env file. +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through a specific interface by using a different sensor name. -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all four fields are set properly! For example, ```sh ifindex_sensor_rename_flag=True -ifindex_sensor_rename_old_name=IU Sflow -ifindex_sensor_rename_new_name=IU Bloomington Sflow ifindex_sensor_rename_ifindex=10032 +ifindex_sensor_rename_old_name=MyNet Sflow +ifindex_sensor_rename_new_name=MyNet Bloomington Sflow ``` -In this case, any flows from the "IU Sflow" sensor that come through interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name (sensor_id in ElasticSearch) changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker. +In this case, any flows from the "MyNet Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "MyNet Sflow" to "MyNet Bloomington Sflow". + +Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. :::note -Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +Please notify the devs in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. ::: ## To Do Sampling Rate Corrections in Logstash -When flow sampling is done, the number of bits needs to be corrected for the sampling rate. For example, if you are sampling 1 out of 100 flows and a sample has 55 MB, it is assumed that in reality there would be 100 flows of that size (with that src and dst), so the number of bits is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config or the nfcapd command. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. +When flow sampling is done, corrections have to be applied to the number of packets and bytes. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfacctd or sfacctd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. -In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. For example, +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a semicolon-separated list. The same correction applies to all listed sensors. For example, ```sh sampling_correction_flag=True -sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_sensors=MyNet Bloomington Sflow; MyNet Indy Sflow sampling_correction_factor=512 ``` -## To Change How Long Nfcapd Files Are Kept -The importer will automatically delete older nfcapd files for you, so that your disk don't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: +In this example, all flows from sensors "MyNet Bloomington Sflow" and "MyNet Indy Sflow" will have a correction factor of 512 applied by logstash. Any other sensors will not have a correction applied by logstash (presumably pmacct would apply the correction automatically). -```sh -cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +Only one correction factor is allowed for, so you can't, for example correct Sensor A with a factor of 512 and also Sensor B with a factor of 100. + +>Note that if pmacct has made a sampling correction already, no additional manual correction will be applied, even if these options are set, +>so this can be used *to be sure* a sampling correction is applied. + +## To NOT Deidentify Flows + +Normally all flows are deidentified before being saved to elasticsearch by truncating the src and dst IP addresses. If you do NOT want to do this, set full_IPs_flag to True. (You will most likely want to request access control on the grafana portal, as well.) + +``` +# To keep full IP addresses, set this parameter to True. +full_IPs_flag=True ``` -At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 7 days worth of data: -````xml - - 1 - 7 - -```` +## To Increase Memory Available for Lostash -You will also need to uncomment these lines in docker-compose.override.yml: +If cpu or memory usage seems to be a problem, try increasing the java JVM heap size for logstash from 4GB to 8GB. +To do this, edit LS_JAVA_OPTS in the .env file. E.g., ```yaml - volumes: - - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +LS_JAVA_OPTS=-Xmx8g -Xms8g ``` +Here are some tips for adjusting the JVM heap size (see https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. -## To Process Tstat Data -Tstat data is not collected by nfdump/sfcapd/nfcapd or read by an Importer. Instead, the flow data is sent directly from the router or switch to the logstash pipeline's ingest rabbit queue (named "netsage_deidentifier_raw"). So, when following the Docker Simple guide, the sections related to configuring and starting up the collectors and Importer will not pertain to the tstat sensors. The .env file still needs to be set up though. - -Setting up Tstat is outside the scope of this document, but see the Netsage project Tstat-Transport which contains client programs that can send tstat data to a rabbit queue. See [https://github.com/netsage-project/tstat-transport.git](https://github.com/netsage-project/tstat-transport.git). Basically, you need to have Tstat send data directly to the same rabbit queue that the importers write sflow and netflow data to and that the logstash pipeline reads from. +## To Overwrite Organization Names When an ASN is Shared +Source and destination organization names come from lookups by ASN or IP in databases provided by CAIDA or MaxMind. (The former is preferred, the latter acts as a backup.) +Sometimes an organization that owns an AS and a large block of IPs will allow members or subentities to use certain IP ranges within the same AS. +In this case, all flows to and from the members will have src or dst organization set to the parent organization's name. If desired, the member organizations' names can be substituted. To do so requires the use of a "member list" which specifies the ASN(s) being shared and the IP ranges for each member. -## To Customize Java Settings / Increase Memory Available for Lostash +See **conf-logstash/support/networkA-members-list.rb.example** for an example. -If you need to modify the amount of memory logstash can use or any other java settings, -rename the provided example for JVM Options and tweak the settings as desired. +## To Tag Flows with Science Discipline Information -```sh -cp userConfig/jvm.options_example userConfig/jvm.options -``` +At https://scienceregistry.netsage.global, you can see a hand-curated list of resources (IP blocks) which are linked to the organizations, sciences, and projects that use them. This information is used by the Netsage pipeline to tag science-related flows. If you would like to see your resources or projects included, please contact us to have them added to the Registry. -Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: +## To Use IPtables to Block Some Incoming Traffic -```yaml -logstash: - image: netsage/pipeline_logstash:latest - volumes: - - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options -``` +In certain situations, you may want to use a firewall to block some of the traffic coming to your pipeline host so that it does not enter the docker containers. For example, if multiple routers must send to the same port on the host, but you only want to process flows from one of them, you can use iptables to block traffic from the those you don't want. -Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): +With Docker, the INPUT chain in iptables is skipped and instead the FORWARDING chain is used. The first rule of the FORWARDING chain is to read the DOCKER-USER chain. This chain will contain docker rules that aren't overridden by docker. Rules that Docker creates are added to the DOCKER chain; do not manipulate this chain manually. -- The recommended heap size for typical ingestion scenarios should be no less than 4GB and no more than 8GB. -- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. -- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. -- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +To allow only a specific IP or network to access the containers, insert a negated rule at the top of the DOCKER-USER filter chain (or an accept then a drop all others). + ## To Bring up Kibana and Elasticsearch Containers The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) -## For Data Saved to an NFS Volume - -By default, data is saved to subdirectories in the ./data directory. If you would like to use an NFS mount instead you will need to either - -1. export the NFS volume as ${PROJECT_DIR}/data (which is the idea scenario and least intrusive) -2. update the path to the NFS export path in all locations in docker-compose.yml and docker-compose.override.yml - -Note: modifying all the paths in the two files should work, but may not. In one case, it worked to modify only the paths for the collector volumes (eg, - /mnt/nfs/netsagedata/netflow:/data), leaving all others with their default values. - -:::warning -If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict on upgrade. -You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. -::: diff --git a/website/docs/deploy/docker_install_simple.md b/website/docs/deploy/docker_install_simple.md index 264e5953..9efc32f3 100644 --- a/website/docs/deploy/docker_install_simple.md +++ b/website/docs/deploy/docker_install_simple.md @@ -3,105 +3,213 @@ id: docker_install_simple title: Docker Installation Guide sidebar_label: Docker Installation --- -In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. +This deployment guide describes how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. The Docker containers included in the installation are - - rabbit (the local RabbitMQ server) - - sflow-collector (receives sflow data and writes nfcapd files) - - netflow-collector (receives netflow data and writes nfcapd files) - - importer (reads nfcapd files and puts flows into a local rabbit queue) - - logstash (logstash pipeline that processes flows and sends them to, by default, netsage-elk1.grnoc.iu.edu) - - ofelia (cron-like downloading of files used by the logstash pipeline) + - sfacctd_1 to _n - sflow collectors (one per sflow sensor) - each receives sflow data and writes it to a rabbit queue) + - nfacctd_1 to _n - netflow collector (one per netflow sensor) - each receives netflow data and writes it to a rabbit queue) + - rabbit - the local RabbitMQ server + - logstash - logstash pipeline that pulls from the rabbit queue, processes flows, and sends to the final destination -The code and configs for the importer and logstash pipeline can be viewed in this github repo (netsage-project/netsage-pipeline). See netsage-project/docker-nfdump-collector for code related to the collectors. +### 1. Prepare a Pipeline Host +Decide where to run the Docker Pipeline, eg, create a VM. The default java heap size for logstash is 4GB so have at least 8GB of memory. Little disk space should be needed. -### 1. Set up Data Sources +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +This page has a good list of post-installation steps you may want or need to do: [https://docker-docs.netlify.app/install/linux/linux-postinstall/](https://docker-docs.netlify.app/install/linux/linux-postinstall/). + +Start docker: +``` +sudo systemctl docker start +``` + +Docker Compose is not part of Docker Engine, so must be installed separately from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. + +Check which file permissions new files are created with on the host. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 2. Set up Data Sources The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. - sflow - netflow - tstat -At least one of these must be set up on a sensor to provide the incoming flow data. +At least one of these must be set up on a **sensor** (i.e., flow **exporter** / router), to provide the incoming flow data. You can do this step later, but it will helpful to have it working first. -Sflow and netflow data should be exported to the pipeline host where there are collectors (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. +Configure sflow and netflow to send flow data to the pipeline host. Each sensor/router should send to a different port. +You will list the port numbers in the .env file (see below). +Usually default settings are ok. (Please share your settings with us.) -Tstat data should be sent directly to the logstash input RabbitMQ queue on the pipeline host. No collector is needed for tstat data. From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) +On the pipeline host, configure the firewall to allow incoming traffic from the flow exporters, of course. -### 2. Clone the Netsage Pipeline Project +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. -If you haven't already, install [Docker](https://www.docker.com) and [Docker Compose](https://docs.docker.com/compose/install/) and clone this project +Check to see if data is arriving with tcpdump. + +### 3. Clone the Netsage Pipeline Project + +Clone the netsage-pipeline project from github. ```sh git clone https://github.com/netsage-project/netsage-pipeline.git ``` -(If you are upgrading to a new release, see the Upgrade section below!) -Then checkout the right version of the code. +When the pipeline runs, it uses some of the files that are in the git checkout, so it is important to checkout the correct version. +Move into the netsage-pipeline/ directory (**all git, docker, and other commands below must be run from inside this directory!**), then checkout the most recent version of the pipeline (normally the most recent tag). It will say you are in 'detached HEAD' state. ```sh +cd netsage-pipeline git checkout {tag} ``` -Replace "{tag}" with the release version you intend to use, e.g., "v1.2.8". ("Master" is the development version and is not intended for general use!) -`git status` will confirm which branch you are on, e.g., master or v1.2.8. +Replace "{tag}" with the release version you intend to use, e.g., "v2.0.0". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v2.0.0. -### 3. Create Docker-compose.override.yml +>Files located in the git checkout that are used by the docker containers and cron: +>- the .env file (created by setup script from example file) +>- docker-compose.yml (created by setup script from example file) and docker-compose.override.yml (optional) +>- logstash config files in conf-logstash/ +>- non-ORIG nfacctd and sfacctd config files in conf-pmacct/ (created by setup script) +>- cron jobs use non-ORIG files in bin/ (created by setup script) and save files to logstash-downloads/ +>- logstash may write to or read from logstash-temp/ when it stops or starts +>On upgrade, example and ORIG files and files in conf-logstash/ will be overwritten. -Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. -Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. +### 4. Create the Environment File -Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. +Next, copy `env.example` to `.env` then edit the .env file to set the number of sensors of each type, the sensor names and ports, and where to send processed flows. ```sh -cp docker-compose.override_example.yml docker-compose.override.yml +cp env.example .env ``` -By default docker will bring up a single netflow collector and a single sflow collector. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. -:::note -If you only have one collector, you should remove or comment out the section for the collector that is not used. -::: +The .env file is used in multiple ways - by setup scripts as well as by docker-compose and hence logstash and rabbitmq. Everything you need to set is in this one location. + +By default, the number of sflowSensors and netflowSensors is set to 1 at the top. If you know from the start that you will have only 1 sensor, set either sflowSensors or netflowSensors to 0 and comment out the sensor name and port below. If you know that you will have more than 1 sensor of the same type, specify the number and add variables for the extra sensor names and ports. Note that the variable names need to have _1 replaced by _2, etc. For example, + +``` +sflowSensors=1 +netflowSensors=2 -This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. +# sflow sensors: +sflowSensorName_1=MyNetwork New York Sflow +sflowPort_1=8010 -Other lines in this file you can ignore for now. +# netflow sensors: +netflowSensorName_1=MyNetwork LA Netflow +netflowPort_1=9000 + +netflowSensorName_2=MyNetwork Seattle Netflow +netflowPort_2=9010 +``` :::note -If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose +Sensor names uniquely identify the source of the data and will be shown in the Grafana dashboards so they should be understandable by a general audience. For example, your sensor names might be "MyNet New York Sflow" or "MyNet New York to London". (Running your proposed names by a Netsage admin would be helpful.) + +Also, pmacct does not properly handle sensor names containing commas! ::: +You will also want to edit the **rabbit_output** variables. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container, but there is nothing provided to do anything further with it. + +To send processed flow data to us, you will need to obtain settings for this section from your contact. A new queue may need to be set up on our end, as well as allowing traffic from your pipeline host. (On our end, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing in Netsage Portals.) -### 4. Create Environment File -{@import ../components/docker_env.md} +### 5. Run the Pmacct/Compose Setup Script + +```sh +./setup-pmacct-compose.sh +``` -### 5. Choose Pipeline Version +This script will use settings in the .env file to create pmacct (ie, nfacctd and sfacctd) config files in **conf-pmacct/** from the .ORIG files in the same directory. -Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which version Docker should run. +It will also create **docker-compose.yml** from docker-compose.example.yml, filling in the correct number of nfacctd and sfacctd services and substituting ${var} values from the .env file. (This is needed since pmacct can't use environment variables directly, like logstash can.) + +Information in the docker-compose.yml file tells docker which containers to run (or stop). +if needed, you can create a docker-compose.override.yml file; settings in this file will overrule and add to those in docker-compose.yml. All customizations should go in the override file, which will not be overwritten. + +Check the docker-compose file to be sure it looks ok and is consistent with the new config files in conf-pmacct/. All environment variables (${x}) should be filled in. Under ports, there should be two numbers separated by a colon, eg, "18001:8000/udp" + +### 6. Run the Cron Setup Script ```sh -./scripts/docker_select_version.sh +./setup-cron.sh ``` -When prompted, select the **same version** you checked out earlier. -This script will replace the version numbers of docker images in the docker-compose files with the correct values. -## Running the Collectors +This script will create docker-netsage-downloads.cron and restart-logstash-container.cron in the checkout's **cron.d/** directory, along with matching .sh files in **bin/**. These are based on .ORIG files in the same directories but have required information filled in. -After selecting the version to run, you could start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. +The docker-netsage-downloads cron job runs the downloads shell script, which will get various files required by the pipeline from scienceregistry.grnoc.iu.edu on a weekly basis. +The restart cron job runs the restart shell script, which restarts the logstash container once a day. Logstash must be restarted to pick up any changes in the downloaded files. -(Or see the next section for how to start all the containers, including the collectors.) +**Note that you need to manually check and then copy the .cron files to /etc/cron.d/.** ```sh -docker-compose up -d sflow-collector netflow-collector +sudo cp cron.d/docker-netsage-downloads.cron /etc/cron.d/ +sudo cp cron.d/restart-logstash-container.cron /etc/cron.d/ ``` -If the collector(s) are running properly, you should see nfcapd files in subdirectories of data/input_data/, and they should have sizes of more than a few hundred bytes. (See Troubleshooting if you have problems.) +Also, manually run the downloads script to immediately download the required external files. + +```sh +bin/docker-netsage-downloads.sh +``` + +Check to be sure files are in logstash-downloads/. + +### 8. Start up the Docker Containers + +Start up the pipeline (all containers) using + +```sh +docker-compose up -d +``` + +This command will pull down all required docker images and start all the services/containers as listed in the docker-compose.yml and docker-compose.override.yml files. +"-d" runs the containers in the background. + +You can see the status of the containers and whether any have died (exited) using these commands +```sh +docker-compose ps +docker container ls +``` + +To check the logs for each of the containers, run + +```sh +docker-compose logs logstash +docker-compose logs rabbit +docker-compose logs sfacctd_1 +docker-compose logs nfacctd_1 +etc. +``` + +`--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +When running properly, logstash logs should end with a line saying how many pipelines are running and another about connecting to rabbitmq. + +To shut down the pipeline (all containers) use + +```sh +# docker-compose down +``` + +**Run all commands from the netsage-pipeline/ directory.** + +>Note that if the pipeline host is rebooted, the containers will not restart automatically. +> +>If this will be a regular occurance on your host, you can add `restart:always` to each service in the docker-compose.override file (you may need to add any missing services to that file). + +### 9. Check the RabbitMQ User Interface + +The rabbitMQ user interface can be used to see if there are incoming flows from pmacct processes and if those flows are being comsumed by logstash. + +In your browser, go to ``` https:///rabbit ``` Login with username *guest*, password *guest*. Look at the small graph showing rates for incoming messages, acks, etc. You should see bursts of incoming messages (usually once a minute for netflow and once every 5 min for sflow) and no long-term buildup of messages in the other graph. + +### 10. Check for processed flows +- Ask your contact to check for flows and/or look at dashboards in your grafana portal if it's already been set up. Flows should appear after 10-15 minutes. +- Check to be sure the sensor name(s) are correct in the portal. +- Check flow sizes and rates to be sure they are reasonable. (If sampling rate corrections are not being done properly, you may have too few flows and flows which are too small.) You contact can check to see whether flows have @sampling_corrected=yes (a handful from the startup of netflow collection may not) and to check for unusal tags on the flows. -### Running the Collectors and Pipeline +If you are not seeing flows, see the Troubleshooting section of the documentation. -{@import ../components/docker_pipeline.md} -## Upgrading -{@import ../components/docker_upgrade.md} diff --git a/website/docs/deploy/docker_troubleshooting.md b/website/docs/deploy/docker_troubleshooting.md index 1ad608ab..babed481 100644 --- a/website/docs/deploy/docker_troubleshooting.md +++ b/website/docs/deploy/docker_troubleshooting.md @@ -4,58 +4,52 @@ title: Docker Troubleshooting sidebar_label: Troubleshooting --- -## Troubleshooting - -### If you are not seeing flows after installation - -**Troubleshooting checklist:** - -- Make sure you configured your routers to point to the correct address/port where the collector is running.  -- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. -- Use `docker-compose ps` to be sure the collectors (and other containers) are running. -- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. -- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` -- Check the logs to see if logstash is starting successfully. -- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. - -To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. - -### If flow collection stops - -**Logstash or Importer errors:** -- Make sure all containers are running. `docker ps` -- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` -- Check the logs to see if logstash is starting successfully. - -**Disk space:** -- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. -- Also check to see how much space the nfcapd files are comsuming. You need to add more disk space. You could try saving fewer days of nfcapd files (see Docker Advanced). - -**Memory:** -- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most -likely culprit is logstash which is usually only allocated 2GB of RAM. You'll need to update the jvm.options file to grant it more memory. - -Please see the [Docker Advanced guide](docker_install_advanced.md#customize-logstash-settings) for details on how to customize logstash. - -Applying this snippet to logstash may help. For example, to give logstash (java) 3GB, - -```yaml -environment: + LS_JAVA_OPTS=-Xmx3g -``` - -Alternatively you may also try doing this: - -```yaml -deploy: - resources: - limits: - cpus: "0.50" - memory: 50M - reservations: - cpus: "0.25" - memory: 20M -``` - -Reference: https://docs.docker.com/compose/compose-file/#resources +### If you are not seeing flows + +- Be sure allow time for the first flows to timeout in the logstash aggregation - wait at least 10-15 minutes after starting up containers. +- Use `docker-compose ps` to see if all the containers are (still) running. +- Check the logs of the various containers to see if anything jumps out as being a problem. +- If logstash logs say things like *OutOfMemoryError: Java heap space* or *An unexpected connection driver error occured (Exception message: Connection reset)* and the rabbit container is also down... We've seen this before, but are not sure why it occurs. Try stopping everything, restarting docker for good measure, and starting all the containers up again. (If problems are continuing, it might be a memory issue.) + ``` + docker-compose down + sudo systemctl restart docker + docker-compose up -d + ``` +- If there is only an *OutOfMemoryError* for java, perhaps you need to increase the java heap size. +- Check flow export on the network device to be sure it is (still) configured and running correctly. +- Make sure there really is traffic to be detected (with flows over 10 MB). A circuit outage or simple lack of large flows might be occurring. + + +## Problems most likely to occur at installation: + +- Be sure conf-logstash/ files and dirs are readable by the logstash user (uid 1000, regardless of whether there is different username associated with uid 1000 on the host). A logstash error about not being able to find *.conf files could be caused by a permissions problem. +- Files in logstash-downloads/ and conf-pmacct/ also need to be readable by the logstash user. +- Logstash-temp/ needs to be readable and also writable by the logstash user. + +- Ensure routers are configured to send to the correct host and port and flow export is functioning. +- Check iptables on the pipeline host to be sure incoming traffic from the routers is allowed. +- Use tcpdump to be sure there are flows coming into the expected port. + +- If the final rabbit queue is on a remote host, eg, at IU, check the credentials you are using and iptables on the remote host. + +- Did you create and edit .env? + - Are the numbers of sensors, sensor names, and port numbers correct? + - Make sure you don't have sflows going to a nfacctd process or vise versa. + - Are there names and port numbers for each sensor? + - Are the environment variable names for sensors like *_1, *_2, *_3, etc. with one sequence for sflow and one for netflow? +- Did you run setup-pmacct-compose.sh? +- In docker-compose.override.yml, make sure the ports are set correctly. You will see *port on host : port in container*. (Docker uses its own port numbers internally.) *Port on host* should match what is in .env (the port the router is sending to on the pipeline host). *Port in container* should match what is in the corresponding pmacct config. +- In pmacct config files, make sure amqp_host is set to rabbit (for docker installs) or localhost (for bare metal) +- In 'docker-compose ps' output, be sure the command for the sfacctd_1 container is /usr/local/sbin/sfacctd, similarly for nfacctd. + (If there are 0 Xflow sensors, the command should be *echo No Xflow sensor* and the container state should be Exit 0.) +- In docker-compose.yml and docker-compose.override.yml, make sure *command:*s specify config files with the right _n's (these are actually just the parameters for the commands). + + +## Memory: +- If you are processing a lot of flows and encountering Out of Memory erros, docker may need to be allocated more memory. The most likely culprit is logstash (java) which is only allocated 4GB of RAM by default (in previous versions, only 2GB). Please see the Docker Advanced Options guide for how to change. + +### If there are too few flows and flow sizes and rates are smaller than expected: + +The router may not be sending the sampling rate with the flow data. +You may need to apply sampling corrections - see the Docker Advanced Options guide. diff --git a/website/docs/deploy/docker_upgrade.md b/website/docs/deploy/docker_upgrade.md new file mode 100644 index 00000000..b5456759 --- /dev/null +++ b/website/docs/deploy/docker_upgrade.md @@ -0,0 +1,78 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker - Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. + +### 1. Shut things down + +```sh +cd {netsage-pipeline directory} +docker-compose down +``` +This will stop and remove all the docker containers. Note that incoming flow data will be lost during the time the collector and rabbit containers are down. + +### 2. Update source code + +To upgrade to a new release, first pull updates from github. Your customized .env and override files will not be overwritten, nor will files created by startup scripts, cache files, or downloaded support files, though it's always good to make backup copies. + +```sh +git reset --hard +git pull origin master +``` + +:::warning +git reset --hard will obliterate any changes you have made to non-override files, eg, logstash conf files. If necessary, please make sure you commit and save to a feature branch before continuing. +::: + +Checkout the version of the pipeline you want to run (replace "{tag}" by the version number, eg, v1.2.11) and make sure it's up to date. +```sh +git checkout {tag} +git pull +``` + +### 3. Recreate and check custom files + +- Compare your .env to env.example to see if any changes have been made. + Copy in any updates, particularly any relevant ones, or just recreate the .env file as you did during installation. + +- Run the pmacct/compose setup script to recreate the pmacct config files and the docker-compose.yml file, in case there have been any changes. + + ```sh + ./setup-pmacct-compose.sh + ``` + +- If there is a docker-compose.override.yml file, check to see if it's still needed. (For the upgrade to v2.0, you will want to get rid of the override file since we are doing everything directly in docker-compose.yml now. + +- Rerun the cron setup script to recreate the non-ORIG files in bin/ and cron.d/: + + ```sh + ./setup-cron.sh + ``` + +- Compare the resulting files in the cron.d/ directory to those in /etc/cron.d/. If any have changed, copy them to /etc/cron.d/. + +### 4. Restart all the Docker Containers + +``` +docker-compose up -d +``` + +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, pulling down any new docker images that are required. + +### 5. Delete old images and containers + +To keep things tidy, delete any old images and containers that are not being used. + +``` +docker image prune -a +docker container prune +``` + +To check which images you have +``` +docker image ls +``` + diff --git a/website/docs/devel/docker.md b/website/docs/devel/docker.md index 76735113..b124d463 100644 --- a/website/docs/devel/docker.md +++ b/website/docs/devel/docker.md @@ -3,38 +3,90 @@ id: docker_dev_guide title: Docker Dev Guide sidebar_label: Docker Dev Guide --- +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose down +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash # run bash shell in logstash container +``` -## Selecting a Version +### View Container Logs + +``` sh +docker-compose logs -f # view logs for all containers +docker-compose logs -f # view logs for container, eg logstash +``` -You can use the "master" version or a tagged version. -To select a released version use the docker_select_version.sh script (see the Deployment Guide). -If you wish to use the development version (master branch) simply scip the docker_select_version.sh step. +## To Build Pmacct Docker Images -## Installing +We will normally use official images for rabbitMQ, logstash, nfacctd, and sfacctd, so no building of images is required. -See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. +However, in case there is not an offical image of nfacctd or sfacctd that includes required commits, you may need to build images from master. -## Importer +Below are the steps used to build the pmacct Docker images for v2.0. In the future, you may not have to apply a patch. (Without the patch, when bringing up the nfacctd or sfacctd container, we got *error while loading shared libraries: libndpi.so.4: cannot open shared object file: No such file or directory*.) -The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** +You may need to first add dns servers from /etc/resolv.conf to /etc/docker/daemon.json and restart docker. -## Build Images +``` +$ git clone https://github.com/pmacct/pmacct.git +$ mv pmacct pmacct-30June2022+patch +$ cd pmacct-30June2022/ +$ git checkout 865a81e1f6c444aab32110a87d72005145fd6f74 +$ git submodule update --init --recursive +$ git am -3 0001-ci-docker-fix-docker-multi-stage-build.patch +$ sudo docker build -f docker/base/Dockerfile -t pmacct:base . +$ sudo docker tag pmacct:base base:_build +$ sudo docker build -f docker/nfacctd/Dockerfile -t nfacctd:7Jun2022 . +$ sudo docker build -f docker/nfacctd/Dockerfile -t sfacctd:7Jun2022 . + +$ sudo docker-compose up -d +$ sudo docker-cmopose down +``` -The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. +These steps clone pmacct, change the name of the directory, checkout the code from the desired point in time, get files for submodules, apply the patch that was emailed and saved to ~/lensman/GIT/pmacct-30June2022+patch/0001-ci-docker-fix-docker-multi-stage-build.patch on netsage-pipeline-dev2.bldc, build the base image, rename the base image, build nfacctd and sfacctd images. After building, do a test run (of course, first make the .env file, etc.). When ready, push to the Github Container Registry. -### Build Using Source Code +The nfacctd and sfacctd images are just the base image plus specific commands to run. -If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: -```sh -docker-compose -f docker-compose.build.yml build +## To push images to the GitHub Container Registry +You need to have a personal access token and (presumably) be part of the Netsage Project. The personal access token needs at least the following scopes: repo, read/write/delete:packages. +As an example, here is how lisaens pushed the images for 2.0: +``` +$ sudo docker login ghcr.io -u lisaens +$ sudo docker images + REPOSITORY TAG IMAGE ID CREATED SIZE + sfacctd 7Jun2022 f62b1c6cddbd 5 weeks ago 346MB + nfacctd 7Jun2022 5833977f6dd0 5 weeks ago 346MB + ... +$ sudo docker tag f62b1c6cddbd ghcr.io/netsage-project/sfacctd:7Jun2022 +$ sudo docker push ghcr.io/netsage-project/sfacctd:7Jun2022 +$ sudo docker tag 5833977f6dd0 ghcr.io/netsage-project/nfacctd:7Jun2022 +$ sudo docker push ghcr.io/netsage-project/nfacctd:7Jun2022 + +Go to the Netsage Project in github (netsage-project), click on Packages, click on an image, +click on Connect to Repository and select Netsage Pipeline, then go to Package Settings +(lower right). In the Danger Zone, click on Change Visibility and choose Public. ``` -NOTE: The importer container includes the config files for the logstash pipeline. +NOTE that the docker-compose.yml file must refer to the images using the registry location, eg, for sfacctd `ghcr.io/netsage-project/sfacctd:7jun2022`. -## Optional: ElasticSearch and Kibana +## Run ElasticSearch and Kibana Containers You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: @@ -51,33 +103,3 @@ elasticsearch { 3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` -## Handy Docker Commands - -### Start the Containers - -``` sh -docker-compose up -d -``` - -### Stop the Containers - -``` sh -docker-compose stop && docker-compose rm -``` - -### Enter a Container Shell - -``` sh -docker-compose exec logstash bash #bash shell in logstash container -docker-compose exec importer bash #bash shell in importer container -docker-compose exec rabbit bash #bash shell in rabbit container -``` - -### View Container Logs - -``` sh -docker-compose logs -f #view logs for all containers -docker-compose logs -f logstash #view logs for logstash container -docker-compose logs -f importer #view logs for importer container -docker-compose logs -f rabbit #view logs for rabbit container -``` diff --git a/website/docs/devel/documentation_guide.md b/website/docs/devel/documentation_guide.md index 42a37954..7287ed06 100644 --- a/website/docs/devel/documentation_guide.md +++ b/website/docs/devel/documentation_guide.md @@ -32,9 +32,9 @@ $ cd netsage-pipeline/website $ yarn install ``` -### Local Development +### If Local Development -If you are working on your local machine, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. ``` $ yarn build $ yarn start @@ -46,6 +46,7 @@ Whether on a local machine or a linux host, to make changes, edit the files in w When finished, git add, git commit, git push, as usual. Repeat as needed. +To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. ### Tagging a New release @@ -80,15 +81,20 @@ $ USE_SSH="true" GIT_USER="your-username" yarn deploy ``` replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + ### Removing a version -To remove version 1.2.6 for example. +To remove version 1.2.6 of the docs, for example, we need to: * update versions.json to remove the reference * remove the versioned_docs/version-1.2.6 * remove versioned_sidebars/version-1.2.6-sidebars.json + * change 1.2.6 in docusaurus.config.js back to 1.2.5 + +Then git add, commit, and push ## If Using Docker diff --git a/website/docs/devel/pipeline_dataset.md b/website/docs/devel/pipeline_dataset.md index a061957d..ce98a436 100644 --- a/website/docs/devel/pipeline_dataset.md +++ b/website/docs/devel/pipeline_dataset.md @@ -4,6 +4,8 @@ title: Pipeline Replay Dataset sidebar_label: Replay Dataset --- +(We haven't been using this for a long time, so it may be out of date.) + The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best test is to replay network data and inspect the output in the grafana dashboard. @@ -14,13 +16,9 @@ You can download the files from [here](https://drive.google.com/drive/folders/19 Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). -Currently the default ports are: - - 9998/udp for sflow - - 9999/udp for netflow - Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) -In order to replay the data, use the following commands for netflow and sflow respectively: +In order to replay the data, use nfreplay which is part of the nfdump package. Eg, ### Netflow @@ -30,5 +28,5 @@ nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 ### Sflow -Coming soon. nfreplay will not work with sflow data type. +nfreplay will not work with sflow data type. diff --git a/website/docs/pipeline/elasticsearch.md b/website/docs/pipeline/elasticsearch.md new file mode 100644 index 00000000..0b816bc0 --- /dev/null +++ b/website/docs/pipeline/elasticsearch.md @@ -0,0 +1,125 @@ +--- +id: elastic +title: Elasticsearch +sidebar_label: Elasticsearch +--- + +Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. + +### Flow fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| +|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| +|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| +|meta.protocol |tcp |Protocol used| +|meta.sensor_id |GEANT NY to Paris |Assigned sensor name | +|meta.sensor_group |GEANT |Sensor group, usually the network | +|meta.sensor_type |Circuit |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |International |'Domestic', 'International', or 'Mixed', depending on countries of src and dst (Domestic = src and dst in USA)| +|meta.is_network_testing |no |'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| + +### Source Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_ip |171.64.68.x |deidentified IP address| +|meta.src_port |80 |port used | +|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| +|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| +|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| +|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| +|meta.src_ifindex |166 |the index of the interface the flow came into| + +### Source Science Registry Fields (Destination Fields similarly with "dst") +The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | +|meta.scireg.src.role |Storage |Role that the host plays | +|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| +|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| +|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | +|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| +|meta.scireg.src.project_names |ATLAS |"Project(s)" that the resource is part of| +|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| +|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| + +### Source "Preferred" Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| +|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| +|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | + +### Value Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| +|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| +|values.duration |3.891 |Calculated as end minus start.| +|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | +|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| + +### Tstat Value Fields + +|name |example | +|-----------------------|-----------------------| +|values.tcp_cwin_max |1549681 | +|values.tcp_cwin_min |17| +|values.tcp_initial_cwin|313| +|values.tcp_max_seg_size|64313| +|values.tcp_min_seg_size|17| +|values.tcp_mss |8960| +|values.tcp_out_seq_pkts|0| +|values.tcp_pkts_dup |0| +|values.tcp_pkts_fc |0| +|values.tcp_pkts_fs |0| +|values.tcp_pkts_reor |0| +|values.tcp_pkts_rto |0| +|values.tcp_pkts_unfs |0| +|values.tcp_pkts_unk |2| +|values.tcp_pkts_unrto |0| +|values.tcp_rexmit_bytes |1678| +|values.tcp_rexmit_pkts |2| +|values.tcp_rtt_avg |0.044| +|values.tcp_rtt_max |39.527| +|values.tcp_rtt_min |0.001| +|values.tcp_rtt_std |0.276| +|values.tcp_sack_cnt | 1| +|values.tcp_win_max |1549681| +|values.tcp_win_min |17| +|values.tcp_window_scale |13| + +### Developer Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| +|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | +|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | +|@sampling_corrected |yes |'yes' if sampling corrections have been done; 'no' otherwise, eg, for netflows before a template has been seen that includes the sampling rate. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|tags |maxmind src asn |Various info and error messages| +|trial | 5 |Can be set in 40-aggregation.conf if desired| + +### Elasticsearch Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | +|_type |_doc | set by ES | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | +|_score |1 |set by ES query | +|@version |1 | set by ES | + diff --git a/website/docs/pipeline/intro.md b/website/docs/pipeline/intro.md index f4cce287..c2c8163d 100644 --- a/website/docs/pipeline/intro.md +++ b/website/docs/pipeline/intro.md @@ -3,35 +3,39 @@ id: intro title: Intro sidebar_label: Intro --- -# The NetSage Pipeline +## Network Flows -## Description +As is well known, communication between two computers is accomplished by breaking up the information to be sent into packets which are forwarded through routers and switches from the source to the destination. A **flow** is defined as a series of packets with common characteristics. Normally these are the source IP and port, the destination IP and port, and the protocal (the **5-tuple**). These flows can be detected and analyzed to learn about the traffic going over a certain circuit, for example. -The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. -There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. +> Note that when there is a "conversation" between two hosts, there will be two flows, one in each direction. Note also that determining when the flow ends is somewhat problematic. A flow ends when no more matching packets have been detected for some time, but exactly how much time? A router may declare a flow over after waiting just 15 seconds, but if one is interested in whole "conversations," a much longer time might make more sense. The source port of flows is normally ephemeral and a particular value is unlikely to be reused in a short time unless the packets are part of the same flow, but what if packets with the same 5-tuple show up after 5 or 10 or 30 minutes? Are they part of the same flow? -## Data Collection +## Flow Export -In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. +Network devices such as routers can function as **flow exporters** by simply configuring and enabling flow collection. All or nearly all come with this capability. -Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. +There are three main types of flow exporters: **[sflow](https://www.rfc-editor.org/info/rfc3176)**, **[netflow/IPFIX](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html))** and **[tstat](http://tstat.polito.it/)**. Sflow data is composed of sampled packets, while netflow (the newest version of which is IPFIX) and tstat data consists of information about series of packets (ie whole flows, or what they consider whole flows). These are described further in the following sections. -Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. +For Netsage, flow exporters, also referred to as **sensors**, are configured to send the flow data to a **Netsage Pipeline host** for processing. -## Pipeline Components +## The NetSage Pipeline + +The **Netsage Flow Processing Pipeline** processes network flow data. It is comprised of several components that collect the flows, add metadata, stitch them into longer flows, etc. + +### Pipeline Components The Netsage Flow Processing Pipeline is made of the following components - - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) - - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. - - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) - - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + - **[Pmacct](https://github.com/pmacct/pmacct)**: The pmacct package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They are configured to send the flows to a rabbitMQ queue. + - **[RabbitMQ](https://www.rabbitmq.com/)**: RabbitMQ is used for message queueing and passing at a couple of points in the full pipeline. + - **[Logstash](https://www.elastic.co/logstash)**: Our logstash pipeline pulls flow data from a rabbitMQ queue and performs a variety of operations to transform it and add additional information. + - **[Elasticsearch](https://www.elastic.co/what-is/elasticsearch)**: Elasticsearch is used for storing the final flow data. -## Visualization +### Pipeline Installation -[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "Bare Metal" or "Manual" Install). We still use this deployment method at IU. More recently, we've also added a Docker deployment option. For simple scenerios having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + +## Visualization -## Pipeline Installation +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) (with appropriate credentials) can be used to visualize the data stored in elasticsearch. Netsage grafana dashboards or **portals** are set up by the IU team. The dashboards are saved in github [HERE](https://github.com/netsage-project/netsage-grafana-configs). -Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. diff --git a/website/docs/pipeline/logstash.md b/website/docs/pipeline/logstash.md index b27e2ee7..ee0deaf6 100644 --- a/website/docs/pipeline/logstash.md +++ b/website/docs/pipeline/logstash.md @@ -4,56 +4,67 @@ title: Logstash Pipeline sidebar_label: Logstash --- -The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. +The Logstash portion of the Netsage Pipeline reads flows from a RabbitMQ queue, performs various transformations and adds additional information, then sends them to a rabbitMQ queue on a different host. Eventually the data ends up in an Elasticsearch data store. -Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. +Logstash .conf files invoke various "filters" and actions. In the bare metal installation, these conf files are located in /etc/logstash/conf.d/. In a docker installation, they are located in the conf-logstash/ directory of the git checkout of the pipeline. See below for a brief description of what each does and check the files for comments. -Notes: - - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). - - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. - - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. - - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. +> - All \*.conf files in conf.d/ or conf-logstash/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"'). +> - If you are not running a standard Netsage pipeline and actions in a particular .conf file are not needed in your particular case, they or the whole .conf file can be removed, but check carefully for downstream effects. +> - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs on a weekly or daily basis. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also. Similarly for other support files, eg, those used in 90-additional-fields.conf. +> - "Member organization" lists that we have stored are available to download from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often. You will need to provide lists for other networks yourself or ask us. (See Docker Advanced Options.) ## Logstash Sequence +The main things done in each conf file are as follows. (Please double check the comments in the files themselves, as well, in case this documentation fails to keep up with changes.) + ### 01-input-rabbit.conf -Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) +Reads flows from a rabbitmq queue. (The ".disabled" extention can be removed from other 01-input configs available in conf.d/ to get flows from other sources, probably for testing.) + +### 05-translate-pmacct.conf + +Renames fields provided by pmacct processes to match what the pipeline uses (from before we used pmacct). ### 10-preliminaries.conf Drops flows to or from private IP addresses; converts any timestamps in milliseconds to seconds; -drops events with timestamps more than a year in the past or (10 sec) in the future; -does some data type conversions; -adds @ingest_time (this is mainly for developers). +drops strange events with timestamps more than a year in the past or (10 sec) in the future; +sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) ### 15-sensor-specific-changes.conf -Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list, 2) the ability to change the sensor name for flows from a specified sensor which go through a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, 3) the ability to apply a sampling rate correction manually for named sensors, and 4) the ability to add subnet filtering for flows from specified sensors. + +You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. ### 20-add_id.conf -Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. ### 40-aggregation.conf -Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. +Stitches incoming flows into longer flows. The inactive timeout is 6 minutes, by default. So, if the time from the start of the current flow to the start time of the last matching flow is over 6 minutes, declare the previous aggregated flow ended and start a new one with the current incoming flow. The default active timeout is 1 hour, meaning any flows over 1 hour in length will be split up into 1 hour chunks. This may require the start time to be adjusted, to cut off previous whole hours. -Notes: - - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. - - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. - - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). +For sflow, aggregation uses the 5-tuple plus sensor name. +For netflow, aggregation uses the 5-tuple plus sensor name plus start time. This means that when there's a timeout at the router (default inactive timeout is usually 15 sec), the flows will stay separate. (In certain grafana dashboards, they will be added together.) Start times of incoming flows are adjusted. See comments in file. + +Notes - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. - Tstat flows come in already complete, so no aggregation is done on those flows. +### 41-thresholds.conf + +Drops flows that are too small - under 10 MB, by default. +For flows with small durations, sets rates to 0 because sampling makes them too inaccurate. + ### 45-geoip-tagging.conf Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". -*This product includes GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* +*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* ### 50-asn.conf @@ -86,6 +97,8 @@ Notes: Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. +Deidentfication can be skipped by using an option in the environment file. + ### 80-privatize.org.conf Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). @@ -98,19 +111,19 @@ Copies Science Registry organization and location values, if they exist, to the ### 90-additional-fields.conf Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): - - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) - - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - sensor_group = TACC, NEAAR, I-Light, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, Regional Network, Facility Edge, Campus (based on matching sensor names to regexes) - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) - - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) - - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or if port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) This id may or may not be used for the document id in Elasticsearch. It may be used for other purposes in grafana dashboards, as well. ### 95-cleanup.conf -Does small misc. tasks at the end like rename, remove, or convert fields +Does small miscellaneous tasks at the end like rename, remove, or convert fields ### 98-post-process.conf -Adds @exit_time and @processing_time (these are mainly for developers) +Adds @exit_time, @processing_time, and @pipeline_ver (these are mainly for developers) ### 99-output-rabbit.conf @@ -118,7 +131,7 @@ Sends results to a final RabbitMQ queue. (".disabled" can be removed from other ### Final Stage -In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue at Indiana University and the last stage is a separate logstash pipeline. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. ## Field names diff --git a/website/docs/pipeline/pmacct.md b/website/docs/pipeline/pmacct.md new file mode 100644 index 00000000..9484bfea --- /dev/null +++ b/website/docs/pipeline/pmacct.md @@ -0,0 +1,17 @@ +--- +id: pmacct +title: Pmacct +sidebar_label: Pmacct +--- +The pmacct ("p-m-account") package includes sfacctd and nfacctd daemons which receive sflow and netflow/IPFIX flows, respectively. They can also do some processing and filtering, but we use these options very minimally. (Pmacct includes other daemons, as well, but we do not use them. Here, "pmacct" will refer to sfacctd and nfacctd in general.) + +As flow data comes into the pipeline host, it is received by nfacctd and sfacctd processes which are listening on the proper ports (one process per port). +These proceses do sampling corrections, add sensor name information, and send the flows to a rabbitmq queue. +Netsage also uses sfacctd to do some preliminary aggregation for sflow, to cut down on the work that logstash needs to do. By default, all samples, with the same 5-tuple, within each 5 minute window are aggregated into one incoming raw flow. + +### Configuration +Each nfacctd and sfacctd process requires a main config file. In the bare-metal installation, these are in /etc/pmacct/. For the default docker deployment, they are in {pipeline checkout directory}/conf-pmacct/. There are two basic versions - sfacctd.conf.ORIG and nfacctd.conf.ORIG. See comments within the files. Sensor-specific copies are created from these via a setup script. + +For Netsage, pretag.map files are also required to assign a sensor name, one file for each nfacctd or sfacctd process. With the docker deployment, these files are also created by a setup script. By default, these are found in the same directory as the main config files. + + diff --git a/website/docs/pipeline/sensors.md b/website/docs/pipeline/sensors.md new file mode 100644 index 00000000..45088816 --- /dev/null +++ b/website/docs/pipeline/sensors.md @@ -0,0 +1,19 @@ +--- +id: sensors +title: Sflow/Netflow Data Export +sidebar_label: Sflow/Netflow Data +--- + +Export of sflow and netflow/IPFIX data can be configured on appropriate network devices. Routers and switches will have at least one of these capabililties built in, although implementations can somtimes be buggy. + +We have assumed that each exporter/sensor is configured to send flow data to a different port on the pipeline host. Certainly if different sensors use different sampling rates, this needs to be adhered to. The pipeline uses the port number to recognize which sensor the flows are coming from and tag them with the name of that sensor. + +Sflow exporters simply collect individual **samples** of packets passing through the device and send them to a collector (pmacct in our case). The netsage pipeline then looks for matching packets to aggregate into flows. The sampling rate can be configured, eg, 1 out of every 100 packets. + +>To approximately correct for the fact that most packets are not detected, one assumes that each sampled packet represents N others and multiplies the number of bytes in the sampled packet by the sampling rate N, eg, 100. The sampling rate compared to the number of packets per second flowing through the device determines how accurate this approximation is. Sampling is least accurate for shorter flows since their packets will be more likely to be missed and the correction applied may overestimate the number of bytes in the flow. Discussions of accuracy and sampling rates can be found online. + +Netflow also commonly samples packets, and the same sampling corrections must be appled, but it also keeps track of the flows and aggregates by the 5-tuple (source and destination IPs, ports, and protocol) *on the router*. The **active timeout** determines how often netflow sends out an "update" on the flows it is aggregating. The **inactive timeout** determines how long to wait for another matching packet before declaring that a flow has ended. + +>Typically, the active timeout is set to 1 minute and the inactive timeout to 15 seconds. This means that for flows longer than 1 minute, a "netflow update" is sent out every minute. The tricky thing is that these update-flows all have the same start time (the time the first packet was observed). The end time (the time the last packet was observed) and duration change, but the number of bytes and packets reported corresponds only to the period since the last update. The netsage pipeline attempts to combine these updates to aggregate long flows correctly. +> +>Netflow exporters also periodically send "templates" which describe the contents of the flow data datagrams. Before the first template is sent, the flow collector won't know what the sampling rate is, so templates should be sent frequently, eg, every minute. diff --git a/website/docs/pipeline/tstat.md b/website/docs/pipeline/tstat.md index baab97c5..e6d47724 100644 --- a/website/docs/pipeline/tstat.md +++ b/website/docs/pipeline/tstat.md @@ -1,16 +1,18 @@ --- id: tstat -title: Tstat Data Collection +title: Tstat Data Export sidebar_label: Tstat Data --- -## Netsage GitHub Project +**[Tstat](http://tstat.polito.it/)** is a passive sniffer that provides insights into traffic patterns. -[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). +The **Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project** provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). -## Docker +Tstat is only appropriate for certain situations, eg, tracking traffic into and out of data archives. It does not do sampling and exports only complete flows. It also provides additional information beyond what sflow and netflow provide. -Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). +In the Netage Pipeline, tstat data is treated the same as sflow and netflow data, but the logstash aggregation step is skipped since it is not needed. + +Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 123409c5..591bebfb 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -55,7 +55,7 @@ module.exports = { remarkPlugins: [require("remark-import-partial")], sidebarPath: require.resolve("./sidebars.js"), // Please change this to your repo. - lastVersion: "1.2.9", + lastVersion: "1.2.11", versions: { current: { label: `master (unreleased)`, diff --git a/website/package.json b/website/package.json index 6091a5e6..826ac11d 100644 --- a/website/package.json +++ b/website/package.json @@ -12,8 +12,8 @@ "@docusaurus/core": "^2.0.0-alpha.72", "@docusaurus/preset-classic": "^2.0.0-alpha.72", "classnames": "^2.2.6", - "immer": "^8.0.1", - "node-fetch": "^2.6.1", + "immer": "^9.0.6", + "node-fetch": "^3.1.1", "react": "^16.8.4", "react-dom": "^16.8.4", "remark-images": "^2.0.0", diff --git a/website/sidebars.js b/website/sidebars.js index 9fc9f34b..539ab969 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -3,8 +3,8 @@ module.exports = { Pipeline: [ "pipeline/intro", "pipeline/tstat", - "pipeline/nfdump", - "pipeline/importer", + "pipeline/sensors", + "pipeline/pmacct", "pipeline/logstash", "pipeline/elastic", ], @@ -13,13 +13,13 @@ module.exports = { "deploy/bare_metal_install", "deploy/docker_install_simple", "deploy/docker_install_advanced", + "deploy/docker_upgrade", "deploy/docker_troubleshoot", ], Development: [ "devel/dev_dataset", "devel/docker_dev_guide", "devel/docusaurus", - "devel/docker_dev_tag", ] }, }; diff --git a/website/docs/components/docker_env.md b/website/versioned_docs/version-1.2.10/components/docker_env.md similarity index 84% rename from website/docs/components/docker_env.md rename to website/versioned_docs/version-1.2.10/components/docker_env.md index 0aae416c..845e433b 100644 --- a/website/docs/components/docker_env.md +++ b/website/versioned_docs/version-1.2.10/components/docker_env.md @@ -11,8 +11,8 @@ netflowSensorName=my netflow sensor name Simply change the names to unique identifiers (with spaces or not, no quotes) and you're good to go. :::note -These names uniquely identify the source of the data. In elasticsearch, they are saved in the `meta.sensor_id` field and will be shown in Grafana dashboards. Choose names that are meaningful and unique. -For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet NY-London 1" and "RNDNet NY-London 2". Whatever makes sense in your situation. +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Boston Netflow" or "RNDNet New York - London 1" and "RNDNet New York - London 2". Whatever makes sense in your situation. ::: - If you don't set a sensor name, the default docker hostname, which changes each time you run the pipeline, will be used. @@ -22,7 +22,7 @@ For example, your sensor names might be "RNDNet New York Sflow" and "RNDNet Bost Other settings of note in this file include the following. You will not necessarily need to change these, but be aware. -**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). +**rabbit_output_host**: this defines where the final data will land after going through the pipeline. By default, the last rabbit queue will be on `rabbit`, ie, the local rabbitMQ server running in its docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). (For NetSage, another logstash pipeline on a remote server moves flows from this final rabbit queue into Elasticsearch.) The following Logstash Aggregation Filter settings are exposed in case you wish to use different values. (See comments in the \*-aggregation.conf file.) The aggregation filter stitches together long-lasting flows that are seen in multiple nfcapd files, matching by the 5-tuple (source and destination IPs, ports, and protocol) plus sensor name. diff --git a/website/docs/components/docker_first_steps.md b/website/versioned_docs/version-1.2.10/components/docker_first_steps.md similarity index 100% rename from website/docs/components/docker_first_steps.md rename to website/versioned_docs/version-1.2.10/components/docker_first_steps.md diff --git a/website/docs/components/docker_pipeline.md b/website/versioned_docs/version-1.2.10/components/docker_pipeline.md similarity index 100% rename from website/docs/components/docker_pipeline.md rename to website/versioned_docs/version-1.2.10/components/docker_pipeline.md diff --git a/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md new file mode 100644 index 00000000..c0c21510 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/bare_metal_install.md @@ -0,0 +1,299 @@ +--- +id: bare_metal_install +title: Manual Installation Guide +sidebar_label: Manual Installation +--- + +This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. + +## Data sources + +The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. + +1. sflow or netflow +2. tstat + +At least one of these must be set up on a sensor to provide the incoming flow data. + +Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. + +Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. + +## Installing the Prerequisites + +### Installing nfdump + +The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. +The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. + + +Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. +Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. + +:::note +It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. +::: + + +If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. + +### Installing RabbitMQ + +The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). + +```sh +[root@host ~]# yum install rabbitmq-server + +``` + +Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: + +```sh +[root@host ~]# /sbin/service rabbitmq-server start + or # systemctl start rabbitmq-server.service +``` + +### Installing Logstash + +See the logstash documentation. We are currently using Version 7.10. + +### Installing the EPEL repo + +Some of our dependencies come from the EPEL repo. To install this: + +``` +[root@host ~]# yum install epel-release +``` + +### Installing the GlobalNOC Open Source repo + +The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. + +For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. + +``` +[grnoc6] +name=GlobalNOC Public el6 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 +``` + +For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. + +``` +[grnoc7] +name=GlobalNOC Public el7 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 +``` + +The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. + +## Installing the Pipeline (Importer and Logstash configs) + +Install it like this: + +``` +[root@host ~]# yum install grnoc-netsage-deidentifier +``` + +Pipeline components: + +1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. +2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) +3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! +4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. + +Nothing will automatically start after installation as we need to move on to configuration. + +## Importer Configuration + +Configuration files of interest are + - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information + - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings + - /etc/grnoc/netsage/deidentifier/logging.conf - logging config + - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled + +### Setting up the shared config file + +`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` + +There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. + +The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. + +``` + + + /path/to/netflow-files/ + + + Netflow Sensor 1 + + + sflow + + + + + + + + +``` + +Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. + +There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) + +Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. + +If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. + +``` + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + +``` + +### Setting up the Importer config file + +`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` + +This file has a few more setting specific to the Importer component which you may like to adjust. + + - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. + - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) + - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. + - Min-file-age is used to be sure files are complete before being read. + - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. + - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. + - Keep num-processes set to 1. + +```xml + + + + + + netsage_deidentifier_netflow_fake + 2 + + + + 3 + netsage_deidentifier_raw + + + + + 100 + + + 1 + + + + + + /var/cache/netsage/netflow_importer.cache + + + + 100000000 + + + 10m + + + + + + + + + + + + + /var/run/netsage-netflow-importer-daemon.pid + + + +``` + +## Logstash Setup Notes + +Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. + +The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. + +When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. + +FOR FLOW STITCHING/AGGREGATION - IMPORTANT! +Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! +Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". + +## Start Logstash + +```sh +[root@host ~]# /sbin/service logstash start + or # systemctl start logstash.service +``` +It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. + +When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. + +## Start the Importer + +Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: + +`--config [file]` - specify which config file to read + +`--sharedconfig [file]` - specify which shared config file to read + +`--logging [file]` - the logging config + +`--nofork` - run in foreground (do not daemonize) + +```sh +[root@host ~]# /sbin/service netsage-netflow-importer start + or # systemctl start netsage-netflow-importer.service +``` +The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. + + +## Cron jobs + +Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. + + + diff --git a/website/versioned_docs/version-1.2.10/deploy/choosing.md b/website/versioned_docs/version-1.2.10/deploy/choosing.md new file mode 100644 index 00000000..43ae4429 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/choosing.md @@ -0,0 +1,25 @@ +--- +id: choose_install +title: Choosing an Installation Procedure +sidebar_label: Choose Install +--- + +## Manual or BareMetal Installation + +The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. + +It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. + +If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. + +## Dockerized Version + +The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. + +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. + +## Choose your adventure + +- [Manual/Server Installation](bare_metal_install) +- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor +- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md new file mode 100644 index 00000000..259571d2 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_install_advanced.md @@ -0,0 +1,196 @@ +--- +id: docker_install_advanced +title: Docker Advanced Options Guide +sidebar_label: Docker Advanced Options +--- + +If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. + +*Please first read the Docker Installation guide in detail. This guide will build on top of that.* + + +## To Add an Additional Sflow or Netflow Collector + +If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. + +Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) + + +### a. Edit docker-compose.override.yml + +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add + +```yaml + example-collector: + image: netsage/nfdump-collector:1.6.18 + restart: always + command: sfcapd -T all -l /data -S 1 -w -z -p 9997 + volumes: + - ./data/input_data/example:/data + ports: + - "9997:9997/udp" +``` + +- collector-name: should be updated to something that has some meaning, in our example "example-collector". +- command: choose between sfcapd for sflow and nfcapd for netflow, and at the end of the command, specify the port to watch for incoming flow data. (Unless your flow exporter is already set up to use a different port, you can use the default ports and configure the exporters on the routers to match.) +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the +router should be configured to export data to the same port. (If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change the last part of the path to something meaningful. + +You will also need to uncomment these lines: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +### b. Edit netsage_override.xml + +To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +Edit netsage_override.xml and add a "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; it will be replaced with a value set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. + +```xml + + /data/input_data/example/ + $exampleSensorName + sflow + +``` + +### c. Edit environment file + +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. + +```ini +exampleSensorName=Example New York sFlow +``` + + +### d. Running the new collector + +After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): + +```sh +docker-compose up -d example-collector +``` + +:::note +The default version of the collector is 1.6.18. There are other versions released and :latest should be point to the latest one, but there is no particular effort made to make sure we released the latest version. You can get a listing of all the current tags listed [here](https://hub.docker.com/r/netsage/nfdump-collector/tags) and the source to generate the docker image can be found [here](https://github.com/netsage-project/docker-nfdump-collector) the code for the You may use a different version though there is no particular effort to have an image for every nfdump release. +::: + + +## To Keep Only Flows From Certain Interfaces +If your sensors are exporting all flows, but you only want to keep some of them (eg, only send some of them to NetSage), use this option. The collectors and importer will process all flows, but in the logstash pipeline, those that do not have src_ifindex or dst_inindex equal to one of the listed interfaces will be dropped. + +In the .env file, uncomment the apprpriate section and enter the information required. Be sure "True" is capitalized as shown and list all the ifindex values of flows that should be kept and passed on to NetSage. You may enter one or more ifindex values. For example, + +```sh +ifindex_filter_flag=True +ifindex_filter_keep=123,456 +``` + +In this case, only flows that have src_ifindex = 123 or src_ifindex = 456 or dst_ifindex = 123 or dst_ifindex = 456 will be kept. All others will be dropped. + + +## To Change a Sensor Name Depending on the Interface Used +In some cases, users want to differentiate between flows that enter or exit through specific sensor interfaces. This can be done by editing the env file. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, + +```sh +ifindex_sensor_rename_flag=True +ifindex_sensor_rename_old_name=IU Sflow +ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_ifindex=10032 +``` + +In this case, any flows from the "IU Sflow" sensor that come through interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name (sensor_id in ElasticSearch) changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker. + +:::note +Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +::: + +## To Do Sampling Rate Corrections in Logstash +When flow sampling is done, the number of bits needs to be corrected for the sampling rate. For example, if you are sampling 1 out of 100 flows and a sample has 55 MB, it is assumed that in reality there would be 100 flows of that size (with that src and dst), so the number of bits is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config or the nfcapd command. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. For example, + +```sh +sampling_correction_flag=True +sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_factor=512 +``` + +## To Change How Long Nfcapd Files Are Kept +The importer will automatically delete older nfcapd files for you, so that your disk don't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 7 days worth of data: +````xml + + 1 + 7 + +```` + +You will also need to uncomment these lines in docker-compose.override.yml: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +## To Customize Java Settings / Increase Memory Available for Lostash + +If you need to modify the amount of memory logstash can use or any other java settings, +rename the provided example for JVM Options and tweak the settings as desired. + +```sh +cp userConfig/jvm.options_example userConfig/jvm.options +``` + +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: + +```yaml +logstash: + image: netsage/pipeline_logstash:latest + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +``` + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- The recommended heap size for typical ingestion scenarios should be no less than 4GB and no more than 8GB. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. + +## To Bring up Kibana and Elasticsearch Containers + +The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. + +This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) + +## For Data Saved to an NFS Volume + +By default, data is saved to subdirectories in the ./data directory. If you would like to use an NFS mount instead you will need to either + +1. export the NFS volume as ${PROJECT_DIR}/data (which is the idea scenario and least intrusive) +2. update the path to the NFS export path in all locations in docker-compose.yml and docker-compose.override.yml + +Note: modifying all the paths in the two files should work, but may not. In one case, it worked to modify only the paths for the collector volumes (eg, - /mnt/nfs/netsagedata/netflow:/data), leaving all others with their default values. + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md new file mode 100644 index 00000000..bf3d6856 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_install_simple.md @@ -0,0 +1,103 @@ +--- +id: docker_install_simple +title: Docker Installation Guide +sidebar_label: Docker Installation +--- +In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. + +The Docker containers included in the installation are + - rabbit (the local RabbitMQ server) + - sflow-collector (receives sflow data and writes nfcapd files) + - netflow-collector (receives netflow data and writes nfcapd files) + - importer (reads nfcapd files and puts flows into a local rabbit queue) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) + - ofelia (cron-like downloading of files used by the logstash pipeline) + +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. + + +### 1. Set up Data Sources +The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. + + - sflow + - netflow + - tstat + +At least one of these must be set up on a sensor (flow exporter/router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. + +Sflow and netflow data should be exported to the pipeline host where there are collectors (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. + +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) + +### 2. Clone the Netsage Pipeline Project + +If you haven't already, install [Docker](https://www.docker.com) and [Docker Compose](https://docs.docker.com/compose/install/) and clone this project +```sh +git clone https://github.com/netsage-project/netsage-pipeline.git +``` +(If you are upgrading to a new release, see the Upgrade section below!) + +Then checkout the right version of the code. +```sh +git checkout {tag} +``` +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.8". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.8. + +### 3. Create Docker-compose.override.yml + +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. + +Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. + +```sh +cp docker-compose.override_example.yml docker-compose.override.yml +``` + +By default docker will bring up a single netflow collector and a single sflow collector. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +:::note +If you only have one collector, you should remove or comment out the section for the collector that is not used, so it doesn't run and just create empty files. +::: + +This file also specifies port numbers, and directories for nfcapd files. By default, the sflow collector will listen to udp traffic on localhost:9998, while the netflow collector will listen on port 9999, and data will be written to `/data/input_data/`. Each collector is namespaced by its type so the sflow collector will write data to `/data/input_data/sflow/` and the netflow collector will write data to `/data/input_data/netflow/`. Change these only if required. + +Other lines in this file you can ignore for now. + +:::note +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose +::: + + +### 4. Create Environment File + +{@import ../components/docker_env.md} + +### 5. Choose Pipeline Version + +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which version Docker should run. + +```sh +./scripts/docker_select_version.sh +``` +When prompted, select the **same version** you checked out earlier. +This script will replace the version numbers of docker images in the docker-compose files with the correct values. + +## Running the Collectors + +After selecting the version to run, you could start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(Or see the next section for how to start all the containers, including the collectors.) + +```sh +docker-compose up -d sflow-collector netflow-collector +``` + +If the collector(s) are running properly, you should see nfcapd files in subdirectories of data/input_data/, and they should have sizes of more than a few hundred bytes. (See Troubleshooting if you have problems.) + + +## Running the Collectors and Pipeline + +{@import ../components/docker_pipeline.md} + diff --git a/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md new file mode 100644 index 00000000..1ad608ab --- /dev/null +++ b/website/versioned_docs/version-1.2.10/deploy/docker_troubleshooting.md @@ -0,0 +1,61 @@ +--- +id: docker_troubleshoot +title: Docker Troubleshooting +sidebar_label: Troubleshooting +--- + +## Troubleshooting + +### If you are not seeing flows after installation + +**Troubleshooting checklist:** + +- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, day and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file. +- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` +- Check the logs to see if logstash is starting successfully. +- If the final rabbit queue is on an external host, check iptables on that host to be sure incoming traffic from your pipeline host is allowed. + +To see if flows are getting into and being read from the rabbit queue on the pipeline host, you can go to `http://localhost:15672` in your favorite web browser. Login as guest with password guest. Look for accumulating messages and/or messages being acknowledged and published. + +### If flow collection stops + +**Logstash or Importer errors:** +- Make sure all containers are running. `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs -f $service_label` +- Check the logs to see if logstash is starting successfully. + +**Disk space:** +- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +- Also check to see how much space the nfcapd files are comsuming. You need to add more disk space. You could try saving fewer days of nfcapd files (see Docker Advanced). + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most +likely culprit is logstash which is usually only allocated 2GB of RAM. You'll need to update the jvm.options file to grant it more memory. + +Please see the [Docker Advanced guide](docker_install_advanced.md#customize-logstash-settings) for details on how to customize logstash. + +Applying this snippet to logstash may help. For example, to give logstash (java) 3GB, + +```yaml +environment: + LS_JAVA_OPTS=-Xmx3g +``` + +Alternatively you may also try doing this: + +```yaml +deploy: + resources: + limits: + cpus: "0.50" + memory: 50M + reservations: + cpus: "0.25" + memory: 20M +``` + +Reference: https://docs.docker.com/compose/compose-file/#resources + diff --git a/website/docs/components/docker_upgrade.md b/website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md similarity index 56% rename from website/docs/components/docker_upgrade.md rename to website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md index 708bed07..a598caac 100644 --- a/website/docs/components/docker_upgrade.md +++ b/website/versioned_docs/version-1.2.10/deploy/docker_upgrade.md @@ -1,3 +1,10 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. ### Shut things down @@ -23,19 +30,21 @@ Example: ```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` ::: -### Check/Update Files -- Compare the new docker-compose.override_example.yml file to your docker-compose.override.yml to see if a new version of Docker is required. Look for, eg, version: "3.7" at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. +### Check/Update Override Files +Occasionally, the required version of Docker or nfdump may change, which will necessitate editing your override and/or env files. + +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml` to see if a new version of Docker is required. Look for, eg, `version: "3.7"` at the top. If the version number is different, change it in your docker-compose.override.yml file and upgrade Docker manually. -- In the same files, see if the version of nfdump has changed. Look for lines like "image: netsage/nfdump-collector:1.6.18". If there has been a change, update the version in the override file. (You do not need to actually perform any update yourself.) +- Also check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:1.6.18`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) Note that you do not need to update the versions of the importer or logstash images. That will be done for you in the "select release version" stop coming up. -- Also compare your .env file with the new env.example file to see if any new lines or sections have been added. Copy new lines into your .env file, making any appropriate changes to example values. +- Also compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. -- If you used the Docker Advanced guide to make a netsage_override.xml file, compare it to netsage_shared.xml to see if there are any changes. This is unlikely. +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. ### Select Release Version -Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.8). When asked by the second, select the same version as the tag you checked out. +Run these two commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.10). When asked by the second, select the same version as the tag you checked out. ```sh git checkout -b {tag} git pull diff --git a/website/versioned_docs/version-1.2.10/devel/docker.md b/website/versioned_docs/version-1.2.10/devel/docker.md new file mode 100644 index 00000000..76735113 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/docker.md @@ -0,0 +1,83 @@ +--- +id: docker_dev_guide +title: Docker Dev Guide +sidebar_label: Docker Dev Guide +--- + +## Selecting a Version + +You can use the "master" version or a tagged version. +To select a released version use the docker_select_version.sh script (see the Deployment Guide). +If you wish to use the development version (master branch) simply scip the docker_select_version.sh step. + +## Installing + +See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. + +## Importer + +The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** + +## Build Images + +The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. + +### Build Using Source Code + +If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: + +```sh +docker-compose -f docker-compose.build.yml build + +``` + +NOTE: The importer container includes the config files for the logstash pipeline. + + +## Optional: ElasticSearch and Kibana + +You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: + +1. Uncomment the following lines in conf-logstash/99-outputs.conf: + +``` +elasticsearch { + hosts => ["elasticsearch"] + index => "netsage_flow-%{+YYYY.MM.dd}" +} +``` + +2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. + +3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` + +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash #bash shell in logstash container +docker-compose exec importer bash #bash shell in importer container +docker-compose exec rabbit bash #bash shell in rabbit container +``` + +### View Container Logs + +``` sh +docker-compose logs -f #view logs for all containers +docker-compose logs -f logstash #view logs for logstash container +docker-compose logs -f importer #view logs for importer container +docker-compose logs -f rabbit #view logs for rabbit container +``` diff --git a/website/versioned_docs/version-1.2.10/devel/documentation_guide.md b/website/versioned_docs/version-1.2.10/devel/documentation_guide.md new file mode 100644 index 00000000..06c1c4a8 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/documentation_guide.md @@ -0,0 +1,142 @@ +--- +id: docusaurus +title: Revising Documentation +sidebar_label: Docusaurus +--- + +This project's documentation uses Docusaurus. + +Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. + +Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. + +To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation +at the [docusaurus 2](https://v2.docusaurus.io/) project website. + +*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* + +## If Not Using Docker +These are instructions for editing and releasing docs without using Docker. + +### Installation + +To get started the first time, install npm, then use that to install yarn +``` +$ sudo yum install npm +$ sudo npm install -g yarn +``` + +Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json +``` +$ cd netsage-pipeline/website +$ yarn install +``` + +### Local Development + +If you are working on your local machine, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +``` +$ yarn build +$ yarn start +go to http://localhost:3000 +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ yarn run docusaurus docs:version a.b.c +``` + +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. + +If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. + +If it is not, do it manually: +``` +$ USE_SSH="true" GIT_USER="your-username" yarn deploy +``` +replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) + +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + +### Removing a version + +To remove version 1.2.6 for example. + +we need to: + + * update versions.json to remove the reference + * remove the versioned_docs/version-1.2.6 + * remove versioned_sidebars/version-1.2.6-sidebars.json + +## If Using Docker + +You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. + +### Build and Start the Container + +Git clone the netsage pipeline project then build and start the container. +The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. +``` +$ cd netsage-pipeline/website +$ docker-compose build build_docs +$ docker-compose up -d docs +go to http://localhost:8000/netsage-pipeline/ +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ docker-compose build build_docs +$ docker-compose run docs yarn run docusaurus docs:version a.b.c +``` +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +How to do this when using Docker ??? Get into the container ??? + +For now, go a linux server that has yarn installed and +follow the instructions under If Not Using Docker. + diff --git a/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md new file mode 100644 index 00000000..a061957d --- /dev/null +++ b/website/versioned_docs/version-1.2.10/devel/pipeline_dataset.md @@ -0,0 +1,34 @@ +--- +id: dev_dataset +title: Pipeline Replay Dataset +sidebar_label: Replay Dataset +--- + +The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test +the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best +test is to replay network data and inspect the output in the grafana dashboard. + +Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. + +You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). + +Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). + +Currently the default ports are: + - 9998/udp for sflow + - 9999/udp for netflow + +Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) + +In order to replay the data, use the following commands for netflow and sflow respectively: + +### Netflow + +``` +nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 +``` + +### Sflow + +Coming soon. nfreplay will not work with sflow data type. + diff --git a/website/docs/devel/tag.md b/website/versioned_docs/version-1.2.10/devel/tag.md similarity index 94% rename from website/docs/devel/tag.md rename to website/versioned_docs/version-1.2.10/devel/tag.md index 65611923..040de851 100644 --- a/website/docs/devel/tag.md +++ b/website/versioned_docs/version-1.2.10/devel/tag.md @@ -1,7 +1,7 @@ --- id: docker_dev_tag -title: Tagging a Release -sidebar_label: How to Tag a New Release +title: How to Tag a New Release +sidebar_label: Taggin a Release --- To tag a new release, first updated the version number and Changes file, build the rpm, etc. and upgrade on bare-metal hosts using yum. If all works fine, do the following steps to create new Docker images. diff --git a/website/docs/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.10/pipeline/elastic_search.md similarity index 100% rename from website/docs/pipeline/elastic_search.md rename to website/versioned_docs/version-1.2.10/pipeline/elastic_search.md diff --git a/website/docs/pipeline/importer.md b/website/versioned_docs/version-1.2.10/pipeline/importer.md similarity index 100% rename from website/docs/pipeline/importer.md rename to website/versioned_docs/version-1.2.10/pipeline/importer.md diff --git a/website/versioned_docs/version-1.2.10/pipeline/intro.md b/website/versioned_docs/version-1.2.10/pipeline/intro.md new file mode 100644 index 00000000..f4cce287 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/intro.md @@ -0,0 +1,37 @@ +--- +id: intro +title: Intro +sidebar_label: Intro +--- +# The NetSage Pipeline + +## Description + +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. + +## Data Collection + +In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. + +Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. + +## Pipeline Components + +The Netsage Flow Processing Pipeline is made of the following components + + - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. + - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + +## Visualization + +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). + +## Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + diff --git a/website/versioned_docs/version-1.2.10/pipeline/logstash.md b/website/versioned_docs/version-1.2.10/pipeline/logstash.md new file mode 100644 index 00000000..b27e2ee7 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/logstash.md @@ -0,0 +1,127 @@ +--- +id: logstash +title: Logstash Pipeline +sidebar_label: Logstash +--- + +The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. + +Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. + +Notes: + - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. + - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. + - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. + +## Logstash Sequence + +### 01-input-rabbit.conf + +Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) + +### 10-preliminaries.conf + +Drops flows to or from private IP addresses; +converts any timestamps in milliseconds to seconds; +drops events with timestamps more than a year in the past or (10 sec) in the future; +does some data type conversions; +adds @ingest_time (this is mainly for developers). + +### 15-sensor-specific-changes.conf + +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list, 2) the ability to change the sensor name for flows from a specified sensor which go through a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. + +### 20-add_id.conf + +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. + +### 40-aggregation.conf + +Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. + +Notes: + - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. + - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. + - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). + - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. + - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. + - Tstat flows come in already complete, so no aggregation is done on those flows. + +### 45-geoip-tagging.conf + +Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; +if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". + +*This product includes GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* + +### 50-asn.conf + +Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. +Sets ASN to -1 if it is unavailable for any reason. + +### 53-caida-org.conf + +Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. + +*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* + +### 55-member-orgs.conf + +Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. +This allows entities which don't own their own ASs to be listed as the src or dst Organization. + +Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. + +### 60-scireg-tagging-fakegeoip.conf + +Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; +removes Registry fields we don't need to save to elasticsearch. + +Notes: + - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). + +### 70-deidentify.conf + +Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. + +### 80-privatize.org.conf + +Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). +If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. + +### 88-preferred-location-org.conf + +Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. + +### 90-additional-fields.conf + +Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): + - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + +### 95-cleanup.conf + +Does small misc. tasks at the end like rename, remove, or convert fields + +### 98-post-process.conf + +Adds @exit_time and @processing_time (these are mainly for developers) + +### 99-output-rabbit.conf + +Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) + +### Final Stage + +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. + +## Field names + +The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). + + diff --git a/website/docs/pipeline/nfdump.md b/website/versioned_docs/version-1.2.10/pipeline/nfdump.md similarity index 100% rename from website/docs/pipeline/nfdump.md rename to website/versioned_docs/version-1.2.10/pipeline/nfdump.md diff --git a/website/versioned_docs/version-1.2.10/pipeline/tstat.md b/website/versioned_docs/version-1.2.10/pipeline/tstat.md new file mode 100644 index 00000000..baab97c5 --- /dev/null +++ b/website/versioned_docs/version-1.2.10/pipeline/tstat.md @@ -0,0 +1,16 @@ +--- +id: tstat +title: Tstat Data Collection +sidebar_label: Tstat Data +--- + +## Netsage GitHub Project + +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +## Docker + +Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). + + + diff --git a/website/versioned_docs/version-1.2.11/components/docker_env.md b/website/versioned_docs/version-1.2.11/components/docker_env.md new file mode 100644 index 00000000..0bfe77ac --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_env.md @@ -0,0 +1,40 @@ +Next, copy `env.example` to `.env` +```sh +cp env.example .env +``` + +then edit the .env file to set the sensor names to unique identifiers (with spaces or not, no quotes) +```sh +# Importer settings +sflowSensorName=My sflow sensor name +netflowSensorName=My netflow sensor name +``` + + - If you have only one collector, remove or comment out the line for the one you are not using. + - If you have more than one of the same type of collector, see the "Docker Advanced" documentation. + +:::note +These names uniquely identify the source of the data and will be shown in the Grafana dashboards. In elasticsearch, they are saved in the `meta.sensor_id` field. Choose names that are meaningful and unique. +For example, your sensor names might be "MyNet New York Sflow" and "MyNet Boston Netflow" or "MyNet New York - London" and "MyNet New York - Paris". Whatever makes sense in your situation. +::: + +You will also want to edit the **Logstash output rabbit queue** section. This section defines where the final data will land after going through the pipeline. By default, it will be written to a rabbitmq queue on `rabbit`, ie, the local rabbitMQ server running in the docker container. Enter a hostname to send to a remote rabbitMQ server (also the correct username, password, and queue key/name). + +```sh +rabbitmq_output_host=rabbit@mynet.edu +rabbitmq_output_username=guest +rabbitmq_output_pw=guest +rabbitmq_output_key=netsage_archive_input +``` +:::note +To send processed flow data to GlobalNOC at Indiana University, you will need to obtain settings for this section from your contact. A new queue may need to be set up at IU, as well as allowing traffic from your pipeline host. (At IU, data from the this final rabbit queue will be moved into an Elasticsearch instance for storage and viewing.) +::: + +The following options are described in the Docker Advanced section: + +**To drop all flows except those using the specfied interfaces**: Use if only some flows from a router are of interest and those can be identified by interface. + +**To change the sensor name for flows using a certain interface**: Use if you want to break out some flows coming into a port and give them a different sensor name. + +**To "manually" correct flow sizes and rates for sampling for specified sensors**: Use if sampling corrections are not being done automatically. Normally you do not need to use this, but check flows to be sure results are reasonable. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_first_steps.md b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md new file mode 100644 index 00000000..9a75fb05 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_first_steps.md @@ -0,0 +1,26 @@ +#### saving this for now in case I need to put it back ####### + +Then checkout the latest version of the code. If you are a developer you'll want the latest version from master, otherwise please use make sure +you've checked out the latest tagged version. + +For example, +```sh +## Normal Deployment, eg, checkout version 1.2.8 +$ git fetch +$ git checkout v1.2.8 -b v1.2.8 + +## Developers +$ git fetch +$ git reset --hard origin/master +``` + +:::warning +git reset --hard will obliterate any changes. On initial installation, you should not have any, but if you do wish to save any state, please make sure you commit and backup to a feature branch before continuing + +Example: +```git commit -a -m "Saving local state"; git checkout -b feature/backup; git checkout master``` +::: + + +All instructions that follow assume these first steps were performed succesfully. If not, you'll likely run into errors down the line if the code doesn't match up with the instructions provided. + diff --git a/website/versioned_docs/version-1.2.11/components/docker_pipeline.md b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md new file mode 100644 index 00000000..a0709f08 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/components/docker_pipeline.md @@ -0,0 +1,31 @@ +Start up the pipeline (all containers) using: + +```sh +# docker-compose up -d +``` + +This will also restart any containers/processes that have died. "-d" runs containers in the background. + +You can see the status of the containers and whether any have died (exited) using +```sh +# docker-compose ps +``` + +To check the logs for each of the containers, run + +```sh +# docker-compose logs +# docker-compose logs logstash +# docker-compose logs importer +etc. +``` + +Add `-f` or, e.g., `-f logstash` to see new log messages as they arrive. `--timestamps`, `--tail`, and `--since` are also useful -- look up details in Docker documentation. + +To shut down the pipeline (all containers) use + +```sh +# docker-compose down +``` + +Run all commands from the netsage-pipeline/ directory. diff --git a/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md new file mode 100644 index 00000000..c0c21510 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/bare_metal_install.md @@ -0,0 +1,299 @@ +--- +id: bare_metal_install +title: Manual Installation Guide +sidebar_label: Manual Installation +--- + +This document covers installing the NetSage Flow Processing Pipeline manually on a new machine (without using Docker). Steps should be followed below in order unless you know for sure what you are doing. This document assumes a RedHat Linux environment or one of its derivatives. + +## Data sources + +The Processing pipeline needs data to ingest in order to do anything. There are two types of data that can be consumed. + +1. sflow or netflow +2. tstat + +At least one of these must be set up on a sensor to provide the incoming flow data. + +Sflow and netflow data should be sent to ports on the pipeline host where nfcapd and/or sfcapd are ready to receive it. + +Tstat data should be sent directly to the logstash input RabbitMQ queue (the same one that the Importer writes to, if it is used). From there, the data will be processed the same as sflow/netflow data. + +## Installing the Prerequisites + +### Installing nfdump + +The nfdump package provides nfcapd and sfcapd processes which recieve flow data and write nfcapd files. +The Importer also uses nfdump. If you are only collecting tstat data, you do not need nfdump. + + +Nfdump is _not_ listed as a dependency of the Pipeline RPM package, as in a lot cases people are running special builds of nfdump -- but make sure you install it before you try running the Netflow Importer. If in doubt, `yum install nfdump` should work. +Flow data exported by some routers require a newer version of nfdump than the one in the CentOS repos; in these cases, it may be necessary to manually compile and install the lastest nfdump. + +:::note +It is recommended to check the version of nfdump used in the Docker installation and use the same or newer in order to be sure that any fixes for impactful issues are included. +::: + + +If desired, you can also install nfsen, which has a UI for viewing flow data and can manage starting and stopping all the nfcapd/sfcapd processes for you.The nfsen.conf file has a section in which to configure all the sources. + +### Installing RabbitMQ + +The pipeline requires a RabbitMQ server. Typically, this runs on the same server as the pipeline itself, but if need be, you can separate them (for this reason, the Rabbit server is not automatically installed with the pipeline package). + +```sh +[root@host ~]# yum install rabbitmq-server + +``` + +Typically, the default configuration will work. Perform any desired Rabbit configuration, then, start RabbitMQ: + +```sh +[root@host ~]# /sbin/service rabbitmq-server start + or # systemctl start rabbitmq-server.service +``` + +### Installing Logstash + +See the logstash documentation. We are currently using Version 7.10. + +### Installing the EPEL repo + +Some of our dependencies come from the EPEL repo. To install this: + +``` +[root@host ~]# yum install epel-release +``` + +### Installing the GlobalNOC Open Source repo + +The Pipeline package (and its dependencies that are not in EPEL) are in the GlobalNOC Open Source Repo. + +For Red Hat/CentOS 6, create `/etc/yum.repos.d/grnoc6.repo` with the following content. + +``` +[grnoc6] +name=GlobalNOC Public el6 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/6/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC6 +``` + +For Red Hat/CentOS 7, create `/etc/yum.repos.d/grnoc7.repo` with the following content. + +``` +[grnoc7] +name=GlobalNOC Public el7 Packages - $basearch +baseurl=https://repo-public.grnoc.iu.edu/repo/7/$basearch +enabled=1 +gpgcheck=1 +gpgkey=https://repo-public.grnoc.iu.edu/repo/RPM-GPG-KEY-GRNOC7 +``` + +The first time you install packages from the repo, you will have to accept the GlobalNOC repo key. + +## Installing the Pipeline (Importer and Logstash configs) + +Install it like this: + +``` +[root@host ~]# yum install grnoc-netsage-deidentifier +``` + +Pipeline components: + +1. Flow Filter - GlobalNOC uses this for Cenic data to filter out some flows. Not needed otherwise. +2. Netsage Netflow Importer - required to read nfcapd files from sflow and netflow importers. (If using tstat flow sensors only, this is not needed.) +3. Logstash - be sure the number of logstash pipeline workers in /etc/logstash/logstash.yml is set to 1 or flow stitching/aggregation will not work right! +4. Logstash configs - these are executed in alphabetical order. See the Logstash doc. At a minimum, the input, output, and aggregation configs have parameters that you will need to update or confirm. + +Nothing will automatically start after installation as we need to move on to configuration. + +## Importer Configuration + +Configuration files of interest are + - /etc/grnoc/netsage/deidentifier/netsage_shared.xml - Shared config file allowing configuration of collections, and Rabbit connection information + - /etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml - other settings + - /etc/grnoc/netsage/deidentifier/logging.conf - logging config + - /etc/grnoc/netsage/deidentifier/logging-debug.conf - logging config with debug enabled + +### Setting up the shared config file + +`/etc/grnoc/netsage/deidentifier/netsage_shared.xml` + +There used to be many perl-based pipeline components and daemons. At this point, only the importer is left, the rest having been replaced by logstash. The shared config file, which was formerly used by all the perl components, is read before reading the individual importer config file. + +The most important part of the shared configuration file is the definition of collections. Each sflow or netflow sensor will have its own collection stanza. Here is one such stanza, a netflow example. Instance and router-address can be left commented out. + +``` + + + /path/to/netflow-files/ + + + Netflow Sensor 1 + + + sflow + + + + + + + + +``` + +Having multiple collections in one importer can sometimes cause issues for aggregation, as looping through the collections one at a time adds to the time between the flows, affecting timeouts. You can also set up multiple Importers with differently named shared and importer config files and separate init.d files. + +There is also RabbitMQ connection information in the shared config, though queue names are set in the Importer config. (The Importer does not read from a rabbit queue, but other old components did, so both input and output are set.) + +Ideally, flows should be deidentified before they leave the host on which the data is stored. If flows that have not be deidentified need to be pushed to another node for some reason, the Rabbit connection must be encrypted with SSL. + +If you're running a default RabbitMQ config, which is open only to 'localhost' as guest/guest, you won't need to change anything here. + +``` + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + + + + 127.0.0.1 + 5672 + guest + guest + 0 + 100 + / + 1 + +``` + +### Setting up the Importer config file + +`/etc/grnoc/netsage/deidentifier/netsage_netflow_importer.xml` + +This file has a few more setting specific to the Importer component which you may like to adjust. + + - Rabbit_output has the name of the output queue. This should be the same as that of the logstash input queue. + - (The Importer does not actually use an input rabbit queue, so we add a "fake" one here.) + - Min-bytes is a threshold applied to flows aggregated within one nfcapd file. Flows smaller than this will be discarded. + - Min-file-age is used to be sure files are complete before being read. + - Cull-enable and cull-ttl can be used to have nfcapd files older than some number of days automatically deleted. + - Pid-file is where the pid file should be written. Be sure this matches what is used in the init.d file. + - Keep num-processes set to 1. + +```xml + + + + + + netsage_deidentifier_netflow_fake + 2 + + + + 3 + netsage_deidentifier_raw + + + + + 100 + + + 1 + + + + + + /var/cache/netsage/netflow_importer.cache + + + + 100000000 + + + 10m + + + + + + + + + + + + + /var/run/netsage-netflow-importer-daemon.pid + + + +``` + +## Logstash Setup Notes + +Standard logstash filter config files are provided with this package. Most should be used as-is, but the input and output configs may be modified for your use. + +The aggregation filter also has settings that may be changed as well - check the two timeouts and the aggregation maps path. + +When upgrading, these logstash configs will not be overwritten. Be sure any changes get copied into the production configs. + +FOR FLOW STITCHING/AGGREGATION - IMPORTANT! +Flow stitching (ie, aggregation) will NOT work properly with more than ONE logstash pipeline worker! +Be sure to set "pipeline.workers: 1" in /etc/logstash/logstash.yml and/or /etc/logstash/pipelines.yml. When running logstash on the command line, use "-w 1". + +## Start Logstash + +```sh +[root@host ~]# /sbin/service logstash start + or # systemctl start logstash.service +``` +It will take couple minutes to start. Log files are normally /var/log/messages and /var/log/logstash/logstash-plain.log. + +When logstash is stopped, any flows currently "in the aggregator" will be written out to /tmp/logstash-aggregation-maps (or the path/file set in 40-aggregation.conf). These will be read in and deleted when logstash is started again. + +## Start the Importer + +Typically, the daemons are started and stopped via init script (CentOS 6) or systemd (CentOS 7). They can also be run manually. The daemons all support these flags: + +`--config [file]` - specify which config file to read + +`--sharedconfig [file]` - specify which shared config file to read + +`--logging [file]` - the logging config + +`--nofork` - run in foreground (do not daemonize) + +```sh +[root@host ~]# /sbin/service netsage-netflow-importer start + or # systemctl start netsage-netflow-importer.service +``` +The Importer will create a deamon process and a worker process. When stopping the service, the worker process might take a few minutes to quit. If it does not quit, kill it by hand. + + +## Cron jobs + +Sample cron files are provided. Please review and uncomment their contents. These periodically download MaxMind, CAIDA, and Science Registry files, and also restart logstash. Logstash needs to be restarted in order for any updated files to be read in. + + + diff --git a/website/versioned_docs/version-1.2.11/deploy/choosing.md b/website/versioned_docs/version-1.2.11/deploy/choosing.md new file mode 100644 index 00000000..43ae4429 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/choosing.md @@ -0,0 +1,25 @@ +--- +id: choose_install +title: Choosing an Installation Procedure +sidebar_label: Choose Install +--- + +## Manual or BareMetal Installation + +The Manual (baremetal) Installation Guide will walk you through installing the pipeline using your own server infrastructure and requires you to maintain all the components involved. + +It will likely be a bit better when it comes to performance, and have greater flexibility, but there is also more complexity involved in configuring and setting up. + +If you are the ultimate consumer of the data then setting up a baremetal version might be worth doing. Or at least the final rabbitMQ that will be holding the data since it'll like need to handle a large dataset. + +## Dockerized Version + +The Docker version makes it trivial to bring up the pipeline for both a developer and consumer. The work is mostly already done for you. It should be a simple matter of configuring a few env settings and everything should 'just' work. + +If you are simply using the pipeline to deliver the anonymized network stats for someone else's consumption, then using the docker pipeline would be preferred. + +## Choose your adventure + +- [Manual/Server Installation](bare_metal_install) +- [Simple Docker](docker_install_simple.md) - 1 netflow sensor and/or 1 sflow sensor +- [Advanced Docker](docker_install_advanced.md) - options that allow for more complex configurations diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md new file mode 100644 index 00000000..bc84b812 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_advanced.md @@ -0,0 +1,216 @@ +--- +id: docker_install_advanced +title: Docker Advanced Options Guide +sidebar_label: Docker Advanced Options +--- + +If the basic Docker Installation does not meet your needs, the following customizations will allow for more complex situations. Find the section(s) which apply to you. + +*Please first read the Docker Installation guide in detail. This guide will build on top of that.* + + +## To Add an Additional Sflow or Netflow Collector + +If you have more than 1 sflow and/or 1 netflow sensor, you will need to create more collectors and modify the importer config file. The following instructions describe the steps needed to add one additional sensor. + +Any number of sensors can be accomodated, although if there are more than a few being processed by the same Importer, you may run into issues where long-lasting flows from sensosr A time out in the aggregation step while waiting for flows from sensors B to D to be processed. (Another option might be be to run more than one Docker deployment.) + + +#### a. Edit docker-compose.override.yml + +The pattern to add a flow collector is always the same. To add an sflow collector called example-collector, edit the docker-compose.override.yml file and add something like + +```yaml + example-collector: + image: netsage/nfdump-collector:alpine-1.6.23 + restart: always + command: sfcapd -T all -l /data -S 1 -w -z -p 9997 + volumes: + - ./data/input_data/example:/data + ports: + - "9997:9997/udp" +``` + +- collector name: should be updated to something that has some meaning, in our example "example-collector". +- image: copy from the default collector sections already in the file. +- command: choose between "sfcapd" for sflow and "nfcapd" for netflow, and at the end of the command, specify the port to watch for incoming flow data. +- volumes: specify where to write the nfcapd files. Make sure the path is unique and in ./data/. In this case, we're writing to ./data/input_data/example. Change "example" to something meaningful. +- ports: make sure the port here matches the port you've set in the command. Naturally all ports have to be unique for this host and the router should be configured to export data to the same port. (?? If the port on your docker container is different than the port on your host/local machine, use container_port:host_port.) + +Make sure the indentation is right or you'll get an error about yaml parsing. + +You will also need to uncomment these lines: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +#### b. Edit netsage_override.xml + +To make the Pipeline Importer aware of the new data to process, you will need to create a custom Importer configuration: netsage_override.xml. This will replace the usual config file netsage_shared.xml. + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +Edit netsage_override.xml and add a new "collection" section for the new sensor as in the following example. The flow-path should match the path set above in docker-compose.override.yml. $exampleSensorName is a new "variable"; don't replace it here, it will be replaced with a value that you set in the .env file. For the flow-type, enter "sflow" or "netflow" as appropriate. (Enter "netflow" if you're running IPFIX.) + +```xml + + /data/input_data/example/ + $exampleSensorName + sflow + +``` + +#### c. Edit environment file + +Then, in the .env file, add a line that sets a value for the "variable" you referenced above, $exampleSensorName. The value is the name of the sensor which will be saved to elasticsearch and which appears in Netsage Dashboards. Set it to something meaningful and unique. E.g., + +```ini +exampleSensorName=MyNet Los Angeles sFlow +``` + + +#### d. Running the new collector + +After doing the setup above and selecting the docker version to run, you can start the new collector by running the following line, using the collector name (or by running `docker-compose up -d` to start up all containers): + +```sh +docker-compose up -d example-collector +``` + +## To Keep Only Flows From Certain Interfaces +If your sensors are exporting all flows, but only those using a particular interface are relevant, use this option in the .env file. The collectors and importer will save/read all incoming flows, but the logstash pipeline will drop those that do not have src_ifindex OR dst_inindex equal to one of those listed. + +In the .env file, uncomment lines in the appropriate section and enter the information required. Be sure `ifindex_filter_flag=True` with "True" capitalized as shown, any sensor names are spelled exactly right, and list all the ifindex values of flows that should be kept and processed. Some examples (use just one!): + +```sh +ifindex_filter_keep=123 +ifindex_filter_keep=123,456 +ifindex_filter_keep=Sensor 1: 789 +ifindex_filter_keep=123; Sensor 1: 789; Sensor 2: 800, 900 +``` + +In the first case, all flows that have src_ifindex = 123 or dst_ifindex = 123 will be kept, regardless of sensor name. (Note that this may be a problem if you have more than 1 sensor with the same ifindex values!) +In the 2nd case, if src or dst ifindex is 123 or 456, the flow will be processed. +In the 3rd case, only flows from Sensor 1 will be filtered, with flows using ifindex 789 kept. +In the last example, any flow with ifindex 123 will be kept. Sensor 1 flows with ifindex 789 (or 123) will be kept, and those from Sensor 2 having ifindex 800 or 900 (or 123) will be kept. + +Spaces don't matter except within the sensor names. Punctuation is required as shown. + + +## To Change a Sensor Name Depending on the Interface Used +In some cases, users want to keep all flows from a certain sensor but differentiate between those that enter or exit through specific sensor interfaces. This can be done by using this option in the .env file. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 4 fields are set properly! For example, + +```sh +ifindex_sensor_rename_flag=True +ifindex_sensor_rename_old_name=IU Sflow +ifindex_sensor_rename_new_name=IU Bloomington Sflow +ifindex_sensor_rename_ifindex=10032 +``` + +In this case, any flows from the "IU Sflow" sensor that use interface 10032 (src_ifindex = 10032 OR dst_ifindex = 10032) will have the sensor name changed from "IU Sflow" to "IU Bloomington Sflow". Currently, only one such rename can be configured in Docker and only 1 ifindex is allowed. + +:::note +Please notify the devs at IU in advance, if you need to modify a sensor name, because the regexes used for determining sensor_group and sensor_type may have to be updated. +::: + +## To Do Sampling Rate Corrections in Logstash +When flow sampling is done, corrections have to be applied. For example, if you are sampling 1 out of 100 flows, for each flow measured, it is assumed that in reality there would be 100 flows of that size with that src and dst, so the number of bits (and the number of packets, bits/s and packets/s) is multiplied by 100. Usually the collector (nfcapd or sfcapd process) gets the sampling rate from the incoming data and applies the correction, but in some cases, the sensor may not send the sampling rate, or there may be a complex set-up that requires a manual correction. With netflow, a manual correction can be applied using the '-s' option in the nfsen config, if nfsen is being used, or the nfcapd command, but this is not convenient when using Docker. For sflow, there is no such option. In either case, the correction can be made in logstash as follows. + +In the .env file, uncomment the appropriate section and enter the information required. Be sure "True" is capitalized as shown and all 3 fields are set properly! The same correction can be applied to multiple sensors by using a comma-separed list. The same correction applies to all listed sensors. For example, + +```sh +sampling_correction_flag=True +sampling_correction_sensors=IU Bloomington Sflow, IU Sflow +sampling_correction_factor=512 +``` + +## To Change How Long Nfcapd Files Are Kept +The importer will automatically delete older nfcapd files for you, so that your disk doesn't fill up. By default, 3 days worth of files will be kept. This can be adjusted by making a netsage_override.xml file: + +```sh +cp compose/importer/netsage_shared.xml userConfig/netsage_override.xml +``` + +At the bottom of the file, edit this section to set the number of days worth of files to keep. Set cull-enable to 0 for no culling. Eg, to save 1 days worth of data: +````xml + + 1 + 1 + +```` + +You will also need to uncomment these lines in docker-compose.override.yml: + +```yaml + volumes: + - ./userConfig/netsage_override.xml:/etc/grnoc/netsage/deidentifier/netsage_shared.xml +``` + + +## To Save Flow Data to a Different Location + +By default, data is saved to subdirectories in the ./data/ directory (ie, the data/ directory in the git checkout). If you would like to use a different location, there are two options. + +1. The best solution is to create a symlink between ./data/ and the preferred location, or, for an NFS volume, export it as ${PROJECT_DIR}/data. + +During installation, delete the data/ directory (it should only contain .placeholder), then create your symlink. Eg, to use /var/netsage/ instead of data/, +```sh +cd {netsage-pipeline dir} +mkdir /var/netsage +rm data/.placeholder +rmdir data +ln -s /var/netsage {netsage-pipeline dir}/data +``` +(Check the permissions of the directory.) + +2. Alternatively, update volumes in docker-compose.yml and docker-compose.override.yml Eg, to save nfcapd files to subdirs in /mydir, set the collector volumes to `- /mydir/input_data/netflow:/data` (similarly for sflow) and set the importer and logstash volumes to `- /mydir:/data`. + +:::warning +If you choose to update the docker-compose file, keep in mind that those changes will cause a merge conflict or be wiped out on upgrade. +You'll have to manage the volumes exported and ensure all the paths are updated correctly for the next release manually. +::: + +## To Customize Java Settings / Increase Memory Available for Lostash + + +If cpu or memory seems to be a problem, try increasing the JVM heap size for logstash from 2GB to 3 or 4, no more than 8. + +To do this, edit LS_JAVA_OPTS in the .env file. +```yaml +LS_JAVA_OPTS=-Xmx4g -Xms4g +``` + +Here are some tips for adjusting the JVM heap size (https://www.elastic.co/guide/en/logstash/current/jvm-settings.html): + +- Set the minimum (Xms) and maximum (Xmx) heap allocation size to the same value to prevent the heap from resizing at runtime, which is a very costly process. +- CPU utilization can increase unnecessarily if the heap size is too low, resulting in the JVM constantly garbage collecting. You can check for this issue by doubling the heap size to see if performance improves. +- Do not increase the heap size past the amount of physical memory. Some memory must be left to run the OS and other processes. As a general guideline for most installations, don’t exceed 50-75% of physical memory. The more memory you have, the higher percentage you can use. + +To modify other logstash settings, rename the provided example file for JVM Options and tweak the settings as desired: + +```sh +cp userConfig/jvm.options_example userConfig/jvm.options +``` + +Also update the docker-compose.override.xml file to uncomment lines in the logstash section. It should look something like this: + +```yaml +logstash: + image: netsage/pipeline_logstash:latest + volumes: + - ./userConfig/jvm.options:/usr/share/logstash/config/jvm.options +``` + +## To Bring up Kibana and Elasticsearch Containers + +The file docker-compose.develop.yaml can be used in conjunction with docker-compose.yaml to bring up the optional Kibana and Elastic Search components. + +This isn't a production pattern but the tools can be useful at times. Please refer to the [Docker Dev Guide](../devel/docker_dev_guide#optional-elasticsearch-and-kibana) + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md new file mode 100644 index 00000000..c4216138 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_install_simple.md @@ -0,0 +1,121 @@ +--- +id: docker_install_simple +title: Docker Installation Guide +sidebar_label: Docker Installation +--- +In this deployment guide, you will learn how to deploy a basic Netsage setup that includes one sflow and/or one netflow collector. If you have more than one collector of either type, or other special situations, see the Docker Advanced guide. + +The Docker containers included in the installation are + - rabbit (the local RabbitMQ server) + - sflow-collector (receives sflow data and writes nfcapd files) + - netflow-collector (receives netflow data and writes nfcapd files) + - importer (reads nfcapd files and puts flows into a local rabbit queue) + - logstash (logstash pipeline that processes flows and sends them to their final destination, by default a local rabbit queue) + - ofelia (cron-like downloading of files used by the logstash pipeline) + +The code and configs for the importer and logstash pipeline can be viewed in the netsage-project/netsage-pipeline github repo. See netsage-project/docker-nfdump-collector for code related to the collectors. + + +### 1. Set up Data Sources +The data processing pipeline needs data to ingest in order to do anything, of course. There are three types of data that can be consumed. + + - sflow + - netflow + - tstat + +At least one of these must be set up on a *sensor* (i.e., flow *exporter* / router), to provide the incoming flow data. +You can do this step later, but it will helpful to have it working first. + +Sflow and netflow data should be exported to the pipeline host where there will be *collectors* (nfcapd and/or sfcapd processes) ready to receive it (see below). To use the default settings, send sflow to port 9998 and netflow/IPFIX to port 9999. On the pipeline host, allow incoming traffic from the flow exporters, of course. + +Tstat data should be sent directly to the logstash input rabbit queue "netsage_deidentifier_raw" on the pipeline host. No collector is needed for tstat data. See the netsage-project/tstat-transport repo. (From there, logstash will grab the data and process it the same way as it processes sflow/netflow data. (See the Docker Advanced guide.) + +### 2. Set up a Pipeline Host +Decide where to run the Docker Pipeline and get it set up. Adjust iptables to allow the flow exporters (routers) to send flow data to the host. + +Install Docker Engine (docker-ce, docker-ce-cli, containerd.io) - see instructions at [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/). + +Install Docker Compose from Docker's GitHub repository - see [https://docs.docker.com/compose/install/](https://docs.docker.com/compose/install/). You need to **specify version 1.29.2** (or newer) in the curl command. + +Check default file permissions. If the *logstash* user is not able to access the logstash config files in the git checkout, you'll get an error from logstash saying there are no .conf files found even though they are there. Various components also need to be able to read and write to the data/ directory in the checkout. Defaults of 775 (u=rwx, g=rwx, o=rx) should work. + +### 3. Clone the Netsage Pipeline Project + +Clone the netsage-pipeline project from github. +```sh +git clone https://github.com/netsage-project/netsage-pipeline.git +``` + +When the pipeline runs, it uses the logstash conf files that are in the git checkout (in conf-logstash/), as well as a couple other files like docker-compose.yml, so it is important to checkout the correct version. + +Move into the netsage-pipeline/ directory (**all git and docker commands must be run from inside this directory!**), then checkout the most recent version of the code. It will say you are in 'detached HEAD' state if you don't include -b. +```sh +git checkout {tag} +``` +Replace "{tag}" with the release version you intend to use, e.g., "v1.2.11". ("Master" is the development version and is not intended for general use!) +`git status` will confirm which branch you are on, e.g., master or v1.2.11. + +### 4. Create Docker-compose.override.yml + +Information in the `docker-compose.yml` file tells docker which containers (processes) to run and sets various parameters for them. +Settings in the `docker-compose.override.yml` file will overrule and add to those. Note that docker-compose.yml should not be edited since upgrades will replace it. Put all customizations in the override file, since override files will not be overwritten. + +Collector settings may need to be edited by the user, so the information that docker uses to run the collectors is specified (only) in the override file. Therefore, docker-compose_override.example.yml must always be copied to docker-compose_override.yml. + +```sh +cp docker-compose.override_example.yml docker-compose.override.yml +``` + +By default docker will bring up a single sflow collector and a single netflow collector that listen to udp traffic on ports localhost:9998 and 9999. If this matches your case, you don't need to make any changes to the docker-compose.override_example.yml. + +- If you have only one collector, remove or comment out the section for the one not needed so the collector doesn't run and simply create empty nfcapd files. +- If the collectors need to listen to different ports, make the appropriate changes here in both the "command:" and "ports:" lines. +- By default, the collectors will save flows to nfcapd files in sflow/ and netflow/ subdirectories in `./data/input_data/` (i.e., the data/ directory in the git checkout). If you need to save the data files to a different location, see the Docker Advanced section. + +Other lines in this file you can ignore for now. + +:::note +If you run into issues, try removing all the comments in the override file as they may conflict with the parsing done by docker-compose, though we have not found this to be a problem. +::: + +### 5. Choose Pipeline Version + +Once you've created the docker-compose.override.xml file and finished adjusting it for any customizations, you're ready to select which image versions Docker should run. + +```sh +./scripts/docker_select_version.sh +``` +When prompted, select the **same version** you checked out earlier. + +This script will replace the version numbers of docker images in docker-compose.override.yml and docker-compose.yml with the correct values. + +### 6. Create Environment File + +{@import ../components/docker_env.md} + +## Testing the Collectors + +At this point, you can start the two flow collectors by themselves by running the following line. If you only need one of the collectors, remove the other from this command. + +(See the next section for how to start all the containers, including the collectors.) + +```sh +docker-compose up -d sflow-collector netflow-collector +``` + +Subdirectories for sflow/netflow, year, month, and day are created automatically under `data/input_data/`. File names contain dates and times. +These are not text files; to view the contents, use an [nfdump command](http://www.linuxcertif.com/man/1/nfdump/) (you will need to install nfdump). +Files will be deleted automatically by the importer as they age out (the default is to keep 3 days). + +If the collector(s) are running properly, you should see nfcapd files being written every 5 minutes and they should have sizes of more than a few hundred bytes. (Empty files still have header and footer lines.) +See Troubleshooting if you have problems. + +To stop the collectors +```sh +docker-compose down +``` + +## Running the Collectors and Pipeline + +{@import ../components/docker_pipeline.md} + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md new file mode 100644 index 00000000..7cfc2690 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_troubleshooting.md @@ -0,0 +1,35 @@ +--- +id: docker_troubleshoot +title: Docker Troubleshooting +sidebar_label: Troubleshooting +--- + +## Troubleshooting + +### If you are not seeing flows after installation + +**Troubleshooting checklist:** + +- Use `docker-compose ps` to be sure the collectors (and other containers) are running. +- Make sure you configured your routers to point to the correct address/port where the collector is running.  +- Check iptables on your pipeline host to be sure incoming traffic from the routers is allowed. +- Check to see if nfcapd files are being written. There should be a directory for the year, month, and day in netsage-pipeline/data/input_data/netflow/ or sflow/, and files should be larger than a few hundred bytes. If the files exist but are too small, the collector is running but there are no incoming flows. "nfdump -r filename" will show the flows in a file (you may need to install nfdump). +- Make sure you created .env and docker-compose.override.yml files and updated the settings accordingly, sensorName especially since that identifies the source of the data. +- Check the logs of the various containers to see if anything jumps out as being invalid.  `docker-compose logs $service`, where $service is logstash, importer, rabbit, etc. +- If the final rabbit queue is on an external host, check the credentials you are using and whether iptables on that host allows incoming traffic from your pipeline host. + +### If flow collection stops + +**Errors:** +- See if any of the containers has died using `docker ps` +- Check the logs of the various containers to see if anything jumps out as being invalid. Eg, `docker-compose logs logstash`. +- If logstash dies with an error about not finding \*.conf files, make sure conf-logstash/ and directories and files within are readable by everyone (and directories are executable by everyone). The data/ directory and subdirectories need to be readable and writable by everyone, as well. + +**Disk space:** +- If the pipeline suddenly fails, check to see if the disk is full. If it is, first try getting rid of old docker images and containers to free up space: `docker image prune -a` and `docker container prune`. +- Also check to see how much space the nfcapd files are consuming. You may need to add more disk space. You could also try automatically deleting nfcapd files after a fewer number of days (see Docker Advanced). + +**Memory:** +- If you are running a lot of data, sometimes docker may need to be allocated more memory. The most +likely culprit is logstash (java) which is only allocated 2GB of RAM by default. Please see the Docker Advanced guide for how to change. + diff --git a/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md new file mode 100644 index 00000000..d640d12a --- /dev/null +++ b/website/versioned_docs/version-1.2.11/deploy/docker_upgrade.md @@ -0,0 +1,80 @@ +--- +id: docker_upgrade +title: Upgrading +sidebar_label: Docker - Upgrading +--- + +To upgrade a previous installment of the Dockerized pipeline, perform the following steps. + +### Shut things down + +```sh +cd {netsage-pipeline directory} +docker-compose down +``` +This will stop and remove all the docker containers, including the importer, logstash, and any collectors. Note that incoming flow data will not be saved during the time the collectors are down. + +### Update Source Code + +To upgrade to a new release, pull new tags/code from github and docker images from dockerhub. Your customized .env and override files will not be overwritten, nor will data files, cache files, or downloaded support files. + +```sh +git reset --hard +git pull origin master +``` + +:::warning +git reset --hard will obliterate any changes you have made to non-override files, eg, logstash conf files. If necessary, please make sure you commit and save to a feature branch before continuing. +::: + +Run these three commands to select the new release you want to run. In the first, replace "{tag}" by the version to run (eg, v1.2.11). When asked by the third, select the same version as the tag you checked out. +```sh +git checkout -b {tag} +git pull +./scripts/docker_select_version.sh +``` +The docker-compose.yml and docker-compose.override.yml should both now have the version number you selected for pipeline_importer and pipeline_logstash. + +### Check/Update Customization Files +Occasionally, something may change which will necessitate editing your override and/or env file. + +- Compare the new `docker-compose.override_example.yml` file to your `docker-compose.override.yml`. Be sure to check to see if the version of nfdump has changed. Look for lines like `image: netsage/nfdump-collector:`. Make sure the version in your override file matches what is the example file. (You do not need to actually perform any upgrade yourself. This will ensure the correct version is pulled from Docker Hub.) + +- Also, look for`version: "x.x"` at the top. If the version number is different, change it in your docker-compose.override.yml file. (This is the Compose file format version.) + + +- Compare your `.env` file with the new `env.example` file to see if any new lines or sections have been added. If there have been any changes relevant to your deployment, eg, new options you want to use, copy the changes into your .env file. + +- If you used the Docker Advanced guide to make a `netsage_override.xml` file, compare it to `netsage_shared.xml` to see if there are any changes. This is unlikely. + + +### Update Docker Containers + +This should be done automatically when you start up the conctainers, but you can also pull new images from Docker Hub now. + +``` +docker-compose pull +``` + +### Restart all the Docker Containers + +``` +docker-compose up -d +``` + +This will start all the services/containers listed in the docker-compose.yml and docker-compose.override.yml files, including the importer, logstash pipeline, and collectors. + +### Delete old images and containers + +To keep things tidy, delete any old images and containers that are not being used. + +``` +docker image prune -a +docker container prune +``` + +To check which images you have +``` +docker image ls +``` + diff --git a/website/versioned_docs/version-1.2.11/devel/docker.md b/website/versioned_docs/version-1.2.11/devel/docker.md new file mode 100644 index 00000000..21cb7d5c --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/docker.md @@ -0,0 +1,83 @@ +--- +id: docker_dev_guide +title: Docker Dev Guide +sidebar_label: Docker Dev Guide +--- + +## Selecting a Version + +You can use the "master" version or a tagged version. +To select a released version use the docker_select_version.sh script (see the Deployment Guide). +If you wish to use the development version (master branch) simply skip the docker_select_version.sh step. + +## Installing + +See the Deployment Guide to learn how to set up collectors, your environment and override files, etc. + +## Importer + +The importer "shared" config that Docker uses is defined in compose/netsage_shared.xml. ** NOTE: If you want to make changes to this file, you will need to rebuild the container** + +## Build Images + +The images are published on Docker Hub, but if you'd like to incorporate local changes please follow the process below. + +### Build Using Source Code + +If you would like to build the *importer* container using the version of the pipeline scripts found in the GitHub repo then run the following: + +```sh +docker-compose -f docker-compose.build.yml build + +``` + +NOTE: The importer container includes the config files for the logstash pipeline. + + +## Optional: ElasticSearch and Kibana + +You can optionally store flow data locally in an ElasticSearch container and view the data with Kibana. Local storage can be enabled with the following steps: + +1. Uncomment the following lines in conf-logstash/99-outputs.conf: + +``` +elasticsearch { + hosts => ["elasticsearch"] + index => "netsage_flow-%{+YYYY.MM.dd}" +} +``` + +2. Comment out the `rabbitmq {...}` block in conf-logstash/99-outputs.conf if you do not want to also send logstash output to RabbitMQ. + +3. Run the containers using the following line: ` ` ` docker-compose -f docker-compose.yml -f docker-compose.develop.yml up -d ` ` ` + +## Handy Docker Commands + +### Start the Containers + +``` sh +docker-compose up -d +``` + +### Stop the Containers + +``` sh +docker-compose stop && docker-compose rm +``` + +### Enter a Container Shell + +``` sh +docker-compose exec logstash bash #bash shell in logstash container +docker-compose exec importer bash #bash shell in importer container +docker-compose exec rabbit bash #bash shell in rabbit container +``` + +### View Container Logs + +``` sh +docker-compose logs -f #view logs for all containers +docker-compose logs -f logstash #view logs for logstash container +docker-compose logs -f importer #view logs for importer container +docker-compose logs -f rabbit #view logs for rabbit container +``` diff --git a/website/versioned_docs/version-1.2.11/devel/documentation_guide.md b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md new file mode 100644 index 00000000..076628b2 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/documentation_guide.md @@ -0,0 +1,143 @@ +--- +id: docusaurus +title: Revising Documentation +sidebar_label: Docusaurus +--- + +This project's documentation uses Docusaurus. + +Docusaurus converts markdown into html and builds a static website using React UI components, which can be exported to a webserver. + +Yarn is a package manager for JavaScript and replaces the npm client. It is not strictly necessary but highly encouraged. + +To extend the docs simply create a markdown file and reference the ID in the side bar config. Please see the related documentation +at the [docusaurus 2](https://v2.docusaurus.io/) project website. + +*THE FOLLOWING INSTRUCTIONS ARE NOT CONFIRMED TO WORK. PLEASE UPDATE WITH CORRECTIONS.* + +## If Not Using Docker +These are instructions for editing and releasing docs without using Docker. + +### Installation + +To get started the first time, install npm, then use that to install yarn +``` +$ sudo yum install npm +$ sudo npm install -g yarn +``` + +Git clone the netsage pipeline project, then run yarn install to get all the dependencies listed within package.json +``` +$ cd netsage-pipeline/website +$ yarn install +``` + +### If Local Development + +If you are working on your local machine, rather than sshing into a host, you can view changes to the docs in a browser as you work. Use the following commands to generate the static website content (gets written into the build directory), then start a local development server and open up a browser window in which to view the docs. Most changes you make will be reflected live without having to restart the server. +``` +$ yarn build +$ yarn start +go to http://localhost:3000 +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +To view the changes you've made with some formatting, just go to the file on github in a browser. To see all of the formatting, read the "Deploying Docs to github.io" section below. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ yarn run docusaurus docs:version a.b.c +``` + +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +Whether you have created a new set of versioned tags or just want to update the docs in "master", to make changes appear at https://netsage-project.github.io/netsage-pipeline, do the following. + +If Travis or some other CI is working, it will run yarn install and yarn deploy to do this automatically. + +If it is not, do it manually: +``` +$ USE_SSH="true" GIT_USER="your-username" yarn deploy +``` +replacing your-username. This sets a couple env vars then runs 'yarn deploy' which runs 'docusaurus deploy' (see package.json) which pushes the static website created to url: "https://netsage-project.github.io" (see docusaurus.config.js) + +NOTE: You need to have created ssh keys on the host you are running this on and added them to your github account. + +### Removing a version + +To remove version 1.2.6 of the docs, for example, + +we need to: + + * update versions.json to remove the reference + * remove the versioned_docs/version-1.2.6 + * remove versioned_sidebars/version-1.2.6-sidebars.json + +## If Using Docker + +You may also use a docs Docker container to simplify installation, making changes, and deployment. This method starts a local web server that allows you to see changes to the docs in a browser on your local machine, as they are made. + +### Build and Start the Container + +Git clone the netsage pipeline project then build and start the container. +The Dockerfile in website/ tells how to build an image that runs yarn. Docker-compose.yml brings up a docs container. +``` +$ cd netsage-pipeline/website +$ docker-compose build build_docs +$ docker-compose up -d docs +go to http://localhost:8000/netsage-pipeline/ +``` + +### To Make Changes +Whether on a local machine or a linux host, to make changes, edit the files in website/docs/. +When finished, git add, git commit, git push, as usual. +Repeat as needed. + +### Tagging a New release + +When it's time to release a new version of the Pipeline, you need to create a new version of the docs as well. + +Once the documentation is stable and you don't forsee any new change, please do the following: + +``` +$ docker-compose build build_docs +$ docker-compose run docs yarn run docusaurus docs:version a.b.c +``` +replacing a.b.c with the next release version number. +This will create new versioned docs in website/versioned_docs/. + +Then edit docusaurus.config.js and change `lastVersion:` to refer to the new version number. + +Finally, commit and push the following to github: + * website/versioned_docs/version-a.b.c/ + * website/versioned_sidebars/version-a.b.c.sidebars.json + * versions.json + * docusaurus.config.js + + +### Deploying Docs to github.io +How to do this when using Docker ??? Get into the container ??? + +For now, go a linux server that has yarn installed and +follow the instructions under If Not Using Docker. + diff --git a/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md new file mode 100644 index 00000000..a061957d --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/pipeline_dataset.md @@ -0,0 +1,34 @@ +--- +id: dev_dataset +title: Pipeline Replay Dataset +sidebar_label: Replay Dataset +--- + +The Netsage Pipeline processes network data. Though there are some components and patterns we can use to test +the behavior using things like the Ruby unit [tests](https://github.com/netsage-project/netsage-pipeline/tree/master/conf-logstash/ruby/spec) in logstash, and the [generator](https://www.elastic.co/guide/en/logstash/current/plugins-inputs-generator.html) pligin, but the best +test is to replay network data and inspect the output in the grafana dashboard. + +Two sample data set are provided for the two types of collectors we have (Netflow and Sflow). The network data and ips have been anonymized and should have no identifying information. + +You can download the files from [here](https://drive.google.com/drive/folders/19fzY5EVoKwtYUaiBJq5OxAR82yDY0taG). + +Please take note of which ports the collectors are listing on. Check your docker-compose.override.yml file. If you are using default ports, they should match this [example](https://github.com/netsage-project/netsage-pipeline/blob/master/docker-compose.override_example.yml). + +Currently the default ports are: + - 9998/udp for sflow + - 9999/udp for netflow + +Naturally the collectors have to be running in order for any of this to be usable. You can read more on how to get them running in the [Docker Simple Deployment Guide](../deploy/docker_install_simple.md#running-the-collectors) + +In order to replay the data, use the following commands for netflow and sflow respectively: + +### Netflow + +``` +nfreplay -H 127.0.0.1 -p 9999 -r nfcapd-ilight-anon-20200114 -v 9 -d 1000 +``` + +### Sflow + +Coming soon. nfreplay will not work with sflow data type. + diff --git a/website/versioned_docs/version-1.2.11/devel/tag.md b/website/versioned_docs/version-1.2.11/devel/tag.md new file mode 100644 index 00000000..18819a89 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/devel/tag.md @@ -0,0 +1,113 @@ +--- +id: docker_dev_tag +title: How to Release a New Version of the Pipeline +sidebar_label: Making Releases +--- + +If a new version of nfdump needs to be used, make the new nfdump-collector image(s) first (see below) and update the docker-compose files with the new version number, then make new pipeline_importer and pipeline_logstash images.. + +## Make an RPM Release + +Use standard procedures to create an rpm of the new version of the pipeline. Update the version number and the CHANGES file, build the rpm, repoify, etc., then upgrade grnoc-netsage-deidentifier on bare-metal hosts using yum. If all works well, do the following steps to create new Docker images with which to upgrade Docker deployments. + +## In Github, Create a Release Tag + +Create a new Tag or Release in Github, eg, v1.2.11. +Be sure to copy info from the CHANGES file into the Release description. + +## To Build and Push Images Manually + +Below is the procedure to build pipeline_importer and pipeline_logstash images manually. + +Install docker-compose if not done already. See the Docker Installation instructions. + +Git clone (or git pull) the pipeline project and check out the tag you want to build, then set the version number in docker-compose.build.yml using the script. Eg, for v1.2.11, +``` +git clone https://github.com/netsage-project/netsage-pipeline.git +cd netsage-pipeline +git checkout -b v1.2.11 +./scripts/docker_select_version.sh 1.2.11 +``` + +Then build the pipeline_importer and pipeline_logstash images and push them to Docker Hub: +``` +$ sudo systemctl start docker +$ sudo docker-compose -f docker-compose.build.yml build +$ sudo docker login + provide your DockerHub login credentials +$ sudo docker-compose -f docker-compose.build.yml push (will push images mentioned in docker-compose.yml ??) + or $ docker push $image:$tag (will push a specific image version) +$ sudo systemctl stop docker +``` +If you run into an error about retrieving a mirrorlist and could not find a valid baseurl for repo, restart docker and try again. +If that doesn't work, try adding this to /etc/hosts: `67.219.148.138 mirrorlist.centos.org`, and/or try `yum install net-tools bridge-utils`, and/or restart network.service then docker. + +The person pushing to Docker Hub must have a Docker Hub account and belong to the Netsage team (3 users are allowed, for the free level). + +It might be a good idea to test the images before pushing them. See "Test Docker Images" below. + + +## Building With Automation + +??? + +## Test Docker Images + +See the Docker installation instructions for details... + +In the git checkout of the correct version, make an .env file and a docker-compose.override.yml file. You probably want to send the processed data to a dev Elasticsearch instance. Use samplicate or some other method to have data sent to the dev host. + +Run docker_select_version.sh if you haven't already, then start it up `$ sudo docker-compose up -d`. If there are local images, they'll be used, otherwise they'll be pulled from Docker Hub. + +After about 30 minutes, you should see flows in elasticsearch. + +## Make Versioned Docs + +A new set of versioned docs also has to be tagged once you are done making changes for the latest pipeline version. See the **Docusaurus guide**. + +## To Make New Nfdump-Collector Images + +If a new version of nfdump has been released that we need, new nfdump-collector images need to be made. + +``` +$ git clone https://github.com/netsage-project/docker-nfdump-collector.git +$ cd docker-nfdump-collector +$ sudo systemctl start docker +``` + +To use squash: create a file at /etc/docker/daemon.json and put into it +``` + "experimental": true + "debug: false" +``` + +To build version $VER, eg, 1.6.23 (both regular and alpine linux versions ?): +``` +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:$VER --squash collector +$ sudo docker build --build-arg NFDUMP_VERSION=$VER --tag netsage/nfdump-collector:alpine-$VER -f collector/Dockerfile-alpine --squash . +``` + +To push to Docker Hub and quit docker +``` +$ sudo docker login + provide your DockerHub login credentials +$ sudo docker push netsage/nfdump-collector:$VER +$ sudo systemctl stop docker +``` + +To use the new collector image in the pipeline, change the version number in docker-compose.override_example.yml. For example, to use the alpine-1.6.23 image: +``` +sflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +... +netflow-collector: + image: netsage/nfdump-collector:alpine-1.6.23 +``` + +Remind users to make the same change in their docker-compose.override.yml file when they do the next pipeline upgrade. + + +### New Version of Logstash + +If a new version of logstash has been released that we want everyone to use, +??? diff --git a/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md new file mode 100644 index 00000000..c82a8dbd --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/elastic_search.md @@ -0,0 +1,124 @@ +--- +id: elastic +title: Elasticsearch +sidebar_label: Elasticsearch +--- + +Flow data is ultimately saved to Elasticsearch. Following are the fields that are used/created in Logstash and that you may see returned by an elasticsearch query. + +### Flow fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|start |Jun 9, 2020 @ 17:39:53.808 | Start time of the flow (first packet seen)| +|end |Jun 9, 2020 @ 17:39:57.699 |End time of the flow (last packet seen)| +|meta.id |a17c4f0542... |Id of the flow (hash of 5-tuple + Sensor name)| +|es_doc_id |4f46bef884... |Hash of meta.id and start time. May be used as doc id in ES to prevent duplicates, but see Notes elsewhere.| +|meta.flow_type |sflow |'sflow', 'netflow', or 'tstat'| +|meta.protocol |tcp |Protocol used| +|meta.sensor_id | snvl2-pw-sw-1-mgmt-2.cenic.net|Sensor name (set in importer config, may not always be a hostname) | +|meta.sensor_group |CENIC |Sensor group, usually the network | +|meta.sensor_type |Regional Network |Sensor type ('Circuit', 'Regional Network', etc) | +|meta.country_scope |Domestic |'Domestic', 'International', or 'Mixed', depending on countries of src and dst| +|meta.is_network_testing | no | 'yes' if discipline is 'CS.Network Testing and Monitoring' or port is one used for PerfSonar: 5001, 5101, or 5201| + +### Source Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_ip |171.64.68.x | deidentified IP address| +|meta.src_port |80 |port used | +|meta.src_asn |32 |Source ASN from the flow header or, in some cases, the ANS of the IP from the MaxMind GeoIP ASN database| +|meta.src_organization |Stanford University | organization that owns the AS from the CAIDA ASN-Organization database +|meta.src_location.lat | 37.423 | latitude of the IP from the MaxMind GeoIP City database| +|meta.src_location.lon |-122.164 | longitude of the IP from the MaxMind GeoIP City database| +|meta.src_country_name |United States | country of the IP from the MaxMind GeoIP City database| +|meta.src_continent |North America | continent of the IP the MaxMind GeoIP City database| +|meta.src_ifindex |166 |the index of the interface the flow came into| + +### Source Science Registry Fields (Destination Fields similarly with "dst") +The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.scireg.src.discipline |MPS.Physics.High Energy |The science discipline that uses the resource (ie IP). Note that not the src MAY not have the same discipline as the dst. | +|meta.scireg.src.role |Storage |Role that the host plays | +|meta.scireg.src.org_name |Boston University (BU) |The organization the manages and/or uses the resource, as listed in the Science Registry| +|meta.scireg.src.org_abbr |Boston U |A shorter name for the organization. May not be the official abbreviation.| +|meta.scireg.src.resource |BU - ATLAS |Descriptive resource name from SciReg | +|meta.scireg.src.resource_abbr | |Resource abbreviation (if any)| +|meta.scireg.src.project_names |ATLAS |"Projects" that the resource is part of| +|meta.scireg.src.latitude |37.4178 |Resource's latitude, as listed in the Science Registry| +|meta.scireg.src.longitude |-122.178 |Resource's longitude, as listed in the Science Registry| + +### Source "Preferred" Fields (Destination Fields similarly with "dst") + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|meta.src_preferred_org |Stanford University |If the IP was found in the Science Registry, this is the SciReg organization, otherwise it is the CAIDA organization| +|meta.src_preferred_location.lat |37.417800 | Science Registry value if available, otherwise the MaxMind City DB value| +|meta.src_preferred_location.lon |-122.172000i | Science Registry value if available, otherwise the MaxMind City DB value | + +### Value Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|values.num_bits |939, 458, 560 |Sum of the number of bits in the (stitched) flow| +|values.num_packets |77, 824 |Sum of the number of packets in the (stitched) flows| +|values.duration |3.891 |Calculated as end minus start.| +|values.bits_per_second |241, 443, 988 |Calculated as num_bits divided by duration | +|values.packets_per_second |20, 001 |Calculated as num_packets divided by duration| + +### Tstat Value Fields + +|name |example | +|-----------------------|-----------------------| +|values.tcp_cwin_max |1549681 | +|values.tcp_cwin_min |17| +|values.tcp_initial_cwin|313| +|values.tcp_max_seg_size|64313| +|values.tcp_min_seg_size|17| +|values.tcp_mss |8960| +|values.tcp_out_seq_pkts|0| +|values.tcp_pkts_dup |0| +|values.tcp_pkts_fc |0| +|values.tcp_pkts_fs |0| +|values.tcp_pkts_reor |0| +|values.tcp_pkts_rto |0| +|values.tcp_pkts_unfs |0| +|values.tcp_pkts_unk |2| +|values.tcp_pkts_unrto |0| +|values.tcp_rexmit_bytes |1678| +|values.tcp_rexmit_pkts |2| +|values.tcp_rtt_avg |0.044| +|values.tcp_rtt_max |39.527| +|values.tcp_rtt_min |0.001| +|values.tcp_rtt_std |0.276| +|values.tcp_sack_cnt | 1| +|values.tcp_win_max |1549681| +|values.tcp_win_min |17| +|values.tcp_window_scale |13| + +### Developer Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|@pipeline_ver |1.2.11 | Version number of the pipeline used to process this flow | +|@ingest_time |Jun 9, 2020 @ 10:03:20.700 | The time the flow entered the logstash pipeline | +|@timestamp |Jun 9, 2020 @ 18:03:21.703 |The time the flow entered the logstash pipeline for tstat flows, or the time stitching finished and the event exited the aggregation filter for other flows.| +|@exit_time |Jun 9, 2020 @ 18:03:25.369 |The time the flow exited the pipeline | +|@processing_time |688.31 |@exit_time minus @ingest_time. Useful for seeing how long stitching took. | +|stitched_flows |13 |Number of flows that came into logstash that were stitched together to make this final one. 1 if no flows were stitched together. 0 for tstat flows, which are never stitched. | +|tags |maxmind src asn |Various info and error messages| +|trial | 5 |Can be set in 40-aggregation.conf if desired| + +### Elasticsearch Fields + +|name |example |description | +|-----------------------|-----------------------|-----------------------------| +|_index | om-ns-netsage-2020.06.14 | name of the index ("database table") | +|_type |_doc | set by ES | +|_id |HRkcm3IByJ9fEnbnCpaY | elasticsearch document id. | +|_score |1 |set by ES query | +|@version |1 | set by ES | + diff --git a/website/versioned_docs/version-1.2.11/pipeline/importer.md b/website/versioned_docs/version-1.2.11/pipeline/importer.md new file mode 100644 index 00000000..24b05c4b --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/importer.md @@ -0,0 +1,14 @@ +--- +id: importer +title: Importer +sidebar_label: Importer +--- +A netsage-netflow-importer script reads any new nfcapd files that have come in after a configurable delay and writes the results to the "netsage_deidentifier_raw" RabbitMQ queue. +All flow data waits in the queue until it is read in and processed by the logstash pipeline. + +To read nfcapd files, the importer uses an nfdump command with the "-a" option to aggregate raw flows within the file by the "5-tuple," i.e., the source and destination IPs, ports, and protocol. The "-L" option is used to throw out any aggregated flows below a threshold number of bytes. This threshold is specified in the importer config file. + +### Configuration +Configuration files for the importer are netsage_netflow_importer.xml and netsage_shared.xml in /etc/grnoc/netsage/deidentfier/. Comments in the files briefly describe the options. See also the Deployment pages in these docs. + +To avoid re-reading nfcapd files, the importer stores the names of files that have already been read in /var/cache/netsage/netflow_importer.cache. diff --git a/website/versioned_docs/version-1.2.11/pipeline/intro.md b/website/versioned_docs/version-1.2.11/pipeline/intro.md new file mode 100644 index 00000000..f4cce287 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/intro.md @@ -0,0 +1,37 @@ +--- +id: intro +title: Intro +sidebar_label: Intro +--- +# The NetSage Pipeline + +## Description + +The Netsage Flow Processing Pipeline is composed of several components for processing network flow data, including importing, deidentification, metadata tagging, flow stitching, etc. +There are many ways the components can be combined, configured, and run. These documents will describe the standard "simple" set up and provide information for more complex configurations. + +## Data Collection + +In Netsage, sensor(s) are network devices configured to collect flow data ([tstat](http://tstat.polito.it/), [sflow](https://www.rfc-editor.org/info/rfc3176), or [netflow](https://www.cisco.com/c/en/us/products/collateral/ios-nx-os-software/ios-netflow/prod_white_paper0900aecd80406232.html)) and send it to a "pipeline host" for processing. + +Tstat flow data can be sent directly to the pipeline ingest RabbitMQ queue on the pipeline host using the Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) tool. This can be installed as usual or via Docker. + +Sflow and netflow data from configured routers should be sent to the pipeline host where it is collected and stored into nfcapd files using [nfdump tools](https://github.com/phaag/nfdump). The Netsage project has packaged the nfdump tools into a [Docker container](https://github.com/netsage-project/docker-nfdump-collector) for ease of use. + +## Pipeline Components + +The Netsage Flow Processing Pipeline is made of the following components + + - Importer: Perl scripts on the pipeline host that read nfcapd flow files and send the flow data to a RabbitMQ queue. ([Doc](importer.md), [in github](https://github.com/netsage-project/netsage-pipeline/blob/master/lib/GRNOC/NetSage/Deidentifier/NetflowImporter.pm)) + - [RabbitMQ](https://www.rabbitmq.com/): Used for message passing and queuing of tasks. + - [Logstash](https://www.elastic.co/logstash) pipeline: Performs a variety of operations on the flow data to transform it and add additional information. ([Doc](logstash.md)) + - [Elasticsearch](https://www.elastic.co/what-is/elasticsearch): Used for storing the final flow data. + +## Visualization + +[Grafana](https://grafana.com/oss/grafana/) or [Kibana](https://www.elastic.co/kibana) can be used to visualize the data stored in elasticsearch. Netsage Grafana Dashboards are available [in github](https://github.com/netsage-project/netsage-grafana-configs). + +## Pipeline Installation + +Originally, the pipeline was deployed by installing all of the components individually on one or more servers (the "BareMetal" or "Manual" Install). More recently, we've also added a Docker deployment option. With simple pipelines having just one sflow and/or one netflow sensor (and any number of tstat sensors), the basic "Docker Installation" should suffice. The "Docker Advanced Options" guide will help when there are more sensors and/or other customizations required. + diff --git a/website/versioned_docs/version-1.2.11/pipeline/logstash.md b/website/versioned_docs/version-1.2.11/pipeline/logstash.md new file mode 100644 index 00000000..658b240a --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/logstash.md @@ -0,0 +1,128 @@ +--- +id: logstash +title: Logstash Pipeline +sidebar_label: Logstash +--- + +The Logstash portion of the Netsage Pipeline reads in flows from a RabbitMQ queue, performs various transformations and adds additional information to them, then sends them to a location specified in the output logstash config, eventually ending up in an Elasticsearch instance. + +Logstash config files invoke various logstash "filters" and actions. These conf files are located in /etc/logstash/conf.d/. See below for a brief description of what each does and check the files for comments. + +Notes: + - All \*.conf files in conf.d/ are executed in alphabetical order, as if they were one huge file. Those ending in .disabled will not be executed (assuming 'path.config: "/etc/logstash/conf.d/*.conf"' in /etc/logstash/pipelines.yml). + - If actions in a particular .conf file are not needed in your particular case, they can be removed or the file disabled, but check carefully for effects on downstream configs. + - MaxMind, CAIDA, and Science Registry database files required by the geoip and aggregate filters are downloaded from scienceregistry.netsage.global via cron jobs weekly or daily. (MaxMind data can change weekly, CAIDA quarterly, Science Registry information randomly.) **NOTE that new versions won't be used in the pipeline until logstash is restarted.** There is a cron file to do this also, though it's not running in Docker deployments. Similarly for other support files, eg, those used in 90-additional-fields.conf. + - Lookup tables for 55-member-orgs.conf that we have compiled are available from sciencregistry.grnoc.iu.edu. See the cron files provided. These will not be updated often, so you may run the cron jobs or not. You will need to provide lists for other networks yourself or ask us. + +## Logstash Sequence + +The main things done in each conf file are as follows. + +### 01-input-rabbit.conf + +Reads flows from a rabbitmq queue. (The ".disabled" extenstion can be removed from other 01-input configs available in conf.d/ to get flows from other sources.) + +### 10-preliminaries.conf + +Drops flows to or from private IP addresses; +converts any timestamps in milliseconds to seconds; +drops events with timestamps more than a year in the past or (10 sec) in the future; +sets duration and rates to 0 if duration is <= 0.002 sec (because tiny durations/few samples lead to inaccurate rates) + +### 15-sensor-specific-changes.conf + +Makes any changes to fields needed for specific sensors. This config currently provides 1) the ability to drop all flows that do not use interfaces (ifindexes) in a specfied list; lists can be sensor-specific, 2) the ability to change the sensor name for flows from a specified sensor which use a certain interface, and 3) the ability to apply a sampling rate correction manually for named sensors. You may edit the file in a bare-metal installation and specify everything explicitly (upgrades will not overwrite this config) or you may use the environment file specified in the systemd unit file. For Docker installations, use the .env file to specifiy the parameters. By default, this config will do nothing since the flags will be set to False. + +### 20-add_id.conf + +Adds a unique id (evenutally called meta.id) which is a hash of the 5-tuple of the flow (src and dst ips and ports, and protocol) plus the sensor name. This id is used for aggregating (stitching) in the next step. + +### 40-aggregation.conf + +Stitches together flows from different nfcapd files into longer flows, matching them up by meta.id and using a specified inactivity_timeout to decide when to start a new flow. + +Notes: + - By default, 5-minute nfcapd files are assumed and the inactivity_timeout is set to 10.5 minutes. If more than 10.5 min have passed between the start of the current flow and the start of the last matching one, do not stitch them together. + - If your nfcapd files are written every 15 minutes, change the inactivity_timeout to at least 16 minutes. + - There is another "timeout" setting which is basically the maximum duration of a stitched flow (default: 24 hr). + - When logstash shuts down, any flows "in the aggregator" will be written out to aggregate_maps_path (default: /tmp/logstash-aggregation-maps). The file is then read back in when logstash is restarted so aggregation can continue. + - Your logstash pipeline can have only 1 worker or aggregation is not going to work! This is set in the logstash config file. + - Tstat flows come in already complete, so no aggregation is done on those flows. + +### 45-geoip-tagging.conf + +Queries the MaxMind GeoLite2-City database by IP to get src and dst Countries, Continents, Latitudes, and Longitudes; +if the destination IP is in the multicast range, sets the destination Organization, Country, and Continent to "Multicast". + +*This product uses GeoLite2 data created by MaxMind, available from [www.maxmind.com](http://www.maxmind.com).* + +### 50-asn.conf + +Normally with sflow and netflow, flows come in with source and destination ASNs. If there is no ASN in the input event; or the input ASN is 0, 4294967295, or 23456, or it is a private ASN, tries to get an ASN by IP from the MaxMind ASN database. +Sets ASN to -1 if it is unavailable for any reason. + +### 53-caida-org.conf + +Uses the current source and destination ASNs to get organization names from the prepared CAIDA ASN-to-Organization lookup file. + +*This product uses a lookup table constructed from the CAIDA AS Organizations Dataset - see [www.caida.org](http://www.caida.org/data/as-organizations).* + +### 55-member-orgs.conf + +Searches any provided lookup tables by IP to obtain member or customer organization names and overwrite the Organization determined previously. +This allows entities which don't own their own ASs to be listed as the src or dst Organization. + +Note: These lookup tables are not stored in github, but an example is provided to show the layout and tables we have can be downloaded via a cron job. + +### 60-scireg-tagging-fakegeoip.conf + +Uses a fake geoip database containing [Science Registry](http://scienceregistry.grnoc.iu.edu) information to tag the flows with source and destination science disciplines and roles, organizations and locations, etc; +removes Registry fields we don't need to save to elasticsearch. + +Notes: + - The [Science Registry](https://scienceregistry.netsage.global/rdb/) stores human-curated information about various "resources". Resources are sources and destinations of flows. + - The Science Registry "fake geoip database" is updated weekly and can be downloaded via wget in a cron job (provided in the installation). + +### 70-deidentify.conf + +Replaces the last octet of IPv4 addresses and the last 4 hextets of IPv6 addresses with x's in order to deidentify them. + +### 80-privatize.org.conf + +Removes information about Australian organizations (or, with modification, any country that has privacy rules that require us not to identify organizations). +If the ASN is one of those listed, completely replaces the IP with x's, sets the location to central Autralia, sets all organizations to "AARNet", removes all Projects. + +### 88-preferred-location-org.conf + +Copies Science Registry organization and location values, if they exist, to the meta.preferred_organization and meta.preferred_location fields. If there are no Science Registry values, the organizations and locations from the CAIDA and MaxMind lookups, respectively, are saved to those fields. + +### 90-additional-fields.conf + +Sets additional quick and easy fields. Supporting mapping or ruby files are used - see support/ and ruby/ in conf.d/. Currently we have (for Netsage's use): + - sensor_group = TACC, AMPATH, etc. (based on matching sensor names to regexes) + - sensor_type = Circuit, Archive, Exchange Point, or Regional Network (based on matching sensor names to regexes) + - country_scope = Domestic, International, or Mixed (based on src and dst countries and possibly continents, where Domestic = US, Puerto Rico, or Guam) + - is_network_testing = yes, no (yes if discipline from the science registry is 'CS.Network Testing and Monitoring' or port = 5001, 5101, or 5201) + - es_doc_id = hash of meta.id and the start time of the flow. If this id is used as the document id in elasticsearch, flows that are mistakenly input more than once will update existing documents rather than be added as duplicates. (NOTE: due to how netflow works, use es_doc_id as the ES document id only for sflow!) + +### 95-cleanup.conf + +Does small misc. tasks at the end like rename, remove, or convert fields + +### 98-post-process.conf + +Adds @exit_time and @processing_time (these are mainly for developers) + +### 99-output-rabbit.conf + +Sends results to a final RabbitMQ queue. (".disabled" can be removed from other output configs to send flows to other places) + +### Final Stage + +In the GlobalNOC-Netsage case, the output filter writes the flows to a network-specific RabbitMQ queue on another host and the last stage is a separate logstash pipeline on a 3rd host. The latter reads flows from the final queue using a rabbitmq input filter and sends it into elasticsearch using an elasticsearch output filter with a mapping template which sets data types for the fields. + +## Field names + +The fields used/created in Logstash (and saved to Elasticsearch) are listed in the [Elasticsearch doc](elastic). + + diff --git a/website/versioned_docs/version-1.2.11/pipeline/nfdump.md b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md new file mode 100644 index 00000000..b9519282 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/nfdump.md @@ -0,0 +1,17 @@ +--- +id: nfdump +title: Sflow/Netflow Data Collection +sidebar_label: Sflow/Netflow Data +--- + +Sflow and Netflow export can be configured on appropriate network devices. Netsage uses tools in the Nfdump package to collect and process the resulting flow data. The toolset supports netflow v1, v5/v7, v9, IPFIX and SFLOW, IPv4 as well as IPv6. + +## Netsage Usage + +Nfcapd and/or sfcapd processes (from the nfdump package) are used to collect incoming netflow and/or sflow data and save it to disk in nfcapd files. The files are then read by the [importer](importer), which uses an nfdump command, and sent to RabbitMQ. From there, the [logstash](logstash) pipeline ingests the flows and processes them in exactly the same way as it processes tstat flows. The data is eventually saved in elasticsearch and visualized by [grafana dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +One may also use the nfdump command interactively to view the flows in a nfcapd file in a terminal window. + +## Docker Deployment + +The nfdump/nfcapd/sfcapd processes can be invoked locally or using a Docker container. The Docker deployment of the Pipeline uses an nfdump Docker container. (See the Docker Deployment Guide.) The Docker image definitions can be found [HERE](https://github.com/netsage-project/docker-nfdump-collector) diff --git a/website/versioned_docs/version-1.2.11/pipeline/tstat.md b/website/versioned_docs/version-1.2.11/pipeline/tstat.md new file mode 100644 index 00000000..baab97c5 --- /dev/null +++ b/website/versioned_docs/version-1.2.11/pipeline/tstat.md @@ -0,0 +1,16 @@ +--- +id: tstat +title: Tstat Data Collection +sidebar_label: Tstat Data +--- + +## Netsage GitHub Project + +[Tstat](http://tstat.polito.it/) is a passive sniffer that provides insights into traffic patterns. The Netsage [tstat-transport](https://github.com/netsage-project/tstat-transport) project provides client programs to parse the captured data and send it to a rabbitmq host where it can then be processed by the [logstash pipeline](logstash), stored in elasticsearch, and finally displayed in our Grafana [dashboards](https://github.com/netsage-project/netsage-grafana-configs). + +## Docker + +Netsage Docker images exist on Docker Hub for tstat and tstat_transport. This is still in a beta state and is in development. The initial documentation is available [here](https://github.com/netsage-project/tstat-transport/blob/master/docs/docker.md). + + + diff --git a/website/versioned_sidebars/version-1.2.10-sidebars.json b/website/versioned_sidebars/version-1.2.10-sidebars.json new file mode 100644 index 00000000..a82d1786 --- /dev/null +++ b/website/versioned_sidebars/version-1.2.10-sidebars.json @@ -0,0 +1,89 @@ +{ + "version-1.2.10/Pipeline": [ + { + "collapsed": true, + "type": "category", + "label": "Pipeline", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/pipeline/intro" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/tstat" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/nfdump" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/importer" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/logstash" + }, + { + "type": "doc", + "id": "version-1.2.10/pipeline/elastic" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/deploy/choose_install" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/bare_metal_install" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_install_simple" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_install_advanced" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_upgrade" + }, + { + "type": "doc", + "id": "version-1.2.10/deploy/docker_troubleshoot" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Development", + "items": [ + { + "type": "doc", + "id": "version-1.2.10/devel/dev_dataset" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docker_dev_guide" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docusaurus" + }, + { + "type": "doc", + "id": "version-1.2.10/devel/docker_dev_tag" + } + ] + } + ] +} diff --git a/website/versioned_sidebars/version-1.2.11-sidebars.json b/website/versioned_sidebars/version-1.2.11-sidebars.json new file mode 100644 index 00000000..40a8c9ac --- /dev/null +++ b/website/versioned_sidebars/version-1.2.11-sidebars.json @@ -0,0 +1,89 @@ +{ + "version-1.2.11/Pipeline": [ + { + "collapsed": true, + "type": "category", + "label": "Pipeline", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/pipeline/intro" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/tstat" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/nfdump" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/importer" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/logstash" + }, + { + "type": "doc", + "id": "version-1.2.11/pipeline/elastic" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Deployment", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/deploy/choose_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/bare_metal_install" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_simple" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_install_advanced" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_upgrade" + }, + { + "type": "doc", + "id": "version-1.2.11/deploy/docker_troubleshoot" + } + ] + }, + { + "collapsed": true, + "type": "category", + "label": "Development", + "items": [ + { + "type": "doc", + "id": "version-1.2.11/devel/dev_dataset" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_guide" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docusaurus" + }, + { + "type": "doc", + "id": "version-1.2.11/devel/docker_dev_tag" + } + ] + } + ] +} diff --git a/website/versions.json b/website/versions.json index 17ed3fdd..5b4b29b1 100644 --- a/website/versions.json +++ b/website/versions.json @@ -1,4 +1,6 @@ [ + "1.2.11", + "1.2.10", "1.2.9", "1.2.8", "1.2.7", diff --git a/website/yarn.lock b/website/yarn.lock index bb962ebe..1c6b1dac 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -2047,9 +2047,9 @@ ajv@^6.1.0, ajv@^6.10.2, ajv@^6.12.4, ajv@^6.12.5: uri-js "^4.2.2" algoliasearch-helper@^3.3.4: - version "3.4.4" - resolved "https://registry.yarnpkg.com/algoliasearch-helper/-/algoliasearch-helper-3.4.4.tgz#f2eb46bc4d2f6fed82c7201b8ac4ce0a1988ae67" - integrity sha512-OjyVLjykaYKCMxxRMZNiwLp8CS310E0qAeIY2NaublcmLAh8/SL19+zYHp7XCLtMem2ZXwl3ywMiA32O9jszuw== + version "3.6.2" + resolved "https://registry.yarnpkg.com/algoliasearch-helper/-/algoliasearch-helper-3.6.2.tgz#45e19b12589cfa0c611b573287f65266ea2cc14a" + integrity sha512-Xx0NOA6k4ySn+R2l3UMSONAaMkyfmrZ3AP1geEMo32MxDJQJesZABZYsldO9fa6FKQxH91afhi4hO1G0Zc2opg== dependencies: events "^1.1.1" @@ -2250,9 +2250,9 @@ async-limiter@~1.0.0: integrity sha512-csOlWGAcRFJaI6m+F2WKdnMKr4HhdhFVBk0H/QbJFMCr+uO2kwohwXQPxw/9OCxp05r5ghVBFSyioixx3gfkNQ== async@^2.6.2: - version "2.6.3" - resolved "https://registry.yarnpkg.com/async/-/async-2.6.3.tgz#d72625e2344a3656e3a3ad4fa749fa83299d82ff" - integrity sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg== + version "2.6.4" + resolved "https://registry.yarnpkg.com/async/-/async-2.6.4.tgz#706b7ff6084664cd7eae713f6f965433b5504221" + integrity sha512-mzo5dfJYwAn29PeiJ0zvwTo04zj8HDJj0Mn8TD7sno7q12prdbnasKJHhkm2c1LgrhlJ0teaea8860oxi51mGA== dependencies: lodash "^4.17.14" @@ -2292,11 +2292,11 @@ autoprefixer@^9.4.7, autoprefixer@^9.6.1: postcss-value-parser "^4.1.0" axios@^0.21.1: - version "0.21.1" - resolved "https://registry.yarnpkg.com/axios/-/axios-0.21.1.tgz#22563481962f4d6bde9a76d516ef0e5d3c09b2b8" - integrity sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA== + version "0.21.4" + resolved "https://registry.yarnpkg.com/axios/-/axios-0.21.4.tgz#c67b90dc0568e5c1cf2b0b858c43ba28e2eda575" + integrity sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg== dependencies: - follow-redirects "^1.10.0" + follow-redirects "^1.14.0" babel-loader@^8.2.2: version "8.2.2" @@ -2367,9 +2367,9 @@ bail@^1.0.0: integrity sha512-xFbRxM1tahm08yHBP16MMjVUAvDaBMD38zsM9EMAUN61omwLmKlOpB/Zku5QkjZ8TZ4vn53pj+t518cH0S03RQ== balanced-match@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767" - integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c= + version "1.0.2" + resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" + integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== base16@^1.0.0: version "1.0.0" @@ -2602,9 +2602,9 @@ browserslist@^4.0.0, browserslist@^4.12.0, browserslist@^4.14.5, browserslist@^4 node-releases "^1.1.70" buffer-from@^1.0.0: - version "1.1.1" - resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef" - integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A== + version "1.1.2" + resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.2.tgz#2b146a6fd72e80b4f55d255f35ed59a3a9a41bd5" + integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== buffer-indexof@^1.0.0: version "1.1.1" @@ -2795,9 +2795,9 @@ caniuse-api@^3.0.0: lodash.uniq "^4.5.0" caniuse-lite@^1.0.0, caniuse-lite@^1.0.30000981, caniuse-lite@^1.0.30001109, caniuse-lite@^1.0.30001125, caniuse-lite@^1.0.30001181, caniuse-lite@^1.0.30001196: - version "1.0.30001205" - resolved "https://registry.yarnpkg.com/caniuse-lite/-/caniuse-lite-1.0.30001205.tgz#d79bf6a6fb13196b4bb46e5143a22ca0242e0ef8" - integrity sha512-TL1GrS5V6LElbitPazidkBMD9sa448bQDDLrumDqaggmKFcuU2JW1wTOHJPukAcOMtEmLcmDJEzfRrf+GjM0Og== + version "1.0.30001350" + resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001350.tgz" + integrity sha512-NZBql38Pzd+rAu5SPXv+qmTWGQuFsRiemHCJCAPvkoDxWV19/xqL2YHF32fDJ9SDLdLqfax8+S0CO3ncDCp9Iw== ccount@^1.0.0, ccount@^1.0.3: version "1.1.0" @@ -2966,15 +2966,6 @@ cli-boxes@^2.2.1: resolved "https://registry.yarnpkg.com/cli-boxes/-/cli-boxes-2.2.1.tgz#ddd5035d25094fce220e9cab40a45840a440318f" integrity sha512-y4coMcylgSCdVinjiDBuR8PCC2bLjyGTwEmPb9NHR/QaNU6EUOXcTY/s6VjGMD6ENSEaeQYHCY0GNGS5jfMwPw== -clipboard@^2.0.0: - version "2.0.8" - resolved "https://registry.yarnpkg.com/clipboard/-/clipboard-2.0.8.tgz#ffc6c103dd2967a83005f3f61976aa4655a4cdba" - integrity sha512-Y6WO0unAIQp5bLmk1zdThRhgJt/x3ks6f30s3oE3H1mgIEU33XyQjEf8gsf6DxC7NPX8Y1SsNWjUjL/ywLnnbQ== - dependencies: - good-listener "^1.2.2" - select "^1.1.2" - tiny-emitter "^2.0.0" - cliui@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/cliui/-/cliui-5.0.0.tgz#deefcfdb2e800784aa34f46fa08e06851c7bbbc5" @@ -3314,11 +3305,11 @@ create-hmac@^1.1.0, create-hmac@^1.1.4, create-hmac@^1.1.7: sha.js "^2.4.8" cross-fetch@^3.0.4: - version "3.1.3" - resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.3.tgz#b8e7d5f19161c4a0ca916f707978848786043afb" - integrity sha512-2i6v88DTqVBNODyjD9U6Ycn/uSZNvyHe25cIbo2fFnAACAsaLTJsd23miRWiR5NuiGXR9wpJ9d40/9WAhjDIrw== + version "3.1.5" + resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" + integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw== dependencies: - node-fetch "2.6.1" + node-fetch "2.6.7" cross-spawn@7.0.3, cross-spawn@^7.0.3: version "7.0.3" @@ -3578,6 +3569,11 @@ cyclist@^1.0.1: resolved "https://registry.yarnpkg.com/cyclist/-/cyclist-1.0.1.tgz#596e9698fd0c80e12038c2b82d6eb1b35b6224d9" integrity sha1-WW6WmP0MgOEgOMK4LW6xs1tiJNk= +data-uri-to-buffer@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.0.tgz#b5db46aea50f6176428ac05b73be39a57701a64b" + integrity sha512-Vr3mLBA8qWmcuschSLAOogKgQ/Jwxulv3RNE4FXnYWRGujzrRWQI4m12fQqRkwX06C0KanhLr4hK+GydchZsaA== + debug@2.6.9, debug@^2.2.0, debug@^2.3.3, debug@^2.6.0: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -3707,11 +3703,6 @@ del@^6.0.0: rimraf "^3.0.2" slash "^3.0.0" -delegate@^3.1.2: - version "3.2.0" - resolved "https://registry.yarnpkg.com/delegate/-/delegate-3.2.0.tgz#b66b71c3158522e8ab5744f720d8ca0c2af59166" - integrity sha512-IofjkYBZaZivn0V8nnsMJGBr4jVLxHDheKSW88PyxS5QC4Vo9ZbZVvhzlSxY87fVq3STR6r+4cGepyHkcWOQSw== - depd@~1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/depd/-/depd-1.1.2.tgz#9bcd52e14c097763e749b274c4346ed2e560b5a9" @@ -3780,9 +3771,9 @@ dns-equal@^1.0.0: integrity sha1-s55/HabrCnW6nBcySzR1PEfgZU0= dns-packet@^1.3.1: - version "1.3.1" - resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.1.tgz#12aa426981075be500b910eedcd0b47dd7deda5a" - integrity sha512-0UxfQkMhYAUaZI+xrNZOz/as5KgDU0M/fQ9b6SpkyLbk3GEswDi6PADJVaYJradtRVsRIlF1zLyOodbcTCDzUg== + version "1.3.4" + resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-1.3.4.tgz#e3455065824a2507ba886c55a89963bb107dec6f" + integrity sha512-BQ6F4vycLXBvdrJZ6S3gZewt6rcrks9KBgM9vrhW+knGRqc8uEdT7fuCwloc7nny5xNoMJ17HGH0R/6fpo8ECA== dependencies: ip "^1.1.0" safe-buffer "^5.0.1" @@ -4112,9 +4103,9 @@ events@^3.0.0: integrity sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q== eventsource@^1.0.7: - version "1.1.0" - resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.1.0.tgz#00e8ca7c92109e94b0ddf32dac677d841028cfaf" - integrity sha512-VSJjT5oCNrFvCS6igjzPAt5hBzQ2qPBFIbJ03zLI9SE0mxwZpMw6BfJrbFHm1a141AavMEB8JHmBhWAd66PfCg== + version "1.1.1" + resolved "https://registry.yarnpkg.com/eventsource/-/eventsource-1.1.1.tgz#4544a35a57d7120fba4fa4c86cb4023b2c09df2f" + integrity sha512-qV5ZC0h7jYIAOhArFJgSfdyz6rALJyb270714o7ZtNnw2WSJ+eexhKtE0O8LYPRsHZHf2osHKZBxGPvm3kPkCA== dependencies: original "^1.0.0" @@ -4312,6 +4303,14 @@ feed@^4.2.2: dependencies: xml-js "^1.6.11" +fetch-blob@^3.1.2, fetch-blob@^3.1.3: + version "3.1.4" + resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.1.4.tgz#e8c6567f80ad7fc22fd302e7dcb72bafde9c1717" + integrity sha512-Eq5Xv5+VlSrYWEqKrusxY1C3Hm/hjeAsCGVG3ft7pZahlUAChpGZT/Ms1WmSLnEAisEXszjzu/s+ce6HZB2VHA== + dependencies: + node-domexception "^1.0.0" + web-streams-polyfill "^3.0.3" + figgy-pudding@^3.5.1: version "3.5.2" resolved "https://registry.yarnpkg.com/figgy-pudding/-/figgy-pudding-3.5.2.tgz#b4eee8148abb01dcf1d1ac34367d59e12fa61d6e" @@ -4426,10 +4425,10 @@ flux@^4.0.1: fbemitter "^3.0.0" fbjs "^3.0.0" -follow-redirects@^1.0.0, follow-redirects@^1.10.0: - version "1.13.3" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.13.3.tgz#e5598ad50174c1bc4e872301e82ac2cd97f90267" - integrity sha512-DUgl6+HDzB0iEptNQEXLx/KhTmDb8tZUHSeLqpnjpknR70H0nC2t9N73BK6fN4hOvJ84pKlIQVQ4k5FFlBedKA== +follow-redirects@^1.0.0, follow-redirects@^1.14.0: + version "1.14.8" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.8.tgz#016996fb9a11a100566398b1c6839337d7bfa8fc" + integrity sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA== for-in@^1.0.2: version "1.0.2" @@ -4449,6 +4448,13 @@ fork-ts-checker-webpack-plugin@4.1.6: tapable "^1.0.0" worker-rpc "^0.1.0" +formdata-polyfill@^4.0.10: + version "4.0.10" + resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423" + integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g== + dependencies: + fetch-blob "^3.1.2" + forwarded@~0.1.2: version "0.1.2" resolved "https://registry.yarnpkg.com/forwarded/-/forwarded-0.1.2.tgz#98c23dab1175657b8c0573e8ceccd91b0ff18c84" @@ -4600,9 +4606,9 @@ glob-parent@^5.1.0, glob-parent@^5.1.1, glob-parent@~5.1.0: is-glob "^4.0.1" glob@^7.0.0, glob@^7.0.3, glob@^7.1.3, glob@^7.1.4: - version "7.1.6" - resolved "https://registry.yarnpkg.com/glob/-/glob-7.1.6.tgz#141f33b81a7c2492e125594307480c46679278a6" - integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== + version "7.2.0" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.0.tgz#d15535af7732e02e948f4c41628bd910293f6023" + integrity sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q== dependencies: fs.realpath "^1.0.0" inflight "^1.0.4" @@ -4674,13 +4680,6 @@ globby@^6.1.0: pify "^2.0.0" pinkie-promise "^2.0.0" -good-listener@^1.2.2: - version "1.2.2" - resolved "https://registry.yarnpkg.com/good-listener/-/good-listener-1.2.2.tgz#d53b30cdf9313dffb7dc9a0d477096aa6d145c50" - integrity sha1-1TswzfkxPf+33JoNR3CWqm0UXFA= - dependencies: - delegate "^3.1.2" - got@^9.6.0: version "9.6.0" resolved "https://registry.yarnpkg.com/got/-/got-9.6.0.tgz#edf45e7d67f99545705de1f7bbeeeb121765ed85" @@ -5128,10 +5127,10 @@ immer@8.0.1: resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.1.tgz#9c73db683e2b3975c424fb0572af5889877ae656" integrity sha512-aqXhGP7//Gui2+UrEtvxZxSquQVXTpZ7KDxfCcKAF3Vysvw0CViVaW9RZ1j1xlIYqaaaipBoqdqeibkc18PNvA== -immer@^8.0.1: - version "8.0.4" - resolved "https://registry.yarnpkg.com/immer/-/immer-8.0.4.tgz#3a21605a4e2dded852fb2afd208ad50969737b7a" - integrity sha512-jMfL18P+/6P6epANRvRk6q8t+3gGhqsJ9EuJ25AXE+9bNTYtssvzeYbEd0mXRYWCmmXSIbnlpz6vd6iJlmGGGQ== +immer@^9.0.6: + version "9.0.6" + resolved "https://registry.yarnpkg.com/immer/-/immer-9.0.6.tgz#7a96bf2674d06c8143e327cbf73539388ddf1a73" + integrity sha512-G95ivKpy+EvVAnAab4fVa4YGYn24J1SpEktnJX7JJ45Bd7xqME/SCplFzYFmTbrkwZbQ4xJK1xMTUYBkN6pWsQ== import-fresh@^2.0.0: version "2.0.0" @@ -5374,6 +5373,13 @@ is-core-module@^2.2.0: dependencies: has "^1.0.3" +is-core-module@^2.8.0: + version "2.8.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.8.1.tgz#f59fdfca701d5879d0a6b100a40aa1560ce27211" + integrity sha512-SdNCUs284hr40hFTFP6l0IfZ/RSrMXF3qgoRHd3/79unUTvrFO/JoXwkGm+5J/Oe3E/b5GsnG330uUNgRpu1PA== + dependencies: + has "^1.0.3" + is-data-descriptor@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/is-data-descriptor/-/is-data-descriptor-0.1.4.tgz#0b5ee648388e2c860282e793f1856fec3f301b56" @@ -5856,9 +5862,9 @@ loader-utils@2.0.0, loader-utils@^2.0.0: json5 "^2.1.2" loader-utils@^1.1.0, loader-utils@^1.2.3, loader-utils@^1.4.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613" - integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA== + version "1.4.2" + resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.2.tgz#29a957f3a63973883eb684f10ffd3d151fec01a3" + integrity sha512-I5d00Pd/jwMD2QCduo657+YM/6L3KZu++pmX9VFncxaxvHcru9jx1lBaFft+r4Mt2jK0Yhp41XlRAihzPxHNCg== dependencies: big.js "^5.2.2" emojis-list "^3.0.0" @@ -6286,10 +6292,15 @@ minimatch@3.0.4, minimatch@^3.0.4: dependencies: brace-expansion "^1.1.7" -minimist@^1.2.0, minimist@^1.2.5: - version "1.2.5" - resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602" - integrity sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw== +minimist@^1.2.0: + version "1.2.7" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.7.tgz#daa1c4d91f507390437c6a8bc01078e7000c4d18" + integrity sha512-bzfL1YUZsP41gmu/qjrEk0Q6i2ix/cVeAhbCbqH9u3zYutS1cLg00qhrD0M2MVdCcx4Sc0UpP2eBWo9rotpq6g== + +minimist@^1.2.5: + version "1.2.6" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" + integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== minipass-collect@^1.0.2: version "1.0.2" @@ -6419,9 +6430,9 @@ nan@^2.12.1: integrity sha512-M2ufzIiINKCuDfBSAUr1vWQ+vuVcA9kqx8JJUsbQi6yf1uGRyb7HfpdfUr5qLXf3B/t8dPvcjhKMmlfnP47EzQ== nanoid@^3.1.22: - version "3.1.22" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.22.tgz#b35f8fb7d151990a8aebd5aa5015c03cf726f844" - integrity sha512-/2ZUaJX2ANuLtTvqTlgqBQNJoQO398KyJgZloL0PZkC0dpysjncRUPsFe3DUPzz/y3h+u7C46np8RMuvF3jsSQ== + version "3.2.0" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.2.0.tgz#62667522da6673971cca916a6d3eff3f415ff80c" + integrity sha512-fmsZYa9lpn69Ad5eDn7FMcnnSR+8R34W9qJEijxYhTbfOWzr22n1QxCMzXLK+ODyW2973V3Fux959iQoUxzUIA== nanomatch@^1.2.9: version "1.2.13" @@ -6463,6 +6474,11 @@ no-case@^3.0.4: lower-case "^2.0.2" tslib "^2.0.3" +node-domexception@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + node-emoji@^1.10.0: version "1.10.0" resolved "https://registry.yarnpkg.com/node-emoji/-/node-emoji-1.10.0.tgz#8886abd25d9c7bb61802a658523d1f8d2a89b2da" @@ -6470,10 +6486,21 @@ node-emoji@^1.10.0: dependencies: lodash.toarray "^4.4.0" -node-fetch@2.6.1, node-fetch@^2.6.1: - version "2.6.1" - resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052" - integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== +node-fetch@2.6.7: + version "2.6.7" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" + integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ== + dependencies: + whatwg-url "^5.0.0" + +node-fetch@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.1.1.tgz#d0d9607e455b3087e3092b821b5b1f1ebf4c2147" + integrity sha512-SMk+vKgU77PYotRdWzqZGTZeuFKlsJ0hu4KPviQKkfY+N3vn2MIzr0rvpnYpR8MtB3IEuhlEcuOLbGvLRlA+yg== + dependencies: + data-uri-to-buffer "^4.0.0" + fetch-blob "^3.1.3" + formdata-polyfill "^4.0.10" node-forge@^0.10.0: version "0.10.0" @@ -6951,10 +6978,10 @@ path-key@^3.0.0, path-key@^3.1.0: resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== -path-parse@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" - integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== +path-parse@^1.0.6, path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== path-to-regexp@0.1.7: version "0.1.7" @@ -7758,11 +7785,9 @@ prism-react-renderer@^1.1.1: integrity sha512-GHqzxLYImx1iKN1jJURcuRoA/0ygCcNhfGw1IT8nPIMzarmKQ3Nc+JcG0gi8JXQzuh0C5ShE4npMIoqNin40hg== prismjs@^1.23.0: - version "1.23.0" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.23.0.tgz#d3b3967f7d72440690497652a9d40ff046067f33" - integrity sha512-c29LVsqOaLbBHuIbsTxaKENh1N2EQBOHaWv7gkHN4dgRbxSREqDnDbtFJYdpPauS4YCplMSNCABQ6Eeor69bAA== - optionalDependencies: - clipboard "^2.0.0" + version "1.27.0" + resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.27.0.tgz#bb6ee3138a0b438a3653dd4d6ce0cc6510a45057" + integrity sha512-t13BGPUlFDR7wRB5kQDG4jjl7XeuH6jbJGt11JHPL96qwsEHNX2+68tFXqc1/k+/jALsbSWJKUOT/hcYAZ5LkA== process-nextick-args@~2.0.0: version "2.0.1" @@ -8444,7 +8469,16 @@ resolve-url@^0.2.1: resolved "https://registry.yarnpkg.com/resolve-url/-/resolve-url-0.2.1.tgz#2c637fe77c893afd2a663fe21aa9080068e2052a" integrity sha1-LGN/53yJOv0qZj/iGqkIAGjiBSo= -resolve@^1.1.6, resolve@^1.14.2, resolve@^1.3.2: +resolve@^1.1.6: + version "1.21.0" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.21.0.tgz#b51adc97f3472e6a5cf4444d34bc9d6b9037591f" + integrity sha512-3wCbTpk5WJlyE4mSOtDLhqQmGFi0/TD9VPwmiolnk8U0wRgMEktqCXd3vy5buTO3tljvalNvKrjHEfrd2WpEKA== + dependencies: + is-core-module "^2.8.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + +resolve@^1.14.2, resolve@^1.3.2: version "1.20.0" resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.20.0.tgz#629a013fb3f70755d6f0b7935cc1c2c5378b1975" integrity sha512-wENBPt4ySzg4ybFQW2TT1zMQucPK95HSh/nq2CFTZVOGut2+pQvSsgtda4d26YrYcr067wjbmzOG8byDPBX63A== @@ -8613,11 +8647,6 @@ select-hose@^2.0.0: resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca" integrity sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo= -select@^1.1.2: - version "1.1.2" - resolved "https://registry.yarnpkg.com/select/-/select-1.1.2.tgz#0e7350acdec80b1108528786ec1d4418d11b396d" - integrity sha1-DnNQrN7ICxEIUoeG7B1EGNEbOW0= - selfsigned@^1.10.8: version "1.10.8" resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-1.10.8.tgz#0d17208b7d12c33f8eac85c41835f27fc3d81a30" @@ -8792,9 +8821,9 @@ shell-quote@1.7.2: integrity sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg== shelljs@^0.8.4: - version "0.8.4" - resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.4.tgz#de7684feeb767f8716b326078a8a00875890e3c2" - integrity sha512-7gk3UZ9kOfPLIAbslLzyWeGiEqx9e3rxwZM0KE6EL8GlGwjym9Mrlx5/p33bWTu9YG6vcS4MBxYZDHYr5lr8BQ== + version "0.8.5" + resolved "https://registry.yarnpkg.com/shelljs/-/shelljs-0.8.5.tgz#de055408d8361bed66c669d2f000538ced8ee20c" + integrity sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow== dependencies: glob "^7.0.0" interpret "^1.0.0" @@ -8921,9 +8950,9 @@ source-map-resolve@^0.5.0: urix "^0.1.0" source-map-support@~0.5.12, source-map-support@~0.5.19: - version "0.5.19" - resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.19.tgz#a98b62f86dcaf4f67399648c085291ab9e8fed61" - integrity sha512-Wonm7zOCIJzBGQdB+thsPar0kYuCIzYvxZwlBa87yi/Mdjv7Tip2cyVbLj5o0cFPN4EVkuTwb3GDDyUx2DGnGw== + version "0.5.21" + resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.21.tgz#04fe7c7f9e1ed2d662233c28cb2b35b9f63f6e4f" + integrity sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w== dependencies: buffer-from "^1.0.0" source-map "^0.6.0" @@ -8989,9 +9018,9 @@ sprintf-js@~1.0.2: integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= ssri@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.1.tgz#2a3c41b28dd45b62b63676ecb74001265ae9edd8" - integrity sha512-3Wge10hNcT1Kur4PDFwEieXSCMCJs/7WvSACcrMYrNp+b8kDL1/0wJch5Ni2WrtwEa2IO8OsVfeKIciKCDx/QA== + version "6.0.2" + resolved "https://registry.yarnpkg.com/ssri/-/ssri-6.0.2.tgz#157939134f20464e7301ddba3e90ffa8f7728ac5" + integrity sha512-cepbSq/neFK7xB6A50KHN0xHDotYzq58wWCa5LeWqnPrHG8GzfEjO/4O8kpmcGW+oaxkvhEJCWgbgNk4/ZV93Q== dependencies: figgy-pudding "^3.5.1" @@ -9204,6 +9233,11 @@ supports-color@^7.0.0, supports-color@^7.1.0: dependencies: has-flag "^4.0.0" +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + svg-parser@^2.0.2: version "2.0.4" resolved "https://registry.yarnpkg.com/svg-parser/-/svg-parser-2.0.4.tgz#fdc2e29e13951736140b76cb122c8ee6630eb6b5" @@ -9234,9 +9268,9 @@ tapable@^1.0.0, tapable@^1.1.3: integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA== tar@^6.0.2: - version "6.1.0" - resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.0.tgz#d1724e9bcc04b977b18d5c573b333a2207229a83" - integrity sha512-DUCttfhsnLCjwoDoFcI+B2iJgYa93vBnDUATYEeRx6sntCTdN01VnqsIuTlALXla/LWooNg0yEGeB+Y8WdFxGA== + version "6.1.11" + resolved "https://registry.yarnpkg.com/tar/-/tar-6.1.11.tgz#6760a38f003afa1b2ffd0ffe9e9abbd0eab3d621" + integrity sha512-an/KZQzQUkZCkuoAA64hM92X0Urb6VpRhAFllDzz44U2mcD5scmT3zBc4VgVpkugF580+DQn8eAFSyoQt0tznA== dependencies: chownr "^2.0.0" fs-minipass "^2.0.0" @@ -9276,9 +9310,9 @@ terser-webpack-plugin@^4.1.0: webpack-sources "^1.4.3" terser@^4.1.2, terser@^4.6.3: - version "4.8.0" - resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.0.tgz#63056343d7c70bb29f3af665865a46fe03a0df17" - integrity sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw== + version "4.8.1" + resolved "https://registry.yarnpkg.com/terser/-/terser-4.8.1.tgz#a00e5634562de2239fd404c649051bf6fc21144f" + integrity sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw== dependencies: commander "^2.20.0" source-map "~0.6.1" @@ -9323,11 +9357,6 @@ timsort@^0.3.0: resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4" integrity sha1-QFQRqOfmM5/mTbmiNN4R3DHgK9Q= -tiny-emitter@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.1.0.tgz#1d1a56edfc51c43e863cbb5382a72330e3555423" - integrity sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q== - tiny-invariant@^1.0.2: version "1.1.0" resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.1.0.tgz#634c5f8efdc27714b7f386c35e6760991d230875" @@ -9395,6 +9424,11 @@ totalist@^1.0.0: resolved "https://registry.yarnpkg.com/totalist/-/totalist-1.1.0.tgz#a4d65a3e546517701e3e5c37a47a70ac97fe56df" integrity sha512-gduQwd1rOdDMGxFG1gEvhV88Oirdo2p+KjoYFU7k2g+i7n6AFFbDQ5kMPUsW0pNbfQsB/cwXvT1i4Bue0s9g5g== +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + integrity sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o= + trim-trailing-lines@^1.0.0: version "1.1.4" resolved "https://registry.yarnpkg.com/trim-trailing-lines/-/trim-trailing-lines-1.1.4.tgz#bd4abbec7cc880462f10b2c8b5ce1d8d1ec7c2c0" @@ -9719,9 +9753,9 @@ url-parse-lax@^3.0.0: prepend-http "^2.0.0" url-parse@^1.4.3, url-parse@^1.5.1: - version "1.5.1" - resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.1.tgz#d5fa9890af8a5e1f274a2c98376510f6425f6e3b" - integrity sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q== + version "1.5.10" + resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1" + integrity sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ== dependencies: querystringify "^2.1.1" requires-port "^1.0.0" @@ -9899,6 +9933,16 @@ web-namespaces@^1.0.0, web-namespaces@^1.1.2: resolved "https://registry.yarnpkg.com/web-namespaces/-/web-namespaces-1.1.4.tgz#bc98a3de60dadd7faefc403d1076d529f5e030ec" integrity sha512-wYxSGajtmoP4WxfejAPIr4l0fVh+jeMXZb08wNc0tMg6xsfZXj3cECqIK0G7ZAqUq0PP8WlMDtaOGVBTAWztNw== +web-streams-polyfill@^3.0.3: + version "3.2.0" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.0.tgz#a6b74026b38e4885869fb5c589e90b95ccfc7965" + integrity sha512-EqPmREeOzttaLRm5HS7io98goBgZ7IVz79aDvqjD0kYXLtFZTc0T/U6wHTPKyIjb+MdN7DFIIX6hgdBEpWmfPA== + +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + integrity sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE= + webpack-bundle-analyzer@^4.4.0: version "4.4.0" resolved "https://registry.yarnpkg.com/webpack-bundle-analyzer/-/webpack-bundle-analyzer-4.4.0.tgz#74013106e7e2b07cbd64f3a5ae847f7e814802c7" @@ -10044,6 +10088,14 @@ websocket-extensions@>=0.1.1: resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.4.tgz#7f8473bc839dfd87608adb95d7eb075211578a42" integrity sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg== +whatwg-url@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-5.0.0.tgz#966454e8765462e37644d3626f6742ce8b70965d" + integrity sha1-lmRU6HZUYuN2RNNib2dCzotwll0= + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" @@ -10129,9 +10181,9 @@ write-file-atomic@^3.0.0: typedarray-to-buffer "^3.1.5" ws@^6.2.1: - version "6.2.1" - resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.1.tgz#442fdf0a47ed64f59b6a5d8ff130f4748ed524fb" - integrity sha512-GIyAXC2cB7LjvpgMt9EKS2ldqr0MTrORaleiOno6TweZ6r3TKtoFQWay/2PceJ3RuBasOHzXNn5Lrw1X0bEjqA== + version "6.2.2" + resolved "https://registry.yarnpkg.com/ws/-/ws-6.2.2.tgz#dd5cdbd57a9979916097652d78f1cc5faea0c32e" + integrity sha512-zmhltoSR8u1cnDsD43TX59mzoMZsLKqUweyYBAIvTngR3shc0W6aOZylZmq/7hqyVxPdi+5Ud2QInblgyE72fw== dependencies: async-limiter "~1.0.0"