diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..65e1b18 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,43 @@ +## Description + + + +Fixes # (issue) + +**Does this PR introduce a breaking change?** + +## Checklist + +- [ ] PR title follows the `: ` convention. +- [ ] I use [conventional commits](https://www.conventionalcommits.org/) in my commit messages. +- [ ] I have updated the [documentation](../docs) accordingly. +- [ ] I Keep It Small and Simple: The smaller the PR is, the easier it is to review and have it merged. +- [ ] I have performed a self-review of my code. +- [ ] I have added tests that prove my fix is effective or that my feature works. +- [ ] New and existing unit tests pass locally with my changes. + +## Additional information for reviewer + +#### Mention if this PR is part of any design or a continuation of previous PRs + + + + diff --git a/.github/workflows/ci-test-py.yml b/.github/workflows/ci-test-py.yml deleted file mode 100644 index 5ab4ed9..0000000 --- a/.github/workflows/ci-test-py.yml +++ /dev/null @@ -1,78 +0,0 @@ -#name: ci-test-py -#on: -# push: -# branches: [main] -# pull_request: -# branches: [main] -# -#jobs: -# py-pip-ai-sentryflow: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - uses: actions/setup-python@v4 -# with: -# python-version: '3.11' -# cache: 'pip' -# -# - name: check Python pip3 requirements -# run: | -# pip install -r requirements.txt -# working-directory: ai-engine -# -# py-ruff-ai-sentryflow: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - uses: actions/setup-python@v4 -# with: -# python-version: '3.11' -# cache: 'pip' -# -# - name: Install dependencies -# run: | -# python -m pip install --upgrade pip -# pip install -r requirements.txt -# working-directory: ai-engine -# -# - name: Create pyproject.toml -# run: | -# echo "[tool.ruff.lint.per-file-ignores]" > pyproject.toml -# echo '"stringlifier/*" = ["E402", "F811", "F401"]' >> pyproject.toml -# working-directory: ai-engine -# -# - name: Lint with Ruff -# run: | -# pip install ruff -# ruff --output-format=github . -# working-directory: ai-engine -# -# py-lint-ai-sentryflow: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - uses: actions/setup-python@v4 -# with: -# python-version: '3.11' -# cache: 'pip' -# -# - name: Install dependencies -# run: | -# python -m pip install --upgrade pip -# pip install -r requirements.txt -# pip install pylint -# working-directory: ai-engine -# -# - name: Lint with Pylint -# run: | -# pylint classifier.py -# working-directory: ai-engine -# -# py-pep8-ai-sentryflow: -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v3 -# - name: 'Run PEP8' -# uses: quentinguidee/pep8-action@v1 -# with: -# arguments: '--max-line-length=120 --exclude=*stringlifier/*,*protobuf/*' diff --git a/README.md b/README.md index 8e722b1..9d1dcff 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # SentryFlow -[![SentryFlow Docker Build](https://github.com/5gsec/sentryflow/actions/workflows/release.yml/badge.svg)](https://github.com/5gsec/sentryflow/actions/workflows/release.yml) [![CI for SentryFlow](https://github.com/5gsec/sentryflow/actions/workflows/ci-test-go.yml/badge.svg)](https://github.com/5gsec/sentryflow/actions/workflows/ci-test-go.yml) [![CI for AI Engine](https://github.com/5gsec/sentryflow/actions/workflows/ci-test-py.yml/badge.svg)](https://github.com/5gsec/sentryflow/actions/workflows/ci-test-py.yml) - -SentryFlow is a cloud-native system for API observability and security, specializing in log collection, metric production, and data exportation. +SentryFlow is a cloud-native system for API observability, specializing in log collection, and data +exportation. ## Architecture Overview @@ -10,17 +9,13 @@ SentryFlow is a cloud-native system for API observability and security, speciali ### Features -- Generation of API Access Logs -- Production of API Metrics -- AI-driven API Classification (Inference) - -## Documentation +- API access observability -### Basic Information +[//]: # (- Production of API Metrics) -- [Getting Started](docs/getting_started.md) -- [Use Cases](examples/README.md) +[//]: # (- AI-driven API Classification (Inference)) -### Contribution +## Documentation -- [Contribution Guide](contribution/README.md) +- [Getting Started](docs/getting_started.md) +- [Contribution Guide](docs/CONTRIBUTING.md) diff --git a/ai-engine/.dockerignore b/ai-engine/.dockerignore deleted file mode 100644 index 9767db2..0000000 --- a/ai-engine/.dockerignore +++ /dev/null @@ -1,6 +0,0 @@ -.git -.gitignore -__pycache__/ -Dockerfile -protobuf/ -.idea/ \ No newline at end of file diff --git a/ai-engine/.gitignore b/ai-engine/.gitignore deleted file mode 100644 index 8ae8ce5..0000000 --- a/ai-engine/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -__pycache__/ -.idea/ diff --git a/ai-engine/.pylintrc b/ai-engine/.pylintrc deleted file mode 100644 index 1ffed1a..0000000 --- a/ai-engine/.pylintrc +++ /dev/null @@ -1,639 +0,0 @@ -[MAIN] - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Clear in-memory caches upon conclusion of linting. Useful if running pylint -# in a server-like mode. -clear-cache-post-run=no - -# Load and enable all available extensions. Use --list-extensions to see a list -# all available extensions. -#enable-all-extensions= - -# In error mode, messages with a category besides ERROR or FATAL are -# suppressed, and no reports are done by default. Error mode is compatible with -# disabling specific errors. -#errors-only= - -# Always return a 0 (non-error) status code, even if lint errors are found. -# This is primarily useful in continuous integration scripts. -#exit-zero= - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-allow-list= - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. (This is an alternative name to extension-pkg-allow-list -# for backward compatibility.) -extension-pkg-whitelist= - -# Return non-zero exit code if any of these messages/categories are detected, -# even if score is above --fail-under value. Syntax same as enable. Messages -# specified are enabled, while categories only check already-enabled messages. -fail-on= - -# Specify a score threshold under which the program will exit with error. -fail-under=10 - -# Interpret the stdin as a python script, whose filename needs to be passed as -# the module_or_package argument. -#from-stdin= - -# Files or directories to be skipped. They should be base names, not paths. -ignore=CVS - -# Add files or directories matching the regular expressions patterns to the -# ignore-list. The regex matches against paths and can be in Posix or Windows -# format. Because '\\' represents the directory delimiter on Windows systems, -# it can't be used as an escape character. -ignore-paths= - -# Files or directories matching the regular expression patterns are skipped. -# The regex matches against base names, not paths. The default value ignores -# Emacs file locks -ignore-patterns=^\.# - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules=protobuf,stringlifier - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use, and will cap the count on Windows to -# avoid hangs. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python module names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Minimum Python version to use for version dependent checks. Will default to -# the version used to run pylint. -py-version=3.10 - -# Discover python modules and packages in the file system subtree. -recursive=no - -# Add paths to the list of the source roots. Supports globbing patterns. The -# source root is an absolute path or a path relative to the current working -# directory used to determine a package namespace for modules located under the -# source root. -source-roots= - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - -# In verbose mode, extra non-checker-related info will be displayed. -#verbose= - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. If left empty, argument names will be checked with the set -# naming style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. If left empty, attribute names will be checked with the set naming -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Bad variable names regexes, separated by a comma. If names match any regex, -# they will always be refused -bad-names-rgxs= - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. If left empty, class attribute names will be checked -# with the set naming style. -#class-attribute-rgx= - -# Naming style matching correct class constant names. -class-const-naming-style=UPPER_CASE - -# Regular expression matching correct class constant names. Overrides class- -# const-naming-style. If left empty, class constant names will be checked with -# the set naming style. -#class-const-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. If left empty, class names will be checked with the set naming style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. If left empty, constant names will be checked with the set naming -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. If left empty, function names will be checked with the set -# naming style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _ - -# Good variable names regexes, separated by a comma. If names match any regex, -# they will always be accepted -good-names-rgxs= - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. If left empty, inline iteration names will be checked -# with the set naming style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. If left empty, method names will be checked with the set naming style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. If left empty, module names will be checked with the set naming style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Regular expression matching correct type alias names. If left empty, type -# alias names will be checked with the set naming style. -#typealias-rgx= - -# Regular expression matching correct type variable names. If left empty, type -# variable names will be checked with the set naming style. -#typevar-rgx= - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. If left empty, variable names will be checked with the set -# naming style. -#variable-rgx= - - -[CLASSES] - -# Warn about protected attribute access inside special methods -check-protected-access-in-special-methods=no - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp, - asyncSetUp, - __post_init__ - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=mcs - - -[DESIGN] - -# List of regular expressions of class ancestor names to ignore when counting -# public methods (see R0903) -exclude-too-few-public-methods= - -# List of qualified class names to ignore when counting class parents (see -# R0901) -ignored-parents= - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when caught. -overgeneral-exceptions=builtins.BaseException,builtins.Exception - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=100 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[IMPORTS] - -# List of modules that can be imported at any level, not just the top level -# one. -allow-any-import-level= - -# Allow explicit reexports by alias from a package __init__. -allow-reexport-from-package=no - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules= - -# Output a graph (.gv or any supported image format) of external dependencies -# to the given file (report RP0402 must not be disabled). -ext-import-graph= - -# Output a graph (.gv or any supported image format) of all (i.e. internal and -# external) dependencies to the given file (report RP0402 must not be -# disabled). -import-graph= - -# Output a graph (.gv or any supported image format) of internal dependencies -# to the given file (report RP0402 must not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - -# Couples of modules and preferred modules, separated by a comma. -preferred-modules= - - -[LOGGING] - -# The type of string formatting that logging methods do. `old` means using % -# formatting, `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, -# UNDEFINED. -confidence=HIGH, - CONTROL_FLOW, - INFERENCE, - INFERENCE_FAILURE, - UNDEFINED - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then re-enable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=raw-checker-failed, - bad-inline-option, - locally-disabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - use-symbolic-message-instead, - use-implicit-booleaness-not-comparison-to-string, - use-implicit-booleaness-not-comparison-to-zero - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable= - - -[METHOD_ARGS] - -# List of qualified names (i.e., library.method) which require a timeout -# parameter e.g. 'requests.api.get,requests.api.post' -timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - -# Regular expression of note tags to take in consideration. -notes-rgx= - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit,argparse.parse_error - -# Let 'consider-using-join' be raised when the separator to join on would be -# non-empty (resulting in expected fixes of the type: ``"- " + " - -# ".join(items)``) -suggest-join-with-non-empty-separator=yes - - -[REPORTS] - -# Python expression which should return a score less than or equal to 10. You -# have access to the variables 'fatal', 'error', 'warning', 'refactor', -# 'convention', and 'info' which contain the number of messages in each -# category, as well as 'statement' which is the total number of statements -# analyzed. This score is used by the global evaluation report (RP0004). -evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -msg-template= - -# Set the output format. Available formats are: text, parseable, colorized, -# json2 (improved json format), json (old json format) and msvs (visual -# studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -#output-format= - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[SIMILARITIES] - -# Comments are removed from the similarity computation -ignore-comments=yes - -# Docstrings are removed from the similarity computation -ignore-docstrings=yes - -# Imports are removed from the similarity computation -ignore-imports=yes - -# Signatures are removed from the similarity computation -ignore-signatures=yes - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. No available dictionaries : You need to install -# both the python package and the system dependency for enchant to work. -spelling-dict= - -# List of comma separated words that should be considered directives if they -# appear at the beginning of a comment and should not be checked. -spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains the private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to the private dictionary (see the -# --spelling-private-dict-file option) instead of raising a message. -spelling-store-unknown-words=no - - -[STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=no - -# This flag controls whether the implicit-str-concat should generate a warning -# on implicit string concatenation in sequences defined over several lines. -check-str-concat-over-line-jumps=no - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of symbolic message names to ignore for Mixin members. -ignored-checks-for-mixins=no-member, - not-async-context-manager, - not-context-manager, - attribute-defined-outside-init - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - -# Regex pattern to define which classes are considered mixins. -mixin-class-rgx=.*[Mm]ixin - -# List of decorators that change the signature of a decorated function. -signature-mutators= - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of names allowed to shadow builtins -allowed-redefined-builtins= - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/ai-engine/Dockerfile b/ai-engine/Dockerfile deleted file mode 100644 index a71491f..0000000 --- a/ai-engine/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.10-bookworm - -RUN mkdir -p /ai-engine/protobuf - -RUN pip3 --no-cache-dir install grpcio grpcio-tools - -WORKDIR /ai-engine - -COPY /ai-engine . -COPY /protobuf ./protobuf - -WORKDIR /ai-engine/stringlifier - -RUN pip3 --no-cache-dir install . - -WORKDIR /ai-engine - -RUN pip3 --no-cache-dir install -r requirements.txt -RUN python3 -m grpc_tools.protoc --python_out=. --pyi_out=. --grpc_python_out=. -I=. protobuf/sentryflow_metrics.proto - -CMD ["python3", "/ai-engine/classifier.py"] diff --git a/ai-engine/Makefile b/ai-engine/Makefile deleted file mode 100644 index f054509..0000000 --- a/ai-engine/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -ENGINE_NAME = sentryflow-ai-engine -IMAGE_NAME = 5gsec/$(ENGINE_NAME) -TAG = v0.1 - -.PHONY: build-image -build-image: - docker build -t $(IMAGE_NAME):$(TAG) -f ./Dockerfile ../ - -.PHONY: clean-image -clean-image: - docker rmi $(IMAGE_NAME):$(TAG) - -.PHONY: run-image -run-image: - docker run -it --rm $(IMAGE_NAME):$(TAG) diff --git a/ai-engine/classifier.py b/ai-engine/classifier.py deleted file mode 100644 index 5ea731d..0000000 --- a/ai-engine/classifier.py +++ /dev/null @@ -1,105 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -"""SentryFlow AI Engine for API Classification""" - -from concurrent import futures -from collections import Counter - -import os -import grpc - -from protobuf import sentryflow_metrics_pb2 -from protobuf import sentryflow_metrics_pb2_grpc - -from stringlifier.api import Stringlifier - - -class HandlerServer: - """ - Class for gRPC Servers - """ - def __init__(self): - self.server = None - self.grpc_servers = [] - - try: - self.listen_addr = os.environ["AI_ENGINE"] - except KeyError: - self.listen_addr = "0.0.0.0:5000" - - def init_grpc_servers(self): - """ - init_grpc_servers method that initializes and registers gRPC servers - :return: None - """ - self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) - self.grpc_servers.append(APIClassifierServer()) - - grpc_server: GRPCServer - for grpc_server in self.grpc_servers: - grpc_server.register(self.server) - - def serve(self): - """ - serve method that starts serving the gRPC servers (blocking function) - :return: None - """ - self.server.add_insecure_port(self.listen_addr) - - print(f"[INFO] Starting to serve on {self.listen_addr}") - - self.server.start() - self.server.wait_for_termination() - - -class GRPCServer: - """ - Abstract class for an individual gRPC Server - """ - def register(self, server): - """ - register method that registers gRPC service to target server - :param server: The server - :return: None - """ - - def unregister(self, server): - """ - unregister method that unregisters gRPC service from target server - :param server: The server - :return: None - """ - - -class APIClassifierServer(sentryflow_metrics_pb2_grpc.APIClassifierServicer, GRPCServer): - """ - Class for API Classification Server using Stringlifier - """ - def __init__(self): - self.stringlifier = Stringlifier() - print("[Init] Successfully initialized APIClassificationServer") - - def register(self, server): - sentryflow_metrics_pb2_grpc.add_APIClassifierServicer_to_server(self, server) - - def ClassifyAPIs(self, request_iterator, _): # pylint: disable=C0103 - """ - ClassifyAPIs method that runs multiple MLs for API Classification at once - :param request_iterator: The requests - :param context: The context - :return: The results - """ - for req in request_iterator: - all_paths = req.API - ml_results = self.stringlifier(all_paths) - - ml_counts = Counter(ml_results) - print(f"{all_paths} -> {ml_counts}") - - yield sentryflow_metrics_pb2.APIClassifierResponse(APIs=ml_counts) - - -if __name__ == '__main__': - hs = HandlerServer() - hs.init_grpc_servers() - hs.serve() diff --git a/ai-engine/requirements.txt b/ai-engine/requirements.txt deleted file mode 100644 index b46818b..0000000 Binary files a/ai-engine/requirements.txt and /dev/null differ diff --git a/ai-engine/stringlifier/CODE_OF_CONDUCT.md b/ai-engine/stringlifier/CODE_OF_CONDUCT.md deleted file mode 100644 index 5405eda..0000000 --- a/ai-engine/stringlifier/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,74 +0,0 @@ -# Adobe Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of age, body -size, disability, ethnicity, gender identity and expression, level of experience, -nationality, personal appearance, race, religion, or sexual identity and -orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic - address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project e-mail -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at Grp-opensourceoffice@adobe.com. All -complaints will be reviewed and investigated and will result in a response that -is deemed necessary and appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an incident. -Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/ai-engine/stringlifier/CONTRIBUTING.md b/ai-engine/stringlifier/CONTRIBUTING.md deleted file mode 100644 index 4ef5c84..0000000 --- a/ai-engine/stringlifier/CONTRIBUTING.md +++ /dev/null @@ -1,19 +0,0 @@ -# Contributing - -Thanks for choosing to contribute! - -The following are a set of guidelines to follow when contributing to this project. - -## Code Of Conduct - -This project adheres to the Adobe [code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to Grp-opensourceoffice@adobe.com. - -## Contributor License Agreement - -All third-party contributions to this project must be accompanied by a signed contributor license agreement. This gives Adobe permission to redistribute your contributions as part of the project. [Sign our CLA](http://opensource.adobe.com/cla.html). You only need to submit an Adobe CLA one time, so if you have submitted one previously, you are good to go! - -## Code Reviews - -All submissions should come in the form of pull requests and need to be reviewed by project committers. Read [GitHub's pull request documentation](https://help.github.com/articles/about-pull-requests/) for more information on sending pull requests. - -Lastly, please follow the [pull request template](.github/PULL_REQUEST_TEMPLATE.md) when submitting a pull request! diff --git a/ai-engine/stringlifier/COPYRIGHT b/ai-engine/stringlifier/COPYRIGHT deleted file mode 100644 index daa48aa..0000000 --- a/ai-engine/stringlifier/COPYRIGHT +++ /dev/null @@ -1,16 +0,0 @@ -The following copyright message should appear at the top of all -source files. This file can be removed from your repository. - -Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/ai-engine/stringlifier/LICENSE b/ai-engine/stringlifier/LICENSE deleted file mode 100644 index 8dada3e..0000000 --- a/ai-engine/stringlifier/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/ai-engine/stringlifier/MANIFEST.in b/ai-engine/stringlifier/MANIFEST.in deleted file mode 100644 index 6ba4036..0000000 --- a/ai-engine/stringlifier/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -include data/string-c.bestType -include data/string-c.conf -include data/string-c.encodings -include data/enhanced-c.bestType -include data/enhanced-c.conf -include data/enhanced-c.encodings \ No newline at end of file diff --git a/ai-engine/stringlifier/README.md b/ai-engine/stringlifier/README.md deleted file mode 100644 index 259537e..0000000 --- a/ai-engine/stringlifier/README.md +++ /dev/null @@ -1,89 +0,0 @@ -[![Downloads](https://pepy.tech/badge/stringlifier)](https://pepy.tech/project/stringlifier) [![Downloads](https://pepy.tech/badge/stringlifier/month)](https://pepy.tech/project/stringlifier/month) ![Weekly](https://img.shields.io/pypi/dw/stringlifier.svg) ![daily](https://img.shields.io/pypi/dd/stringlifier.svg) -![Version](https://badge.fury.io/py/stringlifier.svg) [![Python 3](https://img.shields.io/badge/python-3-blue.svg)](https://www.python.org/downloads/release/python-360/) [![GitHub stars](https://img.shields.io/github/stars/adobe/stringlifier.svg?style=social&label=Star&maxAge=2592000)](https://github.com/adobe/stringlifier/stargazers/) - -# stringlifier -String-classifier - is a python module for detecting random string and hashes text/code. - -Typical usage scenarios include: - -* Sanitizing application or security logs -* Detecting accidentally exposed credentials (complex passwords or api keys) - -# Interactive notebook - -You can see Stringlifier in action by checking out this [interactive notebook hosted on Colaboratory](https://colab.research.google.com/drive/1bgZQSKhVAYU4r46wqb0v8Sfvuo_yMOLA?usp=sharing). - -# Quick start guide - -You can quickly use stringlifier via pip-installation: -```bash -$ pip install stringlifier -``` -In case you are using the pip3 installation that comes with Python3, use pip3 instead of pip in the above command. -```bash -$ pip3 install . # in the root directory -``` - -API example: -```python -from stringlifier.api import Stringlifier - -stringlifier=Stringlifier() - -s = stringlifier("com.docker.hyperkit -A -u -F vms/0/hyperkit.pid -c 8 -m 8192M -b 127.0.0.1 --pass=\"NlcXVpYWRvcg\" -s 0:0,hostbridge -s 31,lpc -s 1:0,virtio-vpnkit,path=vpnkit.eth.sock,uuid=45172425-08d1-41ec-9d13-437481803412 -U c6fb5010-a83e-4f74-9a5a-50d9086b9") -``` - -After this, `s` should be: - -```'com.docker.hyperkit -A -u -F vms/0/hyperkit.pid -c 8 -m 8192M -b --pass="" -s 0:0,hostbridge -s 31,lpc -s 1:0,virtio-vpnkit,path=vpnkit.eth.sock,uuid= -U '``` - -You can also choose to see the full tokenization and classification output: - -```python -s, tokens = stringlifier("com.docker.hyperkit -A -u -F vms/0/hyperkit.pid -c 8 -m 8192M -b 127.0.0.1 --pass=\"NlcXVpYWRvcg\" -s 0:0,hostbridge -s 31,lpc -s 1:0,virtio-vpnkit,path=vpnkit.eth.sock,uuid=45172425-08d1-41ec-9d13-437481803412 -U c6fb5010-a83e-4f74-9a5a-50d9086b9", return_tokens=True) -``` - -`s` will be the same as before and `tokens` will contain the following data: -```python -[[('0', 33, 34, ''), - ('8', 51, 52, ''), - ('8192', 56, 60, ''), - ('127.0.0.1', 65, 74, ''), - ('NlcXVpYWRvcg', 83, 95, ''), - ('0', 100, 101, ''), - ('0', 102, 103, ''), - ('31', 118, 120, ''), - ('1', 128, 129, ''), - ('0', 130, 131, ''), - ('45172425-08d1-41ec-9d13-437481803412', 172, 208, ''), - ('c6fb5010-a83e-4f74-9a5a-50d9086b9', 212, 244, '')]] -``` - - - -# Building your own classifier - -You can also train your own model if you want to detect different types of strings. For this you can use the Command Line Interface for the string classifier: - -```bash -$ python3 stringlifier/modules/stringc.py --help - -Usage: stringc.py [options] - -Options: - -h, --help show this help message and exit - --interactive - --train - --resume - --train-file=TRAIN_FILE - --dev-file=DEV_FILE - --store=OUTPUT_BASE - --patience=PATIENCE (default=20) - --batch-size=BATCH_SIZE - (default=32) - --device=DEVICE -``` - -For instructions on how to generate your training data, use [this link](corpus/README.md). - -**Important note:** This model might not scale if detecting a type of string depends on the surrounding tokens. In this case, you can look at a more advanced tool for sequence processing such as [NLP-Cube](https://github.com/adobe/NLP-Cube) diff --git a/ai-engine/stringlifier/corpus/README.md b/ai-engine/stringlifier/corpus/README.md deleted file mode 100644 index 0576f2a..0000000 --- a/ai-engine/stringlifier/corpus/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# Standard training data - -The training data was generated by running `scripts/01-generate-synthetic-training-data.py` and `scripts/02-split-generated-data.py` on a list of common english words, available [here](https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt). - -# Generating your own training data - -If you want to generate your own dataset, you simply need to create a training and a validation file. They follow a simple format: - -```text - -``` - -**Example** - -```text -ngnix STRING PROGRAM -Y29tbWl4dHVyZQ== HASH PASSWORD -b3d2cf2ec3894374b37d1b79edd57ad4 HASH API_KEY -9c795829-75bc-4596-87d3-3508372bbf5f HASH API_KEY -licenser STRING WORD -``` - -**NOTE:** There are no predefined values for `type` and `subtype`. \ No newline at end of file diff --git a/ai-engine/stringlifier/data/enhanced-c.bestType b/ai-engine/stringlifier/data/enhanced-c.bestType deleted file mode 100644 index ac09942..0000000 Binary files a/ai-engine/stringlifier/data/enhanced-c.bestType and /dev/null differ diff --git a/ai-engine/stringlifier/data/enhanced-c.conf b/ai-engine/stringlifier/data/enhanced-c.conf deleted file mode 100644 index 8c6a2e5..0000000 --- a/ai-engine/stringlifier/data/enhanced-c.conf +++ /dev/null @@ -1 +0,0 @@ -{"char_emb_size": 100, "rnn_layers": 2, "rnn_size": 100, "hidden": 500} \ No newline at end of file diff --git a/ai-engine/stringlifier/data/enhanced-c.encodings b/ai-engine/stringlifier/data/enhanced-c.encodings deleted file mode 100644 index bcc309d..0000000 --- a/ai-engine/stringlifier/data/enhanced-c.encodings +++ /dev/null @@ -1 +0,0 @@ -{"char2int": {"": 0, "": 1, "{": 2, "+": 3, "c": 4, "r": 5, "e": 6, "a": 7, "m": 8, "i": 9, "l": 10, "y": 11, "}": 12, " ": 13, "$": 14, "5": 15, "f": 16, "9": 17, "1": 18, "3": 19, "8": 20, "2": 21, "-": 22, "7": 23, "0": 24, "4": 25, "d": 26, "6": 27, "b": 28, "x": 29, "t": 30, "w": 31, "u": 32, "v": 33, "n": 34, "h": 35, "o": 36, "%": 37, "q": 38, "<": 39, "s": 40, "g": 41, "/": 42, "p": 43, "#": 44, "j": 45, "k": 46, "z": 47, ".": 48, "_": 49, ":": 50, "*": 51, "=": 52, ",": 53, "&": 54, "'": 55, "?": 56, "\"": 57, ">": 58, "!": 59, "(": 60, ")": 61, "\\": 62, "[": 63, "]": 64, "|": 65, "`": 66, "~": 67, ";": 68, "@": 69}, "label2int": {"": 0, "C": 1, "U": 2, "H": 3, "J": 4, "N": 5, "I": 6}} \ No newline at end of file diff --git a/ai-engine/stringlifier/data/enhanced-c.last b/ai-engine/stringlifier/data/enhanced-c.last deleted file mode 100644 index 0a9cfef..0000000 Binary files a/ai-engine/stringlifier/data/enhanced-c.last and /dev/null differ diff --git a/ai-engine/stringlifier/requirements.txt b/ai-engine/stringlifier/requirements.txt deleted file mode 100644 index 9f64b9a..0000000 --- a/ai-engine/stringlifier/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -ipdb==0.13.4 -nptyping==1.3.0 -numpy==1.22.0 -PyJWT==1.7.1 -torch==1.13.1 -tqdm==4.50.2 diff --git a/ai-engine/stringlifier/scripts/01-01-generate-synthetic-training-data.py b/ai-engine/stringlifier/scripts/01-01-generate-synthetic-training-data.py deleted file mode 100644 index d3fc473..0000000 --- a/ai-engine/stringlifier/scripts/01-01-generate-synthetic-training-data.py +++ /dev/null @@ -1,51 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -known_words = [] - - -def generate_words(count, known_words): - import uuid - import datetime - import base64 - generated = [] - for ii in range(count): - if ii % 4 == 0: - generated.append(str(uuid.uuid4())) - elif ii % 4 == 1: - generated.append(str(uuid.uuid4().hex)) - elif ii % 4 == 2: - generated.append(str(datetime.datetime.now().timestamp())) - elif ii % 4 == 3: - message = known_words[ii] - message_bytes = message.encode('ascii') - base64_bytes = base64.b64encode(message_bytes) - base64_message = base64_bytes.decode('ascii') - generated.append(base64_message) - return generated - - -lines = open('corpus/words_alpha.txt').readlines() -for line in lines: - known_words.append(line.strip()) - -generated_words = generate_words(len(known_words), known_words) - -f = open('corpus/generated', 'w') -for ii in range(len(known_words)): - f.write(known_words[ii] + '\tSTRING\n') - f.write(generated_words[ii] + '\tHASH\n') -f.close() diff --git a/ai-engine/stringlifier/scripts/01-02-generate-enhanced-synthetic-training-data.py b/ai-engine/stringlifier/scripts/01-02-generate-enhanced-synthetic-training-data.py deleted file mode 100644 index f51835a..0000000 --- a/ai-engine/stringlifier/scripts/01-02-generate-enhanced-synthetic-training-data.py +++ /dev/null @@ -1,141 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -known_words = [] - - -def generate_words(count, known_words): - import uuid - import datetime - import base64 - generated = [] - for ii in range(count): - if ii % 4 == 0: - generated.append(str(uuid.uuid4())) - elif ii % 4 == 1: - generated.append(str(uuid.uuid4().hex)) - elif ii % 4 == 2: - generated.append(str(datetime.datetime.now().timestamp())) - elif ii % 4 == 3: - message = known_words[ii] - message_bytes = message.encode('ascii') - base64_bytes = base64.b64encode(message_bytes) - base64_message = base64_bytes.decode('ascii') - generated.append(base64_message) - return generated - - -lines = open('corpus/words_alpha.txt').readlines() -for line in lines: - known_words.append(line.strip()) - -generated_words = generate_words(len(known_words), known_words) - -f = open('corpus/generated-enhanced', 'w') - -total_clis = (len(generated_words) + len(known_words)) - -known_index = 0 -gen_index = 0 - -import random - - -def _get_next_known(): - global known_index - s = known_words[known_index] - known_index += 1 - if known_index == len(known_words): - known_index = 0 - return s - - -def _get_next_gen(): - global gen_index - s = generated_words[gen_index] - gen_index += 1 - if gen_index == len(generated_words): - gen_index = 0 - return s - - -import random - - -def _generate_next_cmd(): - delimiters = ' /.,?!~|<>-=_~:;\\+-&*%$#@!' - enclosers = '[]{}``""\'\'()' - mask = '' - cmd = '' - num_words = random.randint(3, 15) - use_space = False - use_delimiter = False - use_encloser = False - append_number = False - for ii in range(num_words): - - use_delimiter = random.random() > 0.5 - use_encloser = random.random() > 0.8 - use_gen_word = random.random() > 0.7 - case_style = random.randint(0, 2) - use_gen_word = random.random() > 0.7 - - del_index = random.randint(0, len(delimiters) - 1) - enc_index = random.randint(0, len(enclosers) // 2 - 1) * 2 - if use_space: - mask += 'C' - cmd += ' ' - if use_gen_word: - wrd = _get_next_gen() - if case_style == 1: - wrd = wrd[0].upper() + wrd[1:] - elif case_style == 2: - wrd = wrd.upper() - msk = '' - for _ in range(len(wrd)): - msk += 'H' - else: - wrd = _get_next_known() - append_number = random.random() > 0.97 - if append_number: - wrd = wrd + str(random.randint(0, 9999)) - if case_style == 1: - wrd = wrd[0].upper() + wrd[1:] - elif case_style == 2: - wrd = wrd.upper() - msk = '' - for _ in range(len(wrd)): - msk += 'C' - - if use_delimiter: - wrd = delimiters[del_index] + wrd - msk = 'C' + msk - if use_encloser: - wrd = enclosers[enc_index] + wrd + enclosers[enc_index + 1] - msk = 'C' + msk + 'C' - - cmd += wrd - mask += msk - use_space = random.random() > 0.7 - - return cmd, mask - - -for ii in range(total_clis): - command, mask = _generate_next_cmd() - f.write(command + '\n' + mask + '\n') - -f.close() diff --git a/ai-engine/stringlifier/scripts/02-02-split-generated-enhanced-data.py b/ai-engine/stringlifier/scripts/02-02-split-generated-enhanced-data.py deleted file mode 100644 index 1c6df76..0000000 --- a/ai-engine/stringlifier/scripts/02-02-split-generated-enhanced-data.py +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -lines = open('corpus/generated-enhanced').readlines() -f_train = open('corpus/enhanced-train', 'w') -f_dev = open('corpus/enhanced-dev', 'w') - -for ii in range(len(lines) // 2): - word = lines[ii * 2] - mask = lines[ii * 2 + 1] - f = f_train - if ii % 10 == 5: - f = f_dev - f.write(word + mask) - -f_train.close() -f_dev.close() diff --git a/ai-engine/stringlifier/scripts/02-split-generated-data.py b/ai-engine/stringlifier/scripts/02-split-generated-data.py deleted file mode 100644 index f91a6b4..0000000 --- a/ai-engine/stringlifier/scripts/02-split-generated-data.py +++ /dev/null @@ -1,78 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -string_list = [] -hash_list = [] - -lines = open('corpus/generated').readlines() - -for line in lines: - parts = line.strip().split('\t') - if parts[1] == 'STRING': - string_list.append(parts[0]) - else: - hash_list.append(parts[0]) - -train_data = [ - ('usr', 'STRING'), - ('var', 'STRING'), - ('lib', 'STRING'), - ('etc', 'STRING'), - ('tmp', 'STRING'), - ('dev', 'STRING'), - ('libexec', 'STRING'), - ('lib32', 'STRING'), - ('lib64', 'STRING'), - ('bin', 'STRING') -] -dev_data = [] - - -def add_data(train, dev, list, label): - for ii in range(len(list)): - if ii % 10 == 0: - dev.append((list[ii], label)) - else: - train.append((list[ii], label)) - - -add_data(train_data, dev_data, string_list, "STRING") -add_data(train_data, dev_data, hash_list, "HASH") - -import random - -random.shuffle(train_data) -random.shuffle(dev_data) - -f_train = open('corpus/string-train', 'w') -f_dev = open('corpus/string-dev', 'w') - -for ii in range(len(train_data)): - if train_data[ii][1] == 'HASH': - stype = 'HASH' - else: - stype = 'WORD' - f_train.write(train_data[ii][0] + '\t' + train_data[ii][1] + '\t' + stype + '\n') -for ii in range(len(dev_data)): - if dev_data[ii][1] == 'HASH': - stype = 'HASH' - else: - stype = 'WORD' - f_dev.write(dev_data[ii][0] + '\t' + dev_data[ii][1] + '\t' + stype + '\n') - -f_train.close() -f_dev.close() diff --git a/ai-engine/stringlifier/setup.py b/ai-engine/stringlifier/setup.py deleted file mode 100644 index f9bbdc5..0000000 --- a/ai-engine/stringlifier/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -import setuptools - - -def parse_requirements(filename, session=None): - """ load requirements from a pip requirements file """ - lineiter = (line.strip() for line in open(filename)) - return [line for line in lineiter if line and not line.startswith("#")] - - -with open("README.md", "r") as fh: - long_description = fh.read() - -setuptools.setup( - name="stringlifier", - version="0.2", - author="Multiple authors", - description="Python module for detecting password, api keys hashes and any other string that resembles a " - "randomly generated character sequence. Originated from https://github.com/adobe/stringlifier, " - "this package updated dependencies for up to date python versions", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/adobe/stringlifier", - packages=setuptools.find_packages(), - install_requires=parse_requirements('requirements.txt', session=False), - classifiers=( - "Programming Language :: Python :: 3.0", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - ), - include_package_data=True, - package_data={ - '': ['data/string-c.encodings', 'data/string-c.conf', 'data/string-c.bestType', 'data/enhanced-c.encodings', - 'data/enhanced-c.conf', 'data/enhanced-c.bestType'] - - }, - # data_files=['data/string-c.encodings', 'data/string-c.conf', 'data/string-c.bestType'], - zip_safe=False -) diff --git a/ai-engine/stringlifier/stringlifier/__init__.py b/ai-engine/stringlifier/stringlifier/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ai-engine/stringlifier/stringlifier/api.py b/ai-engine/stringlifier/stringlifier/api.py deleted file mode 100644 index 0d93b09..0000000 --- a/ai-engine/stringlifier/stringlifier/api.py +++ /dev/null @@ -1,194 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from nptyping import NDArray, Int64 -from stringlifier.modules.stringc import AwDoC, AwDoCConfig, Encodings -from stringlifier.modules.stringc2 import CTagger, CTaggerConfig -from stringlifier.modules.stringc2 import Encodings as CEncodings -import torch -from typing import List, Optional, Tuple, Union -import pkg_resources - - -class Stringlifier: - def __init__(self, model_base: Optional[str] = None): - encodings = CEncodings() - if model_base is None: - enc_file = pkg_resources.resource_filename(__name__, 'data/enhanced-c.encodings') - conf_file = pkg_resources.resource_filename(__name__, 'data/enhanced-c.conf') - model_file = pkg_resources.resource_filename(__name__, 'data/enhanced-c.bestType') - else: - enc_file = '{0}.encodings'.format(model_base) - conf_file = '{0}.conf'.format(model_base) - model_file = '{0}.bestType'.format(model_base) - encodings.load(enc_file) - config = CTaggerConfig() - config.load(conf_file) - self.classifier = CTagger(config, encodings) - self.classifier.load(model_file) - self.classifier.eval() - self.encodings = encodings - self._c_index: int = encodings._label2int['C'] - - def __call__(self, string_or_list: Union[str, List[str]], return_tokens: bool = False, cutoff: int = 5) -> Union[ - Tuple[List[str], List[List[Tuple[str, int, int, str]]]], List[str]]: - if isinstance(string_or_list, str): - tokens = [string_or_list] - else: - tokens = string_or_list - - max_len = max([len(s) for s in tokens]) - if max_len == 0: - if return_tokens: - return [''], [] - else: - return [''] - - with torch.no_grad(): - p_ts = self.classifier(tokens) - - p_ts = torch.argmax(p_ts, dim=-1).detach().cpu().numpy() - ext_tokens: List[List[Tuple[str, int, int, str]]] = [] - new_strings: List[str] = [] - - for iBatch in range(p_ts.shape[0]): - new_str, toks = self._extract_tokens(tokens[iBatch], p_ts[iBatch], cutoff=cutoff) - new_strings.append(new_str) - ext_tokens.append(toks) - - if return_tokens: - return new_strings, ext_tokens - else: - return new_strings - - def _extract_tokens_2class(self, string: str, pred: NDArray[Int64]) -> Tuple[str, List[Tuple[str, int, int]]]: - CUTOFF = 5 - mask = '' - for p in pred: - mask += self.encodings._label_list[p] - start = 0 - tokens: List[Tuple[str, int, int]] = [] - c_tok = '' - for ii in range(len(string)): - if mask[ii] == 'C': - # check if we have a token - - if c_tok != '': - stop = ii - tokens.append((c_tok, start, stop)) - c_tok = '' - else: - if c_tok == '': - start = ii - c_tok += string[ii] - if c_tok != '': - stop = len(string) - tokens.append((c_tok, start, stop)) - - # filter small tokens - final_toks: List[Tuple[str, int, int]] = [] - for token in tokens: - if token[2] - token[1] > CUTOFF: - final_toks.append(token) - # compose new string - new_str: str = '' - last_pos = 0 - for token in final_toks: - if token[1] > last_pos: - new_str += string[last_pos:token[1]] - new_str += token[0] - last_pos = token[2] + 1 - if last_pos < len(string): - new_str += string[last_pos:] - return new_str, final_toks - - def _extract_tokens(self, string: str, pred: NDArray[Int64], cutoff: int = 5) -> Tuple[ - str, List[Tuple[str, int, int, str]]]: - mask = '' - numbers = {str(ii): 1 for ii in range(10)} - - for ii in range(len(pred)): - p = pred[ii] - cls = self.encodings._label_list[p] - if ii < len(string) and cls == 'C' and string[ii] in numbers: - mask += 'N' - else: - mask += cls - start = 0 - tokens = [] - c_tok = '' - last_label = mask[0] - type_: Optional[str] = None - for ii in range(len(string)): - # check if the label-type has changed - if last_label != mask[ii]: - if c_tok != '': - if last_label == 'C': - pass - elif last_label == 'H': - type_ = '' - elif last_label == 'N': - type_ = '' - elif last_label == 'I': - type_ = '' - elif last_label == 'U': - type_ = '' - elif last_label == 'J': - type_ = '' - - if last_label != 'C' and type_ is not None: - tokens.append((c_tok, start, ii, type_)) - c_tok = '' - start = ii - - last_label = mask[ii] - c_tok += string[ii] - - if c_tok != '': - if last_label == 'C': - pass - elif last_label == 'H': - type_ = '' - elif last_label == 'N': - type_ = '' - elif last_label == 'I': - type_ = '' - elif last_label == 'U': - type_ = '' - elif last_label == 'J': - type_ = '' - if last_label != 'C' and type_ is not None: - tokens.append((c_tok, start, ii, type_)) - - # filter small tokens - final_toks: List[Tuple[str, int, int, str]] = [] - for token in tokens: - if token[2] - token[1] > cutoff: - final_toks.append(token) - # compose new string - new_str: str = '' - last_pos = 0 - - # from ipdb import set_trace - # set_trace() - for token in final_toks: - if token[1] > last_pos: - new_str += string[last_pos:token[1]] - new_str += token[3] - last_pos = token[2] - if last_pos < len(string) - 1: - new_str += string[last_pos:] - return new_str, final_toks diff --git a/ai-engine/stringlifier/stringlifier/data/enhanced-c.bestType b/ai-engine/stringlifier/stringlifier/data/enhanced-c.bestType deleted file mode 100644 index ac09942..0000000 Binary files a/ai-engine/stringlifier/stringlifier/data/enhanced-c.bestType and /dev/null differ diff --git a/ai-engine/stringlifier/stringlifier/data/enhanced-c.conf b/ai-engine/stringlifier/stringlifier/data/enhanced-c.conf deleted file mode 100644 index 8c6a2e5..0000000 --- a/ai-engine/stringlifier/stringlifier/data/enhanced-c.conf +++ /dev/null @@ -1 +0,0 @@ -{"char_emb_size": 100, "rnn_layers": 2, "rnn_size": 100, "hidden": 500} \ No newline at end of file diff --git a/ai-engine/stringlifier/stringlifier/data/enhanced-c.encodings b/ai-engine/stringlifier/stringlifier/data/enhanced-c.encodings deleted file mode 100644 index bcc309d..0000000 --- a/ai-engine/stringlifier/stringlifier/data/enhanced-c.encodings +++ /dev/null @@ -1 +0,0 @@ -{"char2int": {"": 0, "": 1, "{": 2, "+": 3, "c": 4, "r": 5, "e": 6, "a": 7, "m": 8, "i": 9, "l": 10, "y": 11, "}": 12, " ": 13, "$": 14, "5": 15, "f": 16, "9": 17, "1": 18, "3": 19, "8": 20, "2": 21, "-": 22, "7": 23, "0": 24, "4": 25, "d": 26, "6": 27, "b": 28, "x": 29, "t": 30, "w": 31, "u": 32, "v": 33, "n": 34, "h": 35, "o": 36, "%": 37, "q": 38, "<": 39, "s": 40, "g": 41, "/": 42, "p": 43, "#": 44, "j": 45, "k": 46, "z": 47, ".": 48, "_": 49, ":": 50, "*": 51, "=": 52, ",": 53, "&": 54, "'": 55, "?": 56, "\"": 57, ">": 58, "!": 59, "(": 60, ")": 61, "\\": 62, "[": 63, "]": 64, "|": 65, "`": 66, "~": 67, ";": 68, "@": 69}, "label2int": {"": 0, "C": 1, "U": 2, "H": 3, "J": 4, "N": 5, "I": 6}} \ No newline at end of file diff --git a/ai-engine/stringlifier/stringlifier/data/enhanced-c.last b/ai-engine/stringlifier/stringlifier/data/enhanced-c.last deleted file mode 100644 index 0a9cfef..0000000 Binary files a/ai-engine/stringlifier/stringlifier/data/enhanced-c.last and /dev/null differ diff --git a/ai-engine/stringlifier/stringlifier/modules/__init__.py b/ai-engine/stringlifier/stringlifier/modules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ai-engine/stringlifier/stringlifier/modules/stringc.py b/ai-engine/stringlifier/stringlifier/modules/stringc.py deleted file mode 100644 index 7270cc1..0000000 --- a/ai-engine/stringlifier/stringlifier/modules/stringc.py +++ /dev/null @@ -1,394 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import torch.nn as nn -import optparse -import sys -import json -import numpy as np -import random -import tqdm - -sys.path.append('') - - -class Encodings: - def __init__(self, filename=None): - self._char2int = {'': 0, '': 1} - self._type2int = {} - self._subtype2int = {'': 0} # this will not get backpropagated - self._type_list = [] - self._subtype_list = [] - if filename is not None: - self.load(filename) - - def save(self, filename): - json.dump({'char2int': self._char2int, 'type2int': self._type2int, 'subtype2int': self._subtype2int}, - open(filename, 'w')) - - def load(self, file): - if isinstance(file, str): - stream = open(file, 'r') - else: - stream = file - obj = json.load(stream) - self._char2int = obj['char2int'] - self._type2int = obj['type2int'] - self._subtype2int = obj['subtype2int'] - self._type_list = [None for _ in range(len(self._type2int))] - self._subtype_list = [None for _ in range(len(self._subtype2int))] - for t in self._type2int: - self._type_list[self._type2int[t]] = t - - for t in self._subtype2int: - self._subtype_list[self._subtype2int[t]] = t - - def update_encodings(self, dataset, cutoff=2): - char2count = {} - for entry in dataset: - domain = entry[0] - ttype = entry[1] - tsubtype = entry[2] - for char in domain: - char = char.lower() - if char in char2count: - char2count[char] += 1 - else: - char2count[char] = 1 - if ttype not in self._type2int: - self._type2int[ttype] = len(self._type2int) - self._type_list.append(ttype) - if tsubtype not in self._subtype2int: - self._subtype2int[tsubtype] = len(self._subtype2int) - self._subtype_list.append(tsubtype) - - for char in char2count: - if char not in self._char2int: - self._char2int[char] = len(self._char2int) - - -class AwDoCConfig: - def __init__(self): - self.char_emb_size = 100 - self.rnn_layers = 2 - self.rnn_size = 100 - self.hidden = 500 - - def save(self, filename): - json.dump({'char_emb_size': self.char_emb_size, 'rnn_layers': self.rnn_layers, 'rnn_size': self.rnn_size, - 'hidden': self.hidden}, - open(filename, 'w')) - - def load(self, file): - if isinstance(file, str): - stream = open(file, 'r') - else: - stream = file - obj = json.load(stream) - self.char_emb_size = obj['char_emb_size'] - self.rnn_size = obj['rnn_size'] - self.rnn_layers = obj['rnn_layers'] - self.hidden = obj['hidden'] - - -class AwDoC(nn.Module): - def __init__(self, config, encodings): - super(AwDoC, self).__init__() - self._config = config - self._encodings = encodings - self._char_emb = nn.Embedding(len(encodings._char2int), config.char_emb_size) - - self._rnn = nn.LSTM(config.char_emb_size, config.rnn_size, config.rnn_layers, batch_first=True) - self._hidden = nn.Sequential(nn.Linear(config.rnn_size, config.hidden), nn.Tanh(), nn.Dropout(0.5)) - self._softmax_type = nn.Linear(config.hidden, len(encodings._type2int)) - self._softmax_subtype = nn.Linear(config.hidden, len(encodings._subtype2int)) - - def _make_input(self, domain_list): - # we pad domain names and feed them in reversed character order to the LSTM - max_seq_len = max([len(domain) for domain in domain_list]) - - x = np.zeros((len(domain_list), max_seq_len)) - for iBatch in range(x.shape[0]): - domain = domain_list[iBatch] - n = len(domain) - ofs_x = max_seq_len - n - for iSeq in range(x.shape[1]): - if iSeq < n: - char = domain[-iSeq - 1].lower() - if char in self._encodings._char2int: - iChar = self._encodings._char2int[char] - else: - iChar = self._encodings._char2int[''] - x[iBatch, iSeq + ofs_x] = iChar - return x - - def forward(self, domain_list): - - x = torch.tensor(self._make_input(domain_list), dtype=torch.long, device=self._get_device()) - hidden = self._char_emb(x) - hidden = torch.dropout(hidden, 0.5, self.training) - output, _ = self._rnn(hidden) - output = output[:, -1, :] - - hidden = self._hidden(output) - - return self._softmax_type(hidden), self._softmax_subtype(hidden) - - def save(self, path): - torch.save(self.state_dict(), path) - - def load(self, path): - self.load_state_dict(torch.load(path, map_location='cpu')) - - def _get_device(self): - if self._char_emb.weight.device.type == 'cpu': - return 'cpu' - return '{0}:{1}'.format(self._char_emb.weight.device.type, str(self._char_emb.weight.device.index)) - - -def _load_dataset(filename): - lines = open(filename, encoding='utf-8').readlines() - dataset = [] - for line in lines: - line = line.strip() - if line != '': - parts = line.split('\t') - if len(parts) == 3: - dataset.append(parts) - return dataset - - -def _eval(model, dataset, encodings): - model.eval() - test_x, test_y = _make_batches(dataset, batch_size=128) - total_t = 0 - total_st = 0 - ok_t = 0 - ok_st = 0 - with torch.no_grad(): - pgb = tqdm.tqdm(zip(test_x, test_y), total=len(test_x), ncols=80, desc='\t\t\t\t') - for x, y in pgb: - y_pred_t, y_pred_st = model(x) - y_tar_t, y_tar_st = _get_targets(y, encodings) - y_pred_t = torch.argmax(y_pred_t, dim=1).detach().cpu().numpy() - y_pred_st = torch.argmax(y_pred_st, dim=1).detach().cpu().numpy() - for y_t_t, y_t_st, y_p_t, y_p_st in zip(y_tar_t, y_tar_st, y_pred_t, y_pred_st): - total_t += 1 - if y_t_st != 0: - total_st += 1 - if y_t_st == y_p_st: - ok_st += 1 - if y_t_t == y_p_t: - ok_t += 1 - - return ok_t / total_t, ok_st / total_st - - -def _make_batches(dataset, batch_size=32): - batches_x = [] - batches_y = [] - - batch_x = [] - batch_y = [] - - for entry in dataset: - domain = entry[0] - t = entry[1] - st = entry[2] - batch_x.append(domain) - batch_y.append((t, st)) - if len(batch_x) == batch_size: - batches_x.append(batch_x) - batches_y.append(batch_y) - batch_x = [] - batch_y = [] - - if len(batch_x) != 0: - batches_x.append(batch_x) - batches_y.append(batch_y) - - return batches_x, batches_y - - -def _get_targets(y, encodings): - y_t = np.zeros((len(y))) - y_st = np.zeros((len(y))) - for i in range(len(y)): - y_t[i] = encodings._type2int[y[i][0]] - y_st[i] = encodings._subtype2int[y[i][1]] - - return y_t, y_st - - -def _drop_tld(domain_list, p): - new_list = [] - for domain in domain_list: - parts = domain.split('.') - dp = random.random() - if dp < p: - if dp < p / 2: - parts[-1] = ' ' - else: - parts[-1] = ' ' - dom = '.'.join(parts) - new_list.append(dom) - return new_list - - -def _start_train(params): - trainset = _load_dataset(params.train_file) - devset = _load_dataset(params.dev_file) - if params.resume: - encodings = Encodings('{0}.encodings'.format(params.output_base)) - else: - encodings = Encodings() - encodings.update_encodings(trainset) - print('chars={0}, types={1}, subtypes={2}'.format(len(encodings._char2int), len(encodings._type2int), - len(encodings._subtype2int))) - - config = AwDoCConfig() - if params.resume: - config.load('{0}.conf'.format(params.output_base)) - model = AwDoC(config, encodings) - model.to(params.device) - if params.resume: - model.load('{0}.last'.format(params.output_base)) - optimizer = torch.optim.Adam(model.parameters()) - criterion_t = torch.nn.CrossEntropyLoss() - criterion_st = torch.nn.CrossEntropyLoss(ignore_index=0) # we ignore unknown types - - patience_left = params.patience - best_type, best_subtype = _eval(model, devset, encodings) - encodings.save('{0}.encodings'.format(params.output_base)) - config.save('{0}.conf'.format(params.output_base)) - model.save('{0}.last'.format(params.output_base)) - print("Deveset evaluation type_acc={0} subtype_acc={1}".format(best_type, best_subtype)) - epoch = 0 - eval_at = 5000 - while patience_left > 0: - epoch += 1 - random.shuffle(trainset) - train_x, train_y = _make_batches(trainset, batch_size=params.batch_size) - sys.stdout.write('Starting epoch {0}\n'.format(epoch)) - - pgb = tqdm.tqdm(zip(train_x, train_y), total=len(train_x), ncols=80, desc='\tloss=N/A') - model.train() - total_loss = 0 - cnt = 0 - for x, y in pgb: - cnt += 1 - if cnt % eval_at == 0: - patience_left -= 1 - sys.stderr.flush() - sys.stderr.flush() - sys.stderr.write('\n\tEvaluating...') - sys.stderr.flush() - acc_t, acc_st = _eval(model, devset, encodings) - sys.stderr.write(' type_acc={0}, subtype_acc={1}\n'.format(acc_t, acc_st)) - sys.stderr.flush() - filename = '{0}.last'.format(params.output_base) - sys.stderr.write('\t\tStoring {0}\n'.format(filename)) - sys.stderr.flush() - model.save(filename) - if acc_t > best_type: - patience_left = params.patience - best_type = acc_t - filename = '{0}.bestType'.format(params.output_base) - sys.stderr.write('\t\tStoring {0}\n'.format(filename)) - sys.stderr.flush() - model.save(filename) - if acc_st > best_subtype: - patience_left = params.patience - best_subtype = acc_st - filename = '{0}.bestSubtype'.format(params.output_base) - sys.stderr.write('\t\tStoring {0}\n'.format(filename)) - sys.stderr.flush() - model.save(filename) - sys.stderr.write('\n') - sys.stderr.flush() - model.train() - if patience_left <= 0: - print("Stopping with maximum patience reached") - sys.exit(0) - - x = _drop_tld(x, 0.5) - y_pred_t, y_pred_st = model(x) - - y_tar_t, y_tar_st = _get_targets(y, encodings) - y_tar_t = torch.tensor(y_tar_t, dtype=torch.long, device=params.device) - y_tar_st = torch.tensor(y_tar_st, dtype=torch.long, device=params.device) - - loss = criterion_t(y_pred_t, y_tar_t) + \ - criterion_st(y_pred_st, y_tar_st) - - optimizer.zero_grad() - total_loss += loss.item() - pgb.set_description('\tloss={0:.4f}'.format(total_loss / cnt)) - loss.backward() - optimizer.step() - - sys.stdout.write('AVG train loss={0}\n'.format(total_loss / len(train_x))) - - -def _start_interactive(params): - encodings = Encodings('{0}.encodings'.format(params.output_base)) - config = AwDoCConfig() - config.load('{0}.conf'.format(params.output_base)) - model = AwDoC(config, encodings) - model.load('{0}.bestType'.format(params.output_base)) - model.to(params.device) - model.eval() - sys.stdout.write('>>> ') - sys.stdout.flush() - domain = input() - while domain != '/exit': - p_t, p_st = model([domain]) - print(p_t) - print(p_st) - p_d_t = torch.argmax(p_t, dim=1).detach().cpu().item() - p_d_st = torch.argmax(p_st, dim=1).detach().cpu().item() - print("Results for '{0}'".format(domain)) - print(encodings._type_list[p_d_t]) - - print(encodings._subtype_list[p_d_st]) - - print("") - sys.stdout.write('>>> ') - sys.stdout.flush() - domain = input() - - -if __name__ == '__main__': - parser = optparse.OptionParser() - parser.add_option('--interactive', action='store_true', dest='interactive') - parser.add_option('--train', action='store_true', dest='train') - parser.add_option('--resume', action='store_true', dest='resume') - parser.add_option('--train-file', action='store', dest='train_file') - parser.add_option('--dev-file', action='store', dest='dev_file') - parser.add_option('--store', action='store', dest='output_base') - parser.add_option('--patience', action='store', dest='patience', type='int', default=20, help='(default=20)') - parser.add_option('--batch-size', action='store', dest='batch_size', default=32, type='int', help='(default=32)') - parser.add_option('--device', action='store', dest='device', default='cpu') - - (params, _) = parser.parse_args(sys.argv) - - if params.train: - _start_train(params) - elif params.interactive: - _start_interactive(params) - else: - parser.print_help() diff --git a/ai-engine/stringlifier/stringlifier/modules/stringc2.py b/ai-engine/stringlifier/stringlifier/modules/stringc2.py deleted file mode 100644 index 09250aa..0000000 --- a/ai-engine/stringlifier/stringlifier/modules/stringc2.py +++ /dev/null @@ -1,383 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import torch -import torch.nn as nn -import optparse -import sys -import json -import numpy as np -import random -import tqdm - - -class Encodings: - def __init__(self, filename=None): - self._char2int = {'': 0, '': 1} - self._label2int = {'': 0} - self._label_list = [''] - if filename is not None: - self.load(filename) - - def save(self, filename): - json.dump({'char2int': self._char2int, 'label2int': self._label2int}, - open(filename, 'w')) - - def load(self, file): - if isinstance(file, str): - stream = open(file, 'r') - else: - stream = file - obj = json.load(stream) - self._char2int = obj['char2int'] - self._label2int = obj['label2int'] - self._label_list = [None for _ in range(len(self._label2int))] - for t in self._label2int: - self._label_list[self._label2int[t]] = t - - def update_encodings(self, dataset, cutoff=2): - char2count = {} - for entry in tqdm.tqdm(dataset): - text = entry[0] - label = entry[1] - for char in text: - char = char.lower() - if char in char2count: - char2count[char] += 1 - else: - char2count[char] = 1 - for ttype in label: - if ttype not in self._label2int: - self._label2int[ttype] = len(self._label2int) - self._label_list.append(ttype) - - for char in char2count: - if char not in self._char2int and char2count[char] > cutoff: - self._char2int[char] = len(self._char2int) - - -class CTaggerConfig: - def __init__(self): - self.char_emb_size = 100 - self.rnn_layers = 2 - self.rnn_size = 100 - self.hidden = 500 - - def save(self, filename): - json.dump({'char_emb_size': self.char_emb_size, 'rnn_layers': self.rnn_layers, 'rnn_size': self.rnn_size, - 'hidden': self.hidden}, - open(filename, 'w')) - - def load(self, file): - if isinstance(file, str): - stream = open(file, 'r') - else: - stream = file - obj = json.load(stream) - self.char_emb_size = obj['char_emb_size'] - self.rnn_size = obj['rnn_size'] - self.rnn_layers = obj['rnn_layers'] - self.hidden = obj['hidden'] - - -class CTagger(nn.Module): - def __init__(self, config, encodings): - super(CTagger, self).__init__() - self._config = config - self._encodings = encodings - self._char_emb = nn.Embedding(len(encodings._char2int), config.char_emb_size, padding_idx=0) - self._case_emb = nn.Embedding(4, 16, padding_idx=0) - - self._rnn = nn.LSTM(config.char_emb_size + 16, config.rnn_size, config.rnn_layers, batch_first=True, - bidirectional=True) - self._hidden = nn.Sequential(nn.Linear(config.rnn_size * 2, config.hidden), nn.Tanh(), nn.Dropout(0.5)) - self._softmax_type = nn.Linear(config.hidden, len(encodings._label2int)) - - def _make_input(self, word_list): - # we pad domain names and feed them in reversed character order to the LSTM - max_seq_len = max([len(word) for word in word_list]) - - x_char = np.zeros((len(word_list), max_seq_len)) - x_case = np.zeros((len(word_list), max_seq_len)) - for iBatch in range(x_char.shape[0]): - word = word_list[iBatch] - for index in range(len(word)): - char = word[index] - case_idx = 0 - if char.lower() == char.upper(): - case_idx = 1 # symbol - elif char.lower() != char: - case_idx = 2 # uppercase - else: - case_idx = 3 # lowercase - char = char.lower() - if char in self._encodings._char2int: - char_idx = self._encodings._char2int[char] - else: - char_idx = 1 # UNK - x_char[iBatch, index] = char_idx - x_case[iBatch, index] = case_idx - - return x_char, x_case - - def forward(self, string_list): - x_char, x_case = self._make_input(string_list) - x_char = torch.tensor(x_char, dtype=torch.long, device=self._get_device()) - x_case = torch.tensor(x_case, dtype=torch.long, device=self._get_device()) - hidden = torch.cat([self._char_emb(x_char), self._case_emb(x_case)], dim=-1) - hidden = torch.dropout(hidden, 0.5, self.training) - output, _ = self._rnn(hidden) - - hidden = self._hidden(output) - - return self._softmax_type(hidden) - - def save(self, path): - torch.save(self.state_dict(), path) - - def load(self, path): - self.load_state_dict(torch.load(path, map_location='cpu')) - - def _get_device(self): - if self._char_emb.weight.device.type == 'cpu': - return 'cpu' - return '{0}:{1}'.format(self._char_emb.weight.device.type, str(self._char_emb.weight.device.index)) - - -def _load_dataset(filename): - lines = open(filename, encoding='utf-8').readlines() - dataset = [] - for ii in range(len(lines) // 2): - string = lines[ii * 2][:-1] - mask = lines[ii * 2 + 1][:-1] - dataset.append((string, mask)) - return dataset - - -def _eval(model, dataset, encodings): - model.eval() - test_x, test_y = _make_batches(dataset, batch_size=128) - total_t = 0 - ok_t = 0 - with torch.no_grad(): - pgb = tqdm.tqdm(zip(test_x, test_y), total=len(test_x), ncols=80, desc='\t\t\t\t') - for x, y in pgb: - y_pred_t = model(x) - y_tar_t = _get_targets(y, encodings).reshape(-1) - y_pred_t = torch.argmax(y_pred_t, dim=-1).detach().cpu().numpy().reshape(-1) - for y_t_t, y_p_t in zip(y_tar_t, y_pred_t): - if y_t_t != 0: - total_t += 1 - - if y_t_t == y_p_t: - ok_t += 1 - - return ok_t / total_t - - -def _make_batches(dataset, batch_size=32): - batches_x = [] - batches_y = [] - - batch_x = [] - batch_y = [] - - for entry in dataset: - domain = entry[0] - t = entry[1] - batch_x.append(domain) - batch_y.append(t) - if len(batch_x) == batch_size: - batches_x.append(batch_x) - batches_y.append(batch_y) - batch_x = [] - batch_y = [] - - if len(batch_x) != 0: - batches_x.append(batch_x) - batches_y.append(batch_y) - - return batches_x, batches_y - - -def _get_targets(y, encodings): - max_len = max([len(yy) for yy in y]) - y_t = np.zeros((len(y), max_len), dtype=np.long) - for i in range(len(y)): - for j in range(max_len): - if j < len(y[i]): - y_t[i, j] = encodings._label2int[y[i][j]] - - return y_t - - -def _generate_dataset(count): - from training import generate_next_cmd - dataset = [] - for ii in range(count): - cmd, mask = generate_next_cmd() - dataset.append((cmd, mask)) - return dataset - - -def _start_train(params): - eval_at = 5000 - - if params.resume: - encodings = Encodings('{0}.encodings'.format(params.output_base)) - else: - sys.stdout.write('Generating new random data...') - sys.stdout.flush() - trainset = _generate_dataset(int(eval_at * 4 * params.batch_size)) - sys.stdout.write('done\n') - encodings = Encodings() - encodings.update_encodings(trainset) - - print('chars={0}, types={1}'.format(len(encodings._char2int), len(encodings._label2int))) - print(encodings._label2int) - - config = CTaggerConfig() - if params.resume: - config.load('{0}.conf'.format(params.output_base)) - model = CTagger(config, encodings) - model.to(params.device) - if params.resume: - model.load('{0}.last'.format(params.output_base)) - optimizer = torch.optim.Adam(model.parameters()) - criterion_t = torch.nn.CrossEntropyLoss(ignore_index=0) - - patience_left = params.patience - best_type = 0 # _eval(model, devset, encodings) - encodings.save('{0}.encodings'.format(params.output_base)) - config.save('{0}.conf'.format(params.output_base)) - model.save('{0}.last'.format(params.output_base)) - print("Deveset evaluation acc={0}".format(best_type)) - epoch = 0 - eval_at = 5000 - - while patience_left > 0: - sys.stdout.write('Generating new random data...') - sys.stdout.flush() - trainset = _generate_dataset(int(eval_at * params.batch_size)) - devset = _generate_dataset(int(eval_at / 10 * params.batch_size)) - sys.stdout.write('done\n') - sys.stdout.flush() - sys.stderr.flush() - epoch += 1 - random.shuffle(trainset) - train_x, train_y = _make_batches(trainset, batch_size=params.batch_size) - sys.stdout.write('Starting epoch {0}\n'.format(epoch)) - - pgb = tqdm.tqdm(zip(train_x, train_y), total=len(train_x), ncols=80, desc='\tloss=N/A') - model.train() - total_loss = 0 - cnt = 0 - for x, y in pgb: - cnt += 1 - if cnt % eval_at == 0: - - patience_left -= 1 - sys.stderr.flush() - sys.stderr.flush() - sys.stderr.write('\n\tEvaluating...') - sys.stderr.flush() - acc_t = _eval(model, devset, encodings) - sys.stderr.write(' acc={0}\n'.format(acc_t)) - sys.stderr.flush() - filename = '{0}.last'.format(params.output_base) - sys.stderr.write('\t\tStoring {0}\n'.format(filename)) - sys.stderr.flush() - model.save(filename) - if acc_t > best_type: - patience_left = params.patience - best_type = acc_t - filename = '{0}.bestType'.format(params.output_base) - sys.stderr.write('\t\tStoring {0}\n'.format(filename)) - sys.stderr.flush() - model.save(filename) - - sys.stderr.write('\n') - sys.stderr.flush() - model.train() - - if patience_left <= 0: - print("Stopping with maximum patience reached") - sys.exit(0) - - y_pred_t = model(x) - - y_tar_t = _get_targets(y, encodings) - y_tar_t = torch.tensor(y_tar_t, dtype=torch.long, device=params.device) - y_pred = y_pred_t.view(-1, y_pred_t.shape[-1]) - y_target = y_tar_t.view(-1) - if y_pred.shape[0] != y_target.shape[0]: - from ipdb import set_trace - set_trace() - loss = criterion_t(y_pred, y_target) - - optimizer.zero_grad() - total_loss += loss.item() - pgb.set_description('\tloss={0:.4f}'.format(total_loss / cnt)) - loss.backward() - optimizer.step() - - sys.stdout.write('AVG train loss={0} \n'.format(total_loss / len(train_x))) - - -def _start_interactive(params): - encodings = Encodings('{0}.encodings'.format(params.output_base)) - config = CTaggerConfig() - config.load('{0}.conf'.format(params.output_base)) - model = CTagger(config, encodings) - model.load('{0}.bestType'.format(params.output_base)) - model.to(params.device) - model.eval() - sys.stdout.write('>>> ') - sys.stdout.flush() - string = input() - while string != '/exit': - p_t = model([string]) - p_d_t = torch.argmax(p_t, dim=-1).detach().cpu().numpy() - print("Results for \n{0}".format(string)) - for ii in range(p_d_t.shape[-1]): - sys.stdout.write(encodings._label_list[p_d_t[0, ii]]) - sys.stdout.write('\n') - - print("") - sys.stdout.write('>>> ') - sys.stdout.flush() - string = input() - - -if __name__ == '__main__': - parser = optparse.OptionParser() - parser.add_option('--interactive', action='store_true', dest='interactive') - parser.add_option('--train', action='store_true', dest='train') - parser.add_option('--resume', action='store_true', dest='resume') - - parser.add_option('--store', action='store', dest='output_base') - parser.add_option('--patience', action='store', dest='patience', type='int', default=20, help='(default=20)') - parser.add_option('--batch-size', action='store', dest='batch_size', default=32, type='int', help='(default=32)') - parser.add_option('--device', action='store', dest='device', default='cpu') - - (params, _) = parser.parse_args(sys.argv) - - if params.train: - _start_train(params) - elif params.interactive: - _start_interactive(params) - else: - parser.print_help() diff --git a/ai-engine/stringlifier/stringlifier/modules/training.py b/ai-engine/stringlifier/stringlifier/modules/training.py deleted file mode 100644 index d51bd69..0000000 --- a/ai-engine/stringlifier/stringlifier/modules/training.py +++ /dev/null @@ -1,170 +0,0 @@ -# -# Copyright (c) 2020 Adobe Systems Incorporated. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -known_words = [] - - -def _generate_word(known_words): - import uuid - import datetime - import base64 - generated = None - ii = random.randint(0, 5) - mask = 'H' - if ii == 0: - generated = str(uuid.uuid4()) - mask = 'U' - elif ii == 1: - generated = str(uuid.uuid4().hex) - mask = 'H' - elif ii == 2: - c = random.randint(0, 3) - if c == 0: - generated = str(datetime.datetime.now().timestamp()) - elif c == 1: - generated = str(random.randint(0, 100000000000)) - elif c == 2: - generated = str(random.randint(0, 999)) + '.' + str(random.randint(0, 999)) - else: - generated = str(random.randint(0, 999)) + '.' + str(random.randint(0, 9999)) + '.' + str( - random.randint(0, 9999)) - mask = 'N' - elif ii == 3: - import string - N = random.randint(5, 20) - message = [random.choice(string.ascii_uppercase + - string.digits + - string.ascii_lowercase) for _ in range(N)] - message = ''.join(message) - i = random.randint(0, 2) - if i == 0: - message = message.lower() - elif i == 1: - message = message.upper() - generated = message - elif ii == 4: - toks = [] - for _ in range(4): - toks.append(str(random.randint(0, 255))) - generated = '.'.join(toks) - mask = 'I' - elif ii == 5: - generated = _generate_JWT_token(known_words) - mask = 'J' - return str(generated), mask[0] - - -lines = open('corpus/words_alpha.txt').readlines() -for line in lines: - known_words.append(line.strip()) - - -def _generate_JWT_token(known_words): - import jwt - - payload = {"id": str(random.random()), "client_id": str(random.random()), "user_id": str(random.random()), - "type": "access_token", - "expires_in": str(random.randint(10, 3600000)), "scope": "read, write", - "created_at": str(random.randint(1900000, 9000000))} - encoded_jwt = jwt.encode(payload, 'secret', algorithm='HS256') - - return str(encoded_jwt)[2:-1] - - -# generated_words = generate_words(len(known_words), known_words) - -known_index = 0 - -import random - -random.shuffle(known_words) - - -def _get_next_known(): - global known_index - s = known_words[known_index] - known_index += 1 - if known_index == len(known_words): - known_index = 0 - random.shuffle(known_words) - return s - - -def _get_next_gen(): - global known_words - s, m = _generate_word(known_words) - return s, m - - -import random - - -def generate_next_cmd(): - delimiters = ' /.,?!~|<>-=_~:;\\+-&*%$#@!' - enclosers = '[]{}``""\'\'()' - mask = '' - cmd = '' - num_words = random.randint(3, 15) - use_space = False - use_delimiter = False - use_encloser = False - append_number = False - for ii in range(num_words): - - use_delimiter = random.random() > 0.5 - use_encloser = random.random() > 0.8 - case_style = random.randint(0, 2) - use_gen_word = random.random() > 0.7 - - del_index = random.randint(0, len(delimiters) - 1) - enc_index = random.randint(0, len(enclosers) // 2 - 1) * 2 - if use_space: - mask += 'C' - cmd += ' ' - if use_gen_word: - wrd, label = _get_next_gen() - if case_style == 1: - wrd = wrd[0].upper() + wrd[1:] - elif case_style == 2: - wrd = wrd.upper() - msk = '' - for _ in range(len(wrd)): - msk += label - else: - wrd = _get_next_known() - append_number = random.random() > 0.97 - if append_number: - wrd = wrd + str(random.randint(0, 99)) - if case_style == 1: - wrd = wrd[0].upper() + wrd[1:] - elif case_style == 2: - wrd = wrd.upper() - msk = '' - for _ in range(len(wrd)): - msk += 'C' - - if use_delimiter: - wrd = delimiters[del_index] + wrd - msk = 'C' + msk - if use_encloser: - wrd = enclosers[enc_index] + wrd + enclosers[enc_index + 1] - msk = 'C' + msk + 'C' - - cmd += wrd - mask += msk - use_space = random.random() > 0.7 - - return cmd, mask diff --git a/clients/log-client/Dockerfile b/clients/log-client/Dockerfile deleted file mode 100644 index 8d89d21..0000000 --- a/clients/log-client/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -### Builder - -FROM golang:1.21-alpine3.17 as builder - -RUN apk --no-cache update -RUN apk add --no-cache git clang llvm make gcc protobuf musl-dev - -RUN mkdir /client -RUN mkdir /protobuf - -WORKDIR /protobuf -COPY /protobuf . - -WORKDIR /client -COPY /clients/log-client . - -RUN go build -o log-client - -### Make executable image - -FROM alpine:3.17 as client - -# RUN echo "@community http://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories -# RUN apk --no-cache update -# RUN apk add bash - -COPY --from=builder /client/log-client / - -CMD ["/log-client"] diff --git a/clients/log-client/Makefile b/clients/log-client/Makefile deleted file mode 100644 index 5556bdf..0000000 --- a/clients/log-client/Makefile +++ /dev/null @@ -1,60 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -CLIENT_NAME = sentryflow-log-client -IMAGE_NAME = 5gsec/$(CLIENT_NAME) -TAG = v0.1 - -.PHONY: build -build: gofmt golint gosec - go mod tidy - go build -o $(CLIENT_NAME) - -.PHONY: clean -clean: - rm -f $(CLIENT_NAME) - -.PHONY: gofmt -gofmt: - cd $(CURDIR); gofmt -w -s -d $(shell find . -type f -name '*.go' -print) - -.PHONY: golint -golint: -ifeq (, $(shell which golint)) - @{ \ - set -e ;\ - GOLINT_TEMP_DIR=$$(mktemp -d) ;\ - cd $$GOLINT_TEMP_DIR ;\ - go mod init tmp ;\ - go get golang.org/x/lint/golint ;\ - go install golang.org/x/lint/golint ;\ - rm -rf $$GOLINT_TEMP_DIR ;\ - } -endif - cd $(CURDIR); golint ./... - -.PHONY: gosec -gosec: -ifeq (, $(shell which gosec)) - @{ \ - set -e ;\ - GOSEC_TEMP_DIR=$$(mktemp -d) ;\ - cd $$GOSEC_TEMP_DIR ;\ - go mod init tmp ;\ - go get github.com/securego/gosec/v2/cmd/gosec ;\ - go install github.com/securego/gosec/v2/cmd/gosec ;\ - rm -rf $$GOSEC_TEMP_DIR ;\ - } -endif - cd $(CURDIR); gosec -exclude=G402 ./... - -.PHONY: build-image -build-image: - docker build -t $(IMAGE_NAME):$(TAG) -f ./Dockerfile ../../ - -.PHONY: clean-image -clean-image: - docker rmi $(IMAGE_NAME):$(TAG) - -.PHONY: run-image -run-image: - docker run -it --rm $(IMAGE_NAME):$(TAG) diff --git a/clients/log-client/client/client.go b/clients/log-client/client/client.go deleted file mode 100644 index 021acbd..0000000 --- a/clients/log-client/client/client.go +++ /dev/null @@ -1,182 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package client - -import ( - pb "SentryFlow/protobuf" - "context" - "fmt" - "log" - "os" - "path/filepath" -) - -// Feeder Structure -type Feeder struct { - Running bool - - client pb.SentryFlowClient - logStream pb.SentryFlow_GetAPILogClient - envoyMetricStream pb.SentryFlow_GetEnvoyMetricsClient - apiMetricStream pb.SentryFlow_GetAPIMetricsClient - - Done chan struct{} -} - -// StrToFile Function -func StrToFile(str, targetFile string) { - _, err := os.Stat(targetFile) - if err != nil { - newFile, err := os.Create(filepath.Clean(targetFile)) - if err != nil { - fmt.Printf("[Client] Failed to create a file (%s, %s)\n", targetFile, err.Error()) - return - } - err = newFile.Close() - if err != nil { - fmt.Printf("[Client] Failed to close the file (%s, %s)\n", targetFile, err.Error()) - } - } - - file, err := os.OpenFile(targetFile, os.O_WRONLY|os.O_APPEND, 0600) - if err != nil { - fmt.Printf("[Client] Failed to open a file (%s, %s)\n", targetFile, err.Error()) - } - defer func() { - if err := file.Close(); err != nil { - fmt.Printf("[Client] Failed to close the file (%s, %s)\n", targetFile, err.Error()) - } - }() - - _, err = file.WriteString(str) - if err != nil { - fmt.Printf("[Client] Failed to write a string into the file (%s, %s)\n", targetFile, err.Error()) - } -} - -// NewClient Function -func NewClient(client pb.SentryFlowClient, clientInfo *pb.ClientInfo, logCfg string, metricCfg string, metricFilter string) *Feeder { - fd := &Feeder{} - - fd.Running = true - - fd.client = client - - fd.Done = make(chan struct{}) - - if logCfg != "none" { - // Contact the server and print out its response - logStream, err := client.GetAPILog(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get API log: %v", err) - } - - fd.logStream = logStream - } - - if metricCfg != "none" && (metricFilter == "all" || metricFilter == "api") { - amStream, err := client.GetAPIMetrics(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get API metrics: %v", err) - } - - fd.apiMetricStream = amStream - } - - if metricCfg != "none" && (metricFilter == "all" || metricFilter == "envoy") { - emStream, err := client.GetEnvoyMetrics(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get Enovy metrics: %v", err) - } - - fd.envoyMetricStream = emStream - } - - return fd -} - -// APILogRoutine Function -func (fd *Feeder) APILogRoutine(logCfg string) { - for fd.Running { - select { - default: - data, err := fd.logStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive an API log: %v", err) - break - } - - str := "" - str = str + "== API Log ==\n" - str = str + fmt.Sprintf("%v\n", data) - - if logCfg == "stdout" { - fmt.Printf("%s", str) - } else { - StrToFile(str, logCfg) - } - case <-fd.Done: - return - } - } -} - -// APIMetricsRoutine Function -func (fd *Feeder) APIMetricsRoutine(metricCfg string) { - for fd.Running { - select { - default: - data, err := fd.apiMetricStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive API metrics: %v", err) - break - } - - str := "" - str = str + "== API Metrics ==\n" - str = str + fmt.Sprintf("%v\n", data) - - if metricCfg == "stdout" { - fmt.Printf("%s", str) - } else { - StrToFile(str, metricCfg) - } - case <-fd.Done: - return - } - } -} - -// EnvoyMetricsRoutine Function -func (fd *Feeder) EnvoyMetricsRoutine(metricCfg string) { - metricKeys := []string{"GAUGE", "COUNTER", "HISTOGRAM", "SUMMARY"} - for fd.Running { - select { - default: - data, err := fd.envoyMetricStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive Envoy metrics: %v", err) - break - } - - str := "" - str = fmt.Sprintf("== Envoy Metrics / %s ==\n", data.TimeStamp) - str = str + fmt.Sprintf("Namespace: %s\n", data.Namespace) - str = str + fmt.Sprintf("Name: %s\n", data.Name) - str = str + fmt.Sprintf("IPAddress: %s\n", data.IPAddress) - str = str + fmt.Sprintf("Labels: %s\n", data.Labels) - - for _, key := range metricKeys { - str = str + fmt.Sprintf("%s: {%v}\n", key, data.Metrics[key]) - } - - if metricCfg == "stdout" { - fmt.Printf("%s", str) - } else { - StrToFile(str, metricCfg) - } - case <-fd.Done: - return - } - } -} diff --git a/clients/log-client/config/config.go b/clients/log-client/config/config.go deleted file mode 100644 index a14c153..0000000 --- a/clients/log-client/config/config.go +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package config - -import ( - "errors" - "fmt" - "os" - "strconv" -) - -// Config Structure -type Config struct { - Hostname string - - ServerAddr string - ServerPort int - - LogCfg string - MetricCfg string - MetricFilter string -} - -// Cfg for Global Reference -var Cfg Config - -// LoadEnvVars loads environment variables and stores them in Cfg (global variables) -func LoadEnvVars() (Config, error) { - var err error - - // get hostname - Cfg.Hostname, err = os.Hostname() - if err != nil { - msg := fmt.Sprintf("[Config] Could not find hostname: %v", err) - return Cfg, errors.New(msg) - } - - // load listen address and check if valid - Cfg.ServerAddr = os.Getenv("SERVER_ADDR") - - // load listen port and check if valid - Cfg.ServerPort, err = strconv.Atoi(os.Getenv("SERVER_PORT")) - if err != nil { - msg := fmt.Sprintf("[Config] Invalid server port %s: %v", os.Getenv("SERVER_PORT"), err) - return Cfg, errors.New(msg) - } - - Cfg.LogCfg = os.Getenv("LOG_CFG") - Cfg.MetricCfg = os.Getenv("METRIC_CFG") - Cfg.MetricFilter = os.Getenv("METRIC_FILTER") - - return Cfg, nil -} diff --git a/clients/log-client/go.mod b/clients/log-client/go.mod deleted file mode 100644 index b9e1bec..0000000 --- a/clients/log-client/go.mod +++ /dev/null @@ -1,18 +0,0 @@ -module log-client - -go 1.21 - -replace SentryFlow/protobuf => ../../protobuf - -require ( - SentryFlow/protobuf v0.0.0-00010101000000-000000000000 - google.golang.org/grpc v1.63.2 -) - -require ( - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect - google.golang.org/protobuf v1.34.1 // indirect -) diff --git a/clients/log-client/go.sum b/clients/log-client/go.sum deleted file mode 100644 index 6e760e0..0000000 --- a/clients/log-client/go.sum +++ /dev/null @@ -1,14 +0,0 @@ -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= -google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= -google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= -google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= diff --git a/clients/log-client/main.go b/clients/log-client/main.go deleted file mode 100644 index a177cd9..0000000 --- a/clients/log-client/main.go +++ /dev/null @@ -1,106 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package main - -import ( - "SentryFlow/protobuf" - "flag" - "fmt" - "log" - "log-client/client" - "log-client/config" - "os" - "os/signal" - "syscall" - - "google.golang.org/grpc" -) - -// ========== // -// == Main == // -// ========== // - -func main() { - // Load environment variables - cfg, err := config.LoadEnvVars() - if err != nil { - log.Fatalf("[Config] Could not load environment variables: %v", err) - } - - // Get arguments - logCfgPtr := flag.String("logCfg", "stdout", "Output location for API logs, {stdout|file|none}") - metricCfgPtr := flag.String("metricCfg", "stdout", "Output location for API and Envoy metrics, {stdout|file|none}") - metricFilterPtr := flag.String("metricFilter", "envoy", "Filter to select specific API or Envoy metrics to receive, {api|envoy}") - flag.Parse() - - if *logCfgPtr == "none" && *metricCfgPtr == "none" { - flag.PrintDefaults() - return - } - - if cfg.LogCfg != "" { - *logCfgPtr = cfg.LogCfg - } - if cfg.MetricCfg != "" { - *metricCfgPtr = cfg.MetricCfg - } - if cfg.MetricFilter != "" { - *metricFilterPtr = cfg.MetricFilter - } - - if *metricFilterPtr != "all" && *metricFilterPtr != "api" && *metricFilterPtr != "envoy" { - flag.PrintDefaults() - return - } - - // == // - - // Construct a string "ServerAddr:ServerPort" - addr := fmt.Sprintf("%s:%d", cfg.ServerAddr, cfg.ServerPort) - - // Connect to the gRPC server of SentryFlow - conn, err := grpc.Dial(addr, grpc.WithInsecure()) - if err != nil { - log.Fatalf("[gRPC] Failed to connect: %v", err) - return - } - defer conn.Close() - - // Connected to the gRPC server - log.Printf("[gRPC] Started to collect Logs from %s", addr) - - // Define clientInfo - clientInfo := &protobuf.ClientInfo{ - HostName: cfg.Hostname, - } - - // Create a gRPC client for the SentryFlow service - sfClient := protobuf.NewSentryFlowClient(conn) - - // Create a log client with the gRPC client - logClient := client.NewClient(sfClient, clientInfo, *logCfgPtr, *metricCfgPtr, *metricFilterPtr) - - if *logCfgPtr != "none" { - go logClient.APILogRoutine(*logCfgPtr) - fmt.Printf("[APILog] Started to watch API logs\n") - } - - if *metricCfgPtr != "none" { - if *metricFilterPtr == "all" || *metricFilterPtr == "api" { - go logClient.APIMetricsRoutine(*metricCfgPtr) - fmt.Printf("[Metric] Started to watch API Metrics\n") - } - - if *metricFilterPtr == "all" || *metricFilterPtr == "envoy" { - go logClient.EnvoyMetricsRoutine(*metricCfgPtr) - fmt.Printf("[Metric] Started to watch Envoy Metrics\n") - } - } - - signalChan := make(chan os.Signal, 1) - signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) - - <-signalChan - - close(logClient.Done) -} diff --git a/clients/mongo-client/Dockerfile b/clients/mongo-client/Dockerfile deleted file mode 100644 index d71eecc..0000000 --- a/clients/mongo-client/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -### Builder - -FROM golang:1.21-alpine3.17 as builder - -RUN apk --no-cache update -RUN apk add --no-cache git clang llvm make gcc protobuf musl-dev - -RUN mkdir /client -RUN mkdir /protobuf - -WORKDIR /protobuf -COPY /protobuf . - -WORKDIR /client -COPY /clients/mongo-client . - -RUN go build -o mongo-client - -### Make executable image - -FROM alpine:3.17 as client - -# RUN echo "@community http://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories -# RUN apk --no-cache update -# RUN apk add bash - -COPY --from=builder /client/mongo-client / - -CMD ["/mongo-client"] diff --git a/clients/mongo-client/Makefile b/clients/mongo-client/Makefile deleted file mode 100644 index daab1d1..0000000 --- a/clients/mongo-client/Makefile +++ /dev/null @@ -1,60 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -CLIENT_NAME = sentryflow-mongo-client -IMAGE_NAME = 5gsec/$(CLIENT_NAME) -TAG = v0.1 - -.PHONY: build -build: gofmt golint gosec - go mod tidy - go build -o $(CLIENT_NAME) - -.PHONY: clean -clean: - rm -f $(CLIENT_NAME) - -.PHONY: gofmt -gofmt: - cd $(CURDIR); gofmt -w -s -d $(shell find . -type f -name '*.go' -print) - -.PHONY: golint -golint: -ifeq (, $(shell which golint)) - @{ \ - set -e ;\ - GOLINT_TEMP_DIR=$$(mktemp -d) ;\ - cd $$GOLINT_TEMP_DIR ;\ - go mod init tmp ;\ - go get golang.org/x/lint/golint ;\ - go install golang.org/x/lint/golint ;\ - rm -rf $$GOLINT_TEMP_DIR ;\ - } -endif - cd $(CURDIR); golint ./... - -.PHONY: gosec -gosec: -ifeq (, $(shell which gosec)) - @{ \ - set -e ;\ - GOSEC_TEMP_DIR=$$(mktemp -d) ;\ - cd $$GOSEC_TEMP_DIR ;\ - go mod init tmp ;\ - go get github.com/securego/gosec/v2/cmd/gosec ;\ - go install github.com/securego/gosec/v2/cmd/gosec ;\ - rm -rf $$GOSEC_TEMP_DIR ;\ - } -endif - cd $(CURDIR); gosec -exclude=G402 ./... - -.PHONY: build-image -build-image: - docker build -t $(IMAGE_NAME):$(TAG) -f ./Dockerfile ../../ - -.PHONY: clean-image -clean-image: - docker rmi $(IMAGE_NAME):$(TAG) - -.PHONY: run-image -run-image: - docker run -it --rm $(IMAGE_NAME):$(TAG) diff --git a/clients/mongo-client/client/client.go b/clients/mongo-client/client/client.go deleted file mode 100644 index bbef3a5..0000000 --- a/clients/mongo-client/client/client.go +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package client - -import ( - pb "SentryFlow/protobuf" - "context" - "log" - "mongo-client/mongodb" -) - -// Feeder Structure -type Feeder struct { - Running bool - - client pb.SentryFlowClient - logStream pb.SentryFlow_GetAPILogClient - envoyMetricStream pb.SentryFlow_GetEnvoyMetricsClient - apiMetricStream pb.SentryFlow_GetAPIMetricsClient - - dbHandler mongodb.DBHandler - - Done chan struct{} -} - -// NewClient Function -func NewClient(client pb.SentryFlowClient, clientInfo *pb.ClientInfo, logCfg string, metricCfg string, metricFilter string, mongoDBAddr string) *Feeder { - fd := &Feeder{} - - fd.Running = true - fd.client = client - fd.Done = make(chan struct{}) - - if logCfg != "none" { - // Contact the server and print out its response - logStream, err := client.GetAPILog(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get API log: %v", err) - } - - fd.logStream = logStream - } - - if metricCfg != "none" && (metricFilter == "all" || metricFilter == "api") { - amStream, err := client.GetAPIMetrics(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get API metrics: %v", err) - } - - fd.apiMetricStream = amStream - } - - if metricCfg != "none" && (metricFilter == "all" || metricFilter == "envoy") { - emStream, err := client.GetEnvoyMetrics(context.Background(), clientInfo) - if err != nil { - log.Fatalf("[Client] Could not get Envoy metrics: %v", err) - } - - fd.envoyMetricStream = emStream - } - - // Initialize DB - dbHandler, err := mongodb.NewMongoDBHandler(mongoDBAddr) - if err != nil { - log.Fatalf("[MongoDB] Unable to intialize DB: %v", err) - } - fd.dbHandler = *dbHandler - - return fd -} - -// APILogRoutine Function -func (fd *Feeder) APILogRoutine(logCfg string) { - for fd.Running { - select { - default: - data, err := fd.logStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive an API log: %v", err) - break - } - err = fd.dbHandler.InsertAPILog(data) - if err != nil { - log.Fatalf("[MongoDB] Failed to insert an API log: %v", err) - } - case <-fd.Done: - return - } - } -} - -// APIMetricsRoutine Function -func (fd *Feeder) APIMetricsRoutine(metricCfg string) { - for fd.Running { - select { - default: - data, err := fd.apiMetricStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive API metrics: %v", err) - break - } - err = fd.dbHandler.InsertAPIMetrics(data) - if err != nil { - log.Fatalf("[MongoDB] Failed to insert API metrics: %v", err) - } - case <-fd.Done: - return - } - } -} - -// EnvoyMetricsRoutine Function -func (fd *Feeder) EnvoyMetricsRoutine(metricCfg string) { - for fd.Running { - select { - default: - data, err := fd.envoyMetricStream.Recv() - if err != nil { - log.Fatalf("[Client] Failed to receive Envoy metrics: %v", err) - break - } - err = fd.dbHandler.InsertEnvoyMetrics(data) - if err != nil { - log.Fatalf("[MongoDB] Failed to insert Envoy metrics: %v", err) - } - case <-fd.Done: - return - } - } -} diff --git a/clients/mongo-client/config/config.go b/clients/mongo-client/config/config.go deleted file mode 100644 index 3fd3d1b..0000000 --- a/clients/mongo-client/config/config.go +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package config - -import ( - "errors" - "fmt" - "os" - "strconv" -) - -// Config Structure -type Config struct { - Hostname string - - ServerAddr string - ServerPort int - - LogCfg string - MetricCfg string - MetricFilter string - - MongoDBAddr string -} - -// Cfg for Global Reference -var Cfg Config - -// LoadEnvVars loads environment variables and stores them in Cfg (global variables) -func LoadEnvVars() (Config, error) { - var err error - - // get hostname - Cfg.Hostname, err = os.Hostname() - if err != nil { - msg := fmt.Sprintf("[Config] Could not find hostname: %v", err) - return Cfg, errors.New(msg) - } - - // load listen address and check if valid - Cfg.ServerAddr = os.Getenv("SERVER_ADDR") - - // load listen port and check if valid - Cfg.ServerPort, err = strconv.Atoi(os.Getenv("SERVER_PORT")) - if err != nil { - msg := fmt.Sprintf("[Config] Invalid server port %s: %v", os.Getenv("SERVER_PORT"), err) - return Cfg, errors.New(msg) - } - - Cfg.LogCfg = os.Getenv("LOG_CFG") - Cfg.MetricCfg = os.Getenv("METRIC_CFG") - Cfg.MetricFilter = os.Getenv("METRIC_FILTER") - - // load MongoDB address - Cfg.MongoDBAddr = os.Getenv("MONGODB_ADDR") - - return Cfg, nil -} diff --git a/clients/mongo-client/go.mod b/clients/mongo-client/go.mod deleted file mode 100644 index a8e4091..0000000 --- a/clients/mongo-client/go.mod +++ /dev/null @@ -1,28 +0,0 @@ -module mongo-client - -go 1.21 - -replace SentryFlow/protobuf => ../../protobuf - -require ( - SentryFlow/protobuf v0.0.0-00010101000000-000000000000 - go.mongodb.org/mongo-driver v1.13.1 - google.golang.org/grpc v1.63.2 -) - -require ( - github.com/golang/snappy v0.0.1 // indirect - github.com/klauspost/compress v1.13.6 // indirect - github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect - github.com/xdg-go/pbkdf2 v1.0.0 // indirect - github.com/xdg-go/scram v1.1.2 // indirect - github.com/xdg-go/stringprep v1.0.4 // indirect - github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect - golang.org/x/crypto v0.21.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sync v0.6.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect - google.golang.org/protobuf v1.34.1 // indirect -) diff --git a/clients/mongo-client/go.sum b/clients/mongo-client/go.sum deleted file mode 100644 index f1e8df1..0000000 --- a/clients/mongo-client/go.sum +++ /dev/null @@ -1,67 +0,0 @@ -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= -github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0= -github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= -github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= -github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= -github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= -github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= -github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= -github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= -github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA= -github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.mongodb.org/mongo-driver v1.13.1 h1:YIc7HTYsKndGK4RFzJ3covLz1byri52x0IoMB0Pt/vk= -go.mongodb.org/mongo-driver v1.13.1/go.mod h1:wcDf1JBCXy2mOW0bWHwO/IOYqdca1MPCwDtFu/Z9+eo= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= -golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= -google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= -google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= -google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= diff --git a/clients/mongo-client/main.go b/clients/mongo-client/main.go deleted file mode 100644 index 95dea38..0000000 --- a/clients/mongo-client/main.go +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package main - -import ( - protobuf "SentryFlow/protobuf" - - "flag" - "fmt" - "log" - "mongo-client/client" - "mongo-client/config" - "os" - "os/signal" - "syscall" - - "google.golang.org/grpc" -) - -// ========== // -// == Main == // -// ========== // - -func main() { - // Load environment variables - cfg, err := config.LoadEnvVars() - if err != nil { - log.Fatalf("[Config] Could not load environment variables: %v", err) - } - - // Get arguments - logCfgPtr := flag.String("logCfg", "mongodb", "Location for storing API logs, {mongodb|none}") - metricCfgPtr := flag.String("metricCfg", "mongodb", "Location for storing API and Envoy metrics, {mongodb|none}") - metricFilterPtr := flag.String("metricFilter", "envoy", "Filter to select specific API or Envoy metrics to receive, {api|envoy}") - mongoDBAddrPtr := flag.String("mongodb", "", "MongoDB Server Address") - flag.Parse() - - if *logCfgPtr == "none" && *metricCfgPtr == "none" { - flag.PrintDefaults() - return - } - - if cfg.LogCfg != "" { - *logCfgPtr = cfg.LogCfg - } - if cfg.MetricCfg != "" { - *metricCfgPtr = cfg.MetricCfg - } - if cfg.MetricFilter != "" { - *metricFilterPtr = cfg.MetricFilter - } - if cfg.MongoDBAddr != "" { - *mongoDBAddrPtr = cfg.MongoDBAddr - } - - if *metricFilterPtr != "all" && *metricFilterPtr != "api" && *metricFilterPtr != "envoy" { - flag.PrintDefaults() - return - } - - // == // - - // Construct a string "ServerAddr:ServerPort" - addr := fmt.Sprintf("%s:%d", cfg.ServerAddr, cfg.ServerPort) - - // Connect to the gRPC server of SentryFlow - conn, err := grpc.Dial(addr, grpc.WithInsecure()) - if err != nil { - log.Fatalf("[gRPC] Failed to connect: %v", err) - return - } - defer conn.Close() - - // Connected to the gRPC server - log.Printf("[gRPC] Started to collect Logs from %s", addr) - - // Define clientInfo - clientInfo := &protobuf.ClientInfo{ - HostName: cfg.Hostname, - } - - // Create a gRPC client for the SentryFlow service - sfClient := protobuf.NewSentryFlowClient(conn) - - // Create a log client with the gRPC client - logClient := client.NewClient(sfClient, clientInfo, *logCfgPtr, *metricCfgPtr, *metricFilterPtr, *mongoDBAddrPtr) - - if *logCfgPtr != "none" { - go logClient.APILogRoutine(*logCfgPtr) - fmt.Printf("[APILog] Started to watch API logs\n") - } - - if *metricCfgPtr != "none" { - if *metricFilterPtr == "all" || *metricFilterPtr == "api" { - go logClient.APIMetricsRoutine(*metricCfgPtr) - fmt.Printf("[Metric] Started to watch API metrics\n") - } - - if *metricFilterPtr == "all" || *metricFilterPtr == "envoy" { - go logClient.EnvoyMetricsRoutine(*metricCfgPtr) - fmt.Printf("[Metric] Started to watch Envoy metrics\n") - } - } - - signalChan := make(chan os.Signal, 1) - signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) - - <-signalChan - - close(logClient.Done) -} diff --git a/clients/mongo-client/mongodb/mongoHandler.go b/clients/mongo-client/mongodb/mongoHandler.go deleted file mode 100644 index eaedb76..0000000 --- a/clients/mongo-client/mongodb/mongoHandler.go +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -package mongodb - -import ( - protobuf "SentryFlow/protobuf" - "context" - "errors" - "fmt" - "log" - "time" - - "go.mongodb.org/mongo-driver/mongo" - "go.mongodb.org/mongo-driver/mongo/options" -) - -// DBHandler Structure -type DBHandler struct { - client *mongo.Client - cancel context.CancelFunc - - database *mongo.Database - apiLogCol *mongo.Collection - apiMetricsCol *mongo.Collection - evyMetricsCol *mongo.Collection -} - -// dbHandler for Global Reference -var dbHandler DBHandler - -// NewMongoDBHandler Function -func NewMongoDBHandler(mongoDBAddr string) (*DBHandler, error) { - var err error - - // Create a MongoDB client - dbHandler.client, err = mongo.NewClient(options.Client().ApplyURI(mongoDBAddr)) - if err != nil { - msg := fmt.Sprintf("[MongoDB] Unable to initialize a monogoDB client (%s): %v", mongoDBAddr, err) - return nil, errors.New(msg) - } - - // Set timeout (10 sec) - var ctx context.Context - ctx, dbHandler.cancel = context.WithTimeout(context.Background(), 10*time.Second) - - // Connect to the MongoDB server - err = dbHandler.client.Connect(ctx) - if err != nil { - msg := fmt.Sprintf("[MongoDB] Unable to connect the mongoDB server (%s): %v", mongoDBAddr, err) - return nil, errors.New(msg) - } - - // Create 'SentryFlow' database - dbHandler.database = dbHandler.client.Database("SentryFlow") - - // Create APILogs and Metrics collections - dbHandler.apiLogCol = dbHandler.database.Collection("APILogs") - dbHandler.apiMetricsCol = dbHandler.database.Collection("APIMetrics") - dbHandler.evyMetricsCol = dbHandler.database.Collection("EnvoyMetrics") - - return &dbHandler, nil -} - -// Disconnect Function -func (handler *DBHandler) Disconnect() { - err := handler.client.Disconnect(context.Background()) - if err != nil { - log.Printf("[MongoDB] Unable to properly disconnect: %v", err) - } -} - -// InsertAPILog Function -func (handler *DBHandler) InsertAPILog(data *protobuf.APILog) error { - _, err := handler.apiLogCol.InsertOne(context.Background(), data) - if err != nil { - return err - } - - return nil -} - -// InsertAPIMetrics Function -func (handler *DBHandler) InsertAPIMetrics(data *protobuf.APIMetrics) error { - _, err := handler.apiMetricsCol.InsertOne(context.Background(), data) - if err != nil { - return err - } - - return nil -} - -// InsertEnvoyMetrics Function -func (handler *DBHandler) InsertEnvoyMetrics(data *protobuf.EnvoyMetrics) error { - _, err := handler.evyMetricsCol.InsertOne(context.Background(), data) - if err != nil { - return err - } - - return nil -} diff --git a/contribution/README.md b/contribution/README.md deleted file mode 100644 index 44c01f7..0000000 --- a/contribution/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# Contribution Guide - -SentryFlow operates within Istio on Kubernetes. This means project participants will need an Istio environment. - -To minimize the hassle of installing (uninstalling) Kubernetes and configuring Istio, we have prepared a Vagrantfile that initializes an Ubuntu VM with fully functional Kubernetes and Istio. - -## 1. Prerequisites - -The provided Vagrantfile is tested on the following environment (i.e., Vagrant with VirtualBox). - -- **[Vagrant](https://www.vagrantup.com/)** - v2.2.9 -- **[VirtualBox](https://www.virtualbox.org/)** - v6.1 - -## 2. Starting up a VM - -To proceed, execute the following command within the `contribution/` directory: - -```bash -$ vagrant up -Bringing machine 'sentryflow' up with 'virtualbox' provider... -==> sentryflow: Importing base box 'generic/ubuntu2204'... -==> sentryflow: Matching MAC address for NAT networking... -==> sentryflow: Checking if box 'generic/ubuntu2204' version '4.3.10' is up to date... -... - sentryflow: clusterrolebinding.rbac.authorization.k8s.io/calico-node created - sentryflow: clusterrolebinding.rbac.authorization.k8s.io/calico-cni-plugin created - sentryflow: daemonset.apps/calico-node created - sentryflow: deployment.apps/calico-kube-controllers created -``` - -This command will initiate the installation of the necessary development environment. The duration of this process may vary, primarily depending on the speed of your network connection, and could take several minutes to complete. - -## 3. Development and Code Quality - -### Development - -Once Vagrant has been initialized successfully, you can access the Kubernetes environment by following these steps: - -``` -$ vagrant ssh -``` - -The source code for SentryFlow will be located at `/home/vagrant/sentryflow` within the virtual environment, and this directory will also be synchronized with the current working directory on the host machine. - -After making modifications to the source code of SentryFlow, you can build the changes by navigating to the `sentryflow` directory and running the Makefile. - -``` -make build -``` - -Executing the Makefile will result in the construction of container images, each tagged as specified. - -### Code Quality - -To maintain a clean and secure code base for SentryFlow, we conduct several checks, including `gofmt` for code formatting, `golint` for code style and linting, and `gosec` for security scanning. - -To evaluate the quality of your code, navigate to the `sentryflow` directory and execute the following commands: - -``` -make golint # run golint checks -make gofmt # run gofmt checks -make gosec # run gosec checks -``` - -### Pull Request - -Once everything is correctly set up, you are ready to create a pull request. Please refer to our guidelines for submitting PRs. - -## 4. Cleaning Up - -If you have successfully made changes to SentryFlow and wish to clean up the created workspace, you can simply use the following command: - -``` -$ vagrant destroy - sentryflow: Are you sure you want to destroy the 'sentryflow' VM? [y/N] y -==> sentryflow: Forcing shutdown of VM... -==> sentryflow: Destroying VM and associated drives... -``` - -Executing the command will terminate and remove the VM that you were working on. diff --git a/contribution/vagrant/.gitignore b/contribution/vagrant/.gitignore deleted file mode 100644 index a977916..0000000 --- a/contribution/vagrant/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.vagrant/ diff --git a/contribution/vagrant/Vagrantfile b/contribution/vagrant/Vagrantfile deleted file mode 100644 index 08f0eca..0000000 --- a/contribution/vagrant/Vagrantfile +++ /dev/null @@ -1,51 +0,0 @@ -Vagrant.require_version ">= 2.0.0" - -VM_NAME = "sentryflow" -IMG_NAME = "generic/ubuntu2204" - -NUM_OF_VCPUS = 4 -SIZE_OF_VMEM = 4096 - -## == ## - -# create ssh keys if needed -system(" - if [ #{ARGV[0]} = 'up' ]; then - if [ ! -f ~/.ssh/id_rsa ]; then - echo '~/.ssh/id_rsa keys does not exist.' - ssh-keygen -t rsa -b 2048 -f ~/.ssh/id_rsa - fi - fi -") - -## == ## - -Vagrant.configure("2") do |config| - # vagrant@VM_NAME - config.vm.hostname = VM_NAME - - config.vm.define VM_NAME do |cfg| - cfg.vm.box = IMG_NAME - - cfg.vm.provider "virtualbox" do |vb| - vb.memory = SIZE_OF_VMEM - vb.cpus = NUM_OF_VCPUS - end - end - - # sync directories - config.vm.synced_folder "../../", "/home/vagrant/sentryflow", owner:"vagrant", group:"vagrant" - - # configure SSH - config.ssh.insert_key = false - - # copy ssh keys - config.vm.provision "file", source: "~/.ssh/id_rsa.pub", destination: "/home/vagrant/.ssh/id_rsa.pub" - config.vm.provision :shell, :inline => "cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys", run: "always" - - # copy git config - config.vm.provision :file, source: "~/.gitconfig", destination: "$HOME/.gitconfig" - - # setup env - config.vm.provision "shell", path: "env-setup.sh" -end diff --git a/contribution/vagrant/env-setup.sh b/contribution/vagrant/env-setup.sh deleted file mode 100755 index 4c9e348..0000000 --- a/contribution/vagrant/env-setup.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash - -# == Build Essential == # - -# update repo -sudo apt-get update - -# install build-essential -sudo apt-get install -y build-essential - -# == Containerd == # - -# add GPG key -sudo apt-get install -y curl ca-certificates gnupg -sudo install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg -sudo chmod a+r /etc/apt/keyrings/docker.gpg - -# add docker repository -echo \ - "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - -# update the docker repo -sudo apt-get update - -# install containerd -sudo apt-get install -y containerd.io - -# set up the default config file -sudo mkdir -p /etc/containerd -sudo containerd config default | sudo tee /etc/containerd/config.toml -sudo sed -i "s/SystemdCgroup = false/SystemdCgroup = true/g" /etc/containerd/config.toml -sudo systemctl restart containerd - -# # == Kubernetes == # - -# install k3s -curl -sfL https://get.k3s.io | K3S_KUBECONFIG_MODE="644" INSTALL_K3S_EXEC="--disable=traefik" sh - - -echo "wait for initialization" -sleep 15 - -runtime="15 minute" -endtime=$(date -ud "$runtime" +%s) - -while [[ $(date -u +%s) -le $endtime ]] -do - status=$(kubectl get pods -A -o jsonpath={.items[*].status.phase}) - [[ $(echo $status | grep -v Running | wc -l) -eq 0 ]] && break - echo "wait for initialization" - sleep 1 -done - -# make kubectl accessable for vagrant user -mkdir -p /home/vagrant/.kube -sudo cp /etc/rancher/k3s/k3s.yaml /home/vagrant/.kube/config -sudo chown -R vagrant:vagrant /home/vagrant/.kube -echo "export KUBECONFIG=/home/vagrant/.kube/config" | tee -a /home/vagrant/.bashrc -PATH=$PATH:/bin:/usr/bin:/usr/local/bin - -# == Istio == # - -# move to home -cd /home/vagrant - -# download istio -curl -L https://istio.io/downloadIstio | sh - - -# copy istioctl to /usr/local/bin -sudo cp /home/vagrant/istio-*/bin/istioctl /usr/local/bin - -# change permissions -sudo chown -R vagrant:vagrant /home/vagrant/istio-* - -# install istio -su - vagrant -c "istioctl install --set profile=default -y" - -# == Docker == # - -# install Docker -sudo apt-get install -y docker-ce && sleep 5 - -# configure daemon.json -sudo mkdir -p /etc/docker -cat <> /home/vagrant/.bashrc -echo "export GOPATH=\$HOME/go" >> /home/vagrant/.bashrc -echo "export GOROOT=/usr/local/go" >> /home/vagrant/.bashrc -echo "export PATH=\$PATH:/usr/local/go/bin:\$HOME/go/bin" >> /home/vagrant/.bashrc -echo >> /home/vagrant/.bashrc - -# create a directory for Go -mkdir -p /home/vagrant/go -chown -R vagrant:vagrant /home/vagrant/go diff --git a/contribution/vagrant/install-scripts/install-vagrant.sh b/contribution/vagrant/install-scripts/install-vagrant.sh deleted file mode 100755 index 899bf0b..0000000 --- a/contribution/vagrant/install-scripts/install-vagrant.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -. /etc/os-release - -if [ "$NAME" != "Ubuntu" ]; then - echo "This script is for Ubuntu." - exit -fi - -if [ ! -x "$(command -v vagrant)" ]; then - VAGRANT_VERSION=2.3.0 - - # install wget - sudo apt-get -y install wget - - # download vagrant package - wget https://releases.hashicorp.com/vagrant/$VAGRANT_VERSION/vagrant_$VAGRANT_VERSION-1_amd64.deb - - # install vagrant - sudo apt-get -y install ./vagrant_$VAGRANT_VERSION-1_amd64.deb - - # rm the vagrant package - rm vagrant_$VAGRANT_VERSION-1_amd64.deb - - # install vagrant plugins - vagrant plugin install vagrant-vbguest - vagrant plugin install vagrant-reload -else - echo "Found Vagrant, skipping the installation of Vagrant." -fi diff --git a/contribution/vagrant/install-scripts/install-virtualbox.sh b/contribution/vagrant/install-scripts/install-virtualbox.sh deleted file mode 100755 index 8e84ae4..0000000 --- a/contribution/vagrant/install-scripts/install-virtualbox.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -. /etc/os-release - -if [ "$NAME" != "Ubuntu" ]; then - echo "This script is for Ubuntu." - exit -fi - -if [ ! -x "$(command -v vboxmanage)" ]; then - # install wget - sudo apt-get -y install wget - - # download oracle_vbox_2016.asc and register it to the system - wget -O- https://www.virtualbox.org/download/oracle_vbox_2016.asc | sudo gpg --dearmor --yes --output /usr/share/keyrings/oracle-virtualbox-2016.gpg - - # install vbox - sudo apt-get update - sudo apt-get install virtualbox - - echo "Please reboot the machine." -else - echo "Found VirtualBox, skipping the installation of Virtualbox." -fi diff --git a/deployments/ai-engine.yaml b/deployments/ai-engine.yaml deleted file mode 100644 index cce9e45..0000000 --- a/deployments/ai-engine.yaml +++ /dev/null @@ -1,36 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - namespace: sentryflow - name: ai-engine -spec: - replicas: 1 - selector: - matchLabels: - app: ai-engine - template: - metadata: - labels: - app: ai-engine - spec: - containers: - - name: sentryflow - image: 5gsec/sentryflow-ai-engine:v0.1 - ports: - - name: ai-engine - protocol: TCP - containerPort: 5000 ---- -apiVersion: v1 -kind: Service -metadata: - name: ai-engine - namespace: sentryflow -spec: - selector: - app: ai-engine - ports: - - name: sentryflow-ai-engine - protocol: TCP - port: 5000 - targetPort: 5000 diff --git a/deployments/log-client.yaml b/deployments/log-client.yaml deleted file mode 100644 index 35b311f..0000000 --- a/deployments/log-client.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - namespace: sentryflow - name: log-client -spec: - replicas: 1 - selector: - matchLabels: - app: log-client - template: - metadata: - labels: - app: log-client - spec: - containers: - - name: log-client - image: 5gsec/sentryflow-log-client:v0.1 - env: - - name: SERVER_ADDR - value: "sentryflow.sentryflow.svc.cluster.local" - - name: SERVER_PORT - value: "8080" - - name: LOG_CFG - value: "stdout" - - name: METRIC_CFG - value: "stdout" - - name: METRIC_FILTER - value: "api" diff --git a/deployments/mongo-client.yaml b/deployments/mongo-client.yaml deleted file mode 100644 index 698ed46..0000000 --- a/deployments/mongo-client.yaml +++ /dev/null @@ -1,61 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - namespace: sentryflow - name: mongodb -spec: - replicas: 1 - selector: - matchLabels: - app: mongodb - template: - metadata: - labels: - app: mongodb - spec: - containers: - - name: mongodb - image: mongo:latest - ports: - - containerPort: 27017 ---- -apiVersion: v1 -kind: Service -metadata: - namespace: sentryflow - name: mongodb -spec: - selector: - app: mongodb - ports: - - protocol: TCP - port: 27017 - targetPort: 27017 ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - namespace: sentryflow - name: mongo-client -spec: - replicas: 1 - selector: - matchLabels: - app: mongo-client - template: - metadata: - labels: - app: mongo-client - spec: - imagePullSecrets: - - name: regcred - containers: - - name: mongo-client - image: 5gsec/sentryflow-mongo-client:v0.1 - env: - - name: SERVER_ADDR - value: "sentryflow.sentryflow.svc.cluster.local" - - name: SERVER_PORT - value: "8080" - - name: MONGODB_ADDR - value: "mongodb://mongodb:27017" diff --git a/deployments/sentryflow.yaml b/deployments/sentryflow.yaml index 13ed851..14d50ec 100644 --- a/deployments/sentryflow.yaml +++ b/deployments/sentryflow.yaml @@ -55,7 +55,7 @@ subjects: apiVersion: v1 kind: ConfigMap metadata: - name: config + name: sentryflow namespace: sentryflow labels: app.kubernetes.io/part-of: sentryflow @@ -103,7 +103,7 @@ spec: - /var/lib/sentryflow/config.yaml volumeMounts: - mountPath: /var/lib/sentryflow/ - name: config + name: sentryflow ports: - containerPort: 8080 name: exporter @@ -126,9 +126,9 @@ spec: initialDelaySeconds: 5 terminationGracePeriodSeconds: 30 volumes: - - name: config + - name: sentryflow configMap: - name: config + name: sentryflow defaultMode: 420 --- apiVersion: v1 diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..fad1036 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,169 @@ +# Want to contribute? + +Great! We welcome contributions of all kinds, big or small! This includes bug reports, code fixes, documentation +improvements, and code examples. + +Before you dive in, please take a moment to read through this guide. + +# Reporting issue + +We use [GitHub](https://github.com/5GSEC/SentryFlow) to manage the issues. Please open +a [new issue](https://github.com/5GSEC/SentryFlow/issues/new/choose) directly there. + +# Getting Started + +## Setting Up Your Environment + +- Head over to [GitHub](https://github.com/5GSEC/SentryFlow) and fork the 5GSec SentryFlow repository. +- Clone your forked repository onto your local machine. + ```shell + git clone git@github.com:/SentryFlow.git + ``` + +## Install development tools + +You'll need these tools for a smooth development experience: + +- [Make](https://www.gnu.org/software/make/#download) +- [Go](https://go.dev/doc/install) SDK, version 1.23 or later +- Go IDE ([Goland](https://www.jetbrains.com/go/) / [VS Code](https://code.visualstudio.com/download)) +- Container tools ([Docker](https://www.docker.com/) / [Podman](https://podman.io/)) +- [Kubernetes cluster](https://kubernetes.io/docs/setup/) running version 1.28 or later. +- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) version 1.28 or later. + +# Contributing Code + +## Building Locally + +- Install development tools as [mentioned above](#setting-up-your-environment). + +- Build SentryFlow using: + ```shell + cd sentryflow + make build + ``` + +## Understanding the Project + +Before contributing to any Open Source project, it's important to have basic understanding of what the project is about. +It is advised to try out the project as an end user. + +## Project Structure + +These are general guidelines on how to organize source code in this repository. + +``` +github.com/5GSEC/SentryFlow + +├── client -> Log client code. +├── deployments -> Manifests or Helm charts for deployment on Kubernetes. +├── docs -> All Documentation. +│   └── receivers -> Receiver specifc integration documentaion. +│   ├── other +│   │   ├── ingress-controller +│   │   │   └── nginx-inc +│   │   └── web-server +│   │   └── nginx +│   └── service-mesh +│   └── istio +├── filter -> Receivers specific filters/modules to observe API calls from receivers. +├── protobuf +│   ├── golang -> Generated protobuf Go code. +│   ├── python -> Generated protobuf Python code. +├── scripts +├── sentryflow +│   ├── cmd -> Code for the actual binary. +│   ├── config +│   │   └── default.yaml -> Default configuration file. +│   ├── go.mod -> Go module file to track dependencies. +│   └── pkg -> pkg is a collection of utility packages used by the components without being specific to its internals. +│   ├── config -> Configuration initialization code. +│   ├── core -> SentryFlow core initialization code. +│   ├── exporter -> Exporter code. +│   ├── k8s -> Kubernetes client code. +│   ├── receiver -> Receiver code. +│   │   ├── receiver.go -> All receivers initialization code. +│   │   └── svcmesh -> ServiceMesh receivers code. +│   │   └── other -> Other receivers code. +│   └── util -> Utilities. +``` + +## Imports grouping + +This project follows the following pattern for grouping imports in Go files: + +* imports from standard library. +* imports from other projects. +* imports from `sentryflow` project. + +For example: + +```go +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/5GSEC/SentryFlow/pkg/config" + "github.com/5GSEC/SentryFlow/pkg/receiver" + "github.com/5GSEC/SentryFlow/pkg/util" +) +``` + +## Pull Requests and Code Reviews + +We use GitHub [pull requests](https://github.com/5GSEC/SentryFlow/pulls) for code contributions. All submissions, +including +those from project members, require review before merging. +We typically aim for two approvals per pull request, with reviews happening within a week or two. +Feel free to ping reviewers if you haven't received feedback within that timeframe. + +### Commit Messages + +We follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for clear and +consistent commit messages. + +Please make sure you have added the **Signed-off-by:** footer in your git commit. In order to do it automatically, use +the **--signoff** flag: + +```shell +git commit --signoff +``` + +With this command, `git` would automatically add a footer by reading your name and email from your `.gitconfig` file. + +### Merging PRs + +**For maintainers:** Before merging a PR make sure the title is descriptive and follows +a [good commit message](https://www.conventionalcommits.org/en/v1.0.0/). + +Merge the PR by using `Squash and merge` option on GitHub. Avoid creating merge commits. After the merge make sure +referenced issues were closed. + +# Testing and Documentation + +Tests and documentation are not optional, make sure your pull requests include: + +- Tests that verify your changes and don't break existing functionality. +- Updated [documentation](../docs) reflecting your code changes. +- Reference information and any other relevant details. + +## Commands to run tests + +- Unit tests: + ```shell + make tests + ``` + +- Integration tests: + ```shell + make integration-test + ``` + +- End-to-end tests: + ```shell + make e2e-test + ``` + diff --git a/docs/getting_started.md b/docs/getting_started.md index 4945f37..22d393f 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,94 +1,52 @@ # Getting Started -This guide provides a step-by-step process for deploying SentryFlow on Istio, aimed at enhancing API observability and security. It includes detailed commands for each step along with their explanations. +This guide provides a step-by-step process for deploying SentryFlow in a Kubernetes environment, aimed at enhancing API +observability. It includes detailed commands for each step along with their explanations. -> **Note**: SentryFlow is currently in the early stages of development. Please be aware that the information provided here may become outdated or change without notice. +> **Note**: SentryFlow is currently in the early stages of development. Please be aware that the information provided +> here may become outdated or change without notice. ## 1. Prerequisites -SentryFlow functions within the Istio framework. Below is a table detailing the environments where SentryFlow has been successfully deployed and verified to be operational. - -|System Name|Version| -|--|--| -|Ubuntu|22.04, 20.04| -|[Istio](https://istio.io/latest/)|1.20.2| -|[Kubernetes](https://kubernetes.io/)|v1.27.1| - -> **Note**: For information on Kubernetes configurations, including Container Network Interface (CNI), Container Runtime Interface (CRI), and their respective runtime settings, please consult the [compatability matrix](k8s_compatibility.md). +- A Kubernetes cluster running version 1.28 or later. +- [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) version 1.28 or later. ## 2. Deploying SentryFlow -SentryFlow can be deployed using `kubectl` command. The deployment can be accomplished with the following commands: +Configure SentryFlow receiver by following [this](receivers.md). Then deploy SentryFlow by following `kubectl` command: -``` -$ git clone https://github.com/5GSEC/sentryflow -$ cd sentryflow/ -$ kubectl create -f deployments/sentryflow.yaml -namespace/sentryflow created -serviceaccount/sa-sentryflow created -clusterrole.rbac.authorization.k8s.io/cr-sentryflow created -clusterrolebinding.rbac.authorization.k8s.io/rb-sentyflow created -deployment.apps/sentryflow created -service/sentryflow created +```shell +kubectl apply -f https://raw.githubusercontent.com/5GSEC/SentryFlow/refs/heads/main/deployments/sentryflow.yaml ``` -This process will create a namespace named `sentryflow` and will establish the necessary Kubernetes resources. - -> **Note**: SentryFlow will automatically modify Istio's `meshConfig` to configure `extensionProviders`, facilitating SentryFlow's API log collection. +This will create a namespace named `sentryflow` and will deploy the necessary Kubernetes resources. Then, check if SentryFlow is up and running by: -``` -$ kubectl get pods -n sentryflow -NAME READY STATUS RESTARTS AGE -sentryflow-cd95d79b4-9q7d7 1/1 Running 0 4m41s +```shell +$ kubectl -n sentryflow get pods +NAME READY STATUS RESTARTS AGE +sentryflow-cff887bbd-rljm7 1/1 Running 0 73s ``` ## 3. Deploying SentryFlow Clients -SentryFlow has now been established within the cluster. In addition, SentryFlow exports API logs and metrics through gRPC. For further details on how this data is transmitted, please consult the [SentryFlow Client Guide](sentryflow_client_guide.md). - -For testing purposes, two simple clients have been developed. +SentryFlow has now been deployed in the cluster. In addition, SentryFlow exports API access logs through `gRPC`. -- `log-client`: Simply log everything coming from SentryFlow service -- `mongo-client`: Stores every logs coming from SentryFlow service to a MongoDB service. +For testing purposes, a client has been developed. -These clients can be deployed into the cluster under namespace `sentryflow` by following the command: +- `log-client`: Simply logs everything on `STDOUT` coming from SentryFlow. -- `log-client` - ``` - $ kubectl create -f deployments/log-client.yaml - deployment.apps/log-client created - ``` +It can be deployed into the cluster under namespace `sentryflow` by following the command: -- `mongo-client` - ``` - $ kubectl create -f deployments/mongo-client.yaml - deployment.apps/mongodb created - service/mongodb created - deployment.apps/mongo-client created - ``` - -Then, check if those clients and MongoDB are properly up and running by: - -``` -$ kubectl get pods -n sentryflow -NAME READY STATUS RESTARTS AGE -log-client-6c8864655f-h2sdv 1/1 Running 0 5m28s -mongo-client-7cbf6b888f-vd69g 1/1 Running 0 5m28s -mongodb-6f5d9fc599-zwnxj 1/1 Running 0 5m28s -... +```shell +kubectl apply -f https://raw.githubusercontent.com/5GSEC/SentryFlow/refs/heads/main/deployments/sentryflow-client.yaml ``` -If you observe `log-client`, `mongo-client`, and `mongodb` running within the namespace, the setup has been completed successfully. +Then, check if it is up and running by: -## 3. Use Cases and Examples - -Up to this point, SentryFlow has been successfully integrated into the Istio service mesh and Kubernetes cluster. For additional details on use cases and examples, please consult the accompanying documentation. - -The links below are organized by their level of complexity, starting from basic and progressing to more complex. +```shell +kubectl get pods -n sentryflow +``` -- [Single HTTP Requests](../examples/httpbin/README.md) -- [RobotShop Demo Microservice](../examples/robotshop/README.md) -- [Nephio Free5gc Workload](../examples/nephio/free5gc/README.md) -- [Nephio OAI Workload](../examples/nephio/oai/README.md) +If you observe `log-client`, is running, the setup has been completed successfully. diff --git a/docs/k8s_compatibility.md b/docs/k8s_compatibility.md deleted file mode 100644 index 1830762..0000000 --- a/docs/k8s_compatibility.md +++ /dev/null @@ -1,8 +0,0 @@ -# Kubernetes Compatability Matrix - -This document outlines various Kubernetes configurations and their compatibility with SentryFlow. - -|OS|Kubernetes Version|CRI|CNI| -|--|--|--|--| -|Ubuntu 20.04|1.27.1|containerd=1.6.19|kindnet=0.4.0| -|Ubuntu 22.04|1.23.0|docker=25.0.3|calico=0.3.1| diff --git a/docs/receivers.md b/docs/receivers.md new file mode 100644 index 0000000..7b99638 --- /dev/null +++ b/docs/receivers.md @@ -0,0 +1,8 @@ +# SentryFlow Receivers + +SentryFlow supports following receivers: + +## Kubernetes + +- [Istio sidecar](https://istio.io/latest/docs/setup/) service mesh. To integrate SentryFlow with it, refer + to [this](receivers/service-mesh/istio/istio.md). diff --git a/docs/receivers/service-mesh/istio/istio.md b/docs/receivers/service-mesh/istio/istio.md new file mode 100644 index 0000000..4792d3c --- /dev/null +++ b/docs/receivers/service-mesh/istio/istio.md @@ -0,0 +1,62 @@ +# Istio Sidecar Service Mesh + +## Description + +This guide provides a step-by-step process to integrate SentryFlow with Istio, aimed at enhancing API observability. It +includes detailed commands for each step along with their explanations. + +SentryFlow makes use of following to provide visibility into API calls: + +- [Envoy Wasm Filter](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/wasm_filter) +- [Istio Wasm Plugin](https://istio.io/latest/docs/reference/config/proxy_extensions/wasm-plugin/) +- [Istio EnvoyFilter](https://istio.io/latest/docs/reference/config/networking/envoy-filter/) + +## Prerequisites + +- Deploy Istio service mesh. Follow [this](https://istio.io/latest/docs/setup/install/) to deploy it if you've not + deployed. +- Enable the envoy proxy injection by labeling the namespace in which you'll deploy your workloads: + ```shell + kubectl label ns istio-injection=enabled + ``` + +## How to + +To Observe API calls of your workloads running on top of Istio Service Mesh in Kubernetes environment, follow the below +steps: + +1. Download SentryFlow manifest file + + ```shell + curl -sO https://raw.githubusercontent.com/5GSEC/SentryFlow/refs/heads/main/deployments/sentryflow.yaml + ``` + +2. Update the `.receivers` configuration in `sentryflow` [configmap](../../../../deployments/sentryflow.yaml) as + follows: + + ```yaml + filters: + server: + port: 8081 + + # Envoy filter is required for `istio-sidecar` service-mesh receiver. + # Leave it as it is unless you want to use your filter. + envoy: + uri: 5gsec/sentryflow-httpfilter:v0.1 + + receivers: + serviceMeshes: + - name: istio-sidecar # SentryFlow makes use of `name` to configure receivers. DON'T CHANGE IT. + namespace: istio-system # Kubernetes namespace in which you've deployed Istio. + ... + ``` + +3. Apply the updated manifest file: + +```shell +kubectl apply -f sentryflow.yaml +``` + +3. Trigger API calls to generate traffic. + +4. Use SentryFlow [log client](../../../../client) to see the API Events. diff --git a/docs/sentryflow_client_guide.md b/docs/sentryflow_client_guide.md deleted file mode 100644 index 890636e..0000000 --- a/docs/sentryflow_client_guide.md +++ /dev/null @@ -1,3 +0,0 @@ -# SentryFlow Client Guide - -- TBF diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index f162656..0000000 --- a/examples/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Examples - -- [Single HTTP Requests](httpbin/README.md) -- [RobotShop Demo Microservice](robotshop/README.md) diff --git a/examples/bookinfo/README.md b/examples/bookinfo/README.md deleted file mode 100644 index 298fd13..0000000 --- a/examples/bookinfo/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Example - bookinfo - -## Installation - -https://istio.io/latest/docs/examples/bookinfo/ - -## Namespace Telemetry - -```kubectl create -f telemetry.yaml``` - -## API Request Generation - -```curl http://bookinfo_Address:9080/productpage``` diff --git a/examples/bookinfo/telemetry.yaml b/examples/bookinfo/telemetry.yaml deleted file mode 100644 index 7c434eb..0000000 --- a/examples/bookinfo/telemetry.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: telemetry.istio.io/v1alpha1 -kind: Telemetry -metadata: - namespace: bookinfo - name: bookinfo-logging -spec: - accessLogging: - - providers: - - name: sentryflow diff --git a/examples/httpbin/README.md b/examples/httpbin/README.md deleted file mode 100644 index 6b5e03e..0000000 --- a/examples/httpbin/README.md +++ /dev/null @@ -1,111 +0,0 @@ -# Single HTTP Requests - -This document demonstrates how SentryFlow effectively captures API logs for simple HTTP requests, using Istio's `sleep` and `httpbin` examples for illustration. - -It is essential to ensure that the `sleep` and `httpbin` deployments are correctly configured and that the default namespace has [Istio injection enabled](https://istio.io/latest/docs/setup/additional-setup/sidecar-injection/#automatic-sidecar-injection) for the setup to function properly. - -## Step 1. Verify Services - -To confirm that Istio is set up correctly, start by verifying if the `default` namespace has Istio injection enabled. This can be done using the following command: - -```bash -kubectl describe namespace default - -Name: default -Labels: istio-injection=enabled -``` - -If the namespace `default` has label `istio-injection=enabled`, this was set properly. Now, apply the `telemetry.yaml` in this directory by following command: - -```bash -kubectl create -f telemetry.yaml - -telemetry.telemetry.istio.io/sleep-logging created -``` - -Executing this command will configure `telemetry` for Istio, instructing Envoy proxies to forward access logs to SentryFlow. - -> **Note**: Configuring telemetry could require some time to be fully implemented throughout the entire cluster. - -To ensure that the pods in the `default` namespace are operational, execute the following command: - -```bash -kubectl get pods -n default - -NAME READY STATUS RESTARTS AGE -httpbin-545f698b64-ncvq9 2/2 Running 0 44s -sleep-75bbc86479-fmf4p 2/2 Running 0 35s -``` - -## Step 2. Sending API Requests - -Going forward, the `sleep` pod will initiate API requests to the `httpbin` service, which can be done using the following command: - -```bash -export SOURCE_POD=$(kubectl get pod -l app=sleep -o jsonpath={.items..metadata.name}) -kubectl exec "$SOURCE_POD" -c sleep -- curl -sS -v httpbin:8000/status/418 -``` - -## Step 3. Checking Logs - -There are two methods of checking logs with SentryFlow clients. - -### 1. Log Client - -To examine the logs exported by SentryFlow, you can use the following command: - -```bash -kubectl logs -n sentryflow -l app=log-client - -YYYY/MM/DD 17:03:37 [gRPC] Successfully connected to sentryflow.sentryflow.svc.cluster.local:8080 -YYYY/MM/DD 17:40:28 [Client] Received log: timeStamp:"[YYYY/MM/DDT17:40:27.225Z]" id:1707929670787152 srcNamespace:"default" srcName:"sleep-75bbc86479-fmf4p" srcLabel:{key:"app" value:"sleep"} srcLabel:{key:"pod-template-hash" value:"75bbc86479"} srcLabel:{key:"security.istio.io/tlsMode" value:"istio"} srcLabel:{key:"service.istio.io/canonical-name" value:"sleep"} srcLabel:{key:"service.istio.io/canonical-revision" value:"latest"} srcIP:"10.244.140.11" srcPort:"44126" srcType:"Pod" dstNamespace:"default" dstName:"httpbin" dstLabel:{key:"app" value:"httpbin"} dstLabel:{key:"service" value:"httpbin"} dstIP:"10.105.103.198" dstPort:"8000" dstType:"Service" protocol:"HTTP/1.1" method:"GET" path:"/status/418" responseCode:418 -YYYY/MM/DD 17:40:29 [Client] Received log: timeStamp:"[YYYY/MM/DDT17:40:28.845Z]" id:1707929670787154 srcNamespace:"default" srcName:"sleep-75bbc86479-fmf4p" srcLabel:{key:"app" value:"sleep"} srcLabel:{key:"pod-template-hash" value:"75bbc86479"} srcLabel:{key:"security.istio.io/tlsMode" value:"istio"} srcLabel:{key:"service.istio.io/canonical-name" value:"sleep"} srcLabel:{key:"service.istio.io/canonical-revision" value:"latest"} srcIP:"10.244.140.11" srcPort:"44158" srcType:"Pod" dstNamespace:"default" dstName:"httpbin" dstLabel:{key:"app" value:"httpbin"} dstLabel:{key:"service" value:"httpbin"} dstIP:"10.105.103.198" dstPort:"8000" dstType:"Service" protocol:"HTTP/1.1" method:"GET" path:"/status/418" responseCode:418 -``` - -As expected, we should be able to observe the `/status/418` API request being made from the `sleep` pod to the `httpbin` service. - -### 2. MongoDB Client - -To inspect the data stored in MongoDB by SentryFlow, you can use the following command: - -```bash -export MONGODB_POD=$(kubectl get pod -n sentryflow -l app=mongodb -o jsonpath='{.items[0].metadata.name}') -kubectl exec -it $MONGODB_POD -n sentryflow mongosh -``` - -Initiating this command will launch an interactive shell that can be used to explore the contents stored within the database. To examine the data in the database, refer to the subsequent commands provided. - -``` -test> use sentryflow; -switched to db sentryflow -sentryflow> db["APILogs"].find() -[ - { - _id: ObjectId('65ccfa872b80bf0cec7dab83'), - timestamp: '[YYYY-MM-DDT17:38:14.330Z]', - id: Long('1707929670787151'), - srcnamespace: 'default', - srcname: 'sleep-75bbc86479-fmf4p', - srclabel: { - app: 'sleep', - 'pod-template-hash': '75bbc86479', - 'security.istio.io/tlsMode': 'istio', - 'service.istio.io/canonical-name': 'sleep', - 'service.istio.io/canonical-revision': 'latest' - }, - srcip: '10.244.140.11', - srcport: '47996', - srctype: 'Pod', - dstnamespace: 'default', - dstname: 'httpbin', - dstlabel: { app: 'httpbin', service: 'httpbin' }, - dstip: '10.105.103.198', - dstport: '8000', - dsttype: 'Service', - protocol: 'HTTP/1.1', - method: 'GET', - path: '/status/418', - responsecode: Long('418') - } -] -``` diff --git a/examples/httpbin/telemetry.yaml b/examples/httpbin/telemetry.yaml deleted file mode 100644 index 78a6809..0000000 --- a/examples/httpbin/telemetry.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: telemetry.istio.io/v1alpha1 -kind: Telemetry -metadata: - name: sleep-logging -spec: - selector: - matchLabels: - app: sleep - accessLogging: - - providers: - - name: sentryflow diff --git a/examples/robotshop/README.md b/examples/robotshop/README.md deleted file mode 100644 index 635a596..0000000 --- a/examples/robotshop/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Example - robotshop - -## Installation - -https://github.com/instana/robot-shop - -Simple Microservice application. this is installed using helm. - -## Namespace Telemetry - -```kubectl create -f telemetry.yaml``` - -## Accessing the Store - -Connect to http://localhost:8080 and make API requests. diff --git a/examples/robotshop/telemetry.yaml b/examples/robotshop/telemetry.yaml deleted file mode 100644 index 5cc4e8f..0000000 --- a/examples/robotshop/telemetry.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: telemetry.istio.io/v1alpha1 -kind: Telemetry -metadata: - namespace: robot-shop - name: robot-shop-logging -spec: - accessLogging: - - providers: - - name: sentryflow