moshemalawach · amiller68 · Dec 27, 2023 · Dec 28, 2023 · Jan 5, 2024 · Jan 5, 2024
diff --git a/.env.sample b/.env.sample
@@ -0,0 +1,8 @@
+# Your Telegram Bot Token
+TG_TOKEN=123456789:ABCdefGhIjKlmnOpqRsTuvwXyz123456789
+# Database URL
+DATABASE_URL=sqlite:///./data/app.db
+# Log Path
+LOG_PATH=./data/app.log
+# Debug
+DEBUG=False
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,3 @@
-logs.json
+.idea
 .env
-__pycache__
+__pycache__
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2024 Alexander Miller
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/README.md b/README.md
@@ -0,0 +1,140 @@
+# Telegram Chat Bot
+
+This is a simple Telegram Chat Bot that can be used to send messages to a Telegram group or channel.
+It utilizes a Basic AI agent in order to respond to messages sent to the bot.
+It works by building up a knowledge base of messages sent to the bot and then using that knowledge base to respond to
+messages sent to the bot.
+It also implements a function interface for the bot to use in order to respond to messages that require further
+information.
+It utilizes [Libertai's decentralized LLM API](https://libertai.io/apis/text-generation/) for generating context-aware responses to user queries.
+It specifically targets [Nouse Hermes 2 Pro](https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B) model for generating responses. This model is fine tuned for handling function calls.
+
+## Requirements
+
+- Python3 + virtualenv
+
+## Setup
+
+### Configuration
+
+#### Note on Environment Variables
+
+The bot is configured using environment variables.
+
+It is best to do this in a `.env` file at the root of this repository. Our scripts will look for this file and use it to set the environment variables.
+
+You can override the defaults you set in your `.env` by setting the environment variables with `export` prior to running the bot.
+
+#### Telegram Bot Token
+
+You must a valid Telegram Bot Token in order to use this bot. You can get one by talking to
+the [BotFather](https://t.me/botfather) on Telegram.
+
+Name this variable `TG_TOKEN` within your environment.
+
+#### Logging
+
+The logging is controlled by the `LOG_PATH` environment variable. This is the path to the log file that the bot will write to.
+
+If this is not set, the bot will default to writing logs out to stdout only.
+
+A good default is to set this to `./data/app.log` in the `.env` file.
+
+#### Sqlite Database
+
+The bot uses a sqlite database to store the knowledge base of messages that it has received.
+
+The path to the database is controlled by the `DATABASE_PATH` environment variable. This variable should point to where our sqlite database is located.
+
+NOTE: We explicitly don't set the full url because some tasks require `sqlite+aiosqlite` to be specified as the protocol. Rather than make the user specify this, we just ask for the path to the database file. `:memory:` is a valid option for this variable.
+
+If this is not set, the bot will default to using `:memory:` which will create an in-memory database that will be lost when the bot is stopped.
+
+A good default is to set this to `./data/app.db` in the `.env` file.
+
+#### Debug Mode
+
+If you want to run the bot in debug mode, you can set the `DEBUG` environment variable to `True`.
+
+This will log debug events related to message handling. This is very useful when developming new features.
+
+#### Agent
+
+The bot uses an AI agent to generate responses to user queries.
+
+See `./agent.yaml` for the default configuration. The bot will load this file and use it to configure the agent when it starts.
+
+If you want to change the agent configuration, you can do so by editing this file or setting the `AGENT_CONFIG_PATH` environment variable to the path of the file you want to use. This file must contain a valid yaml configuration for the agent.
+
+See `./agent.yaml` for the default configuration and documentation on the available options.
+
+## Installation
+
+This command sets up a virtual environment and installs the required dependencies within it for the bot to run.
+
+```
+./scripts/install.sh
+```
+
+If you would like to run the bot please ensure you use the virtual environment created by the install script.
+
+```
+source venv/bin/activate
+python3 src/bot.py
+```
+
+## Usage
+
+### Development
+
+We provide a script to run the bot in development mode. This will run the bot in debug mode, against an in-memory database and write logs to stdout.
+
+```
+./scripts/dev.sh
+```
+
+All yopu have to do is make sure you have a valid Telegram Bot Token set in your environment as `TG_TOKEN`.
+
+After you have launched the bot, you can search for it on Telegram using its username and start a conversation with it.
+
+### Production-ish
+
+#### Note on Database Migrations
+
+The bot uses an sqlite database to store the knowledge base of messages that it has received.
+
+You can use the `alembic` tool to manage the database schema. We provide scripts for doing so within the virtual environment.
+
+You can run the following command to generate new migrations if you have made changes to the database schema:
+
+```
+./scripts/prepare_migrations.sh
+```
+
+This will generate a new migration file in the `./alembic/versions` directory.
+
+Include these updated migrations in your pull request when you make changes to the database schema.
+
+NOTE: This script is also controlled by the `DATABASE_PATH` environment variable. If you do not set this, the script will default to using `./data/app.db` as the database path.
+
+You can run the following command to apply the migrations to the database:
+
+```
+./scripts/migrate.sh
+```
+
+This will apply any new migrations to the database.
+
+Like the previous script, this script is also controlled by the `DATABASE_PATH` environment variable. If you do not set this, the script will default to using `./data/app.db` as the database path.
+
+NOTE: the bot does not run migrations automatically. You must remember to responsibly run the `migrate.sh` script when you have new migrations to apply.
+
+#### Running the Bot
+
+We provide a script to run the bot in production mode. This will deactivate debug mode, and allow you to configure the logging and database path. If neither of these are set, the bot will default to writing logs to `./data/app.log` and using `/./data/app.db` as the database path.
+
+```
+./scripts/run.sh
+```
+
+NOTE: once again, it is your responsibility to set the `TG_TOKEN` environment variable and to run the `migrate.sh` script when you have new migrations to apply.
diff --git a/agent.yaml b/agent.yaml
@@ -0,0 +1,35 @@
+# Everything having to LLM resources and parameterizing same
+model:
+  name: "Nouse-Hermes-2-Pro"
+  api_url: "https://altostratus.everythingwilldecentrali.se/vm/5e413862959ee4e48dda56f9efa0f128e93fb7b0dd6ce660f89172c6f2c96b88/completion"
+  engine: "llamacpp"
+
+  # Max Token count of prompts
+  max_prompt_tokens: 16384
+  # Max Token count of completions
+  max_completion_tokens: 250
+
+  # Model parameterizing
+  temperature: 0.7
+  sampler_order: [6, 0, 1, 3, 4, 2, 5]
+  top_p: 0.9
+  top_k: 40
+
+# Agent prompt configuration
+agent:
+  # Max number of attempts to complete on a prompt
+  max_completion_tries: 3
+  # Max number of times the agent can call back to itself
+  max_self_recurse_depth: 5
+  # System Prompt Template. See the default template for further info
+  system_prompt_template: "./templates/system.yaml"
+
+# ChatML configuration
+chat_ml:
+  user_prepend: "<|im_start|>"
+  user_append: "<|im_end|>"
+  stop_sequences:
+    - "<|im_end|>"
+    - "<|endoftext|>"
+    - "</assistant"
+    - "</user"
diff --git a/alembic.ini b/alembic.ini
@@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = ${DATABASE_URL}
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S