From 15b832b3495230b4cb2cd0db489ebda1018bad2a Mon Sep 17 00:00:00 2001 From: Andrey Date: Tue, 10 Sep 2024 11:53:14 +0300 Subject: [PATCH 01/51] Fix retrieval skill: getting pgvector params in runtime (#9675) --- .../loaders/vector_store_loader/pgvector.py | 84 +++++++++++++++++++ .../vector_store_loader.py | 64 +------------- mindsdb/interfaces/agents/tools.py | 8 +- .../interfaces/knowledge_base/controller.py | 12 +-- tests/scripts/check_requirements.py | 2 +- 5 files changed, 101 insertions(+), 69 deletions(-) create mode 100644 mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py new file mode 100644 index 00000000000..992605239b9 --- /dev/null +++ b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/pgvector.py @@ -0,0 +1,84 @@ +from typing import Any, List, Optional, Dict + +from langchain_community.vectorstores import PGVector +from langchain_community.vectorstores.pgvector import Base + +from pgvector.sqlalchemy import Vector +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import JSON + +from sqlalchemy.orm import Session + + +_generated_sa_tables = {} + + +class PGVectorMDB(PGVector): + """ + langchain_community.vectorstores.PGVector adapted for mindsdb vector store table structure + """ + + def __post_init__( + self, + ) -> None: + + collection_name = self.collection_name + + if collection_name not in _generated_sa_tables: + + class EmbeddingStore(Base): + """Embedding store.""" + + __tablename__ = collection_name + + id = sa.Column(sa.Integer, primary_key=True) + embedding: Vector = sa.Column('embeddings', Vector()) + document = sa.Column('content', sa.String, nullable=True) + cmetadata = sa.Column('metadata', JSON, nullable=True) + + _generated_sa_tables[collection_name] = EmbeddingStore + + self.EmbeddingStore = _generated_sa_tables[collection_name] + + def __query_collection( + self, + embedding: List[float], + k: int = 4, + filter: Optional[Dict[str, str]] = None, + ) -> List[Any]: + """Query the collection.""" + with Session(self._bind) as session: + + results: List[Any] = ( + session.query( + self.EmbeddingStore, + self.distance_strategy(embedding).label("distance"), + ) + .order_by(sa.asc("distance")) + .limit(k) + .all() + ) + for rec, _ in results: + if not bool(rec.cmetadata): + rec.cmetadata = {0: 0} + + return results + + # aliases for different langchain versions + def _PGVector__query_collection(self, *args, **kwargs): + return self.__query_collection(*args, **kwargs) + + def _query_collection(self, *args, **kwargs): + return self.__query_collection(*args, **kwargs) + + def create_collection(self): + raise RuntimeError('Forbidden') + + def delete_collection(self): + raise RuntimeError('Forbidden') + + def delete(self, *args, **kwargs): + raise RuntimeError('Forbidden') + + def add_embeddings(self, *args, **kwargs): + raise RuntimeError('Forbidden') diff --git a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py index b2c6966642a..8ca8e8cad79 100644 --- a/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py +++ b/mindsdb/integrations/utilities/rag/loaders/vector_store_loader/vector_store_loader.py @@ -1,20 +1,13 @@ -import ast -import uuid from langchain_core.embeddings import Embeddings from langchain_community.vectorstores import Chroma, PGVector from langchain_core.vectorstores import VectorStore + from pydantic import BaseModel -from mindsdb.integrations.libs.vectordatabase_handler import TableField from mindsdb.integrations.utilities.rag.settings import VectorStoreType, VectorStoreConfig from mindsdb.utilities import log -import pandas as pd - -from sqlalchemy import create_engine -from sqlalchemy.orm import scoped_session, sessionmaker -from sqlalchemy.exc import DisconnectionError logger = log.getLogger(__name__) @@ -59,60 +52,9 @@ def _load_chromadb_store(embedding_model: Embeddings, settings) -> Chroma: @staticmethod def _load_pgvector_store(embedding_model: Embeddings, settings) -> PGVector: - # create an empty store if collection_name does not exist otherwise load the existing collection - store = PGVector( + from .pgvector import PGVectorMDB + return PGVectorMDB( connection_string=settings.connection_string, collection_name=settings.collection_name, embedding_function=embedding_model ) - return VectorStoreFactory._load_data_into_langchain_pgvector(settings, store) - - @staticmethod - def _load_data_into_langchain_pgvector(settings, vectorstore: PGVector) -> PGVector: - """ - Fetches data from the existing pgvector table and loads it into the langchain pgvector vector store - :param settings: - :param vectorstore: - :return: - """ - df = VectorStoreFactory._fetch_data_from_db(settings) - - df[TableField.EMBEDDINGS] = df[TableField.EMBEDDINGS].apply(ast.literal_eval) - df[TableField.METADATA] = df[TableField.METADATA].apply(ast.literal_eval) - - metadata = df[TableField.METADATA].tolist() - embeddings = df[TableField.EMBEDDINGS].tolist() - texts = df[TableField.CONTENT].tolist() - ids = [str(uuid.uuid1()) for _ in range(len(df))] \ - if TableField.ID not in df.columns else df[TableField.ID].tolist() - - vectorstore.add_embeddings( - texts=texts, - embeddings=embeddings, - metadatas=metadata, - ids=ids - ) - return vectorstore - - @staticmethod - def _fetch_data_from_db(settings: VectorStoreConfig) -> pd.DataFrame: - """ - Fetches data from the database using the provided connection_string in the settings - :param settings: - :return: - """ - try: - engine = create_engine(settings.connection_string) - db = scoped_session(sessionmaker(bind=engine)) - - df = pd.read_sql(f"SELECT * FROM {settings.collection_name}", engine) - - return df - except DisconnectionError as e: - logger.error("Unable to connect to the database. Please check your connection string and try again.") - raise e - except Exception as e: - logger.error(f"An error occurred while fetching data from the database: {e}") - raise e - finally: - db.close() diff --git a/mindsdb/interfaces/agents/tools.py b/mindsdb/interfaces/agents/tools.py index 043406b2699..cda2d1c1fe9 100644 --- a/mindsdb/interfaces/agents/tools.py +++ b/mindsdb/interfaces/agents/tools.py @@ -123,7 +123,13 @@ def _build_vector_store_config_from_knowledge_base(rag_params: Dict, knowledge_b elif vector_store_type == VectorStoreType.PGVECTOR.value: # For pgvector, we get connection string # todo requires further testing - connection_params = knowledge_base.vector_database.data + + # get pgvector runtime data + kb_table = executor.session.kb_controller.get_table(knowledge_base.name, knowledge_base.project_id) + vector_db = kb_table.get_vector_db() + connection_params = vector_db.connection_args + vector_store_config['collection_name'] = vector_db._check_table(knowledge_base.vector_database_table) + vector_store_config['connection_string'] = _create_conn_string(connection_params) else: diff --git a/mindsdb/interfaces/knowledge_base/controller.py b/mindsdb/interfaces/knowledge_base/controller.py index d31f6cff1df..768679268d1 100644 --- a/mindsdb/interfaces/knowledge_base/controller.py +++ b/mindsdb/interfaces/knowledge_base/controller.py @@ -65,7 +65,7 @@ def select_query(self, query: Select) -> pd.DataFrame: query.targets = targets # send to vectordb - db_handler = self._get_vector_db() + db_handler = self.get_vector_db() resp = db_handler.query(query) return resp.data_frame @@ -91,7 +91,7 @@ def update_query(self, query: Update): query.table = Identifier(parts=[self._kb.vector_database_table]) # send to vectordb - db_handler = self._get_vector_db() + db_handler = self.get_vector_db() db_handler.query(query) def delete_query(self, query: Delete): @@ -106,7 +106,7 @@ def delete_query(self, query: Delete): query.table = Identifier(parts=[self._kb.vector_database_table]) # send to vectordb - db_handler = self._get_vector_db() + db_handler = self.get_vector_db() db_handler.query(query) def clear(self): @@ -114,7 +114,7 @@ def clear(self): Clear data in KB table Sends delete to vector db table """ - db_handler = self._get_vector_db() + db_handler = self.get_vector_db() db_handler.delete(self._kb.vector_database_table) def insert(self, df: pd.DataFrame): @@ -134,7 +134,7 @@ def insert(self, df: pd.DataFrame): df = pd.concat([df, df_emb], axis=1) # send to vector db - db_handler = self._get_vector_db() + db_handler = self.get_vector_db() db_handler.do_upsert(self._kb.vector_database_table, df) def _adapt_column_names(self, df: pd.DataFrame) -> pd.DataFrame: @@ -251,7 +251,7 @@ def _replace_query_content(self, node, **kwargs): node.args[0].parts = [TableField.EMBEDDINGS.value] node.args[1].value = [self._content_to_embeddings(node.args[1].value)] - def _get_vector_db(self): + def get_vector_db(self): """ helper to get vector db handler """ diff --git a/tests/scripts/check_requirements.py b/tests/scripts/check_requirements.py index 5d0dc5a0e58..5a76c930d11 100644 --- a/tests/scripts/check_requirements.py +++ b/tests/scripts/check_requirements.py @@ -39,7 +39,7 @@ def get_requirements_from_file(path): # transformers is required for langchain_core and not explicitly imported by mindsdb. MAIN_RULE_IGNORES = { "DEP003": ["torch"], - "DEP001": ["torch"], + "DEP001": ["torch", "pgvector"], "DEP002": ["psycopg2-binary", "lark", "transformers"], } From c0b4f3cc419f459b77aee0fa49d35cc3fb2d30e5 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Tue, 10 Sep 2024 12:13:33 +0300 Subject: [PATCH 02/51] fix BYOM model creation (#9706) --- mindsdb/integrations/handlers/byom_handler/byom_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mindsdb/integrations/handlers/byom_handler/byom_handler.py b/mindsdb/integrations/handlers/byom_handler/byom_handler.py index db74fe34f63..4b3b8427ee9 100644 --- a/mindsdb/integrations/handlers/byom_handler/byom_handler.py +++ b/mindsdb/integrations/handlers/byom_handler/byom_handler.py @@ -452,7 +452,7 @@ def func_call(self, func_name, args): func = getattr(self.module, func_name) return func(*args) - def check(self, mode): + def check(self, mode: str = None): methods = check_module(self.module, mode) return methods @@ -609,7 +609,7 @@ def _run_command(self, params): raise RuntimeError(p.stderr.read()) return ret - def check(self, mode): + def check(self, mode: str = None): params = { 'method': BYOM_METHOD.CHECK.value, 'code': self.code, From 56acc3fe9225158808a6c7496f94ad019370b87b Mon Sep 17 00:00:00 2001 From: Andrey Date: Tue, 10 Sep 2024 12:52:29 +0300 Subject: [PATCH 03/51] Use default ssl certificate for mongo server (#9699) --- mindsdb/api/mongo/server.py | 11 +---------- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py | 2 ++ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/mindsdb/api/mongo/server.py b/mindsdb/api/mongo/server.py index cb21f136f99..456e6055b7e 100644 --- a/mindsdb/api/mongo/server.py +++ b/mindsdb/api/mongo/server.py @@ -251,18 +251,9 @@ class MongoRequestHandler(SocketServer.BaseRequestHandler): def _init_ssl(self): import ssl - import tempfile - import atexit - import os - from mindsdb.utilities.wizards import make_ssl_cert + ssl_context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) - CERT_PATH = tempfile.mkstemp(prefix='mindsdb_cert_', text=True)[1] - make_ssl_cert(CERT_PATH) - atexit.register(lambda: os.remove(CERT_PATH)) - - ssl_context = ssl.SSLContext() - ssl_context.load_cert_chain(CERT_PATH) ssl_socket = ssl_context.wrap_socket( self.request, server_side=True, diff --git a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py b/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py index 2324be4b171..8320e4a4edb 100644 --- a/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +++ b/mindsdb/api/mysql/mysql_proxy/mysql_proxy.py @@ -880,6 +880,8 @@ def startProxy(): cert_path = tempfile.mkstemp(prefix="mindsdb_cert_", text=True)[1] make_ssl_cert(cert_path) atexit.register(lambda: os.remove(cert_path)) + elif not os.path.exists(cert_path): + logger.error("Certificate defined in 'certificate_path' setting does not exist") # TODO make it session local server_capabilities.set(CAPABILITIES.CLIENT_SSL, config["api"]["mysql"]["ssl"]) From 5fccd9c71d918611439645aad9edfc9fcd264347 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Tue, 10 Sep 2024 12:21:07 +0200 Subject: [PATCH 04/51] Update docker-bake.hcl (#9707) --- docker/docker-bake.hcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index f4fba835e32..37fb41741e0 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -90,7 +90,7 @@ target "images" { }, { name = "cloud" - extras = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,mssql,youtube,gmail,pgvector,writer,rag,github,snowflake,clickhouse,bigquery,elasticsearch,s3,dynamodb,databricks,oracle] darts datasetsforecast" + extras = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,mssql,youtube,gmail,pgvector,writer,rag,github,snowflake,clickhouse,bigquery,elasticsearch,s3,dynamodb,databricks,oracle,db2,teradata,hive] darts datasetsforecast" target = "" }, ] From 3f7768c6050df419d175ae84fac4c69e9a307bd1 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Tue, 10 Sep 2024 12:41:46 +0200 Subject: [PATCH 05/51] New Release (#9708) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index b96b4e4aff1..b2b972497a5 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.1.0' +__version__ = '24.9.2.0' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 0ebcdfbfd1a2acdc8a04c78baf054e8827754172 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Tue, 10 Sep 2024 18:53:27 +0200 Subject: [PATCH 06/51] PAtch release (#9709) --- docker/docker-bake.hcl | 2 +- mindsdb/__about__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl index 37fb41741e0..20585774e13 100644 --- a/docker/docker-bake.hcl +++ b/docker/docker-bake.hcl @@ -90,7 +90,7 @@ target "images" { }, { name = "cloud" - extras = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,mssql,youtube,gmail,pgvector,writer,rag,github,snowflake,clickhouse,bigquery,elasticsearch,s3,dynamodb,databricks,oracle,db2,teradata,hive] darts datasetsforecast" + extras = ".[lightwood,huggingface,statsforecast-extra,neuralforecast-extra,timegpt,mssql,youtube,gmail,pgvector,writer,rag,github,snowflake,clickhouse,bigquery,elasticsearch,s3,dynamodb,databricks,oracle,teradata,hive] darts datasetsforecast" target = "" }, ] diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index b2b972497a5..6d3160767ab 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.2.0' +__version__ = '24.9.2.1' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 3bfb86f7c0bee482a921f60830acad1654dea7fc Mon Sep 17 00:00:00 2001 From: Daniel Usvyat Date: Wed, 11 Sep 2024 18:09:47 +0300 Subject: [PATCH 07/51] Extract generated query into response (#9698) --- mindsdb/api/http/namespaces/agents.py | 5 +++++ mindsdb/interfaces/agents/callback_handlers.py | 4 ++++ mindsdb/interfaces/agents/langchain_agent.py | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/mindsdb/api/http/namespaces/agents.py b/mindsdb/api/http/namespaces/agents.py index 68957e0f1fd..f31be7a8f62 100644 --- a/mindsdb/api/http/namespaces/agents.py +++ b/mindsdb/api/http/namespaces/agents.py @@ -306,6 +306,9 @@ def json_serialize(data): elif chunk.get('type') == 'context': # Handle context message yield json_serialize({"type": "context", "content": chunk.get('content')}) + elif chunk.get('type') == 'sql': + # Handle SQL query message + yield json_serialize({"type": "sql", "content": chunk.get('content')}) else: # Process and yield other types of chunks chunk_obj = {} @@ -328,6 +331,8 @@ def json_serialize(data): chunk_obj['steps'] = [{'observation': getattr(s, 'observation', str(s))} for s in chunk['steps']] if 'context' in chunk: chunk_obj['context'] = chunk['context'] + if 'sql' in chunk: + chunk_obj['sql'] = chunk['sql'] yield json_serialize(chunk_obj) else: diff --git a/mindsdb/interfaces/agents/callback_handlers.py b/mindsdb/interfaces/agents/callback_handlers.py index 47f3f34d967..ab836a61d27 100644 --- a/mindsdb/interfaces/agents/callback_handlers.py +++ b/mindsdb/interfaces/agents/callback_handlers.py @@ -28,6 +28,7 @@ def __init__(self, logger: logging.Logger): logger.setLevel('DEBUG') self.logger = logger self._num_running_chains = 0 + self.generated_sql = None def on_llm_start( self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any @@ -112,6 +113,9 @@ def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: '''Run on agent action.''' self.logger.debug(f'Running tool {action.tool} with input:') self.logger.debug(action.tool_input) + if action.tool.startswith("sql_db_query"): + # Save the generated SQL query + self.generated_sql = action.tool_input def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any: '''Run on agent end.''' diff --git a/mindsdb/interfaces/agents/langchain_agent.py b/mindsdb/interfaces/agents/langchain_agent.py index c39259e5c87..7a164010d05 100644 --- a/mindsdb/interfaces/agents/langchain_agent.py +++ b/mindsdb/interfaces/agents/langchain_agent.py @@ -613,6 +613,10 @@ def stream_agent(self, df: pd.DataFrame, agent_executor: AgentExecutor, args: Di if captured_context: yield {"type": "context", "content": captured_context} + if self.log_callback_handler.generated_sql: + # Yield generated SQL if available + yield {"type": "sql", "content": self.log_callback_handler.generated_sql} + if self.run_completion_span is not None: self.run_completion_span.end() self.api_trace.update() From 703795e0beb97a7b3e61f69b53f28a8cfebaf105 Mon Sep 17 00:00:00 2001 From: martyna-mindsdb <109554435+martyna-mindsdb@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:24:41 +0200 Subject: [PATCH 08/51] fix docs mint.json (#9711) --- docs/mint.json | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/mint.json b/docs/mint.json index 9a9951e55a2..00897a3c48b 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -16,7 +16,6 @@ "raiseIssue": true, "thumbsRating": true }, - "openapi": "https://raw.githubusercontent.com/mindsdb/mindsdb/openapi-specs/mindsdb/api/http/openapi.yml", "api": { "baseUrl": "http://127.0.0.1:47334" }, From 2151d53585121e9339adad23e1980a58f983a446 Mon Sep 17 00:00:00 2001 From: Lucas Koontz Date: Thu, 12 Sep 2024 05:15:19 -0400 Subject: [PATCH 09/51] Add `created_at` field to skills response (#9713) --- mindsdb/interfaces/storage/db.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mindsdb/interfaces/storage/db.py b/mindsdb/interfaces/storage/db.py index e5a0107e60c..f9b5db08599 100644 --- a/mindsdb/interfaces/storage/db.py +++ b/mindsdb/interfaces/storage/db.py @@ -452,6 +452,7 @@ def as_dict(self) -> Dict: "agent_ids": [a.id for a in self.agents], "type": self.type, "params": self.params, + "created_at": self.created_at, } From a4ec5025a23cb5303d2a218a7cedcf67ad508ac4 Mon Sep 17 00:00:00 2001 From: martyna-mindsdb <109554435+martyna-mindsdb@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:02:20 +0200 Subject: [PATCH 10/51] updated the usage of openai (#9718) --- docs/integrations/ai-engines/openai.mdx | 34 +++++++++++++++++++ .../handlers/openai_handler/README.md | 34 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/docs/integrations/ai-engines/openai.mdx b/docs/integrations/ai-engines/openai.mdx index 52c94021cf7..f99b97f77ff 100644 --- a/docs/integrations/ai-engines/openai.mdx +++ b/docs/integrations/ai-engines/openai.mdx @@ -38,6 +38,9 @@ USING question_column = 'question', -- optional, column name that stores user input context_column = 'context', -- optional, column that stores context of the user input prompt_template = 'input message to the model here', -- optional, user provides instructions to the model here + user_column = 'user_input', -- optional, stores user input + assistant_column = 'conversation_context', -- optional, stores conversation context + prompt = 'instruction to the model', -- optional stores instruction to the model max_tokens = 100, -- optional, token limit for answer temperature = 0.3, -- temp json_struct = { @@ -118,6 +121,12 @@ The following parameters are available to use when creating an OpenAI model: ## Usage +Here are the combination of parameters for creating a model: + +1. Provide a `prompt_template` alone. +2. Provide a `question_column` and optionally a `context_column`. +3. Provide a `prompt`, `user_column`, and `assistant_column` to create a model in the conversational mode. + The following usage examples utilize `openai_engine` to create a model with the `CREATE MODEL` statement. @@ -213,6 +222,31 @@ The following usage examples utilize `openai_engine` to create a model with the +----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ``` + + + Here is how to create a model in the conversational mode. + + ```sql + CREATE MODEL openai_chat_model + PREDICT response + USING + engine = 'openai_engine', + mode = 'conversational', + model_name = 'gpt-3.5-turbo', + user_column = 'user_input', + assistant_column = 'conversation_history', + prompt = 'Answer the question in a helpful way.'; + ``` + + And here is how to query this model: + + ```sql + SELECT response + FROM openai_chat_model + WHERE user_input = '' + AND conversation_history = ''; + ``` + diff --git a/mindsdb/integrations/handlers/openai_handler/README.md b/mindsdb/integrations/handlers/openai_handler/README.md index f80c235bb2e..b63ac10fcad 100644 --- a/mindsdb/integrations/handlers/openai_handler/README.md +++ b/mindsdb/integrations/handlers/openai_handler/README.md @@ -38,6 +38,9 @@ USING question_column = 'question', -- optional, column name that stores user input context_column = 'context', -- optional, column that stores context of the user input prompt_template = 'input your query here', -- optional, user provides instructions to the model here + user_column = 'user_input', -- optional, stores user input + assistant_column = 'conversation_context', -- optional, stores conversation context + prompt = 'instruction to the model', -- optional stores instruction to the model max_tokens = 100, -- optional, token limit for answer temperature = 0.3, -- temp @@ -66,6 +69,12 @@ The following parameters are available to use when creating an OpenAI model: ## Usage +Here are the combination of parameters for creating a model: + +1. Provide a `prompt_template` alone. +2. Provide a `question_column` and optionally a `context_column`. +3. Provide a `prompt`, `user_column`, and `assistant_column` to create a model in the conversational mode. + The following usage examples utilize `openai_engine` to create a model with the `CREATE MODEL` statement. ### Answering questions without context @@ -158,6 +167,31 @@ On execution, we get: +----------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ``` +### Conversational mode + +Here is how to create a model in the conversational mode. + +```sql +CREATE MODEL openai_chat_model +PREDICT response +USING + engine = 'openai_engine', + mode = 'conversational', + model_name = 'gpt-3.5-turbo', + user_column = 'user_input', + assistant_column = 'conversation_history', + prompt = 'Answer the question in a helpful way.'; +``` + +And here is how to query this model: + +```sql +SELECT response +FROM openai_chat_model +WHERE user_input = '' +AND conversation_history = ''; +``` + ## Next Steps Follow [this tutorial on sentiment analysis](https://docs.mindsdb.com/use-cases/data_enrichment/sentiment-analysis-inside-mysql-with-openai) and [this tutorial on finetuning OpenAI models](https://docs.mindsdb.com/use-cases/automated_finetuning/openai) to see more use case examples. From 3026e52f4a6387b280e6dcb9ee299a3e547d01c5 Mon Sep 17 00:00:00 2001 From: martyna-mindsdb <109554435+martyna-mindsdb@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:52:49 +0200 Subject: [PATCH 11/51] removed obsolete params from chatbot via slack/msteams (#9720) --- .../app-integrations/microsoft-teams.mdx | 7 +------ docs/mindsdb_sql/agents/chatbot.mdx | 16 ++++++++++------ docs/use-cases/ai_agents/create-chatbot-kb.mdx | 3 +-- docs/use-cases/ai_agents/create-chatbot.mdx | 3 +-- .../handlers/ms_teams_handler/README.md | 5 +---- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/docs/integrations/app-integrations/microsoft-teams.mdx b/docs/integrations/app-integrations/microsoft-teams.mdx index 58224075788..559ba90389c 100644 --- a/docs/integrations/app-integrations/microsoft-teams.mdx +++ b/docs/integrations/app-integrations/microsoft-teams.mdx @@ -163,10 +163,5 @@ Finally, create a chatbot using the agent and the Microsoft Teams data source cr CREATE CHATBOT teams_chatbot USING database = 'teams_datasource', - agent = 'convo_agent', - enable_dms = true + agent = 'convo_agent'; ``` - - -The `enable_dms` parameter is optional and is the initially supported mode of talking to a chatbot. A chatbot responds to direct messages. - \ No newline at end of file diff --git a/docs/mindsdb_sql/agents/chatbot.mdx b/docs/mindsdb_sql/agents/chatbot.mdx index cc5099fbe68..3e26b93b25d 100644 --- a/docs/mindsdb_sql/agents/chatbot.mdx +++ b/docs/mindsdb_sql/agents/chatbot.mdx @@ -26,9 +26,6 @@ A chatbot can be created, deleted, queried, and updated. Here is how you can do USING database = 'my_slack', -- this must be created with CREATE DATABASE agent = 'customer_support_agent', -- this must be created with CREATE AGENT - included_channels = ['support', 'help'], -- default is all - excluded_channels = [], -- default is none - enable_dms = true, -- default is true is_running = true; -- default is true ``` @@ -36,14 +33,22 @@ A chatbot can be created, deleted, queried, and updated. Here is how you can do * `database` stores connection to a chat app (like [Slack](/integrations/app-integrations/slack) or [MS Teams](/integrations/app-integrations/microsoft-teams)) that should be created with the `CREATE DATABASE` statement. * `agent` is an [AI agent](/agents/agent) created with the `CREATE AGENT` command. It consists of an AI model trained with defined data sets. - * `included_channels` and `excluded_channels` are optional and store channel names where the bot will or will not respond. - * `enable_dms` is the initially supported mode of talking to a chatbot. A chatbot responds to direct messages. * `is_running` indicates whether or not to start the chatbot upon creation. If you want to use Slack in the [`CREATE CHATBOT`](/agents/chatbot) syntax, use [this method of connecting Slack to MindsDB](/integrations/app-integrations/slack#method-1-chatbot-responds-in-direct-messages-to-a-slack-app). + + **How to connect the chatbot to multiple Slack channels?** + + Open your Slack application and add the App/Bot to one or more channels: + - Go to the channel where you want to use the bot. + - Right-click on the channel and select *View Channel Details*. + - Select *Integrations*. + - Click on *Add an App*. + + * Deleting a chatbot: ```sql @@ -74,7 +79,6 @@ CREATE CHATBOT text_to_sql_chatbot USING database = 'my_slack', -- this must be created with CREATE DATABASE agent = 'text_to_sql_agent', -- this must be created with CREATE AGENT - enable_dms = true, is_running = true; ``` diff --git a/docs/use-cases/ai_agents/create-chatbot-kb.mdx b/docs/use-cases/ai_agents/create-chatbot-kb.mdx index 2466a2870c4..32e92775498 100644 --- a/docs/use-cases/ai_agents/create-chatbot-kb.mdx +++ b/docs/use-cases/ai_agents/create-chatbot-kb.mdx @@ -128,8 +128,7 @@ CREATE CHATBOT my_chatbot USING database = 'chat_app', -- this parameters stores a connection to a chat app, like Slack or MS Teams agent = 'support_agent', -- this parameter stores an agent name, which was create with CREATE AGENT - enable_dms = true, -- this parameter is optional and enable direct messages with the chatbot - running = true; -- this parameter is optional and set to true by default, meaning that the chatbot is running + is_running = true; -- this parameter is optional and set to true by default, meaning that the chatbot is running ``` The `database` parameter stores connection to a chat app. And the `agent` parameter stores an AI agent created by passing a model and training data. diff --git a/docs/use-cases/ai_agents/create-chatbot.mdx b/docs/use-cases/ai_agents/create-chatbot.mdx index a9489a85755..71d3c55c2ee 100644 --- a/docs/use-cases/ai_agents/create-chatbot.mdx +++ b/docs/use-cases/ai_agents/create-chatbot.mdx @@ -93,8 +93,7 @@ CREATE CHATBOT my_chatbot USING database = 'chat_app', -- this parameters stores a connection to a chat app, like Slack or MS Teams agent = 'support_agent', -- this parameter stores an agent name, which was create with CREATE AGENT - enable_dms = true, -- this parameter is optional and enable direct messages with the chatbot - running = true; -- this parameter is optional and set to true by default, meaning that the chatbot is running + is_running = true; -- this parameter is optional and set to true by default, meaning that the chatbot is running ``` The `database` parameter stores connection to a chat app. And the `agent` parameter stores an AI agent created by passing a model and training data. diff --git a/mindsdb/integrations/handlers/ms_teams_handler/README.md b/mindsdb/integrations/handlers/ms_teams_handler/README.md index b54ed8a53b5..b222e7b490c 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/README.md +++ b/mindsdb/integrations/handlers/ms_teams_handler/README.md @@ -197,8 +197,5 @@ Finally, create a chatbot using the agent and the Microsoft Teams data source cr CREATE CHATBOT teams_chatbot USING database = 'teams_datasource', - agent = 'convo_agent', - enable_dms = true + agent = 'convo_agent'; ~~~~ - -Note: The `enable_dms` parameter is optional and is the initially supported mode of talking to a chatbot. A chatbot responds to direct messages. From 88cfe2261b8459eab7ff242736cac48cf4f32957 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Sat, 14 Sep 2024 13:39:40 +0200 Subject: [PATCH 12/51] Update CLA (#9722) --- .../entity-contributor.md | 120 +++++++++++++---- .../individual-contributor.md | 125 +++++++++++++----- 2 files changed, 182 insertions(+), 63 deletions(-) diff --git a/assets/contributions-agreement/entity-contributor.md b/assets/contributions-agreement/entity-contributor.md index c874a0cafd7..1135f865e15 100644 --- a/assets/contributions-agreement/entity-contributor.md +++ b/assets/contributions-agreement/entity-contributor.md @@ -1,29 +1,93 @@ -# MindsDB Entity Contributor License Agreement +# MindsDB, Inc. Contributor License Agreement -Thank you for your interest in contributing to MindsDB.This contributor agreement documents the rights granted by contributors to Us. Tomake this document effective, please sign it and send it to Us by email, following the instructions atcommunity@mindsdb.com. This is a legally binding document, so please read it carefully beforeagreeing to it. The Agreement may cover more than one software project managed by Us. -1. Definitions"You" means any Legal Entity on behalf of whom a Contribution has been received by Us. "LegalEntity" means an entity which is not a natural person. "Affiliates" means other Legal Entities thatcontrol, are controlled by, or under common control with that Legal Entity. For the purposes ofthis definition, "control" means (i) the power, direct or indirect, to cause the direction ormanagement of such Legal Entity, whether by contract or otherwise, (ii) ownership of fiftypercent (50%) or more of the outstanding shares or securities which vote to elect the managementor other persons who direct such Legal Entity or (iii) beneficial ownership of such entity."Contribution" means any work of authorship that is Submitted by You to Us in which You ownor assert ownership of the Copyright. If You do not own the Copyright in the entire work ofauthorship, please follow the instructions in community@mindsdb.com."Copyright" means all rights protecting works of authorship owned or controlled by You or YourAffiliates, including copyright, moral and neighboring rights, as appropriate, for the full term oftheir existence including any extensions by You."Material" means the work of authorship which is made available by Us to third parties. Whenthis Agreement covers more than one software project, the Material means the work of authorshipto which the Contribution was Submitted. After You Submit the Contribution, it may be includedin the Material."Submit" means any form of electronic, verbal, or written communication sent to Us or ourrepresentatives, including but not limited to electronic mailing lists, source code control systems,and issue tracking systems that are managed by, or on behalf of, Us for the purpose of discussingand improving the Material, but excluding communication that is conspicuously marked orotherwise designated in writing by You as "Not a Contribution.""Submission Date" means the date on which You Submit a Contribution to Us."Effective Date" means the date You execute this Agreement or the date You first Submit aContribution to Us, whichever is earlier."Media" means any portion of a Contribution which is not software. -2. Grant of Rights -2.1 Copyright License -(a) You retain ownership of the Copyright in Your Contribution and have the same rights to use orlicense the Contribution which You would have had without entering into the Agreement. -(b) To the maximum extent permitted by the relevant law, You grant to Us a perpetual, worldwide,non-exclusive, transferable, royalty-free, irrevocable license under the Copyright covering the1 of 4Harmony (HA-CLA-E) Version 1.0 -Contribution, with the right to sublicense such rights through multiple tiers of sublicensees, toreproduce, modify, display, perform and distribute the Contribution as part of the Material; providedthat this license is conditioned upon compliance with Section 2.3. -2.2 Patent LicenseFor patent claims including, without limitation, method, process, and apparatus claims which You orYour Affiliates own, control or have the right to grant, now or in the future, You grant to Us aperpetual, worldwide, non-exclusive, transferable, royalty-free, irrevocable patent license, with theright to sublicense these rights to multiple tiers of sublicensees, to make, have made, use, sell, offerfor sale, import and otherwise transfer the Contribution and the Contribution in combination with theMaterial (and portions of such combination). This license is granted only to the extent that theexercise of the licensed rights infringes such patent claims; and provided that this license isconditioned upon compliance with Section 2.3. -2.3 Outbound LicenseAs a condition on the grant of rights in Sections 2.1 and 2.2, We agree to license the Contribution onlyunder the terms of the license or licenses which We are using on the Submission Date for the Material(including any rights to adopt any future version of a license if permitted).In addition, We may use the following licenses for Media in the Contribution: (including any right toadopt any future version of a license if permitted). -2.4 Moral Rights. If moral rights apply to the Contribution, to the maximum extent permitted by law,You waive and agree not to assert such moral rights against Us or our successors in interest, or any ofour licensees, either direct or indirect. -2.5 Our Rights. You acknowledge that We are not obligated to use Your Contribution as part of theMaterial and may decide to include any Contribution We consider appropriate. -2.6 Reservation of Rights. Any rights not expressly assigned or licensed under this section areexpressly reserved by You. -3. Agreement -You confirm that: -(a) You have the legal authority to enter into this Agreement. -(b) You or Your Affiliates own the Copyright and patent claims covering the Contribution which arerequired to grant the rights under Section 2. -(c) The grant of rights under Section 2 does not violate any grant of rights which You or YourAffiliates have made to third parties. -(d) You have followed the instructions in community@mindsdb.com, if You do not own the Copyrightin the entire work of authorship Submitted. -4. DisclaimerEXCEPT FOR THE EXPRESS WARRANTIES IN SECTION 3, THE CONTRIBUTION ISPROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIESINCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF MERCHANTABILITY,2 of 4Harmony (HA-CLA-E) Version 1.0 -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT ARE EXPRESSLYDISCLAIMED BY YOU TO US AND BY US TO YOU. TO THE EXTENT THAT ANY SUCHWARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATIONTO THE MINIMUM PERIOD PERMITTED BY LAW. -5. Consequential Damage WaiverTO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILLYOU OR US BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF ANTICIPATED SAVINGS,LOSS OF DATA, INDIRECT, SPECIAL, INCIDENTAL, CONSEQUENTIAL AND EXEMPLARYDAMAGES ARISING OUT OF THIS AGREEMENT REGARDLESS OF THE LEGAL OREQUITABLE THEORY (CONTRACT, TORT OR OTHERWISE) UPON WHICH THE CLAIM ISBASED. -6. Miscellaneous -6.1 This Agreement will be governed by and construed in accordance with the laws of Californiaexcluding its conflicts of law provisions. Under certain circumstances, the governing law in thissection might be superseded by the United Nations Convention on Contracts for the International Saleof Goods ("UN Convention") and the parties intend to avoid the application of the UN Convention tothis Agreement and, thus, exclude the application of the UN Convention in its entirety to thisAgreement. -6.2 This Agreement sets out the entire agreement between You and Us for Your Contributions to Usand overrides all other agreements or understandings. -6.3 If You or We assign the rights or obligations received through this Agreement to a third party, as acondition of the assignment, that third party must agree in writing to abide by all the rights andobligations in the Agreement. -6.4 The failure of either party to require performance by the other party of any provision of thisAgreement in one situation shall not affect the right of a party to require such performance at any timein the future. A waiver of performance under a provision in one situation shall not be considered awaiver of the performance of the provision in the future or a waiver of the provision in its entirety. -6.5 If any provision of this Agreement is found void and unenforceable, such provision will bereplaced to the extent possible with a provision that comes closest to the meaning of the originalprovision and which is enforceable. The terms and conditions set forth in this Agreement shall applynotwithstanding any failure of essential purpose of this Agreement or any limited remedy to themaximum extent possible under law. \ No newline at end of file +By Submitting a Contribution to a Project, you hereby accept and agree to the following terms and +conditions (this “Agreement”) for your prior, present and future Contributions submitted to MindsDB, Inc. +(“Company”). Except for the licenses granted herein to Company, you reserve all right, title, and interest +in and to your Contributions. + +1. Definitions + + 1.1. “Submit” or “Submitting” is the act of uploading, submitting, transmitting, or distributing code or +other content to any Project, including but not limited to communication on electronic mailing +lists, source code control systems, and issue tracking systems that are managed by, or on behalf +of, the Project for the purpose of discussing and improving that Project. + + 1.2. “Contribution” shall mean the code, documentation, or other original works of authorship, +including any modifications or additions to an existing work, that is Submitted by you to +Company for inclusion in, or documentation of, any of the works, products or projects owned or +managed by Company (each a “Project”). +2. Your Employer. References to “employer” in this Agreement include your employer or anyone else +for whom you are acting in making or Submitting your Contribution, e.g. as a contractor, vendor, or +agent, and any other entity that controls, is controlled by, or is under common control with that entity. +For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty +percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. If your +Contribution is made in the course of your work for an employer or your employer has +intellectual property rights in your Contribution by contract or applicable law, you must +secure permission from your employer to make the Contribution before Submitting the +Contribution. In that case, the term “you” in this Agreement will refer to you and the employer +collectively. If you change employers in the future and desire to Submit additional Contributions for +the new employer, then you agree to secure permission from the new employer before Submitting +those Contributions. +3. Originality of Work. You represent and warrant that each of your Contributions is entirely your +original work, and that the Contribution, whether alone or in combination with the Project, will infringe +upon or misappropriate the intellectual property or other rights of any third party. Should you wish to +Submit materials that are not your original work, you may Submit them separately to the Project if you +(i) retain all copyright and license information that was in the materials as you received them, (ii) in +the description accompanying your Contribution, include the phrase “Contribution containing +materials of a third party:” followed by the names of the third party and any licenses or other +restrictions of which you are aware, and (iii) follow any other instructions in the Project’s written +guidelines concerning Contributions. +4. Licenses + + 4.1. Copyright License. You grant Company, and those who receive the Contribution directly or +indirectly from Company, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license +in the Contribution to reproduce, prepare derivative works of, publicly display, publicly perform, +and distribute the Contribution and such derivative works, and to sublicense any or all of the +foregoing rights to third parties. + + 4.2. Patent License. You grant Company, and those who receive the Contribution directly or indirectly +from Company, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license under +your patent claims that are necessarily infringed by the Contribution or the combination of the +Contribution with the Project to which it was Submitted to make, have made, use, offer to sell, +sell and import or otherwise dispose of the Contribution alone or with the Project. + + 4.3. Other Rights Reserved. Each party reserves all rights not expressly granted in this Agreement. +No additional licenses or rights whatsoever (including, without limitation, any implied licenses) +are granted by implication, exhaustion, estoppel or otherwise. + +5. Prior Contributions. You agree that if you submitted a Contribution to a Project prior to the Effective +Date (each a “Prior Contribution”), then, regardless of whether you had an agreement in place with +Company governing such Prior Contributions, such Prior Contributions are deemed “Contributions” +hereunder and are hereby licensed to Company pursuant to Section 4 as of the date you Submitted +the Prior Contribution. +6. Representations and Warranties. You represent and warrant that you are legally entitled to grant +the above licenses. You represent and warrant that each of your Contributions is entirely your original +work (except as you may have disclosed in accordance with Section 3 ). You represent and warrant +that you have secured permission from your employer to make the Contribution in cases where your +Contribution is made in the course of your work for your employer or your employer has intellectual +property rights in your Contribution (or any portion thereof) by contract or applicable law. If you are +agreeing to this Agreement on behalf of your employer, you represent and warrant that you have the +necessary authority to bind the listed employer to the obligations contained in this Agreement. You +are not expected to provide support for your Contribution unless you choose to do so. UNLESS +REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING, AND EXCEPT FOR THE +WARRANTIES EXPRESSLY STATED IN SECTIONS 3 AND 6 , THE CONTRIBUTION PROVIDED +UNDER THIS AGREEMENT IS PROVIDED WITHOUT WARRANTY OF ANY KIND, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTY OF MERCHANTABILITY, OR FITNESS FOR A +PARTICULAR PURPOSE. +7. Notice to Company. You agree to notify Company in writing of any facts or circumstances of which +you later become aware that would make your representations or warranties in this Agreement +inaccurate in any respect. You may contact Company at [EMAIL]. +8. Information about Contributions. You agree that Contributions to Projects and information about +Contributions may be maintained indefinitely and disclosed publicly, including your name and other +information that you submit with your Contribution. +9. Governing Law/Jurisdiction. This Agreement is governed by the laws of the State of California, and +the parties consent to exclusive jurisdiction and venue in the federal courts sitting in the City and +County of San Francisco. The parties waive all defenses of lack of personal jurisdiction and forum +non-conveniens. +10. Entire Agreement/Assignment. This Agreement is the entire agreement between the parties, and +supersedes any and all prior agreements, understandings or communications, written or oral, +between the parties relating to the subject matter hereof, including any of the foregoing as applicable +to Prior Contributions. This Agreement may be freely assigned by Company, without notice to you. +The parties accept and agree to be bound by the terms of this Agreement as of the date of the last +signature below (the “Effective Date”). diff --git a/assets/contributions-agreement/individual-contributor.md b/assets/contributions-agreement/individual-contributor.md index 2a0f6265d93..1135f865e15 100644 --- a/assets/contributions-agreement/individual-contributor.md +++ b/assets/contributions-agreement/individual-contributor.md @@ -1,38 +1,93 @@ -# MindsDB Individual Contributor License Agreement +# MindsDB, Inc. Contributor License Agreement -Thank you for your interest in contributing to MindsDB ("We" or "Us").This contributor agreement ("Agreement") documents the rights granted by contributors to Us. To make this document effective, please sign it and send it to Us by email, following the instructions at community@mindsdb.com. This is a legally binding document, so please read it carefully before agreeing to it. The Agreement may cover more than one software project managed by Us. +By Submitting a Contribution to a Project, you hereby accept and agree to the following terms and +conditions (this “Agreement”) for your prior, present and future Contributions submitted to MindsDB, Inc. +(“Company”). Except for the licenses granted herein to Company, you reserve all right, title, and interest +in and to your Contributions. 1. Definitions -"You" means the individual who Submits a Contribution to Us."Contribution" means any work of authorship that is Submitted by You to Us in which You own or assert ownership of the Copyright. If You do not own the Copyright in the entire work of authorship, please follow the instructions in community@mindsdb.com."Copyright" means all rights protecting works of authorship owned or controlled by You,including copyright, moral and neighboring rights, as appropriate, for the full term of their existence including any extensions by You. -"Material" means the work of authorship which is made available by Us to third parties. When this Agreement covers more than one software project, the Material means the work of authorship to which the Contribution was Submitted. After You Submit the Contribution, it may be included in the Material."Submit" means any form of electronic, verbal, or written communication sent to Us or our representatives, including but not limited to electronic mailing lists, source code control systems,and issue tracking systems that are managed by, or on behalf of, Us for the purpose of discussing and improving the Material, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution. -""Submission Date" means the date on which You Submit a Contribution to Us. -"Effective Date" means the date You execute this Agreement or the date You first Submit aContribution to Us, whichever is earlier."Media" means any portion of a Contribution which is not software. -2. Grant of Rights -2.1 Copyright License -(a) You retain ownership of the Copyright in Your Contribution and have the same rights to use or license the Contribution which You would have had without entering into the Agreement. -(b) To the maximum extent permitted by the relevant law, You grant to Us a perpetual, worldwide,non-exclusive, transferable, royalty-free, irrevocable license under the Copyright covering the Contribution, with the right to sublicense such rights through multiple tiers of sublicensees, to reproduce, modify, display, perform and distribute the Contribution as part of the Material; providedthat this license is conditioned upon compliance with Section 2.3. -2.2 Patent License -For patent claims including, without limitation, method, process, and apparatus claims which You own, control or have the right to grant, now or in the future, You grant to Us a perpetual, worldwide,non-exclusive, transferable, royalty-free, irrevocable patent license, with the right to sublicense theserights to multiple tiers of sublicensees, to make, have made, use, sell, offer for sale, import and otherwise transfer the Contribution and the Contribution in combination with the Material (andportions of such combination). This license is granted only to the extent that the exercise of thelicensed rights infringes such patent claims; and provided that this license is conditioned uponcompliance with Section 2.3. -2.3 Outbound License -As a condition on the grant of rights in Sections 2.1 and 2.2, We agree to license the Contribution only under the terms of the license or licenses which We are using on the Submission Date for the Material(including any rights to adopt any future version of a license if permitted).In addition, We may use the following licenses for Media in the Contribution: (including any right toadopt any future version of a license if permitted). -2.4 Moral Rights. -If moral rights apply to the Contribution, to the maximum extent permitted by law,You waive and agree not to assert such moral rights against Us or our successors in interest, or any ofour licensees, either direct or indirect. -2.5 Our Rights. -You acknowledge that We are not obligated to use Your Contribution as part of the Material and may decide to include any Contribution We consider appropriate.2.6 Reservation of Rights. Any rights not expressly assigned or licensed under this section areexpressly reserved by You. -3. Agreement -You confirm that: -(a) You have the legal authority to enter into this Agreement. -(b) You own the Copyright and patent claims covering the Contribution which are required to grant the rights under Section 2. -(c) The grant of rights under Section 2 does not violate any grant of rights which You have made to third parties, including Your employer. If You are an employee, You have had Your employer approvethis Agreement or sign the Entity version of this document. If You are less than eighteen years old,please have Your parents or guardian sign the Agreement. -(d) You have followed the instructions in community@mindsdb.com, if You do not own the Copyright in the entire work of authorship Submitted. -4. Disclaimer -EXCEPT FOR THE EXPRESS WARRANTIES IN SECTION 3, THE CONTRIBUTION ISPROVIDED "AS IS". MORE PARTICULARLY, ALL EXPRESS OR IMPLIED WARRANTIESINCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTY OF MERCHANTABILITY,FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT ARE EXPRESSLYDISCLAIMED BY YOU TO US AND BY US TO YOU. TO THE EXTENT THAT ANY SUCHWARRANTIES CANNOT BE DISCLAIMED, SUCH WARRANTY IS LIMITED IN DURATION -TO THE MINIMUM PERIOD PERMITTED BY LAW. -5. Consequential Damage Waiver -TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILLYOU OR US BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF ANTICIPATED SAVINGS,LOSS OF DATA, INDIRECT, SPECIAL, INCIDENTAL, CONSEQUENTIAL AND EXEMPLARY DAMAGES ARISING OUT OF THIS AGREEMENT REGARDLESS OF THE LEGAL OR EQUITABLE THEORY (CONTRACT, TORT OR OTHERWISE) UPON WHICH THE CLAIM IS BASED. -6. Miscellaneous -6.1 This Agreement will be governed by and construed in accordance with the laws of California excluding its conflicts of law provisions. Under certain circumstances, the governing law in this section might be superseded by the United Nations Convention on Contracts for the International Saleof Goods ("UN Convention") and the parties intend to avoid the application of the UN Convention to this Agreement and, thus, exclude the application of the UN Convention in its entirety to this Agreement. -6.2 This Agreement sets out the entire agreement between You and Us for Your Contributions to Usand overrides all other agreements or understandings. -6.3 If You or We assign the rights or obligations received through this Agreement to a third party, as acondition of the assignment, that third party must agree in writing to abide by all the rights andobligations in the Agreement. -6.4 The failure of either party to require performance by the other party of any provision of this Agreement in one situation shall not affect the right of a party to require such performance at any timein the future. A waiver of performance under a provision in one situation shall not be considered awaiver of the performance of the provision in the future or a waiver of the provision in its entirety. -6.5 If any provision of this Agreement is found void and unenforceable, such provision will bereplaced to the extent possible with a provision that comes closest to the meaning of the original provision and which is enforceable. The terms and conditions set forth in this Agreement shall apply not withstanding any failure of essential purpose of this Agreement or any limited remedy to the maximum extent possible under law. + + 1.1. “Submit” or “Submitting” is the act of uploading, submitting, transmitting, or distributing code or +other content to any Project, including but not limited to communication on electronic mailing +lists, source code control systems, and issue tracking systems that are managed by, or on behalf +of, the Project for the purpose of discussing and improving that Project. + + 1.2. “Contribution” shall mean the code, documentation, or other original works of authorship, +including any modifications or additions to an existing work, that is Submitted by you to +Company for inclusion in, or documentation of, any of the works, products or projects owned or +managed by Company (each a “Project”). +2. Your Employer. References to “employer” in this Agreement include your employer or anyone else +for whom you are acting in making or Submitting your Contribution, e.g. as a contractor, vendor, or +agent, and any other entity that controls, is controlled by, or is under common control with that entity. +For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty +percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. If your +Contribution is made in the course of your work for an employer or your employer has +intellectual property rights in your Contribution by contract or applicable law, you must +secure permission from your employer to make the Contribution before Submitting the +Contribution. In that case, the term “you” in this Agreement will refer to you and the employer +collectively. If you change employers in the future and desire to Submit additional Contributions for +the new employer, then you agree to secure permission from the new employer before Submitting +those Contributions. +3. Originality of Work. You represent and warrant that each of your Contributions is entirely your +original work, and that the Contribution, whether alone or in combination with the Project, will infringe +upon or misappropriate the intellectual property or other rights of any third party. Should you wish to +Submit materials that are not your original work, you may Submit them separately to the Project if you +(i) retain all copyright and license information that was in the materials as you received them, (ii) in +the description accompanying your Contribution, include the phrase “Contribution containing +materials of a third party:” followed by the names of the third party and any licenses or other +restrictions of which you are aware, and (iii) follow any other instructions in the Project’s written +guidelines concerning Contributions. +4. Licenses + + 4.1. Copyright License. You grant Company, and those who receive the Contribution directly or +indirectly from Company, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license +in the Contribution to reproduce, prepare derivative works of, publicly display, publicly perform, +and distribute the Contribution and such derivative works, and to sublicense any or all of the +foregoing rights to third parties. + + 4.2. Patent License. You grant Company, and those who receive the Contribution directly or indirectly +from Company, a perpetual, worldwide, non-exclusive, royalty-free, irrevocable license under +your patent claims that are necessarily infringed by the Contribution or the combination of the +Contribution with the Project to which it was Submitted to make, have made, use, offer to sell, +sell and import or otherwise dispose of the Contribution alone or with the Project. + + 4.3. Other Rights Reserved. Each party reserves all rights not expressly granted in this Agreement. +No additional licenses or rights whatsoever (including, without limitation, any implied licenses) +are granted by implication, exhaustion, estoppel or otherwise. + +5. Prior Contributions. You agree that if you submitted a Contribution to a Project prior to the Effective +Date (each a “Prior Contribution”), then, regardless of whether you had an agreement in place with +Company governing such Prior Contributions, such Prior Contributions are deemed “Contributions” +hereunder and are hereby licensed to Company pursuant to Section 4 as of the date you Submitted +the Prior Contribution. +6. Representations and Warranties. You represent and warrant that you are legally entitled to grant +the above licenses. You represent and warrant that each of your Contributions is entirely your original +work (except as you may have disclosed in accordance with Section 3 ). You represent and warrant +that you have secured permission from your employer to make the Contribution in cases where your +Contribution is made in the course of your work for your employer or your employer has intellectual +property rights in your Contribution (or any portion thereof) by contract or applicable law. If you are +agreeing to this Agreement on behalf of your employer, you represent and warrant that you have the +necessary authority to bind the listed employer to the obligations contained in this Agreement. You +are not expected to provide support for your Contribution unless you choose to do so. UNLESS +REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING, AND EXCEPT FOR THE +WARRANTIES EXPRESSLY STATED IN SECTIONS 3 AND 6 , THE CONTRIBUTION PROVIDED +UNDER THIS AGREEMENT IS PROVIDED WITHOUT WARRANTY OF ANY KIND, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTY OF MERCHANTABILITY, OR FITNESS FOR A +PARTICULAR PURPOSE. +7. Notice to Company. You agree to notify Company in writing of any facts or circumstances of which +you later become aware that would make your representations or warranties in this Agreement +inaccurate in any respect. You may contact Company at [EMAIL]. +8. Information about Contributions. You agree that Contributions to Projects and information about +Contributions may be maintained indefinitely and disclosed publicly, including your name and other +information that you submit with your Contribution. +9. Governing Law/Jurisdiction. This Agreement is governed by the laws of the State of California, and +the parties consent to exclusive jurisdiction and venue in the federal courts sitting in the City and +County of San Francisco. The parties waive all defenses of lack of personal jurisdiction and forum +non-conveniens. +10. Entire Agreement/Assignment. This Agreement is the entire agreement between the parties, and +supersedes any and all prior agreements, understandings or communications, written or oral, +between the parties relating to the subject matter hereof, including any of the foregoing as applicable +to Prior Contributions. This Agreement may be freely assigned by Company, without notice to you. +The parties accept and agree to be bound by the terms of this Agreement as of the date of the last +signature below (the “Effective Date”). From dbf513962bf2d5ba7c4d1d56df92098a16909bb1 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Mon, 16 Sep 2024 15:37:20 +0530 Subject: [PATCH 13/51] Updated the Permission Scopes for the MS Teams Integration (#9717) --- .../app-integrations/microsoft-teams.mdx | 40 +++++++++++++++---- .../handlers/ms_teams_handler/README.md | 40 +++++++++++++++---- .../handlers/ms_teams_handler/settings.py | 6 +-- 3 files changed, 65 insertions(+), 21 deletions(-) diff --git a/docs/integrations/app-integrations/microsoft-teams.mdx b/docs/integrations/app-integrations/microsoft-teams.mdx index 559ba90389c..395e937dc13 100644 --- a/docs/integrations/app-integrations/microsoft-teams.mdx +++ b/docs/integrations/app-integrations/microsoft-teams.mdx @@ -37,14 +37,38 @@ The parameters given above can be obtained by registering an application in Entr 3. Click on **App registrations** and then click on **New registration**. 4. Enter a name for your application and select the **Accounts in this organizational directory only** option for the **Supported account types** field. 5. Keep the **Redirect URI** field empty and click on **Register**. -6. Copy the **Application (client) ID** and record it as the `client_id` parameter, and copy the **Directory (tenant) ID** and record it as the `tenant_id` parameter. -7. Click on **Certificates & secrets** and then click on **New client secret**. -8. Enter a description for your client secret and select an expiration period. -9. Click on **Add** and copy the generated client secret and record it as the `client_secret` parameter. -10. Click on **Authentication** and then click on **Add a platform**. -11. Select **Web** and enter the following URLs in the **Redirect URIs** field: - - `https://cloud.mindsdb.com/verify-auth` - - `http://localhost:47334/verify-auth` (for local development) +6. Click on **API permissions** and then click on **Add a permission**. +7. Select **Microsoft Graph** and then click on **Delegated permissions**. +8. Select the following permissions based on the data you want to access and the operations you want to perform: + - Chats: + - Chat.ReadBasic + - Chat.Read + - Chat.ReadWrite + + - Chat messages: + - Chat.Read + - Chat.ReadWrite + - Group.ReadWrite.All + + - Channels: + - ChannelSettings.Read.All + - ChannelSettings.ReadWrite.All + - Directory.Read.All + - Directory.ReadWrite.All + - Group.Read.All + - Group.ReadWrite.All + + - Channel messages: + - ChannelMessage.Read.All + - Group.Read.All + - Group.ReadWrite.All +9. Click on **Add permissions**. +10. Copy the **Application (client) ID** and record it as the `client_id` parameter, and copy the **Directory (tenant) ID** and record it as the `tenant_id` parameter. +11. Click on **Certificates & secrets** and then click on **New client secret**. +12. Enter a description for your client secret and select an expiration period. +13. Click on **Add** and copy the generated client secret and record it as the `client_secret` parameter. +14. Click on **Authentication** and then click on **Add a platform**. +15. Select **Web** and enter URL where MindsDB has been deployed followed by `/verify-auth` in the **Redirect URIs** field. For example, if you are running MindsDB locally (on `http://localhost:47334`), enter `http://localhost:47334/verify-auth` in the **Redirect URIs** field. You can find more information about creating app registrations [here](https://docs.microsoft.com/en-us/graph/auth-register-app-v2). diff --git a/mindsdb/integrations/handlers/ms_teams_handler/README.md b/mindsdb/integrations/handlers/ms_teams_handler/README.md index b222e7b490c..f1b47c5817d 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/README.md +++ b/mindsdb/integrations/handlers/ms_teams_handler/README.md @@ -43,14 +43,38 @@ The parameters given above can be obtained by registering an application in Micr 3. Click on **App registrations** and then click on **New registration**. 4. Enter a name for your application and select the **Accounts in this organizational directory only** option for the **Supported account types** field. 5. Keep the **Redirect URI** field empty and click on **Register**. -6. Copy the **Application (client) ID** and paste it as the `client_id` parameter, and copy the **Directory (tenant) ID** and paste it as the `tenant_id` parameter. -7. Click on **Certificates & secrets** and then click on **New client secret**. -8. Enter a description for your client secret and select an expiration period. -9. Click on **Add** and copy the generated client secret and paste it as the `client_secret` parameter. -10. Click on **Authentication** and then click on **Add a platform**. -11. Select **Web** and enter the following URLs in the **Redirect URIs** field: - - `https://cloud.mindsdb.com/verify-auth` - - `http://localhost:47334/verify-auth` (for local development) +6. Click on **API permissions** and then click on **Add a permission**. +7. Select **Microsoft Graph** and then click on **Delegated permissions**. +8. Select the following permissions based on the data you want to access and the operations you want to perform: + - Chats: + - Chat.ReadBasic + - Chat.Read + - Chat.ReadWrite + + - Chat messages: + - Chat.Read + - Chat.ReadWrite + - Group.ReadWrite.All + + - Channels: + - ChannelSettings.Read.All + - ChannelSettings.ReadWrite.All + - Directory.Read.All + - Directory.ReadWrite.All + - Group.Read.All + - Group.ReadWrite.All + + - Channel messages: + - ChannelMessage.Read.All + - Group.Read.All + - Group.ReadWrite.All +9. Click on **Add permissions**. +10. Copy the **Application (client) ID** and record it as the `client_id` parameter, and copy the **Directory (tenant) ID** and record it as the `tenant_id` parameter. +11. Click on **Certificates & secrets** and then click on **New client secret**. +12. Enter a description for your client secret and select an expiration period. +13. Click on **Add** and copy the generated client secret and record it as the `client_secret` parameter. +14. Click on **Authentication** and then click on **Add a platform**. +15. Select **Web** and enter URL where MindsDB has been deployed followed by `/verify-auth` in the **Redirect URIs** field. For example, if you are running MindsDB locally (on `http://localhost:47334`), enter `http://localhost:47334/verify-auth` in the **Redirect URIs** field. You can find more information about creating app registrations [here](https://docs.microsoft.com/en-us/graph/auth-register-app-v2). diff --git a/mindsdb/integrations/handlers/ms_teams_handler/settings.py b/mindsdb/integrations/handlers/ms_teams_handler/settings.py index 99ea7f4bacb..c905904e6de 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/settings.py +++ b/mindsdb/integrations/handlers/ms_teams_handler/settings.py @@ -53,11 +53,7 @@ class MSTeamsHandlerConfig(BaseSettings): """ DEFAULT_SCOPES: List = [ - 'https://graph.microsoft.com/User.Read', - 'https://graph.microsoft.com/Group.Read.All', - 'https://graph.microsoft.com/ChannelMessage.Send', - 'https://graph.microsoft.com/Chat.Read', - 'https://graph.microsoft.com/ChatMessage.Send', + "https://graph.microsoft.com/.default", ] CHATS_TABLE_COLUMNS: List = [ From ab4b3bbaa82867e310d5031e179876c484edc30f Mon Sep 17 00:00:00 2001 From: Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:13:11 +0700 Subject: [PATCH 14/51] docs(plaid): intended API page link (#9723) --- docs/integrations/app-integrations/plaid.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/app-integrations/plaid.mdx b/docs/integrations/app-integrations/plaid.mdx index bb6d5777319..24e9ac912b5 100644 --- a/docs/integrations/app-integrations/plaid.mdx +++ b/docs/integrations/app-integrations/plaid.mdx @@ -28,7 +28,7 @@ The required arguments to establish a connection are as follows: You can get the `client_id`, `secret`, and `access_token` values [here](https://dashboard.plaid.com/team/keys) once you sign in to your Plaid account. -And [here](https://plaid.com/docs/api/tokens/#itempublic_tokenexchange) is how you generate the `access_token` value. +And [here](https://plaid.com/docs/api/items/#itempublic_tokenexchange) is how you generate the `access_token` value. In order to make use of this handler and connect the Plaid app to MindsDB, the following syntax can be used: From f1741d9572592178974c27e6756b5b4c2eac3bbd Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Tue, 17 Sep 2024 15:58:04 +0200 Subject: [PATCH 15/51] New release (#9728) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index 6d3160767ab..70d1ec180e6 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.2.1' +__version__ = '24.9.3.0' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 578c30c04c8a19c02ee036a66bee961ecabc2b61 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 18 Sep 2024 12:11:52 +0300 Subject: [PATCH 16/51] Fix: Error fetching table info: Table ... not found in database (#9729) --- mindsdb/interfaces/skills/sql_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/interfaces/skills/sql_agent.py b/mindsdb/interfaces/skills/sql_agent.py index dd702031537..71e8e650ed6 100644 --- a/mindsdb/interfaces/skills/sql_agent.py +++ b/mindsdb/interfaces/skills/sql_agent.py @@ -116,7 +116,7 @@ def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifi for table_name in table_names: # Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools. - table_name = table_name.strip(' `"\'') + table_name = table_name.strip(' `"\'\n\r') table = Identifier(table_name) # resolved table From c1509927e58ce90ac3331af2143b1ac65356ee31 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 18 Sep 2024 16:40:18 +0300 Subject: [PATCH 17/51] Fix describe of ray server handler (#9733) --- .../integrations/libs/ml_handler_process/describe_process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mindsdb/integrations/libs/ml_handler_process/describe_process.py b/mindsdb/integrations/libs/ml_handler_process/describe_process.py index bd3bdbe473c..4f3c44b402f 100644 --- a/mindsdb/integrations/libs/ml_handler_process/describe_process.py +++ b/mindsdb/integrations/libs/ml_handler_process/describe_process.py @@ -71,7 +71,7 @@ def describe_process(integration_id: int, attribute: Optional[Union[str, list]], engine_storage=handlerStorage, model_storage=modelStorage ) - return ml_handler.describe(attribute=attribute) + return ml_handler.describe(attribute) except NotImplementedError: return DataFrame() except Exception as e: @@ -96,7 +96,7 @@ def describe_process(integration_id: int, attribute: Optional[Union[str, list]], engine_storage=handlerStorage, model_storage=modelStorage ) - attrs_df = ml_handler.describe(attribute=attribute) + attrs_df = ml_handler.describe(attribute) except NotImplementedError: pass except Exception as e: From 9f45fb795f55315880febd9a8b3c8f9ec78ca5f9 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Wed, 18 Sep 2024 16:44:55 +0200 Subject: [PATCH 18/51] Bump version (#9734) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index 70d1ec180e6..d18b640e42e 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.3.0' +__version__ = '24.9.3.1' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 8f70cab44b8ac68f66a8c4ea5344d7fe83d2c091 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Wed, 18 Sep 2024 19:35:11 +0300 Subject: [PATCH 19/51] Set pymongo requirement == 4.8.0 (#9736) --- mindsdb/__about__.py | 2 +- requirements/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index d18b640e42e..a28c7efff05 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.3.1' +__version__ = '24.9.3.2' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' diff --git a/requirements/requirements.txt b/requirements/requirements.txt index c54670af8b3..bf76b4a7ed0 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -8,7 +8,7 @@ pyparsing == 2.3.1 cryptography>=35.0 psycopg[binary] waitress >= 1.4.4 -pymongo[srv] >= 4.1.1 +pymongo[srv] == 4.8.0 psutil sqlalchemy >= 2.0.0, < 3.0.0 psycopg2-binary # This is required for using sqlalchemy with postgres From eae19d333a478baac3021fedc04c73bfa3720c79 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Thu, 19 Sep 2024 14:42:56 +0300 Subject: [PATCH 20/51] Option to check connection for `GET /database` endpoint (#9735) --- mindsdb/api/http/namespaces/databases.py | 25 +++++++++++++++++-- .../handlers/mysql_handler/mysql_handler.py | 3 +++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/mindsdb/api/http/namespaces/databases.py b/mindsdb/api/http/namespaces/databases.py index 80e6356db93..0d5367a1861 100644 --- a/mindsdb/api/http/namespaces/databases.py +++ b/mindsdb/api/http/namespaces/databases.py @@ -81,14 +81,20 @@ class DatabaseResource(Resource): def get(self, database_name): '''Gets a database by name''' session = SessionController() + check_connection = request.args.get('check_connection', 'false').lower() in ('1', 'true') try: project = session.database_controller.get_project(database_name) - return { + result = { 'name': database_name, 'type': 'project', 'id': project.id, 'engine': None } + if check_connection: + result['connection_status'] = { + 'success': True, + 'error_message': None + } except (ValueError, EntityNotExistsError): integration = session.integration_controller.get(database_name) if integration is None: @@ -96,7 +102,22 @@ def get(self, database_name): HTTPStatus.NOT_FOUND, 'Database not found', f'Database with name {database_name} does not exist.' ) - return integration + result = integration + if check_connection: + integration['connection_status'] = { + 'success': False, + 'error_message': None + } + try: + handler = session.integration_controller.get_data_handler(database_name) + status = handler.check_connection() + integration['connection_status']['success'] = status.success + integration['connection_status']['error_message'] = status.error_message + except Exception as e: + integration['connection_status']['success'] = False + integration['connection_status']['error_message'] = str(e) + + return result @ns_conf.doc('update_database') @api_endpoint_metrics('PUT', '/databases/database') diff --git a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py b/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py index 609697ce795..daed6f17bae 100644 --- a/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +++ b/mindsdb/integrations/handlers/mysql_handler/mysql_handler.py @@ -77,6 +77,9 @@ def connect(self): if 'conn_attrs' in self.connection_data: config['conn_attrs'] = self.connection_data['conn_attrs'] + if 'connection_timeout' not in config: + config['connection_timeout'] = 10 + ssl = self.connection_data.get('ssl') if ssl is True: ssl_ca = self.connection_data.get('ssl_ca') From 31b1a1e71d57fba2235e325737e2bd72a0cc6a62 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Fri, 20 Sep 2024 13:27:23 +0200 Subject: [PATCH 21/51] Upgrade mintlify to latest (#9744) --- docs/package-lock.json | 286 +++++++++++++++++++++-------------------- docs/package.json | 2 +- 2 files changed, 147 insertions(+), 141 deletions(-) diff --git a/docs/package-lock.json b/docs/package-lock.json index 8e426efcdee..fc86f49a227 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "mintlify": "^4.0.217", + "mintlify": "^4.0.223", "sharp": "^0.33.2" } }, @@ -654,16 +654,16 @@ } }, "node_modules/@mintlify/cli": { - "version": "4.0.217", - "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.217.tgz", - "integrity": "sha512-3Jhxsq7DtKlT4EFHpiGJ+bemSkleGF/PFv5ObkVSN1mugtEp+Sfcv0prQ31SU+UZ2jPHn5FSHaws2D5E9BiGMQ==", + "version": "4.0.223", + "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.223.tgz", + "integrity": "sha512-LOCvk0IFXqz3vR2BQHV4UaDPA0GbPwiQP1V6WOpuJ1MMkuLftmByRi/veLF+dZkEhrUoL4yaXINKjRtGWDlOSw==", "dependencies": { "@apidevtools/swagger-parser": "^10.1.0", - "@mintlify/link-rot": "3.0.219", - "@mintlify/models": "0.0.118", - "@mintlify/prebuild": "1.0.218", - "@mintlify/previewing": "4.0.211", - "@mintlify/validation": "0.1.184", + "@mintlify/link-rot": "3.0.225", + "@mintlify/models": "0.0.123", + "@mintlify/prebuild": "1.0.224", + "@mintlify/previewing": "4.0.217", + "@mintlify/validation": "0.1.190", "chalk": "^5.2.0", "detect-port": "^1.5.1", "fs-extra": "^11.1.1", @@ -680,14 +680,14 @@ } }, "node_modules/@mintlify/common": { - "version": "1.0.139", - "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.139.tgz", - "integrity": "sha512-1WyLwiYwhwH4sIYlQziYN/zYo/ET2ZCLUUeuxIg/E0AuZ5KzMvTMqmp36M0HhZk/+cwaSwWCaT4ykNmZuCIxuw==", + "version": "1.0.145", + "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.145.tgz", + "integrity": "sha512-GHdzVdb+wSKsw3OvmDLbq/lLRvh1odWByx0TaLjGQ1Kg2vFG8QrGqIkgHemeTcCTz14AkORVmjWl4IjSrAn2pw==", "dependencies": { "@apidevtools/swagger-parser": "^10.1.0", "@mintlify/mdx": "0.0.46", - "@mintlify/models": "0.0.118", - "@mintlify/validation": "0.1.184", + "@mintlify/models": "0.0.123", + "@mintlify/validation": "0.1.190", "@sindresorhus/slugify": "^2.1.1", "acorn": "^8.11.2", "acorn-jsx": "^5.3.2", @@ -726,13 +726,13 @@ } }, "node_modules/@mintlify/link-rot": { - "version": "3.0.219", - "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.219.tgz", - "integrity": "sha512-h64lVQwjwJ72V0o5ctIbLSR7ubrxYEh3VgKrn6gSkgE/wd7/huAGxxtZ2sdi2rdKce0BPIwiUwDXhicHre2p2g==", + "version": "3.0.225", + "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.225.tgz", + "integrity": "sha512-SOB0P0IR9iaPEnKH97zbCc1RQcYTfStumFvVN6X4+9tF/DvXKpyClhhuzqB00clgkVLVe0R7wV4jb4XIbjwESQ==", "dependencies": { "@apidevtools/swagger-parser": "10.x", - "@mintlify/common": "1.0.139", - "@mintlify/prebuild": "1.0.218", + "@mintlify/common": "1.0.145", + "@mintlify/prebuild": "1.0.224", "chalk": "^5.1.0", "fs-extra": "^11.1.0", "gray-matter": "^4.0.3", @@ -760,9 +760,9 @@ } }, "node_modules/@mintlify/models": { - "version": "0.0.118", - "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.118.tgz", - "integrity": "sha512-h8aYU4I8wCkV2XnI6ktAMReJ5tbKTuYCSpdFrpdC6xPXH5dB4Mox11yyTtTxU9QQAjXTTjqIFz3MFjrxYntvwg==", + "version": "0.0.123", + "resolved": "https://registry.npmjs.org/@mintlify/models/-/models-0.0.123.tgz", + "integrity": "sha512-1hCxHLmaeBDhg017Uw7Rxc9skakGwnQndXXN4+pEMS6mfqspDyaCTvxzgfvrLEwVKLGruy9VnpEfWNtmwuZfjA==", "dependencies": { "axios": "^1.4.0", "openapi-types": "12.x" @@ -772,14 +772,14 @@ } }, "node_modules/@mintlify/prebuild": { - "version": "1.0.218", - "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.218.tgz", - "integrity": "sha512-jEv8/SHK0J+uTkvkPeOf6WsY6Jpk80wClMAKm1JxEEzPMApsslU9xn+eoQSHIpdRCUcro2PT7X3ek95BZWkEXw==", + "version": "1.0.224", + "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.224.tgz", + "integrity": "sha512-hZR427ASdh9s1c3w3GGBkU3/Iftz7Z6R1HaN89BBAsaNVE80OlDg30qsEyl+XMOkrWuD5iJoJcqM30A2w6XIrA==", "dependencies": { "@apidevtools/swagger-parser": "10.x", - "@mintlify/common": "1.0.139", - "@mintlify/scraping": "3.0.162", - "@mintlify/validation": "0.1.184", + "@mintlify/common": "1.0.145", + "@mintlify/scraping": "3.0.168", + "@mintlify/validation": "0.1.190", "axios": "^1.6.2", "chalk": "^5.3.0", "favicons": "^7.0.2", @@ -808,14 +808,14 @@ } }, "node_modules/@mintlify/previewing": { - "version": "4.0.211", - "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.211.tgz", - "integrity": "sha512-R6iXp1CJN29zSFXMUJkcCWS++PyHkW9I70+CL1dGtjWUHKa9tEZRPyEDdJozlZI9Rt8Kdxgu2qlUtyaOVD3a3g==", + "version": "4.0.217", + "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.217.tgz", + "integrity": "sha512-uIBqNmaAy8izPn/7rA6rsLMQDzF8CQXSOmVW5Zk0rScp7AOq/LLlotFxV8EZi5raOL0gPcnjEWFGWbSU9eu7MQ==", "dependencies": { "@apidevtools/swagger-parser": "^10.1.0", - "@mintlify/common": "1.0.139", - "@mintlify/prebuild": "1.0.218", - "@mintlify/validation": "0.1.184", + "@mintlify/common": "1.0.145", + "@mintlify/prebuild": "1.0.224", + "@mintlify/validation": "0.1.190", "@octokit/rest": "^19.0.5", "chalk": "^5.1.0", "chokidar": "^3.5.3", @@ -838,12 +838,12 @@ } }, "node_modules/@mintlify/scraping": { - "version": "3.0.162", - "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-3.0.162.tgz", - "integrity": "sha512-Pi3MNF8uEd0XT3EqeHg0PuK588MjDlRN7CISz4YVcSEdcHCcFxOniipUN8pL26aRAqpTKQ3p/NaCSXZXzmDl2A==", + "version": "3.0.168", + "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-3.0.168.tgz", + "integrity": "sha512-928NqvZ9/MsZ+YZ/BUNksOpUCQHH+zxhcXiMCcH7w3OPL1N1/jpsCDkYWasmYuYN6pP4T+n1gjuHKQo/yiCmhw==", "dependencies": { "@apidevtools/swagger-parser": "^10.1.0", - "@mintlify/common": "1.0.139", + "@mintlify/common": "1.0.145", "axios": "^1.2.2", "cheerio": "^1.0.0-rc.12", "fs-extra": "^11.1.1", @@ -860,11 +860,11 @@ } }, "node_modules/@mintlify/validation": { - "version": "0.1.184", - "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.184.tgz", - "integrity": "sha512-iAc+Cw2APDrRgujvVZrJe7YliBaao3+8S//bKV2bIPeZ7XkVqzdwaaFdu5DD8jHk6CLxHGIGZbMm2VdtI3aLjw==", + "version": "0.1.190", + "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.190.tgz", + "integrity": "sha512-n2s1VnBHQWNTkaOtCfyWpKOyyMd90cp0d6R4cCs6R1JCnFV/kt/1QlcdD3z49sO+P6LpzXhWJ7bUWQhrSffuiQ==", "dependencies": { - "@mintlify/models": "0.0.118", + "@mintlify/models": "0.0.123", "lcm": "^0.0.3", "lodash": "^4.17.21", "openapi-types": "12.x", @@ -1141,9 +1141,9 @@ } }, "node_modules/@types/estree": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", - "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==" + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", + "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==" }, "node_modules/@types/estree-jsx": { "version": "1.0.5", @@ -1218,9 +1218,9 @@ "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==" }, "node_modules/@types/node": { - "version": "22.5.4", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.4.tgz", - "integrity": "sha512-FDuKUJQm/ju9fT/SeX/6+gBzoPzlVCzfzmGkwKvRHQVxi4BntVbyIwf6a4Xn62mrvndLiml6z/UBXIdEVjQLXg==", + "version": "22.5.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.5.5.tgz", + "integrity": "sha512-Xjs4y5UPO/CLdzpgR6GirZJx36yScjh73+2NlLlkFRSoQN8B0DpfXPdZGnvVmLRLOsqDpOfTNv7D9trgGhmOIA==", "dependencies": { "undici-types": "~6.19.2" } @@ -1236,14 +1236,14 @@ "integrity": "sha512-rlAnzkW2sZOjbqZ743IHUhFcvzaGbqijwOu8QZnZCjfQzBqFE3s4lOTJEsxikImav9uzz/42I+O7YUs1mWgMlg==" }, "node_modules/@types/prop-types": { - "version": "15.7.12", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz", - "integrity": "sha512-5zvhXYtRNRluoE/jAp4GVsSduVUzNWKkOZrCDBWYtE7biZywwdC2AcEzg+cSMLFRfVgeAFqpfNabiPjxFddV1Q==" + "version": "15.7.13", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.13.tgz", + "integrity": "sha512-hCZTSvwbzWGvhqxp/RqVqwU999pBf2vp7hzIjiYOsl8wqOmUxkQ6ddw1cV3l8811+kdUFus/q4d1Y3E3SyEifA==" }, "node_modules/@types/react": { - "version": "18.3.5", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.5.tgz", - "integrity": "sha512-WeqMfGJLGuLCqHGYRGHxnKrXcTitc6L/nBUWfWPcTarG3t9PsquqUMuVeXZeca+mglY4Vo5GZjCi0A3Or2lnxA==", + "version": "18.3.7", + "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.7.tgz", + "integrity": "sha512-KUnDCJF5+AiZd8owLIeVHqmW9yM4sqmDVf2JRJiBMFkGvkoZ4/WyV2lL4zVsoinmRS/W3FeEdZLEWFRofnT2FQ==", "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -1357,9 +1357,9 @@ } }, "node_modules/ansi-regex": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", - "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", + "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", "engines": { "node": ">=12" }, @@ -1470,9 +1470,9 @@ "optional": true }, "node_modules/bare-fs": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.3.tgz", - "integrity": "sha512-7RYKL+vZVCyAsMLi5SPu7QGauGGT8avnP/HO571ndEuV4MYdGXvLhtW67FuLPeEI8EiIY7zbbRR9x7x7HU0kgw==", + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.5.tgz", + "integrity": "sha512-SlE9eTxifPDJrT6YgemQ1WGFleevzwY+XAP1Xqgl56HtcrisC2CHCZ2tq6dBpcH2TnNxwUEUGhweo+lrQtYuiw==", "optional": true, "dependencies": { "bare-events": "^2.0.0", @@ -1481,9 +1481,9 @@ } }, "node_modules/bare-os": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.2.tgz", - "integrity": "sha512-HZoJwzC+rZ9lqEemTMiO0luOePoGYNBgsLLgegKR/cljiJvcDNhDZQkzC+NC5Oh0aHbdBNSOHpghwMuB5tqhjg==", + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.4.tgz", + "integrity": "sha512-z3UiI2yi1mK0sXeRdc4O1Kk8aOa/e+FNWZcTiPB/dfTWyLypuE99LibgRaQki914Jq//yAWylcAt+mknKdixRQ==", "optional": true }, "node_modules/bare-path": { @@ -1496,13 +1496,13 @@ } }, "node_modules/bare-stream": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.2.1.tgz", - "integrity": "sha512-YTB47kHwBW9zSG8LD77MIBAAQXjU2WjAkMHeeb7hUplVs6+IoM5I7uEVQNPMB7lj9r8I76UMdoMkGnCodHOLqg==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.3.0.tgz", + "integrity": "sha512-pVRWciewGUeCyKEuRxwv06M079r+fRjAQjBEK2P6OYGrO43O+Z0LrPZZEjlc4mB6C2RpZ9AxJ1s7NLEtOHO6eA==", "optional": true, "dependencies": { "b4a": "^1.6.6", - "streamx": "^2.18.0" + "streamx": "^2.20.0" } }, "node_modules/base64-js": { @@ -1567,9 +1567,9 @@ } }, "node_modules/body-parser": { - "version": "1.20.2", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz", - "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==", + "version": "1.20.3", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz", + "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==", "dependencies": { "bytes": "3.1.2", "content-type": "~1.0.5", @@ -1579,7 +1579,7 @@ "http-errors": "2.0.0", "iconv-lite": "0.4.24", "on-finished": "2.4.1", - "qs": "6.11.0", + "qs": "6.13.0", "raw-body": "2.5.2", "type-is": "~1.6.18", "unpipe": "1.0.0" @@ -2133,11 +2133,11 @@ } }, "node_modules/debug": { - "version": "4.3.6", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz", - "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -2412,9 +2412,9 @@ "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" }, "node_modules/encodeurl": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", - "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", "engines": { "node": ">= 0.8" } @@ -2736,36 +2736,36 @@ } }, "node_modules/express": { - "version": "4.19.2", - "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz", - "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==", + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/express/-/express-4.21.0.tgz", + "integrity": "sha512-VqcNGcj/Id5ZT1LZ/cfihi3ttTn+NJmkli2eZADigjq29qTlWi/hAQ43t/VLPq8+UX06FCEx3ByOYet6ZFblng==", "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", - "body-parser": "1.20.2", + "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", "cookie": "0.6.0", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", - "finalhandler": "1.2.0", + "finalhandler": "1.3.1", "fresh": "0.5.2", "http-errors": "2.0.0", - "merge-descriptors": "1.0.1", + "merge-descriptors": "1.0.3", "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", + "path-to-regexp": "0.1.10", "proxy-addr": "~2.0.7", - "qs": "6.11.0", + "qs": "6.13.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", - "send": "0.18.0", - "serve-static": "1.15.0", + "send": "0.19.0", + "serve-static": "1.16.2", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", @@ -2898,12 +2898,12 @@ } }, "node_modules/finalhandler": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz", - "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz", + "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==", "dependencies": { "debug": "2.6.9", - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "on-finished": "2.4.1", "parseurl": "~1.3.3", @@ -2928,9 +2928,9 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, "node_modules/follow-redirects": { - "version": "1.15.8", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.8.tgz", - "integrity": "sha512-xgrmBhBToVKay1q2Tao5LI26B83UhrB/vM1avwVSDzt8rx3rO6AizBAaF46EgksTVr+rFTQaqZZ9MVBfUe4nig==", + "version": "1.15.9", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.9.tgz", + "integrity": "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==", "funding": [ { "type": "individual", @@ -3282,9 +3282,9 @@ } }, "node_modules/hast-util-from-html-isomorphic/node_modules/hast-util-from-html": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.2.tgz", - "integrity": "sha512-HwOHwxdt2zC5KQ/CNoybBntRook2zJvfZE/u5/Ap7aLPe22bDqen7KwGkOqOyzL5zIqKwiYX/OTtE0FWgr6XXA==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz", + "integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==", "dependencies": { "@types/hast": "^3.0.0", "devlop": "^1.1.0", @@ -4649,9 +4649,12 @@ } }, "node_modules/merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==" + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } }, "node_modules/methods": { "version": "1.1.2", @@ -5571,11 +5574,11 @@ } }, "node_modules/mintlify": { - "version": "4.0.217", - "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.0.217.tgz", - "integrity": "sha512-75kuiAomaFxpkhYu55/ON2PvsT/H/l3o0LgIepvhj7Je17obAXZRDMR8VYMdgHHRmDcdbSSRzlpXlJzmhhimEg==", + "version": "4.0.223", + "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.0.223.tgz", + "integrity": "sha512-qXoRS9LSkhNnEVUFVKgrCs+T4/rDmvkjHwTbDWCM71+oji5qsHvOQBJfYqnwDUGkkxSUqUk/32fNbqNCZzHiKA==", "dependencies": { - "@mintlify/cli": "4.0.217" + "@mintlify/cli": "4.0.223" }, "bin": { "mintlify": "index.js" @@ -5609,9 +5612,9 @@ } }, "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/negotiator": { "version": "0.6.3", @@ -6012,9 +6015,9 @@ } }, "node_modules/path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==" + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz", + "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==" }, "node_modules/pend": { "version": "1.2.0", @@ -6140,9 +6143,9 @@ } }, "node_modules/pump": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", - "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.2.tgz", + "integrity": "sha512-tUPXtzlGM8FE3P0ZL6DVs/3P58k9nk8/jZeQCurTJylQA8qFYzHFfhBJkuqyE0FifOsQ0uKWekiZ5g8wtr28cw==", "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" @@ -6182,11 +6185,11 @@ } }, "node_modules/qs": { - "version": "6.11.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz", - "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==", + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", "dependencies": { - "side-channel": "^1.0.4" + "side-channel": "^1.0.6" }, "engines": { "node": ">=0.6" @@ -6813,9 +6816,9 @@ } }, "node_modules/send": { - "version": "0.18.0", - "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", - "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz", + "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==", "dependencies": { "debug": "2.6.9", "depd": "2.0.0", @@ -6848,20 +6851,23 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, - "node_modules/send/node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + "node_modules/send/node_modules/encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==", + "engines": { + "node": ">= 0.8" + } }, "node_modules/serve-static": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz", - "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==", + "version": "1.16.2", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz", + "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==", "dependencies": { - "encodeurl": "~1.0.2", + "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "parseurl": "~1.3.3", - "send": "0.18.0" + "send": "0.19.0" }, "engines": { "node": ">= 0.8.0" @@ -7095,9 +7101,9 @@ } }, "node_modules/streamx": { - "version": "2.20.0", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.20.0.tgz", - "integrity": "sha512-ZGd1LhDeGFucr1CUCTBOS58ZhEendd0ttpGT3usTvosS4ntIwKN9LJFp+OeCSprsCPL14BXVRZlHGRY1V9PVzQ==", + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.20.1.tgz", + "integrity": "sha512-uTa0mU6WUC65iUvzKH4X9hEdvSW7rbPxPtwfWiLMSj3qTdQbAiUboZTxauKfpFuGIGa1C2BYijZ7wgdUXICJhA==", "dependencies": { "fast-fifo": "^1.3.2", "queue-tick": "^1.0.1", @@ -7241,9 +7247,9 @@ } }, "node_modules/text-decoder": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.1.1.tgz", - "integrity": "sha512-8zll7REEv4GDD3x4/0pW+ppIxSNs7H1J10IKFZsuOMscumCdM2a+toDGLPA3T+1+fLBql4zbt5z83GEQGGV5VA==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.0.tgz", + "integrity": "sha512-n1yg1mOj9DNpk3NeZOx7T6jchTbyJS3i3cucbNN6FcdPriMZx7NsgrGpWWdWZZGxD7ES1XB+3uoqHMgOKaN+fg==", "dependencies": { "b4a": "^1.6.4" } @@ -7985,9 +7991,9 @@ } }, "node_modules/zod-to-json-schema": { - "version": "3.23.2", - "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.2.tgz", - "integrity": "sha512-uSt90Gzc/tUfyNqxnjlfBs8W6WSGpNBv0rVsNxP/BVSMHMKGdthPYff4xtCHYloJGM0CFxFsb3NbC0eqPhfImw==", + "version": "3.23.3", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.3.tgz", + "integrity": "sha512-TYWChTxKQbRJp5ST22o/Irt9KC5nj7CdBKYB/AosCRdj/wxEMvv4NNaj9XVUHDOIp53ZxArGhnw5HMZziPFjog==", "peerDependencies": { "zod": "^3.23.3" } diff --git a/docs/package.json b/docs/package.json index d01ac63b0f6..a52ec5a457c 100644 --- a/docs/package.json +++ b/docs/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "mintlify": "^4.0.217", + "mintlify": "^4.0.223", "sharp": "^0.33.2" } } From a3f103c844bad69c8d0b8d5e7beb1a14fe33bcb6 Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 23 Sep 2024 15:52:52 +0300 Subject: [PATCH 22/51] Fix Teams: don't connect when handler files are imported (#9721) --- mindsdb/api/executor/command_executor.py | 2 +- mindsdb/interfaces/database/integrations.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mindsdb/api/executor/command_executor.py b/mindsdb/api/executor/command_executor.py index 42bb51cda70..be4f6093c3d 100644 --- a/mindsdb/api/executor/command_executor.py +++ b/mindsdb/api/executor/command_executor.py @@ -1096,7 +1096,7 @@ def _create_integration(self, name: str, engine: str, connection_args: dict): self.session.integration_controller.add(name, engine, connection_args) if storage: - handler = self.session.integration_controller.get_data_handler(name) + handler = self.session.integration_controller.get_data_handler(name, connect=False) handler.handler_storage.import_files(storage) def answer_create_ml_engine(self, name: str, handler: str, params: dict = None, if_not_exists=False): diff --git a/mindsdb/interfaces/database/integrations.py b/mindsdb/interfaces/database/integrations.py index 0bb877b99d3..d34696ca6d7 100644 --- a/mindsdb/interfaces/database/integrations.py +++ b/mindsdb/interfaces/database/integrations.py @@ -505,7 +505,7 @@ def get_ml_handler(self, name: str, case_sensitive: bool = False) -> BaseMLEngin return handler @profiler.profile() - def get_data_handler(self, name: str, case_sensitive: bool = False) -> BaseHandler: + def get_data_handler(self, name: str, case_sensitive: bool = False, connect=True) -> BaseHandler: """Get DATA handler (DB or API) by name Args: name (str): name of the handler @@ -587,7 +587,8 @@ def get_data_handler(self, name: str, case_sensitive: bool = False) -> BaseHandl ) HandlerClass = self.handler_modules[integration_engine].Handler handler = HandlerClass(**handler_ars) - self.handlers_cache.set(handler) + if connect: + self.handlers_cache.set(handler) return handler From fa1bc434cca6fc2face9870e55d47b205158a027 Mon Sep 17 00:00:00 2001 From: MOHD NEHAL KHAN Date: Mon, 23 Sep 2024 18:26:36 +0530 Subject: [PATCH 23/51] Added the misspelled nto to into (#9748) --- docs/sdks/javascript/get_database.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sdks/javascript/get_database.mdx b/docs/sdks/javascript/get_database.mdx index 2925387ae52..0032cff3578 100644 --- a/docs/sdks/javascript/get_database.mdx +++ b/docs/sdks/javascript/get_database.mdx @@ -3,7 +3,7 @@ title: Get a Data Source sidebarTitle: Get a Data Source --- -You can save a data sources nto a variable using the code below. +You can save a data sources into a variable using the code below. ``` const db = await MindsDB.Databases.getDatabase('mysql_datasource'); From aa978f05aa9261f1bca1fe4c706d0d5d951ad7aa Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Mon, 23 Sep 2024 18:27:23 +0530 Subject: [PATCH 24/51] Updated the Pydantic Model for Amazon Bedrock to Dismiss Warning (#9750) --- .../handlers/bedrock_handler/bedrock_handler.py | 14 ++++++++++---- .../handlers/bedrock_handler/settings.py | 10 +++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py b/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py index 723a1ad99c6..0dc3f98ac23 100644 --- a/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py +++ b/mindsdb/integrations/handlers/bedrock_handler/bedrock_handler.py @@ -56,8 +56,14 @@ def create(self, target, args: Dict = None, **kwargs: Any) -> None: if 'using' not in args: raise MissingConnectionParams("Amazon Bedrock engine requires a USING clause! Refer to its documentation for more details.") else: - args = args['using'] - handler_model_config = AmazonBedrockHandlerModelConfig(**args, connection_args=self.engine_storage.get_connection_args()) + model_args = args['using'] + # Replace 'model_id' with 'id' to match the Amazon Bedrock handler model configuration. + # This is done to avoid the Pydantic warning regarding conflicts with the protected 'model_' namespace. + if 'model_id' in model_args: + model_args['id'] = model_args['model_id'] + del model_args['model_id'] + + handler_model_config = AmazonBedrockHandlerModelConfig(**model_args, connection_args=self.engine_storage.get_connection_args()) # Save the model configuration to the storage. handler_model_params = handler_model_config.model_dump() @@ -84,7 +90,7 @@ def predict(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None args = self.model_storage.json_get('args') handler_model_params = args['handler_model_params'] mode = handler_model_params['mode'] - model_id = handler_model_params['model_id'] + model_id = handler_model_params['id'] inference_config = handler_model_params.get('inference_config') target = args['target'] @@ -306,7 +312,7 @@ def describe(self, attribute: Optional[Text] = None) -> pd.DataFrame: return pd.DataFrame(args.items(), columns=['key', 'value']) elif attribute == 'metadata': - model_id = args.get('model_id') + model_id = args['handler_model_params']['id'] try: bedrock_client = create_amazon_bedrock_client( 'bedrock', diff --git a/mindsdb/integrations/handlers/bedrock_handler/settings.py b/mindsdb/integrations/handlers/bedrock_handler/settings.py index dcb56a5fa96..21d6f95640a 100644 --- a/mindsdb/integrations/handlers/bedrock_handler/settings.py +++ b/mindsdb/integrations/handlers/bedrock_handler/settings.py @@ -109,7 +109,7 @@ class AmazonBedrockHandlerModelConfig(BaseModel): Attributes ---------- - model_id : Text + id : Text The ID of the model in Amazon Bedrock. mode : Optional[Text] @@ -140,7 +140,7 @@ class AmazonBedrockHandlerModelConfig(BaseModel): The connection arguments passed required to connect to Amazon Bedrock. These are AWS credentials provided when creating the engine. """ # User-provided Handler Model Prameters: These are parameters specific to the MindsDB handler for Amazon Bedrock provided by the user. - model_id: Text = Field(None) + id: Text = Field(None) mode: Optional[Text] = Field(AmazonBedrockHandlerSettings.DEFAULT_MODE) prompt_template: Optional[Text] = Field(None) question_column: Optional[Text] = Field(None) @@ -205,9 +205,9 @@ def check_if_model_id_is_valid_and_correct_for_mode(cls, model: BaseModel) -> Ba ValueError: If the model ID provided is invalid or the parameters provided are invalid for the chosen model. """ # TODO: Set the default model ID for other modes. - if model.model_id is None: + if model.id is None: if model.mode in ['default', 'conversational']: - model.model_id = AmazonBedrockHandlerSettings.DEFAULT_TEXT_MODEL_ID + model.id = AmazonBedrockHandlerSettings.DEFAULT_TEXT_MODEL_ID bedrock_client = create_amazon_bedrock_client( "bedrock", @@ -216,7 +216,7 @@ def check_if_model_id_is_valid_and_correct_for_mode(cls, model: BaseModel) -> Ba try: # Check if the model ID is valid and accessible. - response = bedrock_client.get_foundation_model(modelIdentifier=model.model_id) + response = bedrock_client.get_foundation_model(modelIdentifier=model.id) except ClientError as e: raise ValueError(f"Invalid Amazon Bedrock model ID: {e}!") From a35b72042f68f049a28fdbe96867c7970a321f96 Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 23 Sep 2024 18:39:06 +0300 Subject: [PATCH 25/51] Agent permissions (#9747) --- mindsdb/interfaces/skills/sql_agent.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/mindsdb/interfaces/skills/sql_agent.py b/mindsdb/interfaces/skills/sql_agent.py index 71e8e650ed6..c308f1e3d0c 100644 --- a/mindsdb/interfaces/skills/sql_agent.py +++ b/mindsdb/interfaces/skills/sql_agent.py @@ -2,7 +2,7 @@ import re import hashlib - +from mindsdb_sql.parser.ast import Select import pandas as pd from mindsdb_sql import parse_sql @@ -44,7 +44,7 @@ def _call_engine(self, query: str, database=None): # switch database ast_query = parse_sql(query.strip('`')) - self._check_tables(ast_query) + self._check_permissions(ast_query) if database is None: database = self._database @@ -55,15 +55,21 @@ def _call_engine(self, query: str, database=None): ) return ret - def _check_tables(self, ast_query): + def _check_permissions(self, ast_query): + + # check type of query + if not isinstance(ast_query, Select): + raise ValueError("Only SELECT is allowed") - def _check_f(node, is_table=None, **kwargs): - if is_table and isinstance(node, Identifier): - table = node.parts[-1] - if table not in self._tables_to_include: - ValueError(f"Table {table} not found. Available tables: {', '.join(self._tables_to_include)}") + # Check tables + if self._tables_to_include: + def _check_f(node, is_table=None, **kwargs): + if is_table and isinstance(node, Identifier): + table = node.parts[-1] + if table not in self._tables_to_include: + raise ValueError(f"Table {table} not found. Available tables: {', '.join(self._tables_to_include)}") - query_traversal(ast_query, _check_f) + query_traversal(ast_query, _check_f) def get_usable_table_names(self) -> Iterable[str]: From c69d49ffd8312d6ca08e86b7180c7a9d360d9fc0 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 25 Sep 2024 13:23:31 +0300 Subject: [PATCH 26/51] LLM function (#9731) --- .../controllers/session_controller.py | 4 +- mindsdb/interfaces/functions/controller.py | 117 ++++++++++++++---- requirements/requirements.txt | 2 +- 3 files changed, 98 insertions(+), 25 deletions(-) diff --git a/mindsdb/api/executor/controllers/session_controller.py b/mindsdb/api/executor/controllers/session_controller.py index 80fb56ee032..860ecf32417 100644 --- a/mindsdb/api/executor/controllers/session_controller.py +++ b/mindsdb/api/executor/controllers/session_controller.py @@ -14,7 +14,7 @@ from mindsdb.interfaces.model.model_controller import ModelController from mindsdb.interfaces.database.database import DatabaseController from mindsdb.interfaces.skills.skills_controller import SkillsController -from mindsdb.interfaces.functions.controller import BYOMFunctionsController +from mindsdb.interfaces.functions.controller import FunctionController from mindsdb.utilities import log @@ -46,7 +46,7 @@ def __init__(self, api_type='http') -> object: self.database_controller = DatabaseController() self.skills_controller = SkillsController() - self.function_controller = BYOMFunctionsController(self) + self.function_controller = FunctionController(self) # to prevent circular imports from mindsdb.interfaces.knowledge_base.controller import KnowledgeBaseController diff --git a/mindsdb/interfaces/functions/controller.py b/mindsdb/interfaces/functions/controller.py index e160ac771d2..39851a126ec 100644 --- a/mindsdb/interfaces/functions/controller.py +++ b/mindsdb/interfaces/functions/controller.py @@ -1,3 +1,5 @@ +import os + from duckdb.typing import BIGINT, DOUBLE, VARCHAR, BLOB, BOOLEAN from mindsdb.interfaces.storage.model_fs import HandlerStorage @@ -41,6 +43,8 @@ def __init__(self, session): self.byom_methods = {} self.byom_handlers = {} + self.callbacks = {} + def get_engines(self): # get all byom engines if self.byom_engines is None: @@ -63,6 +67,43 @@ def get_methods(self, engine): return self.byom_methods[engine] + def check_function(self, node): + engine = node.namespace + if engine not in self.get_engines(): + return + + methods = self.get_methods(engine) + + fnc_name = node.op.lower() + if fnc_name not in methods: + # do nothing + return + + new_name = f'{node.namespace}_{fnc_name}' + node.op = new_name + + if new_name in self.callbacks: + # already exists + return self.callbacks[new_name] + + def callback(*args): + return self.method_call(engine, fnc_name, args) + + input_types = [ + param['type'] + for param in methods[fnc_name]['input_params'] + ] + + meta = { + 'name': new_name, + 'callback': callback, + 'input_types': input_types, + 'output_type': methods[fnc_name]['output_type'] + } + + self.callbacks[new_name] = meta + return meta + def method_call(self, engine, method_name, args): return self.byom_handlers[engine].function_call(method_name, args) @@ -70,6 +111,51 @@ def create_function_set(self): return DuckDBFunctions(self) +class FunctionController(BYOMFunctionsController): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def check_function(self, node): + meta = super().check_function(node) + if meta is not None: + return meta + + # builtin function + if node.op.lower() == 'llm': + return self.llm_call_function(node) + + def llm_call_function(self, node): + name = node.op.lower() + + model_name = os.getenv('LLM_FUNCTION_MODEL') + if model_name is None: + return + + try: + from langchain_core.messages import HumanMessage + from mindsdb.interfaces.agents.langchain_agent import create_chat_model + except ImportError: + return + + if name in self.callbacks: + return self.callbacks[name] + + def callback(question): + llm = create_chat_model({'model_name': model_name, 'provider': 'openai'}) + resp = llm([HumanMessage(question)]) + return resp.content + + meta = { + 'name': name, + 'callback': callback, + 'input_types': ['str'], + 'output_type': 'str' + } + self.callbacks[name] = meta + return meta + + class DuckDBFunctions: def __init__(self, controller): self.controller = controller @@ -77,37 +163,24 @@ def __init__(self, controller): def check_function(self, node): - engine = node.namespace - if engine not in self.controller.get_engines(): + meta = self.controller.check_function(node) + if meta is None: return - methods = self.controller.get_methods(engine) + name = meta['name'] - fnc_name = node.op.lower() - if fnc_name not in methods: - # do nothing + if name in self.functions: return - new_name = f'{node.namespace}_{fnc_name}' - if new_name in self.functions: - # already exists - return - - node.op = new_name - - def callback(*args): - return self.controller.method_call(engine, fnc_name, args) - input_types = [ - python_to_duckdb_type(param['type']) - for param in methods[fnc_name]['input_params'] + python_to_duckdb_type(param) + for param in meta['input_types'] ] - output_type = methods[fnc_name]['output_type'] - self.functions[new_name] = { - 'callback': function_maker(len(input_types), callback), + self.functions[name] = { + 'callback': function_maker(len(input_types), meta['callback']), 'input': input_types, - 'output': python_to_duckdb_type(output_type) + 'output': python_to_duckdb_type(meta['output_type']) } def register(self, connection): diff --git a/requirements/requirements.txt b/requirements/requirements.txt index bf76b4a7ed0..462fbe38c52 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -17,7 +17,7 @@ redis >=5.0.0, < 6.0.0 walrus==0.9.3 flask-compress >= 1.0.0 appdirs >= 1.0.0 -mindsdb-sql ~= 0.18.1 +mindsdb-sql ~= 0.19.0 pydantic >= 2.7.0 mindsdb-evaluator >= 0.0.7, < 0.1.0 checksumdir >= 1.2.0 From c282fc6962941ea2eef9f4ed64c5aa10d0fcbdc2 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Wed, 25 Sep 2024 13:29:56 +0300 Subject: [PATCH 27/51] Allow to use/redefine any connection parameters for Postgres handler (#9746) --- .../handlers/postgres_handler/connection_args.py | 6 ++++++ .../handlers/postgres_handler/postgres_handler.py | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/mindsdb/integrations/handlers/postgres_handler/connection_args.py b/mindsdb/integrations/handlers/postgres_handler/connection_args.py index a06fc0fe3ee..1c3a66f3fca 100644 --- a/mindsdb/integrations/handlers/postgres_handler/connection_args.py +++ b/mindsdb/integrations/handlers/postgres_handler/connection_args.py @@ -46,6 +46,12 @@ 'description': 'sslmode that will be used for connection.', 'required': False, 'label': 'sslmode' + }, + connection_parameters={ + 'type': ARG_TYPE.DICT, + 'description': 'Connection string parameters', + 'required': False, + 'label': 'connection_parameters' } ) diff --git a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py b/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py index 7bc5a138e7e..72cbb69960e 100644 --- a/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +++ b/mindsdb/integrations/handlers/postgres_handler/postgres_handler.py @@ -57,6 +57,14 @@ def _make_connection_args(self): 'dbname': self.connection_args.get('database') } + # https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS + connection_parameters = self.connection_args.get('connection_parameters') + if isinstance(connection_parameters, dict) is False: + connection_parameters = {} + if 'connect_timeout' not in connection_parameters: + connection_parameters['connect_timeout'] = 10 + config.update(connection_parameters) + if self.connection_args.get('sslmode'): config['sslmode'] = self.connection_args.get('sslmode') @@ -81,7 +89,7 @@ def connect(self): config = self._make_connection_args() try: - self.connection = psycopg.connect(**config, connect_timeout=10) + self.connection = psycopg.connect(**config) self.is_connected = True return self.connection except psycopg.Error as e: From 9dbcaedcb92efe6e49b323ca35608313a1dfae95 Mon Sep 17 00:00:00 2001 From: Ebrahim Kareem <32773511+ebrahim-2@users.noreply.github.com> Date: Wed, 25 Sep 2024 13:30:46 +0300 Subject: [PATCH 28/51] fix(docs): database name typo (#9749) --- docs/integrations/ai-overview.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/ai-overview.mdx b/docs/integrations/ai-overview.mdx index c861700ff6c..8b4a9606e45 100644 --- a/docs/integrations/ai-overview.mdx +++ b/docs/integrations/ai-overview.mdx @@ -10,7 +10,7 @@ MindsDB integrates with numerous AI frameworks, facilitating [deployment and man

-MindsDB offers a wide range of AI engines used to create models and incorporate them in the data landscape as virtual [AI tables](/generative-ai-tables). MinsdDB abstracts AI models as virtual tables, or Generative AI Tables, that can generate data from the underlying model upon being queried. +MindsDB offers a wide range of AI engines used to create models and incorporate them in the data landscape as virtual [AI tables](/generative-ai-tables). MindsDB abstracts AI models as virtual tables, or Generative AI Tables, that can generate data from the underlying model upon being queried. This section contains instructions on how to create and deploy models within MindsDB, utilizing different AI/ML frameworks. From fd8ecd6aad82ec24f2a0914f2eb19077f5245bbe Mon Sep 17 00:00:00 2001 From: Ty <124617566+tmichaeldb@users.noreply.github.com> Date: Wed, 25 Sep 2024 03:36:30 -0700 Subject: [PATCH 29/51] Add Flush to Langfuse & Handle Errors Properly for Getting Traces (#9755) --- mindsdb/interfaces/agents/langchain_agent.py | 14 +++++++++++--- .../interfaces/agents/langfuse_callback_handler.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/mindsdb/interfaces/agents/langchain_agent.py b/mindsdb/interfaces/agents/langchain_agent.py index 7a164010d05..807614f4f7e 100644 --- a/mindsdb/interfaces/agents/langchain_agent.py +++ b/mindsdb/interfaces/agents/langchain_agent.py @@ -22,6 +22,7 @@ from langchain_core.prompts import PromptTemplate from langchain_core.tools import Tool from langfuse import Langfuse +from langfuse.api.resources.commons.errors.not_found_error import NotFoundError as TraceNotFoundError from langfuse.callback import CallbackHandler from mindsdb.integrations.handlers.openai_handler.constants import ( @@ -288,9 +289,16 @@ def get_completion(self, messages, stream: bool = False): self.api_trace.update(output=response) # update metadata with tool usage - trace = self.langfuse.get_trace(self.trace_id) - trace_metadata['tool_usage'] = get_tool_usage(trace) - self.api_trace.update(metadata=trace_metadata) + try: + # Ensure all batched traces are sent before fetching. + self.langfuse.flush() + trace = self.langfuse.get_trace(self.trace_id) + trace_metadata['tool_usage'] = get_tool_usage(trace) + self.api_trace.update(metadata=trace_metadata) + except TraceNotFoundError: + logger.warning(f'Langfuse trace {self.trace_id} not found') + except Exception as e: + logger.error(f'Something went wrong while processing Langfuse trace {self.trace_id}: {str(e)}') return response def _get_completion_stream( diff --git a/mindsdb/interfaces/agents/langfuse_callback_handler.py b/mindsdb/interfaces/agents/langfuse_callback_handler.py index 13c1730bd11..c4c494b46a1 100644 --- a/mindsdb/interfaces/agents/langfuse_callback_handler.py +++ b/mindsdb/interfaces/agents/langfuse_callback_handler.py @@ -29,6 +29,8 @@ def on_tool_start( """Run when tool starts running.""" parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex action_span = self.action_uuid_to_span.get(parent_run_uuid) + if action_span is None: + return metadata = { 'tool_name': serialized.get("name", "tool"), 'started': datetime.datetime.now().isoformat() @@ -39,6 +41,8 @@ def on_tool_end(self, output: str, **kwargs: Any) -> Any: """Run when tool ends running.""" parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex action_span = self.action_uuid_to_span.get(parent_run_uuid) + if action_span is None: + return action_span.update( output=output, # tool output is action output (unless superseded by a global action output) metadata={'finished': datetime.datetime.now().isoformat()} @@ -50,6 +54,8 @@ def on_tool_error( """Run when tool errors.""" parent_run_uuid = kwargs.get('parent_run_id', uuid4()).hex action_span = self.action_uuid_to_span.get(parent_run_uuid) + if action_span is None: + return try: error_str = str(error) except Exception: @@ -75,6 +81,8 @@ def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any: if chain_uuid not in self.chain_uuid_to_span: return chain_span = self.chain_uuid_to_span.pop(chain_uuid) + if chain_span is None: + return chain_span.update(output=str(outputs)) chain_span.end() @@ -102,6 +110,8 @@ def on_agent_finish(self, finish, **kwargs: Any) -> Any: if run_uuid not in self.action_uuid_to_span: return action_span = self.action_uuid_to_span.pop(run_uuid) + if action_span is None: + return if finish is not None: action_span.update(output=finish) # supersedes tool output action_span.end() From 171c3046309a49b39765c16f1f2f2be1e7b1b3e9 Mon Sep 17 00:00:00 2001 From: Dasek Joiakim Date: Wed, 25 Sep 2024 12:36:54 +0200 Subject: [PATCH 30/51] docs: typo correction (#9757) --- docs/sql/table-structure.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql/table-structure.mdx b/docs/sql/table-structure.mdx index 316b3dcbc9b..77480ded4b5 100644 --- a/docs/sql/table-structure.mdx +++ b/docs/sql/table-structure.mdx @@ -8,7 +8,7 @@ Initially, MindsDB comprises three system databases and one default project, as - `information_schema` stores metadata of all the objects such as handlers, databases, AI engines, models, jobs, and more. - `log` stores log data of models and jobs. - `files`, which is initially empty, stores all files uploaded to MindsDB. -- `mindsdb` is the default project for storing models, views, jobs, triggers, adn agents. +- `mindsdb` is the default project for storing models, views, jobs, triggers, and agents. List all databases by running the following SQL commands: From 21dcd509f4a3f90b8ec1ed17516a606f50c1fae8 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 25 Sep 2024 15:13:16 +0300 Subject: [PATCH 31/51] Fix Agent permissions: added describe, show, explain (#9761) --- mindsdb/interfaces/skills/sql_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mindsdb/interfaces/skills/sql_agent.py b/mindsdb/interfaces/skills/sql_agent.py index c308f1e3d0c..383185b8ccf 100644 --- a/mindsdb/interfaces/skills/sql_agent.py +++ b/mindsdb/interfaces/skills/sql_agent.py @@ -2,7 +2,7 @@ import re import hashlib -from mindsdb_sql.parser.ast import Select +from mindsdb_sql.parser.ast import Select, Show, Describe, Explain import pandas as pd from mindsdb_sql import parse_sql @@ -58,8 +58,8 @@ def _call_engine(self, query: str, database=None): def _check_permissions(self, ast_query): # check type of query - if not isinstance(ast_query, Select): - raise ValueError("Only SELECT is allowed") + if not isinstance(ast_query, (Select, Show, Describe, Explain)): + raise ValueError(f"Query is not allowed: {ast_query.to_string()}") # Check tables if self._tables_to_include: From 783878df07c50aa6ab5a11ecf322ed3c8baa2d48 Mon Sep 17 00:00:00 2001 From: Andrey Date: Wed, 25 Sep 2024 15:40:53 +0300 Subject: [PATCH 32/51] Ray serve: return all columns from prediciton (#9760) --- .../mongodb_handler/tests/test_mongodb_handler.py | 12 +++++++----- .../handlers/ray_serve_handler/ray_serve_handler.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py b/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py index ff2eea3e6b9..9914c7df5a3 100644 --- a/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py +++ b/mindsdb/integrations/handlers/mongodb_handler/tests/test_mongodb_handler.py @@ -2,7 +2,7 @@ import json from pymongo import MongoClient -from mindsdb_sql.parser.ast import CreateTable, DropTables, Identifier, Select, Star +from mindsdb_sql.parser.ast import Identifier, Select, Star from mindsdb.integrations.handlers.mongodb_handler.mongodb_handler import MongoDBHandler from mindsdb.integrations.libs.response import RESPONSE_TYPE @@ -25,12 +25,13 @@ def seed_db(): creds = HANDLER_KWARGS["connection_data"] uri = f"mongodb://{creds['username']}:{creds['password']}@{creds['host']}" conn = MongoClient(uri) - db = conn[HANDLER_KWARGS["connection_data"]["database"]] + db = conn[HANDLER_KWARGS["connection_data"]["database"]] # noqa with open("mindsdb/integrations/handlers/mongodb_handler/tests/seed.json", "r") as f: - seed = json.load(f) + json.load(f) conn.close() + @pytest.fixture(scope="module") def handler(request): seed_db() @@ -53,6 +54,7 @@ def check_valid_response(res): # TODO - Subscribe + class TestMongoDBConnection: def test_connect(self, handler): handler.connect() @@ -60,7 +62,7 @@ def test_connect(self, handler): def test_check_connection(self, handler): res = handler.check_connection() - assert res.success == True, res.error_message + assert res.success is True, res.error_message # TODO - Subscribe @@ -120,4 +122,4 @@ def test_get_columns(self, handler): class TestMongoDBDisconnect: def test_disconnect(self, handler): handler.disconnect() - assert handler.is_connected == False, "failed to disconnect" + assert handler.is_connected is False, "failed to disconnect" diff --git a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py b/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py index a2627b58d17..a56c2d6f11f 100644 --- a/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py +++ b/mindsdb/integrations/handlers/ray_serve_handler/ray_serve_handler.py @@ -46,8 +46,14 @@ def predict(self, df, args=None): resp = requests.post(args['predict_url'], json={'df': df.to_json(orient='records')}, headers={'content-type': 'application/json; format=pandas-records'}) - answer = resp.json() - predictions = pd.DataFrame({args['target']: answer['prediction']}) + response = resp.json() + + target = args['target'] + if target != 'prediction': + # rename prediction to target + response[target] = response.pop('prediction') + + predictions = pd.DataFrame(response) return predictions def describe(self, key: Optional[str] = None) -> pd.DataFrame: From f37840fc9ab4506a0f702a86560ce478780f5089 Mon Sep 17 00:00:00 2001 From: Daniel Usvyat Date: Wed, 25 Sep 2024 15:15:00 +0100 Subject: [PATCH 33/51] Refactor SQL toolkit descriptions for clarity and usability (#9737) --- .../custom/text2sql/mindsdb_sql_toolkit.py | 68 +++++++++++++------ 1 file changed, 47 insertions(+), 21 deletions(-) diff --git a/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py b/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py index 373911e4601..8979b9a4f7c 100644 --- a/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +++ b/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py @@ -10,45 +10,71 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit): def get_tools(self, prefix='') -> List[BaseTool]: - # Return the tools that this toolkit provides as well as MindDB's SQL validator tool - """Get the tools in the toolkit.""" list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db) + info_sql_database_tool_description = ( - "Input to this tool is a comma-separated list of tables, output is the " - "schema and sample rows for those tables. " - "Be sure that the tables actually exist by calling " - f"{list_sql_database_tool.name} first! " + "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. " + f"Ensure tables exist by calling {list_sql_database_tool.name} first. " + "Use this tool to investigate table schemas for needed columns. " + "Get sample data with 'SELECT * FROM table LIMIT 3' before answering questions. " "Example Input: table1, table2, table3" ) info_sql_database_tool = InfoSQLDatabaseTool( name=f'sql_db_schema{prefix}', db=self.db, description=info_sql_database_tool_description ) + query_sql_database_tool_description = ( - "Input to this tool is a detailed and correct SQL query, output is a " - "result from the database. If the query is not correct, an error message " - "will be returned. If an error is returned, rewrite the query, check the " - "query, and try again. If you encounter an issue with Unknown column " - f"'xxxx' in 'field list', use {info_sql_database_tool.name} " - "to query the correct table fields." + "Input: A detailed SQL query. Output: Database result or error message. " + "For errors, rewrite and retry the query. For 'Unknown column' errors, use " + f"{info_sql_database_tool.name} to check table fields. " + "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. " + "Follow these instructions with utmost precision: " + "1. Query Output Format: " + " - Always return results in well-formatted **Markdown tables**. " + " - Ensure clarity and proper structure for easy readability. " + "2. Sample Data: " + " - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. " + "3. Categorical Data: " + " - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. " + " - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. " + "4. Result Limiting and Counting: " + " - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. " + " - If the count is greater than 10, limit the query to return only 10 results initially. " + " - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. " + " - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. " + "5. Date Handling: " + " - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. " + " - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` " + " - Do not compare date values without casting columns to date. " + " - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: " + " SELECT NOW() + INTERVAL 5 DAY; " + " SELECT NOW() - INTERVAL 3 HOUR; " + " SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; " + " SELECT NOW() - INTERVAL 1 YEAR; " + "6. Query Best Practices: " + " - Query only necessary columns, not all. " + " - Use only existing column names from correct tables. " + " - Use database-specific syntax for date operations. " + "7. Error Handling: " + " - For errors, rewrite and retry the query. " + " - For 'Unknown column' errors, check table fields using info_sql_database_tool. " + "Adhere to these guidelines for all queries and responses. Ask for clarification if needed." ) + query_sql_database_tool = QuerySQLDataBaseTool( name=f'sql_db_query{prefix}', db=self.db, description=query_sql_database_tool_description ) mindsdb_sql_parser_tool_description = ( - "Use this tool to ensure that a SQL query passes the MindsDB SQL parser." - "If the query is not correct, it will be corrected and the new query will be returned. Use this new query." - "If the query is not correct and cannot be corrected, an error will be returned." - "In this case an error is returned, rewrite the query, check the query, and try again." - "If query is correct, the query will be parsed and returned." - "This tool should ALWAYS be run before executing a query with the tool " - f"{query_sql_database_tool.name}!" - "" + "Use this tool to ensure a SQL query passes the MindsDB SQL parser. " + "If the query is not correct, it will be corrected and returned. Use the new query. " + "If the query can't be corrected, an error is returned. In this case, rewrite and retry. " + "If the query is correct, it will be parsed and returned. " + f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. " ) - mindsdb_sql_parser_tool = MindsDBSQLParserTool( name=f'mindsdb_sql_parser_tool{prefix}', description=mindsdb_sql_parser_tool_description From f1cc2cd3741b305f45adbda6dcef57e8d651e423 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Wed, 25 Sep 2024 19:54:50 +0530 Subject: [PATCH 34/51] SAP Hana Integration Improvements (#9719) --- .flake8 | 1 - .github/workflows/test_on_push.yml | 3 +- .../data-integrations/sap-hana.mdx | 134 ++++++---- .../handlers/hana_handler/README.md | 142 ++++++---- .../handlers/hana_handler/connection_args.py | 100 ++----- .../handlers/hana_handler/hana_handler.py | 245 +++++++++++------- tests/unit/handlers/test_hana.py | 73 ++++++ 7 files changed, 414 insertions(+), 284 deletions(-) create mode 100644 tests/unit/handlers/test_hana.py diff --git a/.flake8 b/.flake8 index be2dac9a86c..a847b51e252 100644 --- a/.flake8 +++ b/.flake8 @@ -92,7 +92,6 @@ exclude = mindsdb/integrations/handlers/altibase_handler/* mindsdb/integrations/handlers/ms_teams_handler/* mindsdb/integrations/handlers/discord_handler/* - mindsdb/integrations/handlers/hana_handler/* mindsdb/integrations/handlers/sheets_handler/* mindsdb/integrations/handlers/autosklearn_handler/* mindsdb/integrations/handlers/twitter_handler/* diff --git a/.github/workflows/test_on_push.yml b/.github/workflows/test_on_push.yml index 7767c417af4..fc9afa09aff 100644 --- a/.github/workflows/test_on_push.yml +++ b/.github/workflows/test_on_push.yml @@ -162,6 +162,7 @@ jobs: pip install .[db2] pip install .[hive] pip install .[teradata] + pip install .[hana] pip install .[minds_endpoint] pip freeze - name: Run unit tests @@ -177,7 +178,7 @@ jobs: - name: Run Handlers tests and submit Coverage to coveralls if: ${{ needs.changes.outputs.not-docs == 'true' }} run: | - handlers=("mysql" "postgres" "mssql" "clickhouse" "snowflake" "web" "redshift" "bigquery" "elasticsearch" "s3" "databricks" "dynamodb" "mariadb" "oracle" "mongodb" "db2" "hive" "teradata") + handlers=("mysql" "postgres" "mssql" "clickhouse" "snowflake" "web" "redshift" "bigquery" "elasticsearch" "s3" "databricks" "dynamodb" "mariadb" "oracle" "mongodb" "db2" "hive" "teradata" "hana") for handler in "${handlers[@]}" do pytest --cov=mindsdb/integrations/handlers/${handler}_handler tests/unit/handlers/test_${handler}.py diff --git a/docs/integrations/data-integrations/sap-hana.mdx b/docs/integrations/data-integrations/sap-hana.mdx index 4a35787f795..98249a7cb4d 100644 --- a/docs/integrations/data-integrations/sap-hana.mdx +++ b/docs/integrations/data-integrations/sap-hana.mdx @@ -3,84 +3,104 @@ title: SAP HANA sidebarTitle: SAP HANA --- -This is the implementation of the SAP HANA data handler for MindsDB. - -[SAP HANA](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html), where HANA stands for High-performance ANalytic Appliance, is a multi-model database that stores data in its memory instead of keeping it on a disk. The column-oriented in-memory database design used by SAP HANA allows users to run advanced analytics alongside high-speed transactions in a single system. +This documentation describes the integration of MindsDB with [SAP HANA](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html), a multi-model database with a column-oriented in-memory design that stores data in its memory instead of keeping it on a disk. +The integration allows MindsDB to access data from SAP HANA and enhance SAP HANA with AI capabilities. ## Prerequisites Before proceeding, ensure the following prerequisites are met: -1. Install MindsDB locally via [Docker](/setup/self-hosted/docker) or [Docker Desktop](/setup/self-hosted/docker-desktop). -2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](/setup/self-hosted/docker#install-dependencies). -3. Install or ensure access to SAP HANA. - -## Implementation - -This handler is implemented using `hdbcli`, the Python driver for SAP HANA. - -The required arguments to establish a connection are as follows: - -* `host` is the host name or IP address of the SAP HANA instance. -* `port` is the port number of the SAP HANA instance. -* `user` specifies the user name. -* `password` specifies the password for the user. -* `schema` sets the current schema, which is used for identifiers without a defined schema. - -## Usage - -You can use the below SQL statements to create a schema in SAP HANA called `MINDSDB` and a table called `TEST`. +1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). +2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -```sql -CREATE SCHEMA MINDSDB; - -CREATE TABLE MINDSDB.TEST -( - ID INTEGER NOT NULL, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); - -CREATE UNIQUE INDEX MINDSDB.TEST_ID_INDEX - ON MINDSDB.TEST (ID); - -ALTER TABLE MINDSDB.TEST - ADD CONSTRAINT TEST_PK - PRIMARY KEY (ID); - -INSERT INTO MINDSDB.TEST -VALUES (1, 'h', 'w'); -``` +## Connection -In order to make use of this handler and connect to the SAP HANA database in MindsDB, the following syntax can be used: +Establish a connection to SAP HANA from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hana_handler) as an engine. ```sql -CREATE DATABASE sap_hana_trial +CREATE DATABASE sap_hana_datasource WITH ENGINE = 'hana', PARAMETERS = { - "user": "DBADMIN", - "password": "password", - "host": ".hana.trial-us10.hanacloud.ondemand.com", + "host": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", "port": "443", - "schema": "MINDSDB", + "user": "demo_user", + "password": "demo_password", "encrypt": true }; ``` - -The above example assumes usage of SAP HANA Cloud, which requires the `encrypt` parameter to be set to `true` and uses port `443`. - +Required connection parameters include the following: + +* `host`: The hostname, IP address, or URL of the SAP HANA database. +* `port`: The port number for connecting to the SAP HANA database. +* `user`: The username for the SAP HANA database. +* `password`: The password for the SAP HANA database. + +Optional connection parameters include the following: -You can use this established connection to query your table as follows: +* 'database': The name of the database to connect to. This parameter is not used for SAP HANA Cloud. +* `schema`: The database schema to use. Defaults to the user's default schema. +* `encrypt`: The setting to enable or disable encryption. Defaults to `True' + +## Usage + +Retrieve data from a specified table by providing the integration, schema and table names: ```sql SELECT * -FROM sap_hana_trial.test; +FROM sap_hana_datasource.schema_name.table_name +LIMIT 10; ``` -On execution, we get: +Run Teradata SQL queries directly on the connected Teradata database: + +```sql +SELECT * FROM sap_hana_datasource ( + + --Native Query Goes Here + SELECT customer, year, SUM(sales) + FROM t1 + GROUP BY ROLLUP(customer, year); + + SELECT customer, year, SUM(sales) + FROM t1 + GROUP BY GROUPING SETS + ( + (customer, year), + (customer) + ) + UNION ALL + SELECT NULL, NULL, SUM(sales) + FROM t1; + +); +``` + + +The above examples utilize `sap_hana_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. + -| ID | NAME | DESCRIPTION | -| --- | ---- | ----------- | -| 1 | h | w | +## Troubleshooting + + +`Database Connection Error` + +* **Symptoms**: Failure to connect MindsDB with the SAP HANA database. +* **Checklist**: + 1. Make sure the SAP HANA database is active. + 2. Confirm that address, port, user and password are correct. Try a direct connection using a client like DBeaver. + 3. Ensure a stable network between MindsDB and SAP HANA. + + + +`SQL statement cannot be parsed by mindsdb_sql` + +* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. +* **Checklist**: + 1. Ensure table names with spaces or special characters are enclosed in backticks. + 2. Examples: + * Incorrect: SELECT * FROM integration.travel-data + * Incorrect: SELECT * FROM integration.'travel-data' + * Correct: SELECT * FROM integration.\`travel-data\` + \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hana_handler/README.md b/mindsdb/integrations/handlers/hana_handler/README.md index 992fb9353a6..98249a7cb4d 100644 --- a/mindsdb/integrations/handlers/hana_handler/README.md +++ b/mindsdb/integrations/handlers/hana_handler/README.md @@ -1,74 +1,106 @@ -# SAP HANA Handler +--- +title: SAP HANA +sidebarTitle: SAP HANA +--- -This is the implementation of the SAP HANA handler for MindsDB. +This documentation describes the integration of MindsDB with [SAP HANA](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html), a multi-model database with a column-oriented in-memory design that stores data in its memory instead of keeping it on a disk. +The integration allows MindsDB to access data from SAP HANA and enhance SAP HANA with AI capabilities. -## SAP HANA +## Prerequisites -SAP HANA (High-performance ANalytic Appliance) is a multi-model database that stores data in its memory instead of keeping it on a disk. The column-oriented in-memory database design used by SAP HANA allows users to run advanced analytics alongside high-speed transactions in a single system. [Read more](https://www.sap.com/products/technology-platform/hana/what-is-sap-hana.html). +Before proceeding, ensure the following prerequisites are met: -## Implementation +1. Install MindsDB locally via [Docker](https://docs.mindsdb.com/setup/self-hosted/docker) or [Docker Desktop](https://docs.mindsdb.com/setup/self-hosted/docker-desktop). +2. To connect SAP HANA to MindsDB, install the required dependencies following [this instruction](https://docs.mindsdb.com/setup/self-hosted/docker#install-dependencies). -This handler was implemented using `hdbcli` - the Python driver for SAP HANA. +## Connection -The required arguments to establish a connection are, +Establish a connection to SAP HANA from MindsDB by executing the following SQL command and providing its [handler name](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/hana_handler) as an engine. -* `host`: the host name or IP address of the SAP HANA instance -* `port`: the port number of the SAP HANA instance -* `user`: specifies the user name -* `password`: specifies the password for the user -* `schema`: sets the current schema, which is used for identifiers without a schema +```sql +CREATE DATABASE sap_hana_datasource +WITH + ENGINE = 'hana', + PARAMETERS = { + "host": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", + "port": "443", + "user": "demo_user", + "password": "demo_password", + "encrypt": true + }; +``` -## Usage - -Assuming you created a schema in SAP HANA called `MINDSDB` and you have a table called `TEST` that was created using -the following SQL statements: - -~~~~sql -CREATE SCHEMA MINDSDB; +Required connection parameters include the following: -CREATE TABLE MINDSDB.TEST -( - ID INTEGER NOT NULL, - NAME NVARCHAR(1), - DESCRIPTION NVARCHAR(1) -); +* `host`: The hostname, IP address, or URL of the SAP HANA database. +* `port`: The port number for connecting to the SAP HANA database. +* `user`: The username for the SAP HANA database. +* `password`: The password for the SAP HANA database. -CREATE UNIQUE INDEX MINDSDB.TEST_ID_INDEX - ON MINDSDB.TEST (ID); +Optional connection parameters include the following: -ALTER TABLE MINDSDB.TEST - ADD CONSTRAINT TEST_PK - PRIMARY KEY (ID); +* 'database': The name of the database to connect to. This parameter is not used for SAP HANA Cloud. +* `schema`: The database schema to use. Defaults to the user's default schema. +* `encrypt`: The setting to enable or disable encryption. Defaults to `True' -INSERT INTO MINDSDB.TEST -VALUES (1, 'h', 'w'); -~~~~ +## Usage -In order to make use of this handler and connect to the SAP HANA database in MindsDB, the following syntax can be used: +Retrieve data from a specified table by providing the integration, schema and table names: -~~~~sql -CREATE DATABASE sap_hana_trial -WITH ENGINE = 'hana', -PARAMETERS = { - "user": "DBADMIN", - "password": "password", - "host": ".hana.trial-us10.hanacloud.ondemand.com", - "port": "443", - "schema": "MINDSDB", - "encrypt": true -}; -~~~~ +```sql +SELECT * +FROM sap_hana_datasource.schema_name.table_name +LIMIT 10; +``` -**Note**: The above example assumes usage of SAP HANA Cloud, which requires the `encrypt` parameter to be set to `true` and uses port `443`. +Run Teradata SQL queries directly on the connected Teradata database: -Now, you can use this established connection to query your database as follows: +```sql +SELECT * FROM sap_hana_datasource ( -~~~~sql -SELECT * FROM sap_hana_trial.test -~~~~ + --Native Query Goes Here + SELECT customer, year, SUM(sales) + FROM t1 + GROUP BY ROLLUP(customer, year); -| ID | NAME | DESCRIPTION | -|----|------|-------------| -| 1 | h | w | + SELECT customer, year, SUM(sales) + FROM t1 + GROUP BY GROUPING SETS + ( + (customer, year), + (customer) + ) + UNION ALL + SELECT NULL, NULL, SUM(sales) + FROM t1; -![MindsDB using SAP HANA Integration](https://i.imgur.com/okXNhoc.jpg) +); +``` + + +The above examples utilize `sap_hana_datasource` as the datasource name, which is defined in the `CREATE DATABASE` command. + + +## Troubleshooting + + +`Database Connection Error` + +* **Symptoms**: Failure to connect MindsDB with the SAP HANA database. +* **Checklist**: + 1. Make sure the SAP HANA database is active. + 2. Confirm that address, port, user and password are correct. Try a direct connection using a client like DBeaver. + 3. Ensure a stable network between MindsDB and SAP HANA. + + + +`SQL statement cannot be parsed by mindsdb_sql` + +* **Symptoms**: SQL queries failing or not recognizing table names containing spaces or special characters. +* **Checklist**: + 1. Ensure table names with spaces or special characters are enclosed in backticks. + 2. Examples: + * Incorrect: SELECT * FROM integration.travel-data + * Incorrect: SELECT * FROM integration.'travel-data' + * Correct: SELECT * FROM integration.\`travel-data\` + \ No newline at end of file diff --git a/mindsdb/integrations/handlers/hana_handler/connection_args.py b/mindsdb/integrations/handlers/hana_handler/connection_args.py index 3cb7aeb1c3f..1e389bf2b86 100644 --- a/mindsdb/integrations/handlers/hana_handler/connection_args.py +++ b/mindsdb/integrations/handlers/hana_handler/connection_args.py @@ -1,105 +1,57 @@ -from textwrap import dedent from collections import OrderedDict from mindsdb.integrations.libs.const import HANDLER_CONNECTION_ARG_TYPE as ARG_TYPE connection_args = OrderedDict( - host={ + address={ 'type': ARG_TYPE.STR, - 'description': 'The IP address/host name of the SAP HANA instance host.' + 'description': 'The hostname, IP address, or URL of the SAP HANA database.', + 'required': True, + 'label': 'Address' }, port={ - 'type': ARG_TYPE.STR, - 'description': 'The port number of the SAP HANA instance.' + 'type': ARG_TYPE.INT, + 'description': 'The port number for connecting to the SAP HANA database.', + 'required': True, + 'label': 'Port' }, user={ 'type': ARG_TYPE.STR, - 'description': 'Specifies the user name.' + 'description': 'The username for the SAP HANA database.', + 'required': True, + 'label': 'User' }, password={ 'type': ARG_TYPE.PWD, - 'description': 'Specifies the password for the user.', - 'secret': True + 'description': 'The password for the SAP HANA database.', + 'secret': True, + 'required': True, + 'label': 'Password' }, schema={ 'type': ARG_TYPE.STR, - 'description': 'Sets the current schema, which is used for identifiers without a schema.' + 'description': "The database schema to use. Defaults to the user's default schema.", + 'required': False, + 'label': 'Schema' }, database={ 'type': ARG_TYPE.STR, - 'description': 'Specifies the name of the database to connect to. (Not used for SAP HANA Cloud)' - }, - autocommit={ - 'type': ARG_TYPE.BOOL, - 'description': 'Sets the autocommit mode for the connection.' - }, - properties={ - 'type': ARG_TYPE.STR, - 'description': 'Additional dictionary with special properties of the connection.' + 'description': 'The name of the database to connect to. This parameter is not used for SAP HANA Cloud.', + 'required': False, + 'label': 'Database' }, encrypt={ 'type': ARG_TYPE.BOOL, - 'description': 'Enables or disables TLS encryption.' - }, - sslHostNameInCertificate={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the host name used to verify server\'s identity.' - }, - sslValidateCertificate={ - 'type': ARG_TYPE.BOOL, - 'description': 'Specifies whether to validate the server\'s certificate.' - }, - sslCryptoProvider={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the cryptographic library provider used for TLS communication.' - }, - sslTrustStore={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the path to a trust store file that contains the server\'s public certificates.' - }, - sslKeyStore={ - 'type': ARG_TYPE.STR, - 'description': 'Specifies the path to the keystore file that contains the client\'s identity.' - }, - cseKeyStorePassword={ - 'type': ARG_TYPE.STR, - 'description': 'Provides the password for the local key store.' - }, - sslSNIHostname={ - 'type': ARG_TYPE.STR, - 'description': dedent("""Specifies the name of the host that is attempting to connect at the start of - the TLS handshaking process.""") - }, - sslSNIRequest={ - 'type': ARG_TYPE.BOOL, - 'description': 'Specifies whether SNI requests are enabled for TLS connections: TRUE/FALSE.' - }, - siteType={ - 'type': ARG_TYPE.STR, - 'description': dedent("""Specifies whether the connection is made to either the PRIMARY or SECONDARY - site in an Active/Active (read enabled) system.""") - }, - splitBatchCommands={ - 'type': ARG_TYPE.BOOL, - 'description': 'Allows split and parallel execution of batch commands on partitioned tables.' - }, - routeDirectExecute={ - 'type': ARG_TYPE.BOOL, - 'description': dedent("""Converts direct execute into prepare and execute (routed execute) if the - number of index servers is more than one and if statement routing is enabled.""") - }, - secondarySessionFallback={ - 'type': ARG_TYPE.BOOL, - 'description': dedent("""Forces the ongoing transaction on a non-anchor connection to fall back - to the anchor/primary connection if this connection is dropped by the network or server.""") + 'description': 'The setting to enable or disable encryption. Default is `True`.', + 'required': False, + 'label': 'Encrypt' } ) connection_args_example = OrderedDict( - host='.hana.trial-us10.hanacloud.ondemand.com', + host='123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com', port=30013, user='DBADMIN', - password='password', - schema='MINDSDB', + password='password' ) diff --git a/mindsdb/integrations/handlers/hana_handler/hana_handler.py b/mindsdb/integrations/handlers/hana_handler/hana_handler.py index 25f7fcda8ae..9e242d0aaf4 100644 --- a/mindsdb/integrations/handlers/hana_handler/hana_handler.py +++ b/mindsdb/integrations/handlers/hana_handler/hana_handler.py @@ -1,13 +1,11 @@ -from pandas import DataFrame +from typing import Any, Dict, Text from hdbcli import dbapi -import sqlalchemy_hana.dialect as hana_dialect - -from mindsdb_sql import parse_sql +from hdbcli.dbapi import Error, ProgrammingError from mindsdb_sql.parser.ast.base import ASTNode from mindsdb_sql.render.sqlalchemy_render import SqlalchemyRender - -from mindsdb.utilities import log +from pandas import DataFrame +import sqlalchemy_hana.dialect as hana_dialect from mindsdb.integrations.libs.base import DatabaseHandler from mindsdb.integrations.libs.response import ( @@ -15,105 +13,105 @@ HandlerResponse as Response, RESPONSE_TYPE ) +from mindsdb.utilities import log logger = log.getLogger(__name__) + class HanaHandler(DatabaseHandler): """ - This handler handles connection and execution of the SAP Hana statements. + This handler handles the connection and execution of SQL statements on SAP HANA. """ name = 'hana' - def __init__(self, name: str, connection_data: dict, **kwargs): - super().__init__(name) + def __init__(self, name: Text, connection_data: Dict, **kwargs: Any) -> None: + """ + Initializes the handler. - self.dialect = 'hana' - self.parser = parse_sql + Args: + name (Text): The name of the handler instance. + connection_data (Dict): The connection data required to connect to the SAP HANA database. + kwargs: Arbitrary keyword arguments. + """ + super().__init__(name) self.connection_data = connection_data - self.renderer = SqlalchemyRender(hana_dialect.HANAHDBCLIDialect) - - self.address = self.connection_data.get('host') - self.port = self.connection_data.get('port') - self.user = self.connection_data.get('user') - self.password = self.connection_data.get('password') - self.autocommit = self.connection_data.get('autocommit', True) - self.properties = self.connection_data.get('properties') - self.currentSchema = self.connection_data.get('schema', 'CURRENTUSER') - self.databaseName = self.connection_data.get('database') - self.encrypt = self.connection_data.get('encrypt', False) - self.sslHostNameInCertificate = self.connection_data.get('sslHostNameInCertificate') - self.sslValidateCertificate = self.connection_data.get('sslValidateCertificate', False) - self.sslCryptoProvider = self.connection_data.get('sslCryptoProvider') - self.sslTrustStore = self.connection_data.get('sslTrustStore') - self.sslKeyStore = self.connection_data.get('sslKeyStore') - self.cseKeyStorePassword = self.connection_data.get('cseKeyStorePassword') - self.sslSNIHostname = self.connection_data.get('sslSNIHostname') - self.sslSNIRequest = self.connection_data.get('sslSNIRequest', True) - self.siteType = self.connection_data.get('siteType') - self.splitBatchCommands = self.connection_data.get('splitBatchCommands', True) - self.routeDirectExecute = self.connection_data.get('routeDirectExecute', False) - self.secondarySessionFallback = self.connection_data.get('secondarySessionFallback', True) + self.kwargs = kwargs self.connection = None self.is_connected = False def __del__(self): + """ + Closes the connection when the handler instance is deleted. + """ if self.is_connected is True: self.disconnect() - def connect(self): - """ - Handles the connection to a SAP Hana database insance. + def connect(self) -> dbapi.Connection: """ + Establishes a connection to the SAP HANA database. + Raises: + ValueError: If the expected connection parameters are not provided. + hdbcli.dbapi.Error: If an error occurs while connecting to the SAP HANA database. + + Returns: + hdbcli.dbapi.Connection: A connection object to the SAP HANA database. + """ if self.is_connected is True: return self.connection - connection = dbapi.connect( - address=self.address, - port=self.port, - user=self.user, - password=self.password, - autocommit=self.autocommit, - properties=self.properties, - currentSchema=self.currentSchema, - databaseName=self.databaseName, - encrypt=self.encrypt, - sslHostNameInCertificate=self.sslHostNameInCertificate, - sslValidateCertificate=self.sslValidateCertificate, - sslCryptoProvider=self.sslCryptoProvider, - sslTrustStore=self.sslTrustStore, - sslKeyStore=self.sslKeyStore, - cseKeyStorePassword=self.cseKeyStorePassword, - sslSNIHostname=self.sslSNIHostname, - sslSNIRequest=self.sslSNIRequest, - siteType=self.siteType, - splitBatchCommands=self.splitBatchCommands, - routeDirectExecute=self.routeDirectExecute, - secondarySessionFallback=self.secondarySessionFallback - ) - - self.is_connected = True - self.connection = connection - return self.connection - - def disconnect(self): - """ - Disconnects from the SAP HANA database - """ + # Mandatory connection parameters. + if not all(key in self.connection_data for key in ['address', 'port', 'user', 'password']): + raise ValueError('Required parameters (address, port, user, password) must be provided.') + + config = { + 'address': self.connection_data['address'], + 'port': self.connection_data['port'], + 'user': self.connection_data['user'], + 'password': self.connection_data['password'], + } + + # Optional connection parameters. + if 'database' in self.connection_data: + config['databaseName'] = self.connection_data['database'] + if 'schema' in self.connection_data: + config['currentSchema'] = self.connection_data['schema'] + + if 'encrypt' in self.connection_data: + config['encrypt'] = self.connection_data['encrypt'] + + try: + self.connection = dbapi.connect( + **config + ) + self.is_connected = True + return self.connection + except Error as known_error: + logger.error(f'Error connecting to SAP HANA, {known_error}!') + raise + except Exception as unknown_error: + logger.error(f'Unknown error connecting to Teradata, {unknown_error}!') + raise + + def disconnect(self) -> None: + """ + Closes the connection to the SAP HANA database if it's currently open. + """ if self.is_connected is True: self.connection.close() self.is_connected = False def check_connection(self) -> StatusResponse: """ - Check the connection of the SAP HANA database - :return: success status and error message if error occurs - """ + Checks the status of the connection to the SAP HANA database. + Returns: + StatusResponse: An object containing the success status and an error message if an error occurs. + """ response = StatusResponse(False) need_to_close = self.is_connected is False @@ -122,9 +120,12 @@ def check_connection(self) -> StatusResponse: with connection.cursor() as cur: cur.execute('SELECT 1 FROM SYS.DUMMY') response.success = True - except dbapi.Error as e: - logger.error(f'Error connecting to SAP HANA {self.address}, {e}!') - response.error_message = e + except (Error, ProgrammingError, ValueError) as known_error: + logger.error(f'Connection check to SAP HANA failed, {known_error}!') + response.error_message = str(known_error) + except Exception as unknown_error: + logger.error(f'Connection check to SAP HANA failed due to an unknown error, {unknown_error}!') + response.error_message = str(unknown_error) if response.success is True and need_to_close: self.disconnect() @@ -133,13 +134,16 @@ def check_connection(self) -> StatusResponse: return response - def native_query(self, query: str) -> Response: - """ - Receive SQL query and runs it - :param query: The SQL query to run in SAP HANA - :return: returns the records from the current recordset + def native_query(self, query: Text) -> Response: """ + Executes a native SQL query on the SAP HANA database and returns the result. + + Args: + query (Text): The SQL query to be executed. + Returns: + Response: A response object containing the result of the query or an error message. + """ need_to_close = self.is_connected is False connection = self.connect() @@ -158,12 +162,20 @@ def native_query(self, query: str) -> Response: ) ) connection.commit() - except Exception as e: + except ProgrammingError as programming_error: logger.error(f'Error running query: {query} on {self.address}!') response = Response( RESPONSE_TYPE.ERROR, error_code=0, - error_message=str(e) + error_message=str(programming_error) + ) + connection.rollback() + except Exception as unknown_error: + logger.error(f'Unknown error running query: {query} on {self.address}!') + response = Response( + RESPONSE_TYPE.ERROR, + error_code=0, + error_message=str(unknown_error) ) connection.rollback() @@ -174,33 +186,74 @@ def native_query(self, query: str) -> Response: def query(self, query: ASTNode) -> Response: """ - Retrieve the data from the SQL statement with eliminated rows that dont satisfy the WHERE condition - """ + Executes a SQL query represented by an ASTNode on the SAP HANA database and retrieves the data (if any). + + Args: + query (ASTNode): An ASTNode representing the SQL query to be executed. - query_str = self.renderer.get_string(query, with_failback=True) + Returns: + Response: The response from the `native_query` method, containing the result of the SQL query execution. + """ + renderer = SqlalchemyRender(hana_dialect.HANAHDBCLIDialect) + query_str = renderer.get_string(query, with_failback=True) return self.native_query(query_str) def get_tables(self) -> Response: """ - List all tables in SAP HANA in the current schema - """ + Retrieves a list of all non-system tables in the SAP HANA database. - return self.native_query(f""" + Returns: + Response: A response object containing a list of tables in the SAP HANA database. + """ + query = """ SELECT SCHEMA_NAME, TABLE_NAME, - TABLE_TYPE + 'BASE TABLE' AS TABLE_TYPE FROM SYS.TABLES - WHERE IS_SYSTEM_TABLE = 'FALSE' + WHERE IS_SYSTEM_TABLE = 'FALSE' AND IS_USER_DEFINED_TYPE = 'FALSE' AND IS_TEMPORARY = 'FALSE' - """) - def get_columns(self, table_name: str) -> Response: + UNION + + SELECT SCHEMA_NAME, + VIEW_NAME AS TABLE_NAME, + 'VIEW' AS TABLE_TYPE + FROM + SYS.VIEWS + WHERE SCHEMA_NAME <> 'SYS' + AND SCHEMA_NAME NOT LIKE '_SYS%' """ - List all columns in a table in SAP HANA in the current schema - :param table_name: the table name for which to list the columns - :return: returns the columns in the table + return self.native_query(query) + + def get_columns(self, table_name: Text) -> Response: """ + Retrieves column details for a specified table in the SAP HANA database. + + Args: + table_name (Text): The name of the table for which to retrieve column information. - return self.renderer.dialect.get_columns(table_name) + Raises: + ValueError: If the 'table_name' is not a valid string. + + Returns: + Response: A response object containing the column details. + """ + if not table_name or not isinstance(table_name, str): + raise ValueError("Invalid table name provided.") + + query = f""" + SELECT COLUMN_NAME AS Field, + DATA_TYPE_NAME AS Type + FROM SYS.TABLE_COLUMNS + WHERE TABLE_NAME = '{table_name}' + + UNION ALL + + SELECT COLUMN_NAME AS Field, + DATA_TYPE_NAME AS Type + FROM SYS.VIEW_COLUMNS + WHERE VIEW_NAME = '{table_name}' + """ + return self.native_query(query) diff --git a/tests/unit/handlers/test_hana.py b/tests/unit/handlers/test_hana.py new file mode 100644 index 00000000000..c97d9f175ae --- /dev/null +++ b/tests/unit/handlers/test_hana.py @@ -0,0 +1,73 @@ +from collections import OrderedDict +import unittest +from unittest.mock import patch + +from hdbcli.dbapi import ProgrammingError + +from base_handler_test import BaseDatabaseHandlerTest +from mindsdb.integrations.handlers.hana_handler.hana_handler import HanaHandler + + +class TestHanaHandler(BaseDatabaseHandlerTest, unittest.TestCase): + + @property + def dummy_connection_data(self): + return OrderedDict( + address='123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com', + port=443, + user='example_user', + password='example_pass' + ) + + @property + def err_to_raise_on_connect_failure(self): + return ProgrammingError("Connection Failed") + + @property + def get_tables_query(self): + return """ + SELECT SCHEMA_NAME, + TABLE_NAME, + 'BASE TABLE' AS TABLE_TYPE + FROM + SYS.TABLES + WHERE IS_SYSTEM_TABLE = 'FALSE' + AND IS_USER_DEFINED_TYPE = 'FALSE' + AND IS_TEMPORARY = 'FALSE' + + UNION + + SELECT SCHEMA_NAME, + VIEW_NAME AS TABLE_NAME, + 'VIEW' AS TABLE_TYPE + FROM + SYS.VIEWS + WHERE SCHEMA_NAME <> 'SYS' + AND SCHEMA_NAME NOT LIKE '_SYS%' + """ + + @property + def get_columns_query(self): + return f""" + SELECT COLUMN_NAME AS Field, + DATA_TYPE_NAME AS Type + FROM SYS.TABLE_COLUMNS + WHERE TABLE_NAME = '{self.mock_table}' + + UNION ALL + + SELECT COLUMN_NAME AS Field, + DATA_TYPE_NAME AS Type + FROM SYS.VIEW_COLUMNS + WHERE VIEW_NAME = '{self.mock_table}' + """ + + def create_handler(self): + return HanaHandler('hana', connection_data=self.dummy_connection_data) + + def create_patcher(self): + return patch('hdbcli.dbapi.connect') + + +if __name__ == '__main__': + unittest.main() From 8aa064fec8506fe68369314b170d881b3a702067 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Wed, 25 Sep 2024 16:41:54 +0200 Subject: [PATCH 35/51] Bump version (#9762) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index a28c7efff05..8606b8d28ce 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.3.2' +__version__ = '24.9.4.0' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 093786b06773a53fe28a6af4fe66331d227c1e0e Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:56:20 +0530 Subject: [PATCH 36/51] Updated the Uses of host in the SAP Hana Integration Docs to address (#9763) --- docs/integrations/data-integrations/sap-hana.mdx | 4 ++-- mindsdb/integrations/handlers/hana_handler/README.md | 4 ++-- .../handlers/hana_handler/tests/test_hana_handler.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/integrations/data-integrations/sap-hana.mdx b/docs/integrations/data-integrations/sap-hana.mdx index 98249a7cb4d..cea583db9a2 100644 --- a/docs/integrations/data-integrations/sap-hana.mdx +++ b/docs/integrations/data-integrations/sap-hana.mdx @@ -22,7 +22,7 @@ CREATE DATABASE sap_hana_datasource WITH ENGINE = 'hana', PARAMETERS = { - "host": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", + "address": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", "port": "443", "user": "demo_user", "password": "demo_password", @@ -32,7 +32,7 @@ WITH Required connection parameters include the following: -* `host`: The hostname, IP address, or URL of the SAP HANA database. +* `address`: The hostname, IP address, or URL of the SAP HANA database. * `port`: The port number for connecting to the SAP HANA database. * `user`: The username for the SAP HANA database. * `password`: The password for the SAP HANA database. diff --git a/mindsdb/integrations/handlers/hana_handler/README.md b/mindsdb/integrations/handlers/hana_handler/README.md index 98249a7cb4d..cea583db9a2 100644 --- a/mindsdb/integrations/handlers/hana_handler/README.md +++ b/mindsdb/integrations/handlers/hana_handler/README.md @@ -22,7 +22,7 @@ CREATE DATABASE sap_hana_datasource WITH ENGINE = 'hana', PARAMETERS = { - "host": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", + "address": "123e4567-e89b-12d3-a456-426614174000.hana.trial-us10.hanacloud.ondemand.com", "port": "443", "user": "demo_user", "password": "demo_password", @@ -32,7 +32,7 @@ WITH Required connection parameters include the following: -* `host`: The hostname, IP address, or URL of the SAP HANA database. +* `address`: The hostname, IP address, or URL of the SAP HANA database. * `port`: The port number for connecting to the SAP HANA database. * `user`: The username for the SAP HANA database. * `password`: The password for the SAP HANA database. diff --git a/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py b/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py index 28899af66cf..d66a0be3030 100644 --- a/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py +++ b/mindsdb/integrations/handlers/hana_handler/tests/test_hana_handler.py @@ -31,7 +31,7 @@ class HanaHandlerTest(unittest.TestCase): @classmethod def setUpClass(cls): cls.kwargs = { - "host": os.environ.get('HANA_HOST', 'localhost'), + "address": os.environ.get('HANA_ADDRESS', 'localhost'), "port": os.environ.get('HANA_PORT', 30015), "user": "DBADMIN", "password": os.environ.get('HANA_PASSWORD'), From 6eb7e1de0e5bd3aa0fcff4466b90bb6bde5f8219 Mon Sep 17 00:00:00 2001 From: martyna-mindsdb <109554435+martyna-mindsdb@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:35:41 -0700 Subject: [PATCH 37/51] removed obsolete rag handler from docs (#9764) --- docs/integrations/ai-overview.mdx | 1 - docs/integrations/support.mdx | 1 - .../semantic_search.mdx | 155 ------------------ 3 files changed, 157 deletions(-) delete mode 100644 docs/use-cases/ai-powered_data_retrieval/semantic_search.mdx diff --git a/docs/integrations/ai-overview.mdx b/docs/integrations/ai-overview.mdx index 8b4a9606e45..49e42d2e1a2 100644 --- a/docs/integrations/ai-overview.mdx +++ b/docs/integrations/ai-overview.mdx @@ -32,7 +32,6 @@ This section contains instructions on how to create and deploy models within Min - diff --git a/docs/integrations/support.mdx b/docs/integrations/support.mdx index 4171970d150..84c72287ddf 100644 --- a/docs/integrations/support.mdx +++ b/docs/integrations/support.mdx @@ -56,7 +56,6 @@ Below is the list of all community integrations. | MariaDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mariadb_handler) | | TimeGPT | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/timegpt_handler) | | MongoDB | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/mongodb_handler) | -| RAG | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/rag_handler) | | X (Twitter) | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/twitter_handler) | | GitHub | DATA | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/github_handler) | | Hugging Face Inference API | AI | [Link](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/huggingface_api_handler) | diff --git a/docs/use-cases/ai-powered_data_retrieval/semantic_search.mdx b/docs/use-cases/ai-powered_data_retrieval/semantic_search.mdx deleted file mode 100644 index 84b54194b38..00000000000 --- a/docs/use-cases/ai-powered_data_retrieval/semantic_search.mdx +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: Semantic Search with RAG -sidebarTitle: Semantic Search ---- - -This tutorial uses the [RAG handler](https://github.com/mindsdb/mindsdb/tree/main/mindsdb/integrations/handlers/rag_handler) to search data. - - -Learn more about the [RAG handler here](/integrations/a-engines/rag). - - -## Tutorial - -The following examples illustrate various ways to integrate RAG with different data sources, including files, URLs, databases, and vector databases. - -### From a URL - -The following example utilize `rag_engine` to create a model with the `CREATE MODEL` statement. - -```sql -CREATE ML_ENGINE rag_engine -FROM rag -USING - openai_api_key = 'sk-xxx'; -``` - -Create a model using this engine: - -```sql -CREATE MODEL mindsdb_rag_model -predict answer -USING - engine = "rag_engine", - llm_type = "openai", - url='https://docs.mindsdb.com/what-is-mindsdb', - vector_store_folder_name = 'db_connection', - input_column = 'question'; -``` - -Check the status of the model. - -```sql -DESCRIBE mindsdb_rag_model; -``` - -Now you can use the model to answer your questions. - -```sql -SELECT * -FROM mindsdb_rag_model -WHERE question = 'What ML use cases does MindsDB support?'; -``` - -On execution, we get: - - -| answer | source_documents | question | -|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------------------------| -| MindsDB supports various machine learning use cases such as anomaly detection, forecasting, recommenders, classification, and regression. It also supports multimedia use cases such as video and text semantic search, text to audio, text to video, and text to image. | `{} ` | What ML use cases does MindsDB support? | - - -### From Database - -The following example utilize `rag_engine` to create a model with the `CREATE MODEL` statement and `MySQL` database as a knowlege base. - - -```sql -CREATE ML_ENGINE rag_engine -FROM rag -USING - openai_api_key = 'sk-xxx'; -``` - -Connect to MySQL database: - -```sql -CREATE DATABASE mysql_demo_db -WITH ENGINE = 'mysql', -PARAMETERS = { - "user": "user", - "password": "MindsDBUser123!", - "host": "samples.mindsdb.com", - "port": "3306", - "database": "public" -}; -``` - -Create a model using this engine and include the FORM clause: - -```sql -CREATE MODEL rag_handler_db -FROM mysql_demo_db - (SELECT * FROM demo_fda_context LIMIT 2) -PREDICT answer -USING - engine="rag_engine", - llm_type="openai", - vector_store_folder_name='test_db', - input_column='question'; -``` - -Now you can use the model to answer your questions. - -```sql -SELECT * -FROM rag_handler_db -WHERE question='what product is best for treating a cold?'; -``` - -On execution, we get: - -| answer | source_documents | question | -|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------------------------| -| ShopRite Arthritis Pain Acetaminophen is not specifically designed for treating a cold. It may help to temporarily relieve minor aches and pains associated with a cold, but it is not the best product for treating a cold. It is always best to consult with a doctor or pharmacist for the most appropriate medication for treating a cold. | `{"column":["full_ingredients","indications_and_usage","intended_purpose_of_product","active_ingredient"],"sources_content":["ShopRite Arthritis ..."],"sources_document":["dataframe"],"sources_row":[1,1,1,1]}` | what product is best for treating a cold? | - - -### From File - -The following example utilize `rag_engine` to create a model with the `CREATE MODEL` statement and uploaded file as a knowlege base. - - -```sql -CREATE ML_ENGINE rag_engine -FROM rag -USING - openai_api_key = 'sk-xxx'; -``` - -Upload a [file](mindsdb_sql/sql/create/file) using the GUI `Upload File` option. Create a model using this engine and include the FORM clause: - -```sql -CREATE MODEL rag_handler_files -FROM files - (SELECT * FROM uploaded_file) -PREDICT answer -USING - engine="rag_engine", - llm_type="openai", - vector_store_folder_name='test_db', - input_column='question'; -``` - -Now you can use the model to answer your questions. - -```sql -SELECT * -FROM rag_handler_files -WHERE question='what product is best for treating a cold?'; -``` - -On execution, we get: - -| answer | source_documents | question | -|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|-----------------------------------------| -| ShopRite Arthritis Pain Acetaminophen is not specifically designed for treating a cold. It may help to temporarily relieve minor aches and pains associated with a cold, but it is not the best product for treating a cold. It is always best to consult with a doctor or pharmacist for the most appropriate medication for treating a cold. | `{"column":["full_ingredients","indications_and_usage","intended_purpose_of_product","active_ingredient"],"sources_content":["ShopRite Arthritis ..."],"sources_document":["dataframe"],"sources_row":[1,1,1,1]}` | what product is best for treating a cold? | From 1613f8ee5bd9419c73bdd1626b88b7cc938da82e Mon Sep 17 00:00:00 2001 From: Lucas Koontz Date: Wed, 25 Sep 2024 14:42:35 -0400 Subject: [PATCH 38/51] Feature: Add sentry monitoring (#9756) --- docker-compose.yml | 7 ++-- mindsdb/__main__.py | 1 + mindsdb/api/http/initialize.py | 1 + mindsdb/interfaces/jobs/scheduler.py | 1 + mindsdb/utilities/ml_task_queue/consumer.py | 1 + mindsdb/utilities/ml_task_queue/producer.py | 1 + mindsdb/utilities/ml_task_queue/utils.py | 1 + mindsdb/utilities/sentry.py | 46 +++++++++++++++++++++ requirements/requirements.txt | 1 + 9 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 mindsdb/utilities/sentry.py diff --git a/docker-compose.yml b/docker-compose.yml index 10aba6321d6..546e758d10a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: "3.2" - services: mindsdb: @@ -15,11 +13,14 @@ services: - '47335:47335' - '47336:47336' # watchfiles will reload the app when python files are changed - command: bash -c "watchfiles --filter python 'python -m mindsdb' ." + entrypoint: bash -c "watchfiles --filter python 'python -m mindsdb' ." environment: MINDSDB_DOCKER_ENV: "True" MINDSDB_STORAGE_DIR: "/mindsdb/var" FLASK_DEBUG: 1 # This will make sure http requests are logged regardless of log level + SENTRY_IO_DSN: "" + SENTRY_IO_ENVIRONMENT: "local" + # SENTRY_IO_FORCE_RUN: "true" # Uncomment me to force-start sentry on local development. Good for profiling, but may annoy other devs on sentry.io with the "noise" # MINDSDB_LOG_LEVEL: "DEBUG" # OPENAI_API_KEY: "..." volumes: diff --git a/mindsdb/__main__.py b/mindsdb/__main__.py index 8813bc45005..56787f02e94 100644 --- a/mindsdb/__main__.py +++ b/mindsdb/__main__.py @@ -34,6 +34,7 @@ from mindsdb.utilities.telemetry import telemetry_file_exists, disable_telemetry from mindsdb.utilities.context import context as ctx from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 try: import torch.multiprocessing as mp diff --git a/mindsdb/api/http/initialize.py b/mindsdb/api/http/initialize.py index 9361c92b459..fde90446971 100644 --- a/mindsdb/api/http/initialize.py +++ b/mindsdb/api/http/initialize.py @@ -49,6 +49,7 @@ from mindsdb.utilities.json_encoder import CustomJSONEncoder from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true from mindsdb.utilities.telemetry import inject_telemetry_to_static +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 logger = log.getLogger(__name__) diff --git a/mindsdb/interfaces/jobs/scheduler.py b/mindsdb/interfaces/jobs/scheduler.py index 1052232f90a..c40051c1d01 100644 --- a/mindsdb/interfaces/jobs/scheduler.py +++ b/mindsdb/interfaces/jobs/scheduler.py @@ -8,6 +8,7 @@ from mindsdb.interfaces.storage import db from mindsdb.utilities import log from mindsdb.utilities.config import Config +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 logger = log.getLogger(__name__) diff --git a/mindsdb/utilities/ml_task_queue/consumer.py b/mindsdb/utilities/ml_task_queue/consumer.py index 4bdb9607e9f..3cebced41c2 100644 --- a/mindsdb/utilities/ml_task_queue/consumer.py +++ b/mindsdb/utilities/ml_task_queue/consumer.py @@ -27,6 +27,7 @@ TASKS_STREAM_CONSUMER_GROUP_NAME ) from mindsdb.utilities import log +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 logger = log.getLogger(__name__) diff --git a/mindsdb/utilities/ml_task_queue/producer.py b/mindsdb/utilities/ml_task_queue/producer.py index 8b5769f0db1..7c3ad845b12 100644 --- a/mindsdb/utilities/ml_task_queue/producer.py +++ b/mindsdb/utilities/ml_task_queue/producer.py @@ -14,6 +14,7 @@ ML_TASK_STATUS ) from mindsdb.utilities import log +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 logger = log.getLogger(__name__) diff --git a/mindsdb/utilities/ml_task_queue/utils.py b/mindsdb/utilities/ml_task_queue/utils.py index 9284de11d9a..9409f77325b 100644 --- a/mindsdb/utilities/ml_task_queue/utils.py +++ b/mindsdb/utilities/ml_task_queue/utils.py @@ -8,6 +8,7 @@ from mindsdb.utilities.context import context as ctx from mindsdb.utilities.ml_task_queue.const import ML_TASK_STATUS +from mindsdb.utilities.sentry import sentry_sdk # noqa: F401 def to_bytes(obj: object) -> bytes: diff --git a/mindsdb/utilities/sentry.py b/mindsdb/utilities/sentry.py new file mode 100644 index 00000000000..467a14c7f43 --- /dev/null +++ b/mindsdb/utilities/sentry.py @@ -0,0 +1,46 @@ +# Prepare sentry.io for error and exception tracking +import sentry_sdk +import os +from mindsdb.utilities import log + +logger = log.getLogger(__name__) + +# Provide your sentry.io DSN here +SENTRY_IO_DSN = os.environ.get("SENTRY_IO_DSN", "") +# Define the environment +SENTRY_IO_ENVIRONMENT = os.environ.get("SENTRY_IO_ENVIRONMENT", "local").lower() +# This is set to our SHA when deployed so we know what version this occurred in +SENTRY_IO_RELEASE = os.environ.get("SENTRY_IO_RELEASE", "local").lower() +# How often to capture traces, 1.0 means 100%. +SENTRY_IO_TRACE_SAMPLE_RATE = float(os.environ.get("SENTRY_IO_TRACE_SAMPLE_RATE", "1.0")) +# How often to capture profiling, 1.0 means 100%. +SENTRY_IO_PROFILING_SAMPLE_RATE = float(os.environ.get("SENTRY_IO_PROFILING_SAMPLE_RATE", "1.0")) +# By default we have sentry.io enabled on all envs, except for local which is disabled by default +# If you want to enable sentry.io on local for some reason (eg: profiling) please set SENTRY_IO_FORCE_RUN to true +SENTRY_IO_DISABLED = True if (os.environ.get("SENTRY_IO_DISABLED", "false").lower() == "true" or SENTRY_IO_ENVIRONMENT == "local") else False +SENTRY_IO_FORCE_RUN = True if os.environ.get("SENTRY_IO_FORCE_RUN", "false").lower() == "true" else False + + +# If we're not disabled, or if we have forced sentry to run +if SENTRY_IO_DSN and (not SENTRY_IO_DISABLED or SENTRY_IO_FORCE_RUN): + logger.info("Sentry.io enabled") + logger.info(f"SENTRY_IO_DSN: {SENTRY_IO_DSN}") + logger.info(f"SENTRY_IO_ENVIRONMENT: {SENTRY_IO_ENVIRONMENT}") + logger.info(f"SENTRY_IO_RELEASE: {SENTRY_IO_RELEASE}") + logger.info(f"SENTRY_IO_TRACE_SAMPLE_RATE: {SENTRY_IO_TRACE_SAMPLE_RATE * 100}%") + logger.info(f"SENTRY_IO_PROFILING_SAMPLE_RATE: {SENTRY_IO_PROFILING_SAMPLE_RATE * 100}%") + + sentry_sdk.init( + dsn=SENTRY_IO_DSN, + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for tracing. + traces_sample_rate=SENTRY_IO_TRACE_SAMPLE_RATE, + # Set profiles_sample_rate to 1.0 to profile 100% + # of sampled transactions. + # We recommend adjusting this value in production. + profiles_sample_rate=SENTRY_IO_PROFILING_SAMPLE_RATE, + # What environment we're on, by default development + environment=SENTRY_IO_ENVIRONMENT, + # What release/image/etc we're using, injected in Helm/Kubernetes to be the image tag + release=SENTRY_IO_RELEASE, + ) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 462fbe38c52..937e7b334c2 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -47,3 +47,4 @@ langfuse==2.35.0 lark prometheus-client==0.20.0 transformers >= 4.42.4 +sentry-sdk[flask] == 2.14.0 From 4605cbcfa4726f19c6815a4d0c71d6abf65be42a Mon Sep 17 00:00:00 2001 From: martyna-mindsdb <109554435+martyna-mindsdb@users.noreply.github.com> Date: Thu, 26 Sep 2024 06:52:25 -0700 Subject: [PATCH 39/51] updated rayserve docs (#9765) --- docs/integrations/ai-engines/ray-serve.mdx | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/docs/integrations/ai-engines/ray-serve.mdx b/docs/integrations/ai-engines/ray-serve.mdx index 2f65b9ee88a..5f35905389e 100644 --- a/docs/integrations/ai-engines/ray-serve.mdx +++ b/docs/integrations/ai-engines/ray-serve.mdx @@ -22,13 +22,8 @@ from ray import serve import time import pandas as pd import json -from sklearn.linear_model import LogisticRegression - app = FastAPI() -ray.init() -serve.start(detached=True) - async def parse_req(request: Request): data = await request.json() @@ -57,16 +52,15 @@ class MyModel: df, _ = await parse_req(request) X = df.loc[:, self.feature_cols] predictions = self.model.predict(X) - pred_dict = {'prediction': [float(x) for x in predictions]} + index = list(range(len(predictions))) + pred_dict = {'prediction': [float(x) for x in predictions], 'index': index} return pred_dict - -MyModel.deploy() - -while True: - time.sleep(1) +my_app = MyModel.bind() ``` +After saving the above code into `rayserve.py`, run it using `serve run rayserve:my_app`. + It is important to have the `/train` and `/predict` endpoints. The `/train` endpoint accepts two parameters to be sent via POST: @@ -80,7 +74,7 @@ The `/predict` endpoint requires one parameter to be sent via POST: - `df` is a serialized dictionary that can be converted into a pandas dataframe. -It returns a dictionary containing the `prediction` key. It stores the +It returns a dictionary containing the `prediction` and `index` keys. It stores the predictions. Additional keys can be returned for confidence and confidence intervals. @@ -119,7 +113,7 @@ AND rental_price=3000; Or you can `JOIN` the model wth a data table to get bulk predictions. ```sql -SELECT tb.number_of_rooms, t.rental_price +SELECT tb.number_of_rooms, t.rental_price, tb.index FROM mydb.test_data.home_rentals AS t JOIN mindsdb.byom_ray_serve AS tb WHERE t.rental_price > 5300; From 1f4907b9041b3f5ebc8bc2f6ba12598553588371 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Thu, 26 Sep 2024 17:33:46 +0300 Subject: [PATCH 40/51] Reset index when join dataframes (#9766) --- mindsdb/api/executor/sql_query/result_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/api/executor/sql_query/result_set.py b/mindsdb/api/executor/sql_query/result_set.py index e2d34027a7b..0d277af493c 100644 --- a/mindsdb/api/executor/sql_query/result_set.py +++ b/mindsdb/api/executor/sql_query/result_set.py @@ -220,7 +220,7 @@ def add_raw_df(self, df): if self._df is None: self._df = df else: - self._df = pd.concat([self._df, df]) + self._df = pd.concat([self._df, df], ignore_index=True) def add_raw_values(self, values): From 815fdb9bd9696beb14e9bdcd9e9e93ee7db3fbf4 Mon Sep 17 00:00:00 2001 From: Andrey Date: Fri, 27 Sep 2024 17:37:25 +0300 Subject: [PATCH 41/51] Fix HF: new transformers version returns ValueError (#9770) --- .../handlers/huggingface_handler/huggingface_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py b/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py index 79c8a9a1457..61ca41162bb 100644 --- a/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +++ b/mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py @@ -120,7 +120,7 @@ def create(self, target, args=None, **kwargs): logger.debug(f'Model already downloaded!') #### # Otherwise download it - except OSError: + except (ValueError, OSError): try: logger.debug(f"Downloading {model_name}...") pipeline = transformers.pipeline(task=args['task_proper'], model=model_name) @@ -277,7 +277,7 @@ def predict(self, df, args=None): model=hf_model_storage_path, tokenizer=hf_model_storage_path, ) - except OSError: + except (ValueError, OSError): # load from engine storage (i.e. 'common' models) hf_model_storage_path = self.engine_storage.folder_get( args["model_name"] From eafef88ee318d8b8219fba46bc621c848d45ab18 Mon Sep 17 00:00:00 2001 From: Andrey Date: Fri, 27 Sep 2024 18:48:12 +0300 Subject: [PATCH 42/51] Fix checking table lists in agent permissions (#9767) --- mindsdb/interfaces/skills/sql_agent.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mindsdb/interfaces/skills/sql_agent.py b/mindsdb/interfaces/skills/sql_agent.py index 383185b8ccf..396c46dea15 100644 --- a/mindsdb/interfaces/skills/sql_agent.py +++ b/mindsdb/interfaces/skills/sql_agent.py @@ -65,9 +65,11 @@ def _check_permissions(self, ast_query): if self._tables_to_include: def _check_f(node, is_table=None, **kwargs): if is_table and isinstance(node, Identifier): - table = node.parts[-1] - if table not in self._tables_to_include: - raise ValueError(f"Table {table} not found. Available tables: {', '.join(self._tables_to_include)}") + name1 = node.to_string() + name2 = '.'.join(node.parts) + name3 = node.parts[-1] + if not {name1, name2, name3}.intersection(self._tables_to_include): + raise ValueError(f"Table {name1} not found. Available tables: {', '.join(self._tables_to_include)}") query_traversal(ast_query, _check_f) From 6eba730ce167d6842e13274d4c42ec36dd30619e Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Fri, 27 Sep 2024 18:02:33 +0200 Subject: [PATCH 43/51] Release (#9771) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index 8606b8d28ce..9695eea425c 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.4.0' +__version__ = '24.9.4.1' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From b8b8058c4fc542366f2b33435ad0ca92970873c9 Mon Sep 17 00:00:00 2001 From: MindsDB DevOps / Infrastructure <144632297+mindsdb-devops@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:27:28 -0700 Subject: [PATCH 44/51] Langfuse minor fix during login/auth (#9772) --- docker-compose.yml | 8 ++++++++ mindsdb/interfaces/agents/langchain_agent.py | 15 ++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 546e758d10a..9ab06e2a2f0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,14 @@ services: # SENTRY_IO_FORCE_RUN: "true" # Uncomment me to force-start sentry on local development. Good for profiling, but may annoy other devs on sentry.io with the "noise" # MINDSDB_LOG_LEVEL: "DEBUG" # OPENAI_API_KEY: "..." + LANGFUSE_HOST: "OMITTED_FOR_SECURITY" + LANGFUSE_PUBLIC_KEY: "OMITTED_FOR_SECURITY" + LANGFUSE_SECRET_KEY: "OMITTED_FOR_SECURITY" + LANGFUSE_RELEASE: "local" + # LANGFUSE_DEBUG: "True" + LANGFUSE_TIMEOUT: "10" + LANGFUSE_SAMPLE_RATE: "1.0" + volumes: - type: bind source: . diff --git a/mindsdb/interfaces/agents/langchain_agent.py b/mindsdb/interfaces/agents/langchain_agent.py index 807614f4f7e..8a6ef48bcaa 100644 --- a/mindsdb/interfaces/agents/langchain_agent.py +++ b/mindsdb/interfaces/agents/langchain_agent.py @@ -201,7 +201,8 @@ def __init__(self, agent: db.Agents, model): self.langfuse = Langfuse( public_key=os.getenv('LANGFUSE_PUBLIC_KEY'), secret_key=os.getenv('LANGFUSE_SECRET_KEY'), - host=os.getenv('LANGFUSE_HOST') + host=os.getenv('LANGFUSE_HOST'), + release=os.getenv('LANGFUSE_RELEASE', 'local'), ) # agent is using current langchain model @@ -473,10 +474,14 @@ def _get_agent_callbacks(self, args: Dict) -> List: tags=get_tags(metadata), metadata=metadata, ) - if not self.langfuse_callback_handler.auth_check(): - logger.error( - f"Incorrect Langfuse credentials provided to Langchain handler. Full args: {args}" - ) + try: + # This try is critical to catch fatal errors which would otherwise prevent the agent from running properly + if not self.langfuse_callback_handler.auth_check(): + logger.error( + f"Incorrect Langfuse credentials provided to Langchain handler. Full args: {args}" + ) + except Exception as e: + logger.error(f'Something went wrong while running langfuse_callback_handler.auth_check {str(e)}') # custom tracer if self.mdb_langfuse_callback_handler is None: From b011cf84bb055d64b63961a26f41508906c8b1e8 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:18:19 +0530 Subject: [PATCH 45/51] Slack Integration Fixes: Pagination for Channels and Handling App Mentions (#9732) --- docs/integrations/app-integrations/slack.mdx | 72 ++++-- docs/mindsdb_sql/sql/create/trigger.mdx | 16 +- .../customer-reviews-notifications.mdx | 12 +- .../real-time-trading-forecasts.mdx | 12 +- .../ai_workflow_automation/slack-chatbot.mdx | 52 ++-- .../handlers/slack_handler/slack_handler.py | 240 +++++++++++++----- 6 files changed, 282 insertions(+), 122 deletions(-) diff --git a/docs/integrations/app-integrations/slack.mdx b/docs/integrations/app-integrations/slack.mdx index 0525de68254..c9d8f127197 100644 --- a/docs/integrations/app-integrations/slack.mdx +++ b/docs/integrations/app-integrations/slack.mdx @@ -75,7 +75,7 @@ Here is how to set up a Slack app and generate both a Slack bot token and a Slac 6. Go to *App Home* under *Features* and click on the checkbox to *Allow users to send Slash commands and messages from the messages tab*. 7. Go to *Event Subscriptions* under *Features*. - Toggle the button to *Enable Events*. - - Under *Subscribe to bot events*, click on *Add Bot User Event* and add `app_mention`, `message_channels`, and `message.im`. + - Under *Subscribe to bot events*, click on *Add Bot User Event* and add `app_mention` and `message.im`. - Click on *Save Changes*. 8. Now you can use tokens from points 3 and 5 to initialize the Slack handler in MindsDB. @@ -102,7 +102,7 @@ WITH }; ``` -It comes with the `channels` and `channel_lists` table. +It comes with the `conversations` and `messages` tables. ### Method 2: Chatbot responds on a defined Slack channel @@ -145,7 +145,7 @@ WITH }; ``` -It comes with the `channels` and `channel_lists` table. +It comes with the `conversations` and `messages` tables. ## Usage @@ -155,26 +155,40 @@ The following usage applies when **Connection Method 2** was used to connect Sla See the usage for **Connection Method 1** [via the `CREATE CHATBOT` syntax](/sql/tutorials/create-chatbot). -You can select all messages from the channel using the below query. +You can select all messages from a conversation using the below query. ```sql SELECT * -FROM mindsdb_slack.channels -WHERE channel=""; +FROM mindsdb_slack.messages +WHERE channel_id=""; ``` -Also, you can post messages to the channel like this: + +To find the channel ID of a conversation, you can use the `conversations` table: ```sql -INSERT INTO mindsdb_slack.channels (channel, text) -VALUES("", "Hey MindsDB, Thanks to you! Now I can respond to my Slack messages through SQL Queries."); +SELECT * +FROM mindsdb_slack.conversations +WHERE name = ""; +``` + +Please note that if your workspace has more than 1000 conversations, you may need to use the `LIMIT` clause to retrieve all conversations. More information on this can be found below. + +You can also find the channel ID by right-clicking on the conversation in Slack, selecting 'View conversation details' or 'View channel details,' and copying the channel ID from the bottom of the 'About' tab. + + +Also, you can post messages to the conversation like this: + +```sql +INSERT INTO mindsdb_slack.messages (channel_id, text) +VALUES("", "Hey MindsDB, Thanks to you! Now I can respond to my Slack messages through SQL Queries."); ``` And you can delete messages, but only the ones posted by the bot. ```sql -DELETE FROM mindsdb_slack.channels -WHERE channel = "" +DELETE FROM mindsdb_slack.messages +WHERE channel_id = "" AND ts = "1688863707.197229"; ``` @@ -182,8 +196,8 @@ Let's select 10 messages created after the specified timestamp: ```sql SELECT * -FROM mindsdb_slack.channels -WHERE channel="" +FROM mindsdb_slack.messages +WHERE channel_id = "" AND message_created_at > '2023-07-25 00:13:07' LIMIT 10; ``` @@ -192,17 +206,39 @@ We can also order the selected messages: ```sql SELECT * -FROM mindsdb_slack.channels -WHERE channel="" -ORDER BY messages ASC +FROM mindsdb_slack.messages +WHERE channel_id = "" +ORDER BY created_at ASC; LIMIT 5; ``` -List all channels by selecting from the `channel_lists` table. +List all conversations by selecting from the `conversations` table. + +```sql +SELECT * +FROM mindsdb_slack.conversations; +``` + + +The `conversations` table will return the first 1000 conversations by default. To retrieve more, use the `LIMIT` clause. + +When using the `LIMIT` clause to query additional conversations, you may encounter Slack API rate limits. + + +You can also filter conversations by providing the channel ID: + +```sql +SELECT * +FROM mindsdb_slack.conversations +WHERE channel_id = ""; +``` + +To select multiple conversations, provide a list of channel IDs in an `IN` clause: ```sql SELECT * -FROM mindsdb_slack.channel_lists; +FROM mindsdb_slack.conversations +WHERE channel_id IN ("", ""); ``` diff --git a/docs/mindsdb_sql/sql/create/trigger.mdx b/docs/mindsdb_sql/sql/create/trigger.mdx index d6eaf41322d..16078b8884a 100644 --- a/docs/mindsdb_sql/sql/create/trigger.mdx +++ b/docs/mindsdb_sql/sql/create/trigger.mdx @@ -81,8 +81,8 @@ Here is how to generate answers to Slack messages using the model: ```sql SELECT s.text AS question, m.answer FROM chatbot_model m -JOIN mindsdb_slack.channels s -WHERE s.channel = 'slack-bot-channel' +JOIN mindsdb_slack.messages s +WHERE s.channel_id = 'slack-bot-channel-id' AND s.user != 'U07J30KPAUF' AND s.created_at > LAST; ``` @@ -90,7 +90,7 @@ AND s.created_at > LAST; Let's analyze this query: - We select the question from the Slack connection and the answer generated by the model. -- We join the model with the `channels` table. +- We join the model with the `messages` table. - In the `WHERE` clause: - We provide the channel name where the app/bot is integrated. - We exclude the messages sent by the app/bot. You can find the user ID of the app/bot by querying the `mindsdb_slack.users` table. @@ -100,10 +100,10 @@ Finally, create a trigger that will insert an answer generated by the model ever ```sql CREATE TRIGGER slack_trigger -ON mindsdb_slack.channels +ON mindsdb_slack.messages ( - INSERT INTO mindsdb_slack.channels (channel, text) - SELECT 'slack-bot-channel' AS channel, answer AS text + INSERT INTO mindsdb_slack.messages (channel_id, text) + SELECT 'slack-bot-channel-id' AS channel_id, answer AS text FROM chatbot_model m JOIN TABLE_DELTA s WHERE s.user != 'U07J30KPAUF' @@ -113,8 +113,8 @@ ON mindsdb_slack.channels Let's analyze this statement: - We create a trigger named `slack_trigger`. -- The trigger is created on the `mindsdb_slack.channels` table. Therefore, every time when data is added or updated, the trigger will execute its code. +- The trigger is created on the `mindsdb_slack.messages` table. Therefore, every time when data is added or updated, the trigger will execute its code. - We provide the code to be executed by the trigger every time the triggering event takes place. - - We insert an answer generated by the model into the `channels` table. + - We insert an answer generated by the model into the `messages` table. - The `TABLE_DELTA` stands for the table on which the trigger has been created. - We exclude the messages sent by the app/bot. You can find the user ID of the app/bot by querying the `mindsdb_slack.users` table. diff --git a/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx b/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx index 984ee3b372f..40d47b3e751 100644 --- a/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx +++ b/docs/use-cases/ai_workflow_automation/customer-reviews-notifications.mdx @@ -104,8 +104,8 @@ PARAMETERS = { Send a test message to test the connection. ```sql -INSERT INTO customer_reviews_slack_app.channels (channel, text) -VALUES("customer-reviews", "Testing Slack connection"); +INSERT INTO customer_reviews_slack_app.messages (channel_id, text) +VALUES("customer-reviews-channel-id", "Testing Slack connection"); ``` [Create a job](/mindsdb_sql/sql/create/jobs) to send notification every time a negative review is received. @@ -113,8 +113,8 @@ VALUES("customer-reviews", "Testing Slack connection"); ```sql CREATE JOB customer_reviews_notifications ( - INSERT INTO customer_reviews_slack_app.channels (channel, text) - SELECT "customer-reviews" as channel, + INSERT INTO customer_reviews_slack_app.messages (channel_id, text) + SELECT "customer-reviews-channel-id" as channel_id, concat('Product: ', input.product_name, chr(10), 'Received negative review at: ', input.created_at, chr(10), 'Review: ', input.review) as text FROM local_postgres.demo.amazon_reviews AS input JOIN sentiment_classifier AS output @@ -184,8 +184,8 @@ JOIN response_model AS output; ```sql CREATE JOB customer_reviews_and_reponses_notifications ( - INSERT INTO customer_reviews_slack_app.channels (channel, text) - SELECT "customer-reviews" as channel, + INSERT INTO customer_reviews_slack_app.messages (channel_id, text) + SELECT "customer-reviews-channel-id" as channel_id, concat('---------', chr(10), 'Product: ', input.product_name, chr(10), 'Received ', input.sentiment, ' review at: ', input.created_at, chr(10), diff --git a/docs/use-cases/ai_workflow_automation/real-time-trading-forecasts.mdx b/docs/use-cases/ai_workflow_automation/real-time-trading-forecasts.mdx index 89a46748646..a7640142c67 100644 --- a/docs/use-cases/ai_workflow_automation/real-time-trading-forecasts.mdx +++ b/docs/use-cases/ai_workflow_automation/real-time-trading-forecasts.mdx @@ -107,8 +107,8 @@ PARAMETERS = { Here is how to send messages to a Slack channel: ```sql -INSERT INTO btcusdt_slack_app.channels (channel, text) -VALUES("slack-channel-name", "BTCUSDT forecasts coming soon."); +INSERT INTO btcusdt_slack_app.messages (channel_id, text) +VALUES("slack-channel-id", "BTCUSDT forecasts coming soon."); ``` So, let’s put it all together again. @@ -135,10 +135,10 @@ join_learn_process = true; -- step 2: make fresh forecasts for the following 10 minutes and insert it into slack -- -INSERT INTO btcusdt_slack_app.channels (channel, text) -VALUES("slack-channel-name", "Here are the BTCUSDT forecasts for the next 10 minutes:"); -INSERT INTO btcusdt_slack_app.channels (channel, text) -SELECT "slack-channel-name" AS channel, concat('timestamp: ', cast(to_timestamp(cast(m.open_time as bigint)) as string), ' -> open price: ', m.open_price) AS text +INSERT INTO btcusdt_slack_app.messages (channel_id, text) +VALUES("slack-channel-id", "Here are the BTCUSDT forecasts for the next 10 minutes:"); +INSERT INTO btcusdt_slack_app.messages (channel_id, text) +SELECT "slack-channel-id" AS channel_id, concat('timestamp: ', cast(to_timestamp(cast(m.open_time as bigint)) as string), ' -> open price: ', m.open_price) AS text FROM btcusdt_recent AS d JOIN cryptocurrency_forecast_model AS m WHERE d.open_time > LATEST; diff --git a/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx b/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx index ba7dfc2a4d5..9d687349438 100644 --- a/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx +++ b/docs/use-cases/ai_workflow_automation/slack-chatbot.mdx @@ -37,8 +37,8 @@ Here is how to retrieve the 10 messages after specific timestamp: ```sql SELECT * -FROM mindsdb_slack.channels -WHERE channel = "" +FROM mindsdb_slack.messages +WHERE channel_id = "" AND created_at > '2023-07-25 00:13:07' -- created_at stores the timestamp when the message was created LIMIT 10; ``` @@ -47,8 +47,8 @@ You can also retrieve messages in alphabetical order: ```sql SELECT * -FROM mindsdb_slack.channels -WHERE channel = "" +FROM mindsdb_slack.messages +WHERE channel_id = "" ORDER BY text ASC LIMIT 5; ``` @@ -58,17 +58,17 @@ By default, it retrieves by the order the messages were sent, unless specified a Here is how to post messages: ```sql -INSERT INTO mindsdb_slack.channels (channel, text) +INSERT INTO mindsdb_slack.messages (channel_id, text) VALUES - ("", "Hey MindsDB, Thanks to you! Now I can respond to my Slack messages through SQL Queries. 🚀 "), - ("", "It's never been that easy to build ML apps using MindsDB!"); + ("", "Hey MindsDB, Thanks to you! Now I can respond to my Slack messages through SQL Queries. 🚀 "), + ("", "It's never been that easy to build ML apps using MindsDB!"); ``` Whoops! Sent it by mistake? No worries! Use this to delete a specific message: ```sql -DELETE FROM mindsdb_slack.channels -WHERE channel = "" AND ts = "1688863707.197229"; +DELETE FROM mindsdb_slack.messages +WHERE channel_id = "" AND ts = "1688863707.197229"; ``` Now, let's roll up our sleeves and start building the GPT-4 Model together. @@ -163,18 +163,18 @@ WHERE text = 'if a time-traveling astronaut had a dance-off with a black hole, w ### 3. Let's Connect our GPT Model to Slack! -The `channels` table can be used to search for `channels`, `messages`, and `timestamps`, as well as to post messages into slack channels. These functionalities can also be done by using Slack API or Webhooks. +The `messages` table can be used to search for `channels`, `messages`, and `timestamps`, as well as to post messages into Slack conversations. These functionalities can also be done by using Slack API or Webhooks. -Let's query the user's question and see how our GPT model responds to it, by joining the model with the `channels` table: +Let's query the user's question and see how our GPT model responds to it, by joining the model with the `messages` table: ```sql SELECT - t.channel as channel, + t.channel_id as channel_id, t.text as input_text, r.response as output_text -FROM mindsdb_slack.channels as t +FROM mindsdb_slack.messages as t JOIN mindsdb.whizfizz_model as r -WHERE t.channel = "" +WHERE t.channel_id = "" LIMIT 3; ``` @@ -183,13 +183,13 @@ LIMIT 3; We want to respond to the user's questions by posting the output of our newly created **WhizFizz** Model. Let's post the message by querying and joining the user's questions to our model: ```sql -INSERT INTO mindsdb_slack.channels(channel, text) +INSERT INTO mindsdb_slack.messages(channel_id, text) SELECT - t.channel as channel, + t.channel_id as channel_id, r.response as text - FROM mindsdb_slack.channels as t + FROM mindsdb_slack.messages as t JOIN mindsdb.whizfizz_model as r - WHERE t.channel = "" + WHERE t.channel_id = "" LIMIT 3; ``` @@ -212,13 +212,13 @@ Let's do it in single SQL statement: CREATE JOB mindsdb.gpt4_slack_job AS ( -- insert into channels the output of joining model and new responses - INSERT INTO mindsdb_slack.channels(channel, text) + INSERT INTO mindsdb_slack.messages(channel_id, text) SELECT - t.channel as channel, + t.channel_id as channel_id, r.response as text - FROM mindsdb_slack.channels as t + FROM mindsdb_slack.messages as t JOIN mindsdb.whizfizz_model as r - WHERE t.channel = "" + WHERE t.channel_id = "" AND t.created_at > LAST AND t.user = 'user_id' -- to avoid the bot replying to its own messages, include users to which bot should reply --AND t.user != 'bot_id' -- alternatively, to avoid the bot replying to its own messages, exclude the user id of the bot @@ -250,13 +250,13 @@ DROP JOB gpt4_slack_job; Alternatively, you can create a trigger on Slack, instead of scheduling a job. This way, every time new messages are posted, the trigger executes. ```sql -CREATE TRIGGER slack_trigger ON mindsdb_slack.channels ( +CREATE TRIGGER slack_trigger ON mindsdb_slack.messages ( - INSERT INTO mindsdb_slack.channels(channel, text) - SELECT t.channel as channel, a.sentiment as text, + INSERT INTO mindsdb_slack.messages(channel_id, text) + SELECT t.channel_id as channel_id, a.sentiment as text, FROM data_table t JOIN model_table as a - WHERE t.channel = 'channel_name' + WHERE t.channel_id = '' AND t.user != 'bot_id' -- exclude bot ); ``` diff --git a/mindsdb/integrations/handlers/slack_handler/slack_handler.py b/mindsdb/integrations/handlers/slack_handler/slack_handler.py index 79cef3c334f..7ffb8a540c8 100644 --- a/mindsdb/integrations/handlers/slack_handler/slack_handler.py +++ b/mindsdb/integrations/handlers/slack_handler/slack_handler.py @@ -1,4 +1,5 @@ import os +import json import datetime as dt from typing import List import pandas as pd @@ -29,13 +30,41 @@ DATE_FORMAT = '%Y-%m-%d %H:%M:%S' -class SlackChannelListsTable(APIResource): +class SlackConversationsTable(APIResource): - def list(self, **kwargs) -> pd.DataFrame: - - client = self.handler.connect() + def list( + self, + conditions: List[FilterCondition] = None, + limit: int = None, + **kwargs + ) -> pd.DataFrame: + channels = [] + for condition in conditions: + value = condition.value + op = condition.op - channels = client.conversations_list(types="public_channel,private_channel")['channels'] + if condition.column == 'id': + if op not in [FilterOperator.EQUAL, FilterOperator.IN]: + raise ValueError(f"Unsupported operator '{op}' for column 'id'") + + if op == FilterOperator.EQUAL: + try: + channels = [self.handler.get_channel(value)] + condition.applied = True + except ValueError: + raise + + if op == FilterOperator.IN: + try: + channels = self.handler.get_channels( + value if isinstance(value, list) else [value] + ) + condition.applied = True + except ValueError: + raise + + if not channels: + channels = self.handler.get_limited_channels(limit) for channel in channels: channel['created_at'] = dt.datetime.fromtimestamp(channel['created']) @@ -47,8 +76,19 @@ def get_columns(self) -> List[str]: return [ 'id', 'name', + 'is_channel', + 'is_group', + 'is_im', + 'is_mpim', + 'is_private', + 'is_archived', + 'is_general', + 'is_shared', + 'is_ext_shared', + 'is_org_shared', + 'creator', 'created_at', - 'updated_at' + 'updated_at', ] @@ -77,12 +117,13 @@ def get_columns(self) -> List[str]: ] -class SlackChannelsTable(APIResource): +class SlackMessagesTable(APIResource): def list(self, - conditions: List[FilterCondition] = None, - limit: int = None, - **kwargs) -> pd.DataFrame: + conditions: List[FilterCondition] = None, + limit: int = None, + **kwargs + ) -> pd.DataFrame: """ Retrieves the data from the channel using SlackAPI @@ -97,13 +138,6 @@ def parse_utc_date(date_str): date_obj = dt.datetime.fromisoformat(date_str).replace(tzinfo=dt.timezone.utc) return date_obj - # Get the channels list and ids - channels = client.conversations_list(types="public_channel,private_channel")['channels'] - channel_ids = {c['name']: c['id'] for c in channels} - - # Extract comparison conditions from the query - channel_name = None - # Build the filters and parameters for the query params = {} @@ -111,12 +145,16 @@ def parse_utc_date(date_str): value = condition.value op = condition.op - if condition.column == 'channel': - if value in channel_ids: - params['channel'] = channel_ids[value] - channel_name = value + if condition.column == 'channel_id': + if op != FilterOperator.EQUAL: + raise ValueError(f"Unsupported operator '{op}' for column 'channel_id'") + + # Check if the channel exists + try: + channel = self.handler.get_channel(value) + params['channel'] = value condition.applied = True - else: + except SlackApiError as e: raise ValueError(f"Channel '{value}' not found") elif condition.column == 'created_at' and value is not None: @@ -134,6 +172,9 @@ def parse_utc_date(date_str): if limit: params['limit'] = limit + if 'channel' not in params: + raise Exception("To retrieve data from Slack, you need to provide the 'channel_id' parameter.") + # Retrieve the conversation history result = client.conversations_history(**params) @@ -144,7 +185,8 @@ def parse_utc_date(date_str): result = result[result['text'].notnull()] # Add the selected channel to the dataframe - result['channel'] = channel_name + result['channel_id'] = params['channel'] + result['channel_name'] = channel['name'] if 'name' in channel else None # translate the time stamp into a 'created_at' field result['created_at'] = pd.to_datetime(result['ts'].astype(float), unit='s').dt.strftime('%Y-%m-%d %H:%M:%S') @@ -153,6 +195,7 @@ def parse_utc_date(date_str): def get_columns(self) -> List[str]: return [ + 'channel_id', 'channel', 'client_msg_id', 'type', @@ -187,13 +230,13 @@ def insert(self, query): params = dict(zip(columns, row)) # check if required parameters are provided - if 'channel' not in params or 'text' not in params: - raise Exception("To insert data into Slack, you need to provide the 'channel' and 'text' parameters.") + if 'channel_id' not in params or 'text' not in params: + raise Exception("To insert data into Slack, you need to provide the 'channel_id' and 'text' parameters.") # post message to Slack channel try: response = client.chat_postMessage( - channel=params['channel'], + channel=params['channel_id'], text=params['text'] ) except SlackApiError as e: @@ -213,10 +256,6 @@ def update(self, query: ast.Update): """ client = self.handler.connect() - # Get the channels list and ids - channels = client.conversations_list(types="public_channel,private_channel")['channels'] - channel_ids = {c['name']: c['id'] for c in channels} - # Extract comparison conditions from the query conditions = extract_comparison_conditions(query.where) @@ -227,10 +266,12 @@ def update(self, query: ast.Update): # Build the filters and parameters for the query for op, arg1, arg2 in conditions: - if arg1 == 'channel': - if arg2 in channel_ids: - params['channel'] = channel_ids[arg2] - else: + if arg1 == 'channel_id': + # Check if the channel exists + try: + self.handler.get_channel(arg2) + params['channel'] = arg2 + except SlackApiError as e: raise ValueError(f"Channel '{arg2}' not found") if keys[0] == 'text': @@ -270,9 +311,6 @@ def delete(self, query: ASTNode): """ client = self.handler.connect() - # Get the channels list and ids - channels = client.conversations_list(types="public_channel,private_channel")['channels'] - channel_ids = {c['name']: c['id'] for c in channels} # Extract comparison conditions from the query conditions = extract_comparison_conditions(query.where) @@ -281,10 +319,12 @@ def delete(self, query: ASTNode): # Build the filters and parameters for the query for op, arg1, arg2 in conditions: - if arg1 == 'channel': - if arg2 in channel_ids: - params['channel'] = channel_ids[arg2] - else: + if arg1 == 'channel_id': + # Check if the channel exists + try: + self.handler.get_channel(arg2) + params['channel'] = arg2 + except SlackApiError as e: raise ValueError(f"Channel '{arg2}' not found") if arg1 == 'ts': @@ -325,6 +365,7 @@ def __init__(self, name=None, **kwargs): super().__init__(name) args = kwargs.get('connection_data', {}) + self.handler_storage = kwargs.get('handler_storage') self.connection_args = {} handler_config = Config().get('slack_handler', {}) for k in ['token', 'app_token']: @@ -337,11 +378,11 @@ def __init__(self, name=None, **kwargs): self.api = None self.is_connected = False - channels = SlackChannelsTable(self) - self._register_table('channels', channels) + channels = SlackMessagesTable(self) + self._register_table('messages', channels) - channel_lists = SlackChannelListsTable(self) - self._register_table('channel_lists', channel_lists) + channel_lists = SlackConversationsTable(self) + self._register_table('conversations', channel_lists) users = SlackUsersTable(self) self._register_table('users', users) @@ -352,11 +393,11 @@ def get_chat_config(self): params = { 'polling': { 'type': 'realtime', - 'table_name': 'channels' + 'table_name': 'messages' }, 'chat_table': { - 'name': 'channels', - 'chat_id_col': 'channel', + 'name': 'messages', + 'chat_id_col': 'channel_id', 'username_col': 'user', 'text_col': 'text', 'time_col': 'thread_ts', @@ -365,13 +406,12 @@ def get_chat_config(self): return params def get_my_user_name(self): - # TODO api = self.connect() - resp = api.users_profile_get() - return resp.data['profile']['bot_id'] + user_info = api.auth_test().data + return user_info['bot_id'] def subscribe(self, stop_event, callback, table_name, **kwargs): - if table_name != 'channels': + if table_name != 'messages': raise RuntimeError(f'Table not supported: {table_name}') self._socket_mode_client = SocketModeClient( @@ -389,28 +429,31 @@ def _process_websocket_message(client: SocketModeClient, request: SocketModeRequ if request.type != 'events_api': return - # ignore duplicated requests + # Ignore duplicated requests if request.retry_attempt is not None and request.retry_attempt > 0: return payload_event = request.payload['event'] + if payload_event['type'] not in ('message', 'app_mention'): + # TODO: Refresh the channels cache return + if 'subtype' in payload_event: - # Don't respond to message_changed, message_deleted, etc. + # Avoid responding to message_changed, message_deleted, etc. return if 'bot_id' in payload_event: - # A bot sent this message. + # Avoid responding to messages from the bot return key = { - 'channel': payload_event['channel'], + 'channel_id': payload_event['channel'], } row = { 'text': payload_event['text'], 'user': payload_event['user'], - 'channel': payload_event['channel'], + 'channel_id': payload_event['channel'], 'created_at': dt.datetime.fromtimestamp(float(payload_event['ts'])).strftime('%Y-%m-%d %H:%M:%S') } @@ -423,7 +466,6 @@ def _process_websocket_message(client: SocketModeClient, request: SocketModeRequ self._socket_mode_client.close() - def create_connection(self): """ Creates a WebClient object to connect to the Slack API token stored in the connection_args attribute. @@ -512,6 +554,88 @@ def call_slack_api(self, method_name: str = None, params: dict = None): result['channels'] = self.convert_channel_data(result['channels']) return [result] + + def get_channel(self, channel_id: str): + """ + Get the channel data by channel id. + + Args: + channel_id: str + The channel id. + + Returns: + dict + The channel data. + """ + client = self.connect() + + try: + response = client.conversations_info(channel=channel_id) + except SlackApiError as e: + logger.error(f"Error getting channel '{channel_id}': {e.response['error']}") + raise ValueError(f"Channel '{channel_id}' not found") + + return response['channel'] + + def get_channels(self, channel_ids: List[str]): + """ + Get the channel data by channel ids. + + Args: + channel_ids: List[str] + The channel ids. + + Returns: + List[dict] + The channel data. + """ + # TODO: Handle rate limiting + channels = [] + for channel_id in channel_ids: + try: + channel = self.get_channel(channel_id) + channels.append(channel) + except SlackApiError: + logger.error(f"Channel '{channel_id}' not found") + raise ValueError(f"Channel '{channel_id}' not found") + + return channels + + def get_limited_channels(self, limit: int = None): + """ + Get the list of channels with a limit. + If the provided limit is greater than 1000, provide no limit to the API call and paginate the results until the limit is reached. + + Args: + limit: int + The limit of the channels to return. + + Returns: + List[dict] + The list of channels. + """ + client = self.connect() + + try: + if limit and limit > 1000: + response = client.conversations_list() + channels = response['channels'] + + while response['response_metadata']['next_cursor']: + response = client.conversations_list(cursor=response['response_metadata']['next_cursor']) + channels.extend(response['channels']) + if len(channels) >= limit: + break + + channels = channels[:limit] + else: + response = client.conversations_list(limit=limit if limit else 1000) + channels = response['channels'] + except SlackApiError as e: + logger.error(f"Error getting channels: {e.response['error']}") + raise ValueError(f"Error getting channels: {e.response['error']}") + + return channels def convert_channel_data(self, channels: List[dict]): """ From 9f9a0c288182fccdaa6e1ea83b83b88edc8b4d9c Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 30 Sep 2024 14:48:32 +0300 Subject: [PATCH 46/51] Fix agent for mistral model (#9753) --- mindsdb/interfaces/agents/callback_handlers.py | 8 ++++++++ .../interfaces/agents/safe_output_parser.py | 2 +- .../skills/custom/text2sql/mindsdb_sql_tool.py | 18 +++++++++++++----- mindsdb/interfaces/skills/sql_agent.py | 7 ++++++- 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/mindsdb/interfaces/agents/callback_handlers.py b/mindsdb/interfaces/agents/callback_handlers.py index ab836a61d27..3bca7e02983 100644 --- a/mindsdb/interfaces/agents/callback_handlers.py +++ b/mindsdb/interfaces/agents/callback_handlers.py @@ -113,10 +113,18 @@ def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any: '''Run on agent action.''' self.logger.debug(f'Running tool {action.tool} with input:') self.logger.debug(action.tool_input) + + stop_block = 'Observation: ' + if stop_block in action.tool_input: + action.tool_input = action.tool_input[: action.tool_input.find(stop_block)] + if action.tool.startswith("sql_db_query"): # Save the generated SQL query self.generated_sql = action.tool_input + # fix for mistral + action.tool = action.tool.replace('\\', '') + def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any: '''Run on agent end.''' self.logger.debug('Agent finished with return values:') diff --git a/mindsdb/interfaces/agents/safe_output_parser.py b/mindsdb/interfaces/agents/safe_output_parser.py index 94d9fde179a..5642669b738 100644 --- a/mindsdb/interfaces/agents/safe_output_parser.py +++ b/mindsdb/interfaces/agents/safe_output_parser.py @@ -33,7 +33,7 @@ def parse(self, text: str) -> Union[AgentAction, AgentFinish]: Returns: output (str): Parsed text to an Agent step. ''' - regex = r'Action: (.*?)[\n]*Action Input: ([\s\S]*)' + regex = r'Action: (.*?)[\n]*Action Input:([\s\S]*)' match = re.search(regex, text, re.DOTALL) if match is not None: action = match.group(1) diff --git a/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py b/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py index 1efd2bd2fdd..01614debe7c 100644 --- a/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py +++ b/mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_tool.py @@ -20,11 +20,19 @@ def _clean_query(self, query: str) -> str: cmd = re.sub(r'```(sql)?', '', query) return cmd + def _query_options(self, query): + yield query + if '\\_' in query: + yield query.replace('\\_', '_') + def _run(self, query: str): """Validate the SQL query.""" clean_query = self._clean_query(query) - try: - ast_query = parse_sql(clean_query, dialect='mindsdb') - return "".join(f"valid query: {ast_query.to_string()}") - except Exception as e: - return "".join(f"invalid query, with error: {e}") + for query in self._query_options(clean_query): + try: + ast_query = parse_sql(query, dialect='mindsdb') + return "".join(f"valid query: {ast_query.to_string()}") + except Exception as e: + error = "".join(f"invalid query, with error: {e}") + continue + return error diff --git a/mindsdb/interfaces/skills/sql_agent.py b/mindsdb/interfaces/skills/sql_agent.py index 396c46dea15..72daf40038f 100644 --- a/mindsdb/interfaces/skills/sql_agent.py +++ b/mindsdb/interfaces/skills/sql_agent.py @@ -122,6 +122,8 @@ def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifi tables = [] for table_name in table_names: + if not table_name.strip(): + continue # Some LLMs (e.g. gpt-4o) may include backticks or quotes when invoking tools. table_name = table_name.strip(' `"\'\n\r') @@ -286,4 +288,7 @@ def query_safe(self, command: str, fetch: str = "all") -> str: try: return self.query(command, fetch) except Exception as e: - return f"Error: {e}" + msg = f"Error: {e}" + if 'does not exist' in msg and ' relation ' in msg: + msg += '\nAvailable tables: ' + ', '.join(self.get_usable_table_names()) + return msg From f58a71d362cc9c9acd31cd5697efa8f049f46833 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Tue, 1 Oct 2024 02:20:51 -0700 Subject: [PATCH 47/51] feat: add new greptimedb handler (#9071) --- .../all-data-integrations.mdx | 46 +++++++++++++++++ .../data-integrations/greptimedb.mdx | 51 +++++++++++++++++++ docs/integrations/data-overview.mdx | 1 + docs/mint.json | 8 +-- .../handlers/greptimedb_handler/README.md | 42 +++++++++++++++ .../handlers/greptimedb_handler/__about__.py | 9 ++++ .../handlers/greptimedb_handler/__init__.py | 19 +++++++ .../greptimedb_handler/greptimedb_handler.py | 21 ++++++++ .../handlers/greptimedb_handler/icon.svg | 3 ++ .../greptimedb_handler/requirements.txt | 1 + 10 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 docs/integrations/data-integrations/greptimedb.mdx create mode 100644 mindsdb/integrations/handlers/greptimedb_handler/README.md create mode 100644 mindsdb/integrations/handlers/greptimedb_handler/__about__.py create mode 100644 mindsdb/integrations/handlers/greptimedb_handler/__init__.py create mode 100644 mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py create mode 100755 mindsdb/integrations/handlers/greptimedb_handler/icon.svg create mode 100644 mindsdb/integrations/handlers/greptimedb_handler/requirements.txt diff --git a/docs/integrations/data-integrations/all-data-integrations.mdx b/docs/integrations/data-integrations/all-data-integrations.mdx index 92720e19bf1..551de652429 100644 --- a/docs/integrations/data-integrations/all-data-integrations.mdx +++ b/docs/integrations/data-integrations/all-data-integrations.mdx @@ -932,6 +932,52 @@ Check out the Google BigQuery data handler details [here](/data-integrations/goo Check out the Google Sheets data handler details [here](/data-integrations/google-sheets). +## GreptimeDB + + + + ```sql + CREATE DATABASE greptimedb_datasource --- display name for the database + WITH ENGINE = 'greptimedb', --- name of the MindsDB handler + PARAMETERS = { + "host": " ", --- host IP address or URL + "port": , --- port used to make TCP/IP connection + "database": " ", --- database name + "user": " ", --- database user + "password": " ", --- database password + "ssl": , --- optional, the `ssl` parameter value indicates whether SSL is enabled (`True`) or disabled (`False`) + "ssl_ca": { --- optional, SSL Certificate Authority + "path": " " --- either "path" or "url" + }, + "ssl_cert": { --- optional, SSL certificates + "url": " " --- either "path" or "url" + }, + "ssl_key": { --- optional, SSL keys + "path": " " --- either "path" or "url" + } + }; + ``` + + + ```sql + CREATE DATABASE greptimedb_datasource + WITH ENGINE = 'greptimedb', + PARAMETERS = { + "host": "127.0.0.1", + "port": 4002, + "database": "public", + "user": "username", + "password": "password" + }; + ``` + + + + +Check out the GreptimeDB data handler details [here](/data-integrations/greptimedb). + + + ## IBM Db2 diff --git a/docs/integrations/data-integrations/greptimedb.mdx b/docs/integrations/data-integrations/greptimedb.mdx new file mode 100644 index 00000000000..41438d4fdf8 --- /dev/null +++ b/docs/integrations/data-integrations/greptimedb.mdx @@ -0,0 +1,51 @@ +--- +title: GreptimeDB +sidebarTitle: GreptimeDB +--- + +This is the implementation of the GreptimeDB data handler for MindsDB. + +[GreptimeDB](https://greptime.com/) is an open-source, cloud-native time series database features analytical capabilities, scalebility and open protocols support. + +## Implementation + +This handler is implemented by extending the MySQLHandler. + +Connect GreptimeDB to MindsDB by providing the following parameters: + +* `host` is the host name, IP address, or URL. +* `port` is the port used to make TCP/IP connection. +* `database` is the database name. +* `user` is the database user. +* `password` is the database password. + +There are several optional parameters that can be used as well. + +* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). +* `ssl_ca` is the SSL Certificate Authority. +* `ssl_cert` stores SSL certificates. +* `ssl_key` stores SSL keys. + +## Usage + +In order to make use of this handler and connect to the GreptimeDB database in MindsDB, the following syntax can be used: + +```sql +CREATE DATABASE greptimedb_datasource +WITH + engine = 'greptimedb', + parameters = { + "host": "127.0.0.1", + "port": 4002, + "database": "public", + "user": "username", + "password": "password" + }; +``` + +You can use this established connection to query your table as follows. + +```sql +SELECT * +FROM greptimedb_datasource.example_table; +``` diff --git a/docs/integrations/data-overview.mdx b/docs/integrations/data-overview.mdx index 18f2e48c072..5e68c4a3e5a 100644 --- a/docs/integrations/data-overview.mdx +++ b/docs/integrations/data-overview.mdx @@ -78,6 +78,7 @@ Note that MindsDB doesn't store or copy your data. Instead, it fetches data dire + diff --git a/docs/mint.json b/docs/mint.json index 00897a3c48b..12628671a27 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -125,6 +125,7 @@ {"source": "/data-integrations/google-bigquery","destination": "/integrations/data-integrations/google-bigquery"}, {"source": "/data-integrations/google-cloud-sql","destination": "/integrations/data-integrations/google-cloud-sql"}, {"source": "/data-integrations/google-sheets","destination": "/integrations/data-integrations/google-sheets"}, + {"source": "/data-integrations/greptimedb","destination": "/integrations/data-integrations/greptimedb"}, {"source": "/data-integrations/ibm-db2","destination": "/integrations/data-integrations/ibm-db2"}, {"source": "/data-integrations/ibm-informix","destination": "/integrations/data-integrations/ibm-informix"}, {"source": "/data-integrations/influxdb","destination": "/integrations/data-integrations/influxdb"}, @@ -286,7 +287,7 @@ } ] }, - + { "group": "Features", "pages": [ @@ -305,7 +306,7 @@ "pages": [ "sql/feature-eng", "sql/feature-importance" - ] + ] }, "mindsdb-gui", "model-types", @@ -575,7 +576,7 @@ "pages": [ "rest/views/create-view", "rest/views/delete-views", - "rest/views/update-view", + "rest/views/update-view", "rest/views/list-views", "rest/views/list-view" ] @@ -957,6 +958,7 @@ "integrations/data-integrations/google-bigquery", "integrations/data-integrations/google-cloud-sql", "integrations/data-integrations/google-sheets", + "integrations/data-integrations/greptimedb", "integrations/data-integrations/ibm-db2", "integrations/data-integrations/ibm-informix", "integrations/data-integrations/influxdb", diff --git a/mindsdb/integrations/handlers/greptimedb_handler/README.md b/mindsdb/integrations/handlers/greptimedb_handler/README.md new file mode 100644 index 00000000000..90c2dea2b68 --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/README.md @@ -0,0 +1,42 @@ +## Implementation + +This handler is implemented by extending the MySQLHandler. + +Connect GreptimeDB to MindsDB by providing the following parameters: + +* `host` is the host name, IP address, or URL. +* `port` is the port used to make TCP/IP connection. +* `database` is the database name. +* `user` is the database user. +* `password` is the database password. + +There are several optional parameters that can be used as well. + +* `ssl` is the `ssl` parameter value that indicates whether SSL is enabled (`True`) or disabled (`False`). +* `ssl_ca` is the SSL Certificate Authority. +* `ssl_cert` stores SSL certificates. +* `ssl_key` stores SSL keys. + +## Usage + +In order to make use of this handler and connect to the GreptimeDB database in MindsDB, the following syntax can be used: + +```sql +CREATE DATABASE greptimedb_datasource +WITH + engine = 'greptimedb', + parameters = { + "host": "127.0.0.1", + "port": 4002, + "database": "public", + "user": "username", + "password": "password" + }; +``` + +You can use this established connection to query your table as follows. + +```sql +SELECT * +FROM greptimedb_datasource.example_table; +``` diff --git a/mindsdb/integrations/handlers/greptimedb_handler/__about__.py b/mindsdb/integrations/handlers/greptimedb_handler/__about__.py new file mode 100644 index 00000000000..1730b33e720 --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/__about__.py @@ -0,0 +1,9 @@ +__title__ = 'MindsDB GreptimeDB handler' +__package_name__ = 'mindsdb_greptimedb_handler' +__version__ = '0.0.1' +__description__ = "MindsDB handler for GreptimeDB" +__author__ = 'Ning Sun' +__github__ = 'https://github.com/mindsdb/mindsdb' +__pypi__ = 'https://pypi.org/project/mindsdb/' +__license__ = 'MIT' +__copyright__ = 'Copyright 2022- mindsdb' diff --git a/mindsdb/integrations/handlers/greptimedb_handler/__init__.py b/mindsdb/integrations/handlers/greptimedb_handler/__init__.py new file mode 100644 index 00000000000..79ce45d25d6 --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/__init__.py @@ -0,0 +1,19 @@ +from mindsdb.integrations.libs.const import HANDLER_TYPE + +from .__about__ import __version__ as version, __description__ as description +try: + from .greptimedb_handler import GreptimeDBHandler as Handler + import_error = None +except Exception as e: + Handler = None + import_error = e + +title = 'GreptimeDB' +name = 'greptimedb' +type = HANDLER_TYPE.DATA +icon_path = 'icon.svg' + +__all__ = [ + 'Handler', 'version', 'name', 'type', 'title', 'description', + 'import_error', 'icon_path' +] diff --git a/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py b/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py new file mode 100644 index 00000000000..2650537424f --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/greptimedb_handler.py @@ -0,0 +1,21 @@ +""" +This is the GreptimeDB integration handler for mindsdb. It provides the routines +which provide for interacting with the database. + +Because GreptimeDB has built-in MySQL wire protocol support, this handler is simply + a subclass of mindsdb's MySQL handler +""" + +from mindsdb.integrations.handlers.mysql_handler import Handler as MySQLHandler + + +class GreptimeDBHandler(MySQLHandler): + """ + This handler handles connection and execution of GreptimeDB statements. + It's a subclass of default mysql handler + """ + + name = 'greptimedb' + + def __init__(self, name, **kwargs): + super().__init__(name, **kwargs) diff --git a/mindsdb/integrations/handlers/greptimedb_handler/icon.svg b/mindsdb/integrations/handlers/greptimedb_handler/icon.svg new file mode 100755 index 00000000000..1a01a413072 --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/icon.svg @@ -0,0 +1,3 @@ + + + diff --git a/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt b/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt new file mode 100644 index 00000000000..ee467569031 --- /dev/null +++ b/mindsdb/integrations/handlers/greptimedb_handler/requirements.txt @@ -0,0 +1 @@ +-r mindsdb/integrations/handlers/mysql_handler/requirements.txt From f29bdd7e5c64439411d3a8f0e531ce1b9bd85de9 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa <49385643+MinuraPunchihewa@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:40:51 +0530 Subject: [PATCH 48/51] Chat-bot Support for Multiple Tables and Multiple ID Columns (MS Teams Chat-bot for Channels) (#9768) --- .../ms_graph_api_teams_client.py | 35 ++++++++++- .../ms_teams_handler/ms_teams_handler.py | 40 +++++++++--- .../handlers/ms_teams_handler/settings.py | 1 + mindsdb/interfaces/chatbot/chatbot_task.py | 9 +-- mindsdb/interfaces/chatbot/memory.py | 49 +++++++++------ mindsdb/interfaces/chatbot/polling.py | 61 +++++++++++-------- 6 files changed, 136 insertions(+), 59 deletions(-) diff --git a/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py b/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py index b53514fc583..46ed582f611 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py +++ b/mindsdb/integrations/handlers/ms_teams_handler/ms_graph_api_teams_client.py @@ -72,6 +72,9 @@ def get_channel(self, group_id: Text, channel_id: Text) -> Dict: # add the group ID to the channel data channel.update({"teamId": group_id}) + last_message = self.get_last_channel_message(group_id, channel_id) + channel["lastMessagePreview_id"] = last_message.get("id") + return channel def get_channels(self) -> List[Dict]: @@ -87,7 +90,13 @@ def get_channels(self) -> List[Dict]: channels = [] for group_id in self._get_group_ids(): for group_channels in self._fetch_data(f"teams/{group_id}/channels", pagination=False): - [group_channel.update({"teamId": group_id}) for group_channel in group_channels] + for group_channel in group_channels: + # add the group ID to the channel data + group_channel.update({"teamId": group_id}) + + last_message = self.get_last_channel_message(group_id, group_channel["id"]) + group_channel["lastMessagePreview_id"] = last_message.get("id") + channels.extend(group_channels) return channels @@ -135,6 +144,30 @@ def get_channel_messages(self) -> List[Dict]: channel_messages.extend(messages) return channel_messages + + def get_last_channel_message(self, group_id: Text, channel_id: Text) -> Dict: + """ + Get the last message in a channel. + + Parameters + ---------- + group_id : Text + The ID of the group that the channel belongs to. + + channel_id : Text + The ID of the channel. + + Returns + ------- + Dict + The last message data. + """ + + api_url = self._get_api_url(f"teams/{group_id}/channels/{channel_id}/messages") + # get the last message only + messages = self._get_response_value_unsafe(self._make_request(api_url, params={"$top": 1})) + + return messages[0] if messages else {} def send_channel_message(self, group_id: Text, channel_id: Text, message: Text, subject: Optional[Text] = None) -> None: """ diff --git a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py b/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py index f8369521a4f..7082481f9f1 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py +++ b/mindsdb/integrations/handlers/ms_teams_handler/ms_teams_handler.py @@ -144,17 +144,37 @@ def get_chat_config(self) -> Dict: params = { 'polling': { 'type': 'message_count', - 'table': 'chats', - 'chat_id_col': 'id', - 'count_col': 'lastMessagePreview_id' }, - 'chat_table': { - 'name': 'chat_messages', - 'chat_id_col': 'chatId', - 'username_col': 'from_user_displayName', - 'text_col': 'body_content', - 'time_col': 'createdDateTime', - } + 'tables': [ + { + 'polling': { + 'table': 'chats', + 'chat_id_col': 'id', + 'count_col': 'lastMessagePreview_id' + }, + 'chat_table': { + 'name': 'chat_messages', + 'chat_id_col': 'chatId', + 'username_col': 'from_user_displayName', + 'text_col': 'body_content', + 'time_col': 'createdDateTime', + } + }, + { + 'polling': { + 'table': 'channels', + 'chat_id_col': ['teamId', 'id'], + 'count_col': 'lastMessagePreview_id' + }, + 'chat_table': { + 'name': 'channel_messages', + 'chat_id_col': ['channelIdentity_teamId', 'channelIdentity_channelId'], + 'username_col': 'from_user_displayName', + 'text_col': 'body_content', + 'time_col': 'createdDateTime', + } + } + ] } return params diff --git a/mindsdb/integrations/handlers/ms_teams_handler/settings.py b/mindsdb/integrations/handlers/ms_teams_handler/settings.py index c905904e6de..24f0e22a0d4 100644 --- a/mindsdb/integrations/handlers/ms_teams_handler/settings.py +++ b/mindsdb/integrations/handlers/ms_teams_handler/settings.py @@ -113,6 +113,7 @@ class MSTeamsHandlerConfig(BaseSettings): "webUrl", "membershipType", "teamId", + "lastMessagePreview_id" ] CHANNEL_MESSAGES_TABLE_COLUMNS: List = [ diff --git a/mindsdb/interfaces/chatbot/chatbot_task.py b/mindsdb/interfaces/chatbot/chatbot_task.py index 2b7552eb618..5264274e114 100644 --- a/mindsdb/interfaces/chatbot/chatbot_task.py +++ b/mindsdb/interfaces/chatbot/chatbot_task.py @@ -53,6 +53,7 @@ def run(self, stop_event): polling = chat_params['polling']['type'] if polling == 'message_count': + chat_params = chat_params['tables'] if 'tables' in chat_params else [chat_params] self.chat_pooling = MessageCountPolling(self, chat_params) self.memory = HandlerMemory(self, chat_params) @@ -71,10 +72,10 @@ def run(self, stop_event): self.chat_pooling.run(stop_event) - def on_message(self, chat_memory, message: ChatBotMessage): + def on_message(self, chat_memory, message: ChatBotMessage, table_name=None): try: - self._on_message(chat_memory, message) + self._on_message(chat_memory, message, table_name) except (SystemExit, KeyboardInterrupt): raise except Exception: @@ -82,7 +83,7 @@ def on_message(self, chat_memory, message: ChatBotMessage): logger.error(error) self.set_error(str(error)) - def _on_message(self, chat_memory, message: ChatBotMessage): + def _on_message(self, chat_memory, message: ChatBotMessage, table_name=None): # add question to history # TODO move it to realtime pooling chat_memory.add_to_history(message) @@ -105,7 +106,7 @@ def _on_message(self, chat_memory, message: ChatBotMessage): ) # send to chat adapter - self.chat_pooling.send_message(response_message) + self.chat_pooling.send_message(response_message, table_name=table_name) logger.debug(f'>>chatbot {chat_id} out: {response_message.text}') # send to history diff --git a/mindsdb/interfaces/chatbot/memory.py b/mindsdb/interfaces/chatbot/memory.py index f648b41a691..8b79b09d2eb 100644 --- a/mindsdb/interfaces/chatbot/memory.py +++ b/mindsdb/interfaces/chatbot/memory.py @@ -21,14 +21,14 @@ def __init__(self, chat_task, chat_params): self.chat_params = chat_params self.chat_task = chat_task - def get_chat(self, chat_id): - return ChatMemory(self, chat_id) + def get_chat(self, chat_id, table_name=None): + return ChatMemory(self, chat_id, table_name=table_name) - def hide_history(self, chat_id, left_count): + def hide_history(self, chat_id, left_count, table_name=None): ''' set date to start hiding messages ''' - history = self.get_chat_history(chat_id) + history = self.get_chat_history(chat_id, table_name=table_name) if left_count > len(history) - 1: left_count = len(history) - 1 sent_at = history[-left_count].sent_at @@ -62,12 +62,17 @@ def add_to_history(self, chat_id, chat_message): if chat_id in self._cache: del self._cache[chat_id] - def get_chat_history(self, chat_id, cached=True): - if cached and chat_id in self._cache: - history = self._cache[chat_id] + def get_chat_history(self, chat_id, table_name=None, cached=True): + key = (chat_id, table_name) if table_name else chat_id + if cached and key in self._cache: + history = self._cache[key] + else: - history = self._get_chat_history(chat_id) - self._cache[chat_id] = history + if table_name is None: + history = self._get_chat_history(chat_id) + else: + history = self._get_chat_history(chat_id, table_name) + self._cache[key] = history history = self._apply_hiding(chat_id, history) return history @@ -88,25 +93,28 @@ def _add_to_history(self, chat_id, chat_message): # do nothing. sent message will be stored by handler db pass - def _get_chat_history(self, chat_id): - t_params = self.chat_params['chat_table'] + def _get_chat_history(self, chat_id, table_name): + t_params = next( + chat_params['chat_table'] for chat_params in self.chat_params if chat_params['chat_table']['name'] == table_name + ) text_col = t_params['text_col'] username_col = t_params['username_col'] time_col = t_params['time_col'] + chat_id_cols = t_params['chat_id_col'] if isinstance(t_params['chat_id_col'], list) else [t_params['chat_id_col']] ast_query = Select( targets=[Identifier(text_col), Identifier(username_col), Identifier(time_col)], from_table=Identifier(t_params['name']), - where=BinaryOperation( + where=[BinaryOperation( op='=', args=[ - Identifier(t_params['chat_id_col']), - Constant(chat_id) + Identifier(chat_id_col), + Constant(chat_id[idx]) ] - ), + ) for idx, chat_id_col in enumerate(chat_id_cols)], order_by=[OrderBy(Identifier(time_col))], limit=Constant(self.MAX_DEPTH), ) @@ -178,14 +186,19 @@ class ChatMemory: ''' interface to work with individual chat ''' - def __init__(self, memory, chat_id): + def __init__(self, memory, chat_id, table_name=None): self.memory = memory self.chat_id = chat_id + self.table_name = table_name self.cached = False def get_history(self, cached=True): - result = self.memory.get_chat_history(self.chat_id, cached=cached and self.cached) + if self.table_name: + result = self.memory.get_chat_history(self.chat_id, self.table_name, cached=cached and self.cached) + else: + result = self.memory.get_chat_history(self.chat_id, cached=cached and self.cached) + self.cached = True return result @@ -202,4 +215,4 @@ def hide_history(self, left_count): ''' set date to start hiding messages ''' - self.memory.hide_history(self.chat_id, left_count) + self.memory.hide_history(self.chat_id, left_count, table_name=self.table_name) diff --git a/mindsdb/interfaces/chatbot/polling.py b/mindsdb/interfaces/chatbot/polling.py index 971332b7a28..8775788591c 100644 --- a/mindsdb/interfaces/chatbot/polling.py +++ b/mindsdb/interfaces/chatbot/polling.py @@ -18,16 +18,20 @@ def __init__(self, chat_task, chat_params): def start(self, stop_event): raise NotImplementedError - def send_message(self, message: ChatBotMessage): - chat_id = message.destination + def send_message(self, message: ChatBotMessage, table_name=None): + chat_id = message.destination if isinstance(message.destination, tuple) else (message.destination,) text = message.text - t_params = self.params["chat_table"] + t_params = self.params["chat_table"] if table_name is None else next( + (t["chat_table"] for t in self.params if t["chat_table"]["name"] == table_name) + ) + chat_id_cols = t_params["chat_id_col"] if isinstance(t_params["chat_id_col"], list) else [t_params["chat_id_col"]] + ast_query = Insert( table=Identifier(t_params["name"]), - columns=[t_params["chat_id_col"], t_params["text_col"]], + columns=[*chat_id_cols, t_params["text_col"]], values=[ - [chat_id, text], + [*chat_id, text], ], ) @@ -44,22 +48,27 @@ def __init__(self, *args, **kwargs): def run(self, stop_event): while True: try: - chat_ids = self.check_message_count() - logger.debug(f"number of chat ids found: {len(chat_ids)}") - for chat_id in chat_ids: - try: - chat_memory = self.chat_task.memory.get_chat(chat_id) - except Exception as e: - logger.error(f"Problem retrieving chat memory: {e}") - - try: - message = self.get_last_message(chat_memory) - except Exception as e: - logger.error(f"Problem getting last message: {e}") - message = None - - if message: - self.chat_task.on_message(chat_memory, message) + for chat_params in self.params: + chat_ids = self.check_message_count(chat_params) + logger.debug(f"number of chat ids found: {len(chat_ids)}") + + for chat_id in chat_ids: + try: + chat_memory = self.chat_task.memory.get_chat( + chat_id, + table_name=chat_params["chat_table"]["name"], + ) + except Exception as e: + logger.error(f"Problem retrieving chat memory: {e}") + + try: + message = self.get_last_message(chat_memory) + except Exception as e: + logger.error(f"Problem getting last message: {e}") + message = None + + if message: + self.chat_task.on_message(chat_memory, message, table_name=chat_params["chat_table"]["name"]) except Exception as e: logger.error(e) @@ -82,16 +91,16 @@ def get_last_message(self, chat_memory): return return last_message - def check_message_count(self): - p_params = self.params["polling"] + def check_message_count(self, chat_params): + p_params = chat_params["polling"] chat_ids = [] - id_col = p_params["chat_id_col"] + id_cols = p_params["chat_id_col"] if isinstance(p_params["chat_id_col"], list) else [p_params["chat_id_col"]] msgs_col = p_params["count_col"] # get chats status info ast_query = Select( - targets=[Identifier(id_col), Identifier(msgs_col)], + targets=[*[Identifier(id_col) for id_col in id_cols], Identifier(msgs_col)], from_table=Identifier(p_params["table"]), ) @@ -101,7 +110,7 @@ def check_message_count(self): chats = {} for row in resp.data_frame.to_dict("records"): - chat_id = row[id_col] + chat_id = tuple(row[id_col] for id_col in id_cols) msgs = row[msgs_col] chats[chat_id] = msgs From b56c16d4a08747addc431ef8d98300d03159c732 Mon Sep 17 00:00:00 2001 From: Zoran Pandovski Date: Tue, 1 Oct 2024 13:00:43 +0200 Subject: [PATCH 49/51] Bump version (#9778) --- mindsdb/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindsdb/__about__.py b/mindsdb/__about__.py index 9695eea425c..bd7a2df57f6 100644 --- a/mindsdb/__about__.py +++ b/mindsdb/__about__.py @@ -1,6 +1,6 @@ __title__ = 'MindsDB' __package_name__ = 'mindsdb' -__version__ = '24.9.4.1' +__version__ = '24.10.1.0' __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks" __email__ = "jorge@mindsdb.com" __author__ = 'MindsDB Inc' From 263a2348f7dd52bfcc10277c6c2d6bec52d2996d Mon Sep 17 00:00:00 2001 From: Prajwal Pai <108796209+prajwal-pai77@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:04:49 +0530 Subject: [PATCH 50/51] Update sdk & minor changes to connection of cluster (#9703) --- .../data-integrations/couchbase.mdx | 8 +++++--- .../handlers/couchbase_handler/README.md | 10 ++++++---- .../couchbase_handler/connection_args.py | 6 +++--- .../couchbase_handler/couchbase_handler.py | 18 ++++++++---------- .../couchbase_handler/requirements.txt | 2 +- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/docs/integrations/data-integrations/couchbase.mdx b/docs/integrations/data-integrations/couchbase.mdx index feaa30c578d..f4e8a224c26 100644 --- a/docs/integrations/data-integrations/couchbase.mdx +++ b/docs/integrations/data-integrations/couchbase.mdx @@ -20,14 +20,16 @@ Before proceeding, ensure the following prerequisites are met: This handler is implemented using the `couchbase` library, the Python driver for Couchbase. The required arguments to establish a connection are as follows: -* `host`: the host name or IP address of the Couchbase server +* `connection_string`: the connection string for the endpoint of the Couchbase server * `bucket`: the bucket name to use when connecting with the Couchbase server * `user`: the user to authenticate with the Couchbase server * `password`: the password to authenticate the user with the Couchbase server * `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` +Note: The connection string expects either the couchbases:// or couchbase:// protocol. + -If you are using Couchbase Capella, the `host` should be the connection string for the cluster. When you navigate to your cluster, the connection string can be found under the Connect tab. +If you are using Couchbase Capella, you can find the `connection_string` under the Connect tab It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. @@ -38,7 +40,7 @@ CREATE DATABASE couchbase_datasource WITH engine='couchbase', parameters={ - "host": "127.0.0.1", + "connection_string": "couchbase://localhost", "bucket": "travel-sample", "user": "admin", "password": "password", diff --git a/mindsdb/integrations/handlers/couchbase_handler/README.md b/mindsdb/integrations/handlers/couchbase_handler/README.md index f823cd2f57e..ce83f31b6c5 100644 --- a/mindsdb/integrations/handlers/couchbase_handler/README.md +++ b/mindsdb/integrations/handlers/couchbase_handler/README.md @@ -8,13 +8,15 @@ This handler was implemented using the `couchbase` library, the Python driver fo The required arguments to establish a connection are: -* `host`: the host name or IP address of the Couchbase server -* `bucket`: the bjcket name to use when connecting with the Couchbase server +* `connection_string`: the connection string for the endpoint of the Couchbase server +* `bucket`: the bucket name to use when connecting with the Couchbase server * `user`: the user to authenticate with the Couchbase server * `password`: the password to authenticate the user with the Couchbase server * `scope`: scopes are a level of data organization within a bucket. If omitted, will default to `_default` -If you are using Couchbase Capella, the `host` should be the connection string for the cluster. When you navigate to your cluster, the connection string can be found under the Connect tab. +Note: The connection string expects either the couchbases:// or couchbase:// protocol. + +If you are using Couchbase Capella, you can find the `connection_string` under the Connect tab. It will also be required to whitelist the machine(s) that will be running MindsDB and database credentials will need to be created for the user. These steps can also be taken under the Connect tab. ## Usage @@ -26,7 +28,7 @@ CREATE DATABASE couchbase_datasource WITH engine='couchbase', parameters={ - "host": "127.0.0.1", + "connection_string": "couchbase://localhost", "bucket":"travel-sample", "user": "admin", "password": "password", diff --git a/mindsdb/integrations/handlers/couchbase_handler/connection_args.py b/mindsdb/integrations/handlers/couchbase_handler/connection_args.py index 3f9c6fb05b4..acdbfa2a3bc 100644 --- a/mindsdb/integrations/handlers/couchbase_handler/connection_args.py +++ b/mindsdb/integrations/handlers/couchbase_handler/connection_args.py @@ -17,9 +17,9 @@ "type": ARG_TYPE.STR, "description": "The database/bucket name to use when connecting with the Couchbase server.", }, - host={ + connection_string={ "type": ARG_TYPE.STR, - "description": "--your-instance--.dp.cloud.couchbase.com or IP address of the Couchbase server.", + "description": "the Connection string to specify the cluster endpoint.", }, scope={ "type": ARG_TYPE.STR, @@ -27,5 +27,5 @@ }, ) connection_args_example = OrderedDict( - host="127.0.0.1", user="root", password="password", bucket="bucket" + connection_string="couchbase://localhost", user="root", password="password", bucket="bucket" ) diff --git a/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py b/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py index 782f58ef893..da55e5447df 100644 --- a/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py +++ b/mindsdb/integrations/handlers/couchbase_handler/couchbase_handler.py @@ -1,7 +1,6 @@ from datetime import timedelta import pandas as pd -from urllib.parse import urlparse from couchbase.auth import PasswordAuthenticator from couchbase.cluster import Cluster from couchbase.exceptions import UnAmbiguousTimeoutException @@ -27,8 +26,7 @@ class CouchbaseHandler(DatabaseHandler): """ name = "couchbase" - # TODO: Check the timeout value with the sdk default time - DEFAULT_TIMEOUT_SECONDS = 60 + DEFAULT_TIMEOUT_SECONDS = 5 def __init__(self, name, **kwargs): super().__init__(name) @@ -61,17 +59,17 @@ def connect(self): options = ClusterOptions(auth) - host = self.connection_data.get("host") - domain = urlparse(host).hostname - if domain and domain.endswith(".couchbase.com"): - options.apply_profile('wan_development') - endpoint = f"couchbases://{host}" + conn_str = self.connection_data.get("connection_string") + # wan_development is used to avoid latency issues while connecting to Couchbase over the internet + options.apply_profile('wan_development') + # connect to the cluster cluster = Cluster( - endpoint, + conn_str, options, ) try: + # wait until the cluster is ready for use cluster.wait_until_ready(timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS)) self.is_connected = cluster.connected self.cluster = cluster @@ -160,7 +158,7 @@ def query(self, query: ASTNode) -> Response: def get_tables(self) -> Response: """ - Get a list with of collection in database + Get a list of collections in database """ cluster = self.connect() bucket = cluster.bucket(self.bucket_name) diff --git a/mindsdb/integrations/handlers/couchbase_handler/requirements.txt b/mindsdb/integrations/handlers/couchbase_handler/requirements.txt index 32e01b720eb..9a154260511 100644 --- a/mindsdb/integrations/handlers/couchbase_handler/requirements.txt +++ b/mindsdb/integrations/handlers/couchbase_handler/requirements.txt @@ -1 +1 @@ -couchbase==4.0.2 \ No newline at end of file +couchbase==4.3.1 \ No newline at end of file From a79e682f3c387692d0ce6408b338e4f7e16c4668 Mon Sep 17 00:00:00 2001 From: Andrey Date: Fri, 4 Oct 2024 11:16:26 +0300 Subject: [PATCH 51/51] Mindsdb starting speedup, part 2 (#9599) --- mindsdb/api/http/namespaces/handlers.py | 4 +- mindsdb/integrations/libs/process_cache.py | 8 ++-- mindsdb/interfaces/database/integrations.py | 53 ++++++++++----------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/mindsdb/api/http/namespaces/handlers.py b/mindsdb/api/http/namespaces/handlers.py index 45f826edeef..47b439e54f3 100644 --- a/mindsdb/api/http/namespaces/handlers.py +++ b/mindsdb/api/http/namespaces/handlers.py @@ -50,7 +50,9 @@ class HandlerIcon(Resource): @api_endpoint_metrics('GET', '/handlers/handler/icon') def get(self, handler_name): try: - handler_meta = ca.integration_controller.get_handler_meta(handler_name) + handler_meta = ca.integration_controller.get_handlers_metadata().get(handler_name) + if handler_meta is None: + return http_error(HTTPStatus.NOT_FOUND, 'Icon not found', f'Icon for {handler_name} not found') icon_name = handler_meta['icon']['name'] handler_folder = handler_meta['import']['folder'] mindsdb_path = Path(importlib.util.find_spec('mindsdb').origin).parent diff --git a/mindsdb/integrations/libs/process_cache.py b/mindsdb/integrations/libs/process_cache.py index 11018300b84..2492d980d43 100644 --- a/mindsdb/integrations/libs/process_cache.py +++ b/mindsdb/integrations/libs/process_cache.py @@ -222,11 +222,11 @@ def init(self): is_cloud = config.get('cloud', False) # noqa if config['ml_task_queue']['type'] != 'redis': - lightwood_handler = integration_controller.get_handler_module('lightwood') - if lightwood_handler is not None and lightwood_handler.Handler is not None: - preload_handlers[lightwood_handler.Handler] = 4 if is_cloud else 1 - if is_cloud: + lightwood_handler = integration_controller.get_handler_module('lightwood') + if lightwood_handler is not None and lightwood_handler.Handler is not None: + preload_handlers[lightwood_handler.Handler] = 4 if is_cloud else 1 + huggingface_handler = integration_controller.get_handler_module('huggingface') if huggingface_handler is not None and huggingface_handler.Handler is not None: preload_handlers[huggingface_handler.Handler] = 1 diff --git a/mindsdb/interfaces/database/integrations.py b/mindsdb/interfaces/database/integrations.py index d34696ca6d7..0b8fd2449db 100644 --- a/mindsdb/interfaces/database/integrations.py +++ b/mindsdb/interfaces/database/integrations.py @@ -656,28 +656,6 @@ def _get_handler_meta(self, handler_name): for attr in module_attrs: handler_meta[attr] = getattr(module, attr) - # region icon - if hasattr(module, 'icon_path'): - try: - icon_path = handler_dir.joinpath(module.icon_path) - icon_type = icon_path.name[icon_path.name.rfind('.') + 1:].lower() - - if icon_type == 'svg': - with open(str(icon_path), 'rt') as f: - handler_meta['icon'] = { - 'data': f.read() - } - else: - with open(str(icon_path), 'rb') as f: - handler_meta['icon'] = { - 'data': base64.b64encode(f.read()).decode('utf-8') - } - - handler_meta['icon']['name'] = icon_path.name - handler_meta['icon']['type'] = icon_type - except Exception as e: - logger.error(f'Error reading icon for {handler_folder_name}, {e}!') - # endregion if hasattr(module, 'permanent'): handler_meta['permanent'] = module.permanent @@ -689,6 +667,26 @@ def _get_handler_meta(self, handler_name): return handler_meta + def _get_handler_icon(self, handler_dir, icon_path): + icon = {} + try: + icon_path = handler_dir.joinpath(icon_path) + icon_type = icon_path.name[icon_path.name.rfind('.') + 1:].lower() + + if icon_type == 'svg': + with open(str(icon_path), 'rt') as f: + icon['data'] = f.read() + else: + with open(str(icon_path), 'rb') as f: + icon['data'] = base64.b64encode(f.read()).decode('utf-8') + + icon['name'] = icon_path.name + icon['type'] = icon_type + + except Exception as e: + logger.error(f'Error reading icon for {handler_dir}, {e}!') + return icon + def _load_handler_modules(self): mindsdb_path = Path(importlib.util.find_spec('mindsdb').origin).parent handlers_path = mindsdb_path.joinpath('integrations/handlers') @@ -720,15 +718,12 @@ def _load_handler_modules(self): 'name': handler_name, 'permanent': handler_info.get('permanent', False), } + if 'icon_path' in handler_info: + icon = self._get_handler_icon(handler_dir, handler_info['icon_path']) + if icon: + handler_meta['icon'] = icon self.handlers_import_status[handler_name] = handler_meta - # import all handlers in thread - def import_handlers(): - self.get_handlers_import_status() - - thread = threading.Thread(target=import_handlers) - thread.start() - def _get_handler_info(self, handler_dir: Path): init_file = handler_dir / '__init__.py'