From e997b425044aab458d3750866486341fad420c0d Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Thu, 24 Oct 2024 20:04:50 +0800 Subject: [PATCH] DRAFT: miscellaneous updates to HTTP API Reference (#3005) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- api/http_api_reference.md | 164 ++++++++++++++++++++---------------- api/python_api_reference.md | 34 ++++---- 2 files changed, 109 insertions(+), 89 deletions(-) diff --git a/api/http_api_reference.md b/api/http_api_reference.md index 5e44f5448a..8de24150ae 100644 --- a/api/http_api_reference.md +++ b/api/http_api_reference.md @@ -61,10 +61,10 @@ curl --request POST \ - Case-insensitive. - `"avatar"`: (*Body parameter*), `string` - Base64 encoding of the avatar. Defaults to `""`. + Base64 encoding of the avatar. - `"description"`: (*Body parameter*), `string` - A brief description of the dataset to create. Defaults to `""`. + A brief description of the dataset to create. - `"language"`: (*Body parameter*), `string` The language setting of the dataset to create. Available options: @@ -178,8 +178,8 @@ curl --request DELETE \ #### Request parameters -- `"ids"`: (*Body parameter*), `list[string]`, *Required* - The IDs of the datasets to delete. +- `"ids"`: (*Body parameter*), `list[string]` + The IDs of the datasets to delete. If it is not specified, all datasets will be deleted. ### Response @@ -407,10 +407,10 @@ curl --request POST \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) +- `dataset_id`: (*Path parameter*) The ID of the dataset to which the documents will be uploaded. -- `"file"`: (*Body parameter*) - The document to upload. +- `'file'`: (*Body parameter*) + A document to upload. ### Response @@ -469,6 +469,10 @@ curl --request PUT \ #### Request parameters +- `dataset_id`: (*Path parameter*) + The ID of the associated dataset. +- `document_id`: (*Path parameter*) + The ID of the document to update. - `"name"`: (*Body parameter*), `string` - `"chunk_method"`: (*Body parameter*), `string` The parsing method to apply to the document: @@ -538,9 +542,9 @@ curl --request GET \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) - The dataset ID. -- `"documents_id"`: (*Path parameter*) +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `documents_id`: (*Path parameter*) The ID of the document to download. ### Response @@ -580,27 +584,27 @@ Lists documents in a specified dataset. ```bash curl --request GET \ - --url http://{address}/api/v1/dataset/{dataset_id}/info?offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id} \ + --url http://{address}/api/v1/dataset/{dataset_id}/info?keywords={keywords}&offset={offset}&limit={limit}&orderby={orderby}&desc={desc}&id={document_id} \ --header 'Authorization: Bearer {YOUR_API_KEY}' ``` #### Request parameters -- `"dataset_id"`: (*Path parameter*) - The dataset ID. -- `"keywords"`: (*Filter parameter*), `string` +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `keywords`: (*Filter parameter*), `string` The keywords used to match document titles. -- `"offset"`: (*Filter parameter*), `integer` +- `offset`: (*Filter parameter*), `integer` The starting index for the documents to retrieve. Typically used in conjunction with `limit`. Defaults to `1`. -- `"limit"`: (*Filter parameter*), `integer` +- `limit`: (*Filter parameter*), `integer` The maximum number of documents to retrieve. Defaults to `1024`. -- `"orderby"`: (*Filter parameter*), `string` +- `orderby`: (*Filter parameter*), `string` The field by which documents should be sorted. Available options: - - `"create_time"` (default) - - `"update_time"` -- `"desc"`: (*Filter parameter*), `boolean` + - `create_time` (default) + - `update_time` +- `desc`: (*Filter parameter*), `boolean` Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`. -- `"id"`: (*Filter parameter*), `string` +- `id`: (*Filter parameter*), `string` The ID of the document to retrieve. ### Response @@ -690,10 +694,10 @@ curl --request DELETE \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) - The dataset ID. +- `dataset_id`: (*Path parameter*) + The associated dataset ID. - `"ids"`: (*Body parameter*), `list[string]` - The IDs of the documents to delete. If not specified, all documents in the dataset will be deleted. + The IDs of the documents to delete. If it is not specified, all documents in the specified dataset will be deleted. ### Response @@ -747,7 +751,7 @@ curl --request POST \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) +- `dataset_id`: (*Path parameter*) The dataset ID. - `"document_ids"`: (*Body parameter*), `list[string]`, *Required* The IDs of the documents to parse. @@ -804,9 +808,9 @@ curl --request DELETE \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) - The dataset ID -- `"document_ids"`: (*Body parameter*) +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `"document_ids"`: (*Body parameter*), `list[string]`, *Required* The IDs of the documents for which the parsing should be stopped. ### Response @@ -862,9 +866,13 @@ curl --request POST \ #### Request parameters +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `document_ids`: (*Path parameter*) + The associated document ID. - `"content"`: (*Body parameter*), `string`, *Required* The text content of the chunk. -- `"important_keywords`(*Body parameter*) +- `"important_keywords`(*Body parameter*), `list[string]` The key terms or phrases to tag with the chunk. ### Response @@ -924,18 +932,18 @@ curl --request GET \ #### Request parameters -- `"dataset_id"`: (*Path parameter*) - The dataset ID. -- `"document_id"`: (*Path parameter*) - The document ID. +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `document_ids`: (*Path parameter*) + The associated document ID. - `"keywords"`(*Filter parameter*), `string` - The keywords used to match chunk content. Defaults to `None` + The keywords used to match chunk content. - `"offset"`(*Filter parameter*), `string` The starting index for the chunks to retrieve. Defaults to `1`. - `"limit"`(*Filter parameter*), `integer` The maximum number of chunks to retrieve. Default: `1024` - `"id"`(*Filter parameter*), `string` - The ID of the chunk to retrieve. Default: `None` + The ID of the chunk to retrieve. ### Response @@ -1025,8 +1033,12 @@ curl --request DELETE \ #### Request parameters -- `"chunk_ids"`: (*Body parameter*) - The IDs of the chunks to delete. If not specified, all chunks of the current document will be deleted. +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `document_ids`: (*Path parameter*) + The associated document ID. +- `"chunk_ids"`: (*Body parameter*), `list[string]` + The IDs of the chunks to delete. If it is not specified, all chunks of the specified document will be deleted. ### Response @@ -1083,13 +1095,19 @@ curl --request PUT \ #### Request parameters +- `dataset_id`: (*Path parameter*) + The associated dataset ID. +- `document_ids`: (*Path parameter*) + The associated document ID. +- `chunk_id`: (*Path parameter*) + The ID of the chunk to update. - `"content"`: (*Body parameter*), `string` The text content of the chunk. - `"important_keywords"`: (*Body parameter*), `list[string]` A list of key terms or phrases to tag with the chunk. - `"available"`: (*Body parameter*) `boolean` The chunk's availability status in the dataset. Value options: - - `true`: Available + - `true`: Available (default) - `false`: Unavailable ### Response @@ -1157,7 +1175,7 @@ curl --request POST \ #### Request parameter - `"question"`: (*Body parameter*), `string`, *Required* - The user query or query keywords. Defaults to `""`. + The user query or query keywords. - `"dataset_ids"`: (*Body parameter*) `list[string]`, *Required* The IDs of the datasets to search from. - `"document_ids"`: (*Body parameter*), `list[string]` @@ -1300,14 +1318,14 @@ curl --request POST \ - `"name"`: (*Body parameter*), `string`, *Required* The name of the chat assistant. -- `"avatar"`: (*Body parameter*) - Base64 encoding of the avatar. Defaults to `""`. -- `"dataset_ids"`: (*Body parameter*) - The IDs of the associated datasets. Defaults to `[""]`. +- `"avatar"`: (*Body parameter*), `string` + Base64 encoding of the avatar. +- `"dataset_ids"`: (*Body parameter*), `list[string]` + The IDs of the associated datasets. - `"llm"`: (*Body parameter*), `object` - The LLM settings for the chat assistant to create. When the value is `None`, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes: + The LLM settings for the chat assistant to create. If it is not explicitly set, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes: - `"model_name"`, `string` - The chat model name. If it is `None`, the user's default chat model will be returned. + The chat model name. If not set, the user's default chat model will be used. - `"temperature"`: `float` Controls the randomness of the model's predictions. A lower temperature increases the model's confidence in its responses; a higher temperature increases creativity and diversity. Defaults to `0.1`. - `"top_p"`: `float` @@ -1324,10 +1342,10 @@ curl --request POST \ - `"keywords_similarity_weight"`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`. - `"top_n"`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`. - `"variables"`: `object[]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that: - - `"knowledge"` is a reserved variable, which will be replaced with the retrieved chunks. + - `"knowledge"` is a reserved variable, which represents the retrieved chunks. - All the variables in 'System' should be curly bracketed. - - The default value is `[{"key": "knowledge", "optional": true}]` - - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. Defaults to `""`. + - The default value is `[{"key": "knowledge", "optional": true}]`. + - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. - `"empty_response"`: `string` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is found, leave this blank. - `"opener"`: `string` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`. - `"show_quote`: `boolean` Indicates whether the source of text should be displayed. Defaults to `true`. @@ -1458,14 +1476,14 @@ curl --request PUT \ The ID of the chat assistant to update. - `"name"`: (*Body parameter*), `string`, *Required* The name of the chat assistant. -- `"avatar"`: (*Body parameter*) - Base64 encoding of the avatar. Defaults to `""`. -- `"dataset_ids"`: (*Body parameter*) - The IDs of the associated datasets. Defaults to `[""]`. +- `"avatar"`: (*Body parameter*), `string` + Base64 encoding of the avatar. +- `"dataset_ids"`: (*Body parameter*), `list[string]` + The IDs of the associated datasets. - `"llm"`: (*Body parameter*), `object` - The LLM settings for the chat assistant to create. When the value is `None`, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes: + The LLM settings for the chat assistant to create. If it is not explicitly set, a dictionary with the following values will be generated as the default. An `llm` object contains the following attributes: - `"model_name"`, `string` - The chat model name. If it is `None`, the user's default chat model will be returned. + The chat model name. If not set, the user's default chat model will be used. - `"temperature"`: `float` Controls the randomness of the model's predictions. A lower temperature increases the model's confidence in its responses; a higher temperature increases creativity and diversity. Defaults to `0.1`. - `"top_p"`: `float` @@ -1482,10 +1500,10 @@ curl --request PUT \ - `"keywords_similarity_weight"`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`. - `"top_n"`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`. - `"variables"`: `object[]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that: - - `"knowledge"` is a reserved variable, which will be replaced with the retrieved chunks. + - `"knowledge"` is a reserved variable, which represents the retrieved chunks. - All the variables in 'System' should be curly bracketed. - The default value is `[{"key": "knowledge", "optional": true}]` - - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. Defaults to `""`. + - `"rerank_model"`: `string` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. - `"empty_response"`: `string` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is found, leave this blank. - `"opener"`: `string` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`. - `"show_quote`: `boolean` Indicates whether the source of text should be displayed. Defaults to `true`. @@ -1547,7 +1565,7 @@ curl --request DELETE \ #### Request parameters - `"ids"`: (*Body parameter*), `list[string]` - The IDs of the chat assistants to delete. If not specified, all chat assistants in the system will be deleted. + The IDs of the chat assistants to delete. If it is not specified, all chat assistants in the system will be deleted. ### Response @@ -1570,7 +1588,7 @@ Failure: --- -## List chats +## List chat assistants **GET** `/api/v1/chat?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={chat_name}&id={chat_id}` @@ -1599,8 +1617,8 @@ curl --request GET \ The number of chat assistants on each page. Defaults to `1024`. - `orderby`: (*Path parameter*), `string` The attribute by which the results are sorted. Available options: - - `"create_time"` (default) - - `"update_time"` + - `create_time` (default) + - `update_time` - `"desc"`: (*Path parameter*), `boolean` Indicates whether the retrieved chat assistants should be sorted in descending order. Defaults to `true`. - `id`: (*Path parameter*), `string` @@ -1804,7 +1822,7 @@ curl --request PUT \ The ID of the associated chat assistant. - `session_id`: (*Path parameter*) The ID of the session to update. -- `"name`: (*Body Parameter), `string` +- `"name"`: (*Body Parameter), `string` The name of the session to update. ### Response @@ -1822,7 +1840,7 @@ Failure: ```json { "code": 102, - "message": "Name can not be empty." + "message": "Name cannot be empty." } ``` @@ -1853,19 +1871,19 @@ curl --request GET \ - `chat_id`: (*Path parameter*) The ID of the associated chat assistant. -- `"page"`: (*Path parameter*), `integer` +- `page`: (*Filter parameter*), `integer` Specifies the page on which the sessions will be displayed. Defaults to `1`. -- `"page_size"`: (*Path parameter*), `integer` +- `page_size`: (*Filter parameter*), `integer` The number of sessions on each page. Defaults to `1024`. -- `"orderby"`: (*Path parameter*), `string` +- `orderby`: (*Filter parameter*), `string` The field by which sessions should be sorted. Available options: - - `"create_time"` (default) - - `"update_time"` -- `"desc"`: (*Path parameter*), `boolean` + - `create_time` (default) + - `update_time` +- `desc`: (*Filter parameter*), `boolean` Indicates whether the retrieved sessions should be sorted in descending order. Defaults to `true`. -- `"name"`: (*Path parameter*) `string` +- `name`: (*Filter parameter*) `string` The name of the chat session to retrieve. -- `"id"`: (*Path parameter*), `string` +- `id`: (*Filter parameter*), `string` The ID of the chat session to retrieve. ### Response @@ -1941,7 +1959,7 @@ curl --request DELETE \ - `chat_id`: (*Path parameter*) The ID of the associated chat assistant. - `"ids"`: (*Body Parameter*), `list[string]` - The IDs of the sessions to delete. If not specified, all sessions associated with the current chat assistant will be deleted. + The IDs of the sessions to delete. If it is not specified, all sessions associated with the specified chat assistant will be deleted. ### Response @@ -2002,12 +2020,12 @@ curl --request POST \ The ID of the associated chat assistant. - `"question"`: (*Body Parameter*), `string` *Required* The question to start an AI chat. -- `"stream"`: (*Body Parameter*), `string` +- `"stream"`: (*Body Parameter*), `boolean` Indicates whether to output responses in a streaming way: - `true`: Enable streaming. - `false`: (Default) Disable streaming. - `"session_id"`: (*Body Parameter*) - The ID of session. If not provided, a new session will be generated. + The ID of session. If it is not provided, a new session will be generated. ### Response diff --git a/api/python_api_reference.md b/api/python_api_reference.md index 023a26bed4..dd01d47fe1 100644 --- a/api/python_api_reference.md +++ b/api/python_api_reference.md @@ -105,16 +105,16 @@ dataset = rag_object.create_dataset(name="kb_1") ## Delete datasets ```python -RAGFlow.delete_datasets(ids: list[str]) +RAGFlow.delete_datasets(ids: list[str] = None) ``` -Deletes specified datasets or all datasets in the system. +Deletes datasets by ID. ### Parameters #### ids: `list[str]`, *Required* -The IDs of the datasets to delete. +The IDs of the datasets to delete. Defaults to `None`. If it is not specified, all datasets will be deleted. ### Returns @@ -460,7 +460,7 @@ Deletes documents by ID. #### ids: `list[list]` -The IDs of the documents to delete. Defaults to `None`. If not specified, all documents in the dataset will be deleted. +The IDs of the documents to delete. Defaults to `None`. If it is not specified, all documents in the dataset will be deleted. ### Returns @@ -597,7 +597,7 @@ A `Chunk` object contains the following attributes: - `document_id`: `str` The ID of the associated document. - `available`: `bool` The chunk's availability status in the dataset. Value options: - `False`: Unavailable - - `True`: Available + - `True`: Available (default) ### Examples @@ -673,7 +673,7 @@ Deletes chunks by ID. #### chunk_ids: `list[str]` -The IDs of the chunks to delete. Defaults to `None`. If not specified, all chunks of the current document will be deleted. +The IDs of the chunks to delete. Defaults to `None`. If it is not specified, all chunks of the current document will be deleted. ### Returns @@ -714,7 +714,7 @@ A dictionary representing the attributes to update, with the following keys: - `"important_keywords"`: `list[str]` A list of key terms or phrases to tag with the chunk. - `"available"`: `bool` The chunk's availability status in the dataset. Value options: - `False`: Unavailable - - `True`: Available + - `True`: Available (default) ### Returns @@ -866,7 +866,7 @@ The IDs of the associated datasets. Defaults to `[""]`. The LLM settings for the chat assistant to create. Defaults to `None`. When the value is `None`, a dictionary with the following values will be generated as the default. An `LLM` object contains the following attributes: - `model_name`: `str` - The chat model name. If it is `None`, the user's default chat model will be returned. + The chat model name. If it is `None`, the user's default chat model will be used. - `temperature`: `float` Controls the randomness of the model's predictions. A lower temperature increases the model's confidence in its responses; a higher temperature increases creativity and diversity. Defaults to `0.1`. - `top_p`: `float` @@ -886,10 +886,9 @@ Instructions for the LLM to follow. A `Prompt` object contains the following at - `keywords_similarity_weight`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`. - `top_n`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`. - `variables`: `list[dict[]]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that: - - `knowledge` is a reserved variable, which will be replaced with the retrieved chunks. - - All the variables in 'System' should be curly bracketed. - - The default value is `[{"key": "knowledge", "optional": True}]` - + - `knowledge` is a reserved variable, which represents the retrieved chunks. + - All the variables in 'System' should be curly bracketed. + - The default value is `[{"key": "knowledge", "optional": True}]`. - `rerank_model`: `str` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. Defaults to `""`. - `empty_response`: `str` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is found, leave this blank. Defaults to `None`. - `opener`: `str` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`. @@ -947,7 +946,10 @@ A dictionary representing the attributes to update, with the following keys: - `"similarity_threshold"`: `float` RAGFlow uses a hybrid of weighted keyword similarity and vector cosine similarity during retrieval. This argument sets the threshold for similarities between the user query and chunks. If a similarity score falls below this threshold, the corresponding chunk will be excluded from the results. The default value is `0.2`. - `"keywords_similarity_weight"`: `float` This argument sets the weight of keyword similarity in the hybrid similarity score with vector cosine similarity or reranking model similarity. By adjusting this weight, you can control the influence of keyword similarity in relation to other similarity measures. The default value is `0.7`. - `"top_n"`: `int` This argument specifies the number of top chunks with similarity scores above the `similarity_threshold` that are fed to the LLM. The LLM will *only* access these 'top N' chunks. The default value is `8`. - - `"variables"`: `list[dict[]]` If you use dialog APIs, the variables might help you chat with your clients with different strategies. The variables are used to fill in the 'System' part in prompt in order to give LLM a hint. The 'knowledge' is a very special variable which will be filled-in with the retrieved chunks. All the variables in 'System' should be curly bracketed. Defaults to `[{"key": "knowledge", "optional": True}]` + - `"variables"`: `list[dict[]]` This argument lists the variables to use in the 'System' field of **Chat Configurations**. Note that: + - `knowledge` is a reserved variable, which represents the retrieved chunks. + - All the variables in 'System' should be curly bracketed. + - The default value is `[{"key": "knowledge", "optional": True}]`. - `"rerank_model"`: `str` If it is not specified, vector cosine similarity will be used; otherwise, reranking score will be used. Defaults to `""`. - `"empty_response"`: `str` If nothing is retrieved in the dataset for the user's question, this will be used as the response. To allow the LLM to improvise when nothing is retrieved, leave this blank. Defaults to `None`. - `"opener"`: `str` The opening greeting for the user. Defaults to `"Hi! I am your assistant, can I help you?"`. @@ -988,7 +990,7 @@ Deletes chat assistants by ID. #### ids: `list[str]` -The IDs of the chat assistants to delete. Defaults to `None`. If not specified, all chat assistants in the system will be deleted. +The IDs of the chat assistants to delete. Defaults to `None`. If it is ot specified, all chat assistants in the system will be deleted. ### Returns @@ -1219,7 +1221,7 @@ Deletes sessions by ID. #### ids: `list[str]` -The IDs of the sessions to delete. Defaults to `None`. If not specified, all sessions associated with the current chat assistant will be deleted. +The IDs of the sessions to delete. Defaults to `None`. If it is not specified, all sessions associated with the current chat assistant will be deleted. ### Returns @@ -1253,7 +1255,7 @@ Asks a question to start a conversation. The question to start an AI chat. -#### stream: `str` +#### stream: `bool` Indicates whether to output responses in a streaming way: