From 22807bbf3e321486a179dd4e897a8590e252fbf1 Mon Sep 17 00:00:00 2001 From: Robert Escriva Date: Tue, 26 Nov 2024 15:25:10 -0800 Subject: [PATCH] [ENH] Use figment configuration for chroma-load This mirrors the compactor and query services (and can even merge with their configs if we so choose). --- Cargo.lock | 1 + rust/load/Cargo.toml | 1 + rust/load/chroma_load.yaml | 4 ++++ rust/load/src/config.rs | 47 ++++++++++++++++++++++++++++++++++++++ rust/load/src/lib.rs | 19 +++++++++++---- rust/worker/src/config.rs | 1 + 6 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 rust/load/chroma_load.yaml create mode 100644 rust/load/src/config.rs diff --git a/Cargo.lock b/Cargo.lock index 817d093b713f..b9fbe6b843bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1304,6 +1304,7 @@ dependencies = [ "axum", "chromadb", "chrono", + "figment", "guacamole", "opentelemetry", "opentelemetry-otlp", diff --git a/rust/load/Cargo.toml b/rust/load/Cargo.toml index d7b4f6b74b25..3dadc39abdaa 100644 --- a/rust/load/Cargo.toml +++ b/rust/load/Cargo.toml @@ -8,6 +8,7 @@ async-trait = "0.1.83" axum = "0.7" chromadb = { git = "https://github.com/rescrv/chromadb-rs", rev = "e364e35c34c660d4e8e862436ea600ddc2f46a1e" } chrono = "0.4.38" +figment = { version = "0.10.12", features = ["env", "yaml", "test"] } guacamole = { version = "0.9", default-features = false } serde.workspace = true diff --git a/rust/load/chroma_load.yaml b/rust/load/chroma_load.yaml new file mode 100644 index 000000000000..e964c9d94643 --- /dev/null +++ b/rust/load/chroma_load.yaml @@ -0,0 +1,4 @@ +load_service: + service_name: chroma-load + otel_endpoint: "http://otel-collector:4317" + port: 3000 diff --git a/rust/load/src/config.rs b/rust/load/src/config.rs new file mode 100644 index 000000000000..f7d160c51072 --- /dev/null +++ b/rust/load/src/config.rs @@ -0,0 +1,47 @@ +use figment::providers::{Env, Format, Yaml}; +use serde::Deserialize; + +const DEFAULT_CONFIG_PATH: &str = "./chroma_config.yaml"; + +#[derive(Deserialize)] +/// Root config for chroma-load service. Can be part of a larger config file. +pub struct RootConfig { + pub load_service: LoadServiceConfig, +} + +impl RootConfig { + pub fn load() -> Self { + Self::load_from_path(DEFAULT_CONFIG_PATH) + } + + // NOTE: Copied from ../worker/src/config.rs. + pub fn load_from_path(path: &str) -> Self { + // Unfortunately, figment doesn't support environment variables with underscores. So we have to map and replace them. + // Excluding our own environment variables, which are prefixed with CHROMA_. + let mut f = figment::Figment::from(Env::prefixed("CHROMA_").map(|k| match k { + k if k == "my_member_id" => k.into(), + k => k.as_str().replace("__", ".").into(), + })); + if std::path::Path::new(path).exists() { + f = figment::Figment::from(Yaml::file(path)).merge(f); + } + // Apply defaults - this seems to be the best way to do it. + // https://github.com/SergioBenitez/Figment/issues/77#issuecomment-1642490298 + // f = f.join(Serialized::default( + // "worker.num_indexing_threads", + // num_cpus::get(), + // )); + let res = f.extract(); + match res { + Ok(config) => config, + Err(e) => panic!("Error loading config: {}", e), + } + } +} + +#[derive(Deserialize)] +pub struct LoadServiceConfig { + pub service_name: String, + pub otel_endpoint: String, + pub port: u16, +} diff --git a/rust/load/src/lib.rs b/rust/load/src/lib.rs index 28e0f2935259..83a3a110c7e6 100644 --- a/rust/load/src/lib.rs +++ b/rust/load/src/lib.rs @@ -18,11 +18,14 @@ use tower_http::trace::TraceLayer; use tracing::Instrument; use uuid::Uuid; +pub mod config; pub mod data_sets; pub mod opentelemetry_config; pub mod rest; pub mod workloads; +const CONFIG_PATH_ENV_VAR: &str = "CONFIG_PATH"; + /////////////////////////////////////////////// Error ////////////////////////////////////////////// #[derive(Debug)] @@ -659,10 +662,14 @@ async fn uninhibit(State(state): State) -> Result { } pub async fn entrypoint() { - opentelemetry_config::init_otel_tracing( - &"chroma-load".to_string(), - &"localhost:4317".to_string(), - ); + let config = match std::env::var(CONFIG_PATH_ENV_VAR) { + Ok(config_path) => config::RootConfig::load_from_path(&config_path), + Err(_) => config::RootConfig::load(), + }; + + let config = config.load_service; + + opentelemetry_config::init_otel_tracing(&config.service_name, &config.otel_endpoint); let load = Arc::new(LoadService::default()); let state = AppState { load: Arc::clone(&load), @@ -690,7 +697,9 @@ pub async fn entrypoint() { }), ) .with_state(state); - let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap(); + let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", config.port)) + .await + .unwrap(); let runner = tokio::task::spawn(async move { load.run().await }); axum::serve(listener, app).await.unwrap(); runner.abort(); diff --git a/rust/worker/src/config.rs b/rust/worker/src/config.rs index 55f3751395ea..79107db3b1d3 100644 --- a/rust/worker/src/config.rs +++ b/rust/worker/src/config.rs @@ -53,6 +53,7 @@ impl RootConfig { /// # Notes /// The environment variables are prefixed with CHROMA_ and are uppercase. /// Values in the envionment variables take precedence over values in the YAML file. + // NOTE: Copied to ../load/src/config.rs. pub(crate) fn load_from_path(path: &str) -> Self { // Unfortunately, figment doesn't support environment variables with underscores. So we have to map and replace them. // Excluding our own environment variables, which are prefixed with CHROMA_.