Skip to content

Commit

Permalink
enviroment variable approach
Browse files Browse the repository at this point in the history
  • Loading branch information
Edwinhr716 committed Jul 25, 2024
1 parent c27075d commit 9697d16
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 12 deletions.
6 changes: 3 additions & 3 deletions launcher/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ fn spawn_webserver(
max_input_tokens: usize,
max_total_tokens: usize,
max_batch_prefill_tokens: u32,
startup_time: u64,
download_time: u64,
shutdown: Arc<AtomicBool>,
shutdown_receiver: &mpsc::Receiver<()>,
) -> Result<Child, LauncherError> {
Expand Down Expand Up @@ -1200,8 +1200,6 @@ fn spawn_webserver(
format!("{}-0", args.shard_uds_path),
"--tokenizer-name".to_string(),
args.model_id,
"--startup-time".to_string(),
startup_time.to_string(),
];

// Grammar support
Expand Down Expand Up @@ -1278,6 +1276,8 @@ fn spawn_webserver(
envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
}

envs.push(("DOWNLOAD_TIME".into(), download_time.to_string().into()));

let mut webserver = match Command::new("text-generation-router")
.args(router_args)
.envs(envs)
Expand Down
6 changes: 0 additions & 6 deletions router/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ struct Args {
disable_grammar_support: bool,
#[clap(default_value = "4", long, env)]
max_client_batch_size: usize,
#[clap(long, env)]
startup_time: u64,
}

#[derive(Debug, Subcommand)]
Expand Down Expand Up @@ -131,7 +129,6 @@ async fn main() -> Result<(), RouterError> {
disable_grammar_support,
max_client_batch_size,
command,
startup_time,
} = args;

let print_schema_command = match command {
Expand Down Expand Up @@ -381,8 +378,6 @@ async fn main() -> Result<(), RouterError> {
}
};

tracing::info!("start time of the model is {startup_time}");

// Run server
server::run(
master_shard_uds_path,
Expand Down Expand Up @@ -414,7 +409,6 @@ async fn main() -> Result<(), RouterError> {
disable_grammar_support,
max_client_batch_size,
print_schema_command,
startup_time,
)
.await?;
Ok(())
Expand Down
6 changes: 3 additions & 3 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1434,7 +1434,6 @@ pub async fn run(
grammar_support: bool,
max_client_batch_size: usize,
print_schema_command: bool,
start_time: u64,
) -> Result<(), WebServerError> {
// OpenAPI documentation
#[derive(OpenApi)]
Expand Down Expand Up @@ -1514,6 +1513,7 @@ pub async fn run(
)
)]
struct ApiDoc;
let download_time = std::env::var("DOWNLOAD_TIME").unwrap_or("30".to_string()).parse::<u64>().unwrap_or(30);
let length_time = Instant::now();

// Create state
Expand Down Expand Up @@ -1895,11 +1895,11 @@ pub async fn run(
.layer(cors_layer);

tracing::info!("Connected");
let total_time = length_time.elapsed() + Duration::from_secs(start_time);
let total_time = length_time.elapsed() + Duration::from_secs(download_time);
tracing::info!("total time for router to boot up and connect to model server {:?}", length_time.elapsed());
tracing::info!("the total time in secs of boot time is {:?}", total_time);
metrics::gauge!("tgi_model_load_time").set(total_time.as_secs_f64());



if ngrok {
Expand Down

0 comments on commit 9697d16

Please sign in to comment.