diff --git a/examples/usecases/llm_diffusion_serving_app/docker/build_image.sh b/examples/usecases/llm_diffusion_serving_app/docker/build_image.sh index 1c25fbf96b..80529302de 100755 --- a/examples/usecases/llm_diffusion_serving_app/docker/build_image.sh +++ b/examples/usecases/llm_diffusion_serving_app/docker/build_image.sh @@ -20,7 +20,7 @@ echo "ROOT_DIR: $ROOT_DIR" # Build docker image for the application docker_build_cmd="DOCKER_BUILDKIT=1 \ -docker buildx build \ +docker buildx build --load \ --platform=linux/amd64 \ --file ${EXAMPLE_DIR}/Dockerfile \ --build-arg BASE_IMAGE=\"${BASE_IMAGE}\" \ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/client_app.py b/examples/usecases/llm_diffusion_serving_app/docker/client_app.py index 616bc40522..91cb29369d 100644 --- a/examples/usecases/llm_diffusion_serving_app/docker/client_app.py +++ b/examples/usecases/llm_diffusion_serving_app/docker/client_app.py @@ -27,6 +27,15 @@ st.session_state.gen_captions = st.session_state.get("gen_captions", []) st.session_state.llm_prompts = st.session_state.get("llm_prompts", None) st.session_state.llm_time = st.session_state.get("llm_time", 0) +st.session_state.num_images = st.session_state.get("num_images", 2) +st.session_state.max_new_tokens = st.session_state.get("max_new_tokens", 100) + + +def update_max_tokens(): + # Update the max_new_tokens input value in session state and UI + # The prompts generated are description which are around 50 tokens per prompt + st.session_state.max_new_tokens = 50 * st.session_state.num_images + with st.sidebar: st.title("Image Generation with Llama, SDXL, torch.compile and OpenVINO") @@ -76,13 +85,23 @@ def get_model_status(model_name): ) # Client App Parameters - num_images = st.sidebar.number_input( - "Number of images to generate", min_value=1, max_value=8, value=2, step=1 + # Default value is set via session_state variables for num_images and max_new_tokens + st.sidebar.number_input( + "Number of images to generate", + min_value=1, + max_value=8, + step=1, + key="num_images", + on_change=update_max_tokens, ) st.subheader("LLM Model parameters") - max_new_tokens = st.sidebar.number_input( - "max_new_tokens", min_value=30, max_value=250, value=40, step=5 + st.sidebar.number_input( + "max_new_tokens", + min_value=100, + max_value=1250, + step=10, + key="max_new_tokens", ) temperature = st.sidebar.number_input( @@ -159,11 +178,19 @@ def sd_response_postprocess(response): def preprocess_llm_input(user_prompt, num_images=2): - template = """ Below is an instruction that describes a task. Write a response that appropriately completes the request. - Generate {} unique prompts similar to '{}' by changing the context, keeping the core theme intact. - Give the output in square brackets seperated by semicolon. + template = """ Generate expanded and descriptive prompts for a image generation model based on the user input. + Each prompt should build upon the original concept, adding layers of detail and context to create a more vivid and engaging scene for image generation. + Format each prompt distinctly within square brackets. + Ensure that each prompt is a standalone description that significantly elaborates on the original input as shown in the example below: + Example: For the input 'A futuristic cityscape with flying cars,' generate: +[A futuristic cityscape with sleek, silver flying cars zipping through the sky, set against a backdrop of towering skyscrapers and neon-lit streets.] +[A futuristic cityscape at dusk, with flying cars of various colors and shapes flying in formation.] +[A futuristic cityscape at night, with flying cars illuminated by the city's vibrant nightlife.] + + Aim for a tone that is rich in imagination and visual appeal, capturing the essence of the scene with depth and creativity. Do not generate text beyond the specified output format. Do not explain your response. - ### Response: + Generate {} similar detailed prompts for the user's input: {}. + Organize the output such that each prompt is within square brackets. Refer to example above. """ prompt_template_with_user_input = template.format(num_images, user_prompt) @@ -206,7 +233,7 @@ def generate_llm_model_response(prompt_template_with_user_input, user_prompt): { "prompt_template": prompt_template_with_user_input, "user_prompt": user_prompt, - "max_new_tokens": max_new_tokens, + "max_new_tokens": st.session_state.max_new_tokens, "temperature": temperature, "top_k": top_k, "top_p": top_p, @@ -260,7 +287,7 @@ def generate_llm_model_response(prompt_template_with_user_input, user_prompt): ) user_prompt = st.text_input("Enter a prompt for image generation:") -include_user_prompt = st.checkbox("Include orginal prompt", value=False) +include_user_prompt = st.checkbox("Include original prompt", value=False) prompt_container = st.container() status_container = st.container() @@ -287,15 +314,18 @@ def display_prompts(): llm_start_time = time.time() st.session_state.llm_prompts = [user_prompt] - if num_images > 1: + if st.session_state.num_images > 1: prompt_template_with_user_input = preprocess_llm_input( - user_prompt, num_images + user_prompt, st.session_state.num_images ) llm_prompts = generate_llm_model_response( prompt_template_with_user_input, user_prompt ) st.session_state.llm_prompts = postprocess_llm_response( - llm_prompts, user_prompt, num_images, include_user_prompt + llm_prompts, + user_prompt, + st.session_state.num_images, + include_user_prompt, ) st.session_state.llm_time = time.time() - llm_start_time @@ -306,11 +336,11 @@ def display_prompts(): prompt_container.write( "Enter Image Generation Prompt and Click Generate Prompts !" ) - elif len(st.session_state.llm_prompts) < num_images: + elif len(st.session_state.llm_prompts) < st.session_state.num_images: prompt_container.warning( f"""Insufficient prompts. Regenerate prompts ! - Num Images Requested: {num_images}, Prompts Generated: {len(st.session_state.llm_prompts)} - {f"Consider increasing the max_new_tokens parameter !" if num_images > 4 else ""}""", + Num Images Requested: {st.session_state.num_images}, Prompts Generated: {len(st.session_state.llm_prompts)} + {f"Consider increasing the max_new_tokens parameter !" if st.session_state.num_images > 4 else ""}""", icon="⚠️", ) else: diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-1.png b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-1.png index abf041b7b6..ebcf9bb175 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-1.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-1.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-2.png b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-2.png index b8467c8218..239a23944d 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-2.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-2.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-3.png b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-3.png index 08c1c14246..aed9bd57cf 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-3.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/client-app-screen-3.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/multi-image-gen-app.gif b/examples/usecases/llm_diffusion_serving_app/docker/img/multi-image-gen-app.gif index a0a32802af..935cd802be 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/multi-image-gen-app.gif and b/examples/usecases/llm_diffusion_serving_app/docker/img/multi-image-gen-app.gif differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-1.png b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-1.png index cc0d3c1588..450cffd84b 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-1.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-1.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-2.png b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-2.png index 68d3139da9..a2f99ce0a7 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-2.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-2.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-3.png b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-3.png index 309096c864..20c7a43874 100644 Binary files a/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-3.png and b/examples/usecases/llm_diffusion_serving_app/docker/img/server-app-screen-3.png differ diff --git a/examples/usecases/llm_diffusion_serving_app/docker/llm/llm_handler.py b/examples/usecases/llm_diffusion_serving_app/docker/llm/llm_handler.py index c0033101a9..d97e99f0b2 100644 --- a/examples/usecases/llm_diffusion_serving_app/docker/llm/llm_handler.py +++ b/examples/usecases/llm_diffusion_serving_app/docker/llm/llm_handler.py @@ -27,6 +27,7 @@ def __init__(self): self.user_prompt = [] self.prompt_template = "" + @timed def initialize(self, ctx): self.context = ctx self.manifest = ctx.manifest @@ -48,7 +49,7 @@ def initialize(self, ctx): self.tokenizer = AutoTokenizer.from_pretrained(model_dir) self.model = AutoModelForCausalLM.from_pretrained(model_dir) - # Get backend for model-confil.yaml. Defaults to "openvino" + # Get backend for model-config.yaml. Defaults to "openvino" compile_options = {} pt2_config = ctx.model_yaml_config.get("pt2", {}) compile_options = { @@ -115,21 +116,22 @@ def inference(self, input_data): return generated_text + @timed def postprocess(self, generated_text): - logger.info(f"LLM Generated Output: {generated_text}") - # Initialize with user prompt + # Remove input prompt from generated_text + generated_text = generated_text.replace(self.prompt_template, "", 1) + # Clean up LLM output + generated_text = generated_text.replace("\n", " ").replace(" ", " ").strip() + + logger.info(f"LLM Generated Output without input prompt: {generated_text}") prompt_list = [self.user_prompt] try: - logger.info("Parsing LLM Generated Output to extract prompts within []...") - response_match = re.search(r"\[(.*?)\]", generated_text) - # Extract the result if match is found - if response_match: - # Split the extracted string by semicolon and strip any leading/trailing spaces - response_list = response_match.group(1) - extracted_prompts = [item.strip() for item in response_list.split(";")] - prompt_list.extend(extracted_prompts) - else: - logger.warning("No match found in the generated output text !!!") + # Use regular expressions to find strings within square brackets + pattern = re.compile(r"\[.*?\]") + matches = pattern.findall(generated_text) + # Clean up the matches and remove square brackets + extracted_prompts = [match.strip("[]").strip() for match in matches] + prompt_list.extend(extracted_prompts) except Exception as e: logger.error(f"An error occurred while parsing the generated text: {e}")