Merge pull request #6 from rishiraj/generate

Generate
rishiraj · Nov 6, 2024 · 7ea6378 · 7ea6378
2 parents 79711ed + a8a652c
commit 7ea6378
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -7,14 +7,15 @@
     <a href="https://colab.research.google.com/drive/1BIi46kmPQLotG1w9ofTBptlhmnKiKugZ?usp=sharing"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"></a>
 </p>
 
-**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **10x real-world speedup** in file downloads and uploads compared to traditional synchronous methods.
+**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **10x real-world speedup** in file downloads and uploads compared to traditional synchronous methods and enables scalable, parallelized LLM interactions with providers like OpenAI and Google.
 
 ## Features 🚀
 
 - **Asynchronous I/O**: Non-blocking network and file operations using `asyncio`, `aiohttp`, and `aiofiles`, boosting throughput for I/O-bound tasks.
 - **Concurrent Transfers**: Uses `asyncio.Semaphore` to limit simultaneous tasks, optimizing performance by managing system resources effectively.
 - **Fault Tolerance**: Retries failed tasks with exponentially increasing wait times, adding random jitter to prevent network congestion.
 - **Chunked Processing**: Files are split into configurable chunks for parallel processing, significantly accelerating uploads/downloads.
+- **Parallelized LLM Generation**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism.
 - **Compatibility**: Supports environments like Jupyter through `nest_asyncio`, enabling reusable `asyncio` loops for both batch and interactive Jupyter use.
 
 ## Installation 📦
@@ -120,6 +121,35 @@ fr = FireRequests()
 fr.compare(url)
 ```
 
+### Generating Text with LLMs
+
+FireRequests supports generating responses from LLMs like OpenAI’s and Google’s generative models in parallel batches.
+
+```python
+from firerequests import FireRequests
+
+# Initialize FireRequests
+fr = FireRequests()
+
+# Set parameters
+provider = "openai"
+model = "gpt-4o-mini"
+system_prompt = "Provide concise answers."
+user_prompts = ["What is AI?", "Explain quantum computing.", "What is Bitcoin?", "Explain neural networks."]
+parallel_requests = 2
+
+# Generate responses
+responses = fr.generate(
+    provider=provider,
+    model=model,
+    system_prompt=system_prompt,
+    user_prompts=user_prompts,
+    parallel_requests=parallel_requests
+)
+
+print(responses)
+```
+
 ## License 📄
 
 This project is licensed under the Apache License 2.0 - see the [LICENSE](https://github.com/rishiraj/firerequests/blob/main/LICENSE) file for details.

diff --git a/firerequests/main.py b/firerequests/main.py
@@ -287,5 +287,86 @@ def compare(self, url: str, filename: Optional[str] = None):
         except Exception as e:
             print(f"Error in compare: {e}")
 
+    def call_openai_sync(self, model: str, system_prompt: str, user_prompt: str) -> str:
+        from openai import OpenAI
+        client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+        completion = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ]
+        )
+        return completion.choices[0].message.content
+
+    async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> str:
+        return await asyncio.to_thread(self.call_openai_sync, model, system_prompt, user_prompt)
+
+    def call_google_sync(self, model: str, system_prompt: str, user_prompt: str) -> str:
+        import google.generativeai as genai
+        genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+
+        generation_config = {
+            "temperature": 1,
+            "top_p": 0.95,
+            "top_k": 40,
+            "max_output_tokens": 8192,
+            "response_mime_type": "text/plain",
+        }
+
+        model_instance = genai.GenerativeModel(
+            model_name=model,
+            generation_config=generation_config,
+            system_instruction=system_prompt,
+        )
+
+        chat_session = model_instance.start_chat(history=[])
+        response = chat_session.send_message(user_prompt)
+        return response.text
+
+    async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> str:
+        return await asyncio.to_thread(self.call_google_sync, model, system_prompt, user_prompt)
+
+    async def generate_batch(
+        self, provider: str, model: str, system_prompt: str, user_prompts: List[str]
+    ) -> List[str]:
+        tasks = []
+        for user_prompt in user_prompts:
+            if provider.lower() == "openai":
+                tasks.append(self.call_openai(model, system_prompt, user_prompt))
+            elif provider.lower() == "google":
+                tasks.append(self.call_google(model, system_prompt, user_prompt))
+            else:
+                raise ValueError("Unsupported provider. Choose either 'openai' or 'google'.")
+
+        responses = await asyncio.gather(*tasks)
+        return responses
+
+    def generate(
+        self, provider: str, model: str, system_prompt: str, user_prompts: List[str], parallel_requests: int = 10
+    ) -> List[str]:
+        """
+        Generates responses for the given list of user prompts in parallel batches.
+
+        Args:
+            provider (str): The API provider to use, either "openai" or "google".
+            model (str): The model to use for generating responses.
+            system_prompt (str): The system message prompt to include in each request.
+            user_prompts (List[str]): List of user messages for generation.
+            parallel_requests (int): Number of parallel requests to make.
+
+        Returns:
+            List[str]: List of generated responses corresponding to each user prompt.
+        """
+        async def generate_all():
+            all_responses = []
+            for i in range(0, len(user_prompts), parallel_requests):
+                batch_prompts = user_prompts[i:i + parallel_requests]
+                batch_responses = await self.generate_batch(provider, model, system_prompt, batch_prompts)
+                all_responses.extend(batch_responses)
+            return all_responses
+
+        return self.loop.run_until_complete(generate_all())
+
 def main():
     fire.Fire(FireRequests)
diff --git a/requirements.txt b/requirements.txt
@@ -3,5 +3,6 @@ aiofiles
 requests
 nest_asyncio
 tqdm
-httpx
 fire
+google-generativeai
+openai
diff --git a/setup.py b/setup.py
@@ -46,7 +46,7 @@
         "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",
     ],
-    python_requires=">=3.9",
+    python_requires=">=3.8",
     entry_points={
         "console_scripts": [
             "fr=firerequests.main:main",