From 1b63bb9babb51c6e631dba8524d38d0a30982089 Mon Sep 17 00:00:00 2001 From: Tejas Athalye <142726372+LitZeus@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:51:17 +0530 Subject: [PATCH 1/4] Added TextToTalk - A text to speech project --- Machine_Learning/TextToTalk/LICENSE | 21 ++++++ Machine_Learning/TextToTalk/README.md | 68 +++++++++++++++++++ Machine_Learning/TextToTalk/requirements.txt | 3 + .../TextToTalk/scripts/ExtText.py | 17 +++++ .../TextToTalk/scripts/Pipeline.py | 10 +++ Machine_Learning/TextToTalk/scripts/TTS.py | 17 +++++ Machine_Learning/TextToTalk/scripts/app.py | 24 +++++++ 7 files changed, 160 insertions(+) create mode 100644 Machine_Learning/TextToTalk/LICENSE create mode 100644 Machine_Learning/TextToTalk/README.md create mode 100644 Machine_Learning/TextToTalk/requirements.txt create mode 100644 Machine_Learning/TextToTalk/scripts/ExtText.py create mode 100644 Machine_Learning/TextToTalk/scripts/Pipeline.py create mode 100644 Machine_Learning/TextToTalk/scripts/TTS.py create mode 100644 Machine_Learning/TextToTalk/scripts/app.py diff --git a/Machine_Learning/TextToTalk/LICENSE b/Machine_Learning/TextToTalk/LICENSE new file mode 100644 index 0000000000..b1eddb0a45 --- /dev/null +++ b/Machine_Learning/TextToTalk/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Tejas Athalye + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Machine_Learning/TextToTalk/README.md b/Machine_Learning/TextToTalk/README.md new file mode 100644 index 0000000000..600763b262 --- /dev/null +++ b/Machine_Learning/TextToTalk/README.md @@ -0,0 +1,68 @@ +# TextToTalk: A PDF to MP3 Converter Web App + +This is a Streamlit-based web application that allows users to upload a PDF file and convert its text content to an MP3 audio file. The application uses `pdfminer.six` for extracting text from the PDF and `gtts` (Google Text-to-Speech) for converting text to speech. + +## Features + +- Upload a PDF file using a drag-and-drop interface or a file upload button. +- Select the desired language for text-to-speech conversion. +- Convert the text content of the PDF to an MP3 audio file. +- Play the generated audio file directly on the web app. +- Download the generated audio file. + +## Requirements + +- `Python 3.6 or higher` +- `pdfminer.six` +- `gtts` +- `streamlit` + +## Installation + +1. Clone the repository: + ```sh + git clone https://github.com/yourusername/TextToTalk.git + cd TextToTalk + ``` + +2. Create a virtual environment: + ```sh + python -m venv venv + source venv/bin/activate # On Windows, use `venv\Scripts\activate` + ``` + +3. Install the required packages: + ```sh + pip install -r requirements.txt + ``` + +## Usage + + 1. Run the Streamlit app: + ```sh + cd scripts + streamlit run app.py + ``` + + 2. Open your web browser and go to `http://localhost:8501` to access the app. + + 3. Upload a PDF file using the provided upload button or drag and drop the file into the designated area. + + 4. Select the desired language for the text-to-speech conversion. + + 5. The app will extract the text from the uploaded PDF, convert it to speech, and display an audio player for you to listen to the generated MP3 file. + + 6. You can also download the generated MP3 file using the download button. + +## File Structure +- `ExtText.py`: Contains the function for extracting text from the uploaded PDF file using pdfminer.six. +- `TTS.py`: Contains the function for converting text to speech using gtts. +- `Pipeline.py`: Integrates the text extraction and text-to-speech conversion functions into a single pipeline. +- `app.py`: The main Streamlit app that provides the web interface for the PDF to MP3 conversion. + +## Contributing +Contributions are welcome! If you find any bugs or have suggestions for improvements, please open an issue or create a pull request. + +## License +This project is licensed under the MIT License. See the `LICENSE` file for more details. + diff --git a/Machine_Learning/TextToTalk/requirements.txt b/Machine_Learning/TextToTalk/requirements.txt new file mode 100644 index 0000000000..f2dcccd54e --- /dev/null +++ b/Machine_Learning/TextToTalk/requirements.txt @@ -0,0 +1,3 @@ +pdfminer.six +gtts +streamlit \ No newline at end of file diff --git a/Machine_Learning/TextToTalk/scripts/ExtText.py b/Machine_Learning/TextToTalk/scripts/ExtText.py new file mode 100644 index 0000000000..e22aa3e194 --- /dev/null +++ b/Machine_Learning/TextToTalk/scripts/ExtText.py @@ -0,0 +1,17 @@ +from io import BytesIO +from pdfminer.high_level import extract_text_to_fp +from pdfminer.layout import LAParams + +def extract_text(uploaded_file): + """Extracts text from the uploaded PDF file using pdfminer.six.""" + if not uploaded_file: + return None + + try: + output_string = BytesIO() + laparams = LAParams() + extract_text_to_fp(uploaded_file, output_string, laparams=laparams) + return output_string.getvalue().decode('utf-8') + except Exception as e: + print(f"Error extracting text: {e}") + return None diff --git a/Machine_Learning/TextToTalk/scripts/Pipeline.py b/Machine_Learning/TextToTalk/scripts/Pipeline.py new file mode 100644 index 0000000000..5f4f899dba --- /dev/null +++ b/Machine_Learning/TextToTalk/scripts/Pipeline.py @@ -0,0 +1,10 @@ +from ExtText import extract_text +from TTS import text_to_speech + +def pipeline(uploaded_file, lang='en'): + """Extracts text and converts it to speech in a pipeline.""" + extracted_text = extract_text(uploaded_file) + if extracted_text: + return text_to_speech(extracted_text, lang=lang) + else: + return None diff --git a/Machine_Learning/TextToTalk/scripts/TTS.py b/Machine_Learning/TextToTalk/scripts/TTS.py new file mode 100644 index 0000000000..66b59ca578 --- /dev/null +++ b/Machine_Learning/TextToTalk/scripts/TTS.py @@ -0,0 +1,17 @@ +from gtts import gTTS +from io import BytesIO + +def text_to_speech(text, lang='en'): + """Converts the extracted text to audio (MP3) using gTTS.""" + if not text: + return None + + try: + tts = gTTS(text=text, lang=lang) + audio_file = BytesIO() + tts.write_to_fp(audio_file) + audio_file.seek(0) + return audio_file + except Exception as e: + print(f"Error generating audio: {e}") + return None diff --git a/Machine_Learning/TextToTalk/scripts/app.py b/Machine_Learning/TextToTalk/scripts/app.py new file mode 100644 index 0000000000..8fbb59a336 --- /dev/null +++ b/Machine_Learning/TextToTalk/scripts/app.py @@ -0,0 +1,24 @@ +import streamlit as st +from Pipeline import pipeline + +def main(): + """Streamlit app for PDF to MP3 conversion.""" + st.title("PDF to MP3 Converter") + + uploaded_file = st.file_uploader("Choose a PDF file to convert:", type=['pdf']) + + if uploaded_file is not None: + audio_file = pipeline(uploaded_file, lang=lang_code) + if audio_file: + st.audio(audio_file, format='audio/mp3') + st.download_button( + label="Download Audio", + data=audio_file, + file_name="output.mp3", + mime="audio/mp3" + ) + else: + st.error("Failed to convert PDF to audio.") + +if __name__ == '__main__': + main() From f97b1124aa66129e8d1fbf872ffae85d378b069d Mon Sep 17 00:00:00 2001 From: Tejas Athalye <142726372+LitZeus@users.noreply.github.com> Date: Tue, 8 Oct 2024 22:09:36 +0530 Subject: [PATCH 2/4] Update README.md --- Machine_Learning/TextToTalk/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Machine_Learning/TextToTalk/README.md b/Machine_Learning/TextToTalk/README.md index 600763b262..eb50cdebf0 100644 --- a/Machine_Learning/TextToTalk/README.md +++ b/Machine_Learning/TextToTalk/README.md @@ -19,9 +19,11 @@ This is a Streamlit-based web application that allows users to upload a PDF file ## Installation -1. Clone the repository: +#### First Fork the repository and then follow the steps given below! + +1. Clone the repository in your local machine: ```sh - git clone https://github.com/yourusername/TextToTalk.git + git clone https://github.com//PyVerse.git cd TextToTalk ``` From 5a2aa4feb00f6910718c6270ca59847bf3deb13b Mon Sep 17 00:00:00 2001 From: Tejas Athalye <142726372+LitZeus@users.noreply.github.com> Date: Tue, 8 Oct 2024 22:10:29 +0530 Subject: [PATCH 3/4] Update README.md From bed6cd8b09bcbf2ddc8f3907503ca2976f704e54 Mon Sep 17 00:00:00 2001 From: Tejas Athalye <142726372+LitZeus@users.noreply.github.com> Date: Tue, 8 Oct 2024 22:13:28 +0530 Subject: [PATCH 4/4] Update README.md --- Machine_Learning/TextToTalk/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Machine_Learning/TextToTalk/README.md b/Machine_Learning/TextToTalk/README.md index eb50cdebf0..852b1c5f18 100644 --- a/Machine_Learning/TextToTalk/README.md +++ b/Machine_Learning/TextToTalk/README.md @@ -24,7 +24,7 @@ This is a Streamlit-based web application that allows users to upload a PDF file 1. Clone the repository in your local machine: ```sh git clone https://github.com//PyVerse.git - cd TextToTalk + cd Machine_Learning/TextToTalk ``` 2. Create a virtual environment: