Skip to content
This repository has been archived by the owner on Oct 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #35 from jina-ai/app-qna-pdf
Browse files Browse the repository at this point in the history
feat(app): simple pdf q&a bot
  • Loading branch information
deepankarm authored Apr 24, 2023
2 parents 82db0ea + 4de4865 commit 787d11a
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 3 deletions.
Binary file added .github/images/pdf_qna_demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 20 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<p align="center">
<b>LangChain Apps on Production with Jina 🚀</b>
<h2 align="center">LangChain Apps on Production with Jina 🚀</h2>
</p>

<p align=center>
Expand All @@ -14,11 +14,10 @@

**langchain-serve** helps you deploy your LangChain apps on Jina AI Cloud in just a matter of seconds. You can now benefit from the scalability and serverless architecture of the cloud without sacrificing the ease and convenience of local development.

> Give us a :star: and tell us what more you'd like to see!
## 🧠 Babyagi-as-a-service

> Give us a :star: and tell us what more you'd like to see!
- Deploy `babyagi` on Jina AI Cloud with one command

```bash
Expand All @@ -34,6 +33,24 @@
![Babyagi-as-a-service Playground](.github/images/babyagi-playground.gif)


## 💬 Simple Question Answer Bot on PDFs

- Deploy `pdf_qna` on Jina AI Cloud with one command

```bash
lc-serve deploy pdf_qna
```

- Integrate pdf_qna with external services using our Websocket API. Get a flavor of the integration with Streamlit playground on your CLI with

```bash
lc-serve playground pdf_qna
```

![pdf_qna Playground](.github/images/pdf_qna_demo.gif)

## :muscle: Features

#### 🎉 Custom Apps to production in 4 simple steps

1. Refactor your code to function(s) that should be served with `@serving` decorator.
Expand Down
68 changes: 68 additions & 0 deletions lcserve/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .flow import (
APP_NAME,
BABYAGI_APP_NAME,
PDF_QNA_APP_NAME,
DEFAULT_TIMEOUT,
deploy_app_on_jcloud,
get_app_status_on_jcloud,
Expand Down Expand Up @@ -89,6 +90,25 @@ async def serve_babyagi_on_jcloud(
)


async def serve_pdf_qna_on_jcloud(
name: str = PDF_QNA_APP_NAME,
app_id: str = None,
version: str = 'latest',
timeout: int = DEFAULT_TIMEOUT,
platform: str = None,
verbose: bool = False,
):
await serve_on_jcloud(
module='lcserve.apps.pdf_qna.app',
name=name,
app_id=app_id,
version=version,
timeout=timeout,
platform=platform,
verbose=verbose,
)


@click.group()
@click.version_option(__version__, '-v', '--version', prog_name='lc-serve')
@click.help_option('-h', '--help')
Expand Down Expand Up @@ -242,6 +262,54 @@ async def babyagi(name, requirements, app_id, version, timeout, platform, verbos
)


@deploy.command(help='Deploy pdf qna on JCloud.')
@click.option(
'--name',
type=str,
default=PDF_QNA_APP_NAME,
help='Name of the app.',
show_default=True,
)
@click.option(
'--app-id',
type=str,
default=None,
help='AppID of the deployed agent to be updated.',
show_default=True,
)
@click.option(
'--version',
type=str,
default='latest',
help='Version of serving gateway to be used.',
show_default=False,
)
@click.option(
'--timeout',
type=int,
default=DEFAULT_TIMEOUT,
help='Total request timeout in seconds.',
show_default=True,
)
@click.option(
'--platform',
type=str,
default=None,
help='Platform of Docker image needed for the deployment is built on.',
show_default=False,
)
@click.help_option('-h', '--help')
@syncify
async def pdf_qna(name, app_id, version, timeout, platform):
await serve_pdf_qna_on_jcloud(
name=name,
app_id=app_id,
version=version,
timeout=timeout,
platform=platform,
)


@serve.command(help='List all deployed apps.')
@click.option(
'--phase',
Expand Down
12 changes: 12 additions & 0 deletions lcserve/apps/pdf_qna/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from lcserve import serving
from typing import Union, List

from langchain import OpenAI
from chain import get_qna_chain, load_pdf_content


@serving
def ask(urls: Union[List[str], str], question: str) -> str:
content = load_pdf_content(urls)
chain = get_qna_chain(OpenAI())
return chain.run(input_document=content, question=question)
76 changes: 76 additions & 0 deletions lcserve/apps/pdf_qna/chain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import List, Union

from langchain.prompts.prompt import PromptTemplate
from langchain.chains import AnalyzeDocumentChain
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain


def load_pdf_content(urls: Union[List[str], str]) -> str:
if isinstance(urls, str):
urls = [urls]

all_content = []
for url in urls:
if not url.endswith('.pdf'):
raise ValueError('Only PDFs are supported')
loader = PyPDFLoader(url)
pages = loader.load_and_split()
all_content.append('/n'.join([p.page_content for p in pages]))

return '/n'.join(all_content)


def get_combine_prompt() -> PromptTemplate:
combine_prompt_template = """Given the following extracted parts of a long document and a question, determine the language of the question & create a final answer in the language the question was asked in.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
QUESTION: Which state/country's law governs the interpretation of the contract?
=========
Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights.
Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\n\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\n\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\n\n11.9 No Third-Party Beneficiaries.
Content: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur,
=========
LANGUAGE: English
FINAL ANSWER: This Agreement is governed by English law.
QUESTION: Modiji ne kaunse junction kaa naam rename kiya?
=========
Content: Friends, in this very July an interesting endeavour has been undertaken, named Azadi Ki Railgadi Aur Railway Station. The objective of this effort is to make people know the role of Indian Railways in the freedom struggle. There are many such railway stations in the country, which are associated with the history of the freedom movement. You too will be surprised to know about these railway stations. Gomoh Junction in Jharkhand is now officially known as Netaji Subhas Chandra Bose Junction Gomoh. Do you know why? Actually at this very station, Netaji Subhash was successful in dodging British officers by boarding the Kalka Mail. All of you must have heard the name of Kakori Railway Station near Lucknow. The names of bravehearts like Ram Prasad Bismil and Ashfaq Ullah Khan are associated with this station. The brave revolutionaries had displayed their might to the British by looting the treasury of the British being carried by train. Whenever you talk to the people of Tamil Nadu, you will get to know about Vanchi Maniyachchi Junction in Thoothukudi district. This station is named after Tamil freedom fighter Vanchinathan ji. This is the same place where Vanchi, 25 years of age then, had punished one British collector for his actions.
=========
LANGUAGE: HINDI, ENGLISH
FINAL_ANSWER: Gomoh junction, jo ki Jarkhand mai hai, abhi Netaji Subash Chandra Bose Junction ke naam se jaana jaega.
QUESTION: President ne Michael Jackson ke baare mai kya bola?
=========
Content: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n\nLast year COVID-19 kept us apart. This year we are finally together again. \n\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n\nWith a duty to one another to the American people to the Constitution. \n\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \n\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n\nHe met the Ukrainian people. \n\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland.
Content: And we won’t stop. \n\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \n\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \n\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \n\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n\nOfficer Mora was 27 years old. \n\nOfficer Rivera was 22. \n\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves.
Content: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \n\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n\nBut I want you to know that we are going to be okay.
=========
LANGUAGE: HINDI, ENGLISH
FINAL ANSWER: President ne Michael Jackson ko mention nahi kiya.
QUESTION: {question}
=========
{summaries}
=========
LANGUAGE:
FINAL ANSWER:"""

return PromptTemplate(
template=combine_prompt_template, input_variables=["summaries", "question"]
)


def get_qna_chain(llm) -> AnalyzeDocumentChain:
comnine_prompt = get_combine_prompt()
qa_chain = load_qa_chain(
llm, chain_type="map_reduce", combine_prompt=comnine_prompt
)
return AnalyzeDocumentChain(combine_docs_chain=qa_chain)
3 changes: 3 additions & 0 deletions lcserve/apps/pdf_qna/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
openai
pypdf
tiktoken
1 change: 1 addition & 0 deletions lcserve/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

APP_NAME = 'langchain'
BABYAGI_APP_NAME = 'babyagi'
PDF_QNA_APP_NAME = 'pdfqna'
DEFAULT_TIMEOUT = 120
ServingGatewayConfigFile = 'servinggateway_config.yml'
JCloudConfigFile = 'jcloud_config.yml'
Expand Down
15 changes: 15 additions & 0 deletions lcserve/playground/pdf_qna/commands.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

```bash
curl --X POST 'http://localhost:8080/ask' \
--header 'Content-Type: application/json' \
--data-raw '{
"urls": [
"https://uiic.co.in/sites/default/files/uploads/downloadcenter/Arogya%20Sanjeevani%20Policy%20CIS_2.pdf",
"https://uiic.co.in/sites/default/files/uploads/downloadcenter/Individual%20Health%20Insurance%20Policy%20CIS_0.pdf"
],
"question": "Inme se kaunse scheme mai waiting period sabse best hai??",
"envs": {
"OPENAI_API_KEY": "sk-***"
}
}'
```
89 changes: 89 additions & 0 deletions lcserve/playground/pdf_qna/playground.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import sys
import requests
import json

import streamlit as st
from pydantic import BaseModel


class Response(BaseModel):
result: str
error: str
stdout: str


# get file dir and add it to sys.path
cwd = os.path.dirname(os.path.realpath(__file__))
sys.path.append(cwd)

st.set_page_config(
page_title='Q&A Bot on PDFs',
page_icon='⚡',
layout='wide',
initial_sidebar_state='auto',
)

st.sidebar.markdown('## OpenAI Token')
openai_token = st.sidebar.text_input(
'Enter your OpenAI token:', placeholder='sk-...', type='password'
)

host = st.text_input(
'Enter the lc-serve host to connect to',
placeholder='https://pdfqna-1bab3f3291.wolf.jina.ai',
)

urls = st.text_area(
'Type your urls (separated by comma)',
value='https://uiic.co.in/sites/default/files/uploads/downloadcenter/Arogya%20Sanjeevani%20Policy%20CIS_2.pdf',
placeholder='https://uiic.co.in/sites/default/files/uploads/downloadcenter/Arogya%20Sanjeevani%20Policy%20CIS_2.pdf',
)

question = st.text_input(
'Type your question',
value='Kya iss scheme mai koi waiting period hai?',
placeholder='Kya iss scheme mai koi waiting period hai?',
)

submit = st.button('Submit')


def main():
if submit:
if not openai_token:
st.error('Please enter your OpenAI token')
return

if not host:
st.error('Please enter the lc-serve host')
return

if not question:
st.error('Please enter your question')
return

if not urls:
st.error('Please enter your urls')
return

headers = {'Content-Type': 'application/json'}
data = {
'urls': urls.split(','),
'question': question,
'envs': {'OPENAI_API_KEY': openai_token},
}
with st.spinner(text="Asking chain..."):
response = requests.post(host + '/ask', headers=headers, json=data)
try:
response = Response.parse_raw(response.text)
st.markdown(f'Answer: **{response.result.strip()}**')
with st.expander('Show stdout'):
st.write(response.json())
except Exception as e:
st.error(e)
return


if __name__ == '__main__':
main()

0 comments on commit 787d11a

Please sign in to comment.