From f0c5dbed014ca177a9c0fe06f2e8dcbb50ce5e55 Mon Sep 17 00:00:00 2001 From: William Kennedy <bill@ardanlabs.com> Date: Thu, 6 Jun 2024 13:33:37 -0400 Subject: [PATCH] saving work --- fern/docs/pages/quick_start.mdx | 290 ++++++++++++++++++ fern/docs/pages/usingllms/basic_prompting.mdx | 279 +++++++++++++++++ 2 files changed, 569 insertions(+) create mode 100644 fern/docs/pages/quick_start.mdx create mode 100644 fern/docs/pages/usingllms/basic_prompting.mdx diff --git a/fern/docs/pages/quick_start.mdx b/fern/docs/pages/quick_start.mdx new file mode 100644 index 0000000..eb3d38d --- /dev/null +++ b/fern/docs/pages/quick_start.mdx @@ -0,0 +1,290 @@ +--- +title: Quick Start +description: Reliable, future proof AI predictions +--- + +Technical teams need to figure out how to integrate the latest Large Language +Models (LLMs), but: + +- You can’t build robust systems with inconsistent, unvalidated outputs; and +- LLM integrations scare corporate lawyers, finance departments, and security +professionals due to hallucinations, cost, lack of compliance (e.g., HIPAA), +leaked IP/PII, and “injection” vulnerabilities. + +Some companies are moving forward anyway by investing tons of engineering time/money +in their own wrappers around LLMs and expensive hosting with OpenAI/Azure. Others +are ignoring these issues and pressing forward with fragile and risky LLM integrations. + +At Prediction Guard, we think that you should get useful output from compliant +AI systems (without crazy implementation/ hosting costs), so our solution lets you: + +1. **De-risk LLM inputs** to remove PII and prompt injections; +2. **Validate and check LLM outputs** to guard against hallucination, toxicity and +inconsistencies; and +3. **Implement private and compliant LLM systems** (HIPAA and self-hosted) that +give your legal counsel warm fuzzy feeling while still delighting your customers +with AI features. + +Sounds pretty great right? Follow the steps below to starting leveraging +trustworthy LLMs: + +## 1. Get access to Prediction Guard Enterprise + +We host and control the latest LLMs for you in our secure and privacy-conserving +enterprise platform, so you can focus on your prompts and chains. To access the +hosted LLMs, contact us [here](https://mailchi.mp/predictionguard/getting-started) +to get an enterprise access token. You will need this access token to continue. + +## 2. Start using one of our LLMs! + +Suppose you want to prompt an LLM to answer a user query from a chat application. +You can setup a message thread, which includes a system prompt (that instructs +the LLM how to behave in responding) as follows: + +``` +[ + { + "role": "system", + "content": "You are a helpful assistant. Your model is hosted by Prediction Guard, a leading AI company." + }, + { + "role": "user", + "content": "Where can I access the LLMs in a safe and secure environment?" + } +] +``` + +## 3. Download the SDK for your favorite language + +You can then use any of our official SDKs or REST API to prompt one of our LLMs! + +<CodeBlocks> + +<CodeBlock title="Python"> +```python copy +import json +import os + +from predictionguard import PredictionGuard + +# You can set you Prediction Guard API Key as an env variable, +# or when creating the client object +os.environ["PREDICTIONGUARD_API_KEY"] + +client = PredictionGuard( + api_key="<your Prediction Guard API Key" +) + +messages = [ + { + "role": "system", + "content": "You are a helpful chatbot that helps people learn." + }, + { + "role": "user", + "content": "What is a good way to learn to code?" + } +] + +result = client.chat.completions.create( + "model": "Hermes-2-Pro-Llama-3-8B", + "messages": messages, + "max_tokens": 100 +) + +print(json.dumps( + result, + sort_keys=True, + indent=4, + separators=(',', ': ') +)) +```` +</CodeBlock> + +<CodeBlock title="Go"> +```go copy +package main + +import ( + "context" + "fmt" + "log" + "os" + "time" + + "github.com/predictionguard/go-client" +) + +func main() { + if err := run(); err != nil { + log.Fatalln(err) + } +} + +func run() error { + host := "https://api.predictionguard.com" + apiKey := os.Getenv("PGKEY") + + logger := func(ctx context.Context, msg string, v ...any) { + s := fmt.Sprintf("msg: %s", msg) + for i := 0; i < len(v); i = i + 2 { + s = s + fmt.Sprintf(", %s: %v", v[i], v[i+1]) + } + log.Println(s) + } + + cln := client.New(logger, host, apiKey) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + input := client.ChatInput{ + Model: client.Models.NeuralChat7B, + Messages: []client.ChatInputMessage{ + { + Role: client.Roles.System, + Content: "You are a helpful assistant. Your model is hosted by Prediction Guard, a leading AI company.", + }, + { + Role: client.Roles.User, + Content: "Where can I access the LLMs in a safe and secure environment?", + }, + }, + MaxTokens: 1000, + Temperature: 0.1, + TopP: 0.1, + Options: &client.ChatInputOptions{ + Factuality: true, + Toxicity: true, + PII: client.PIIs.Replace, + PIIReplaceMethod: client.ReplaceMethods.Random, + }, + } + + resp, err := cln.Chat(ctx, input) + if err != nil { + return fmt.Errorf("ERROR: %w", err) + } + + fmt.Println(resp.Choices[0].Message.Content) + + return nil +} +```` +</CodeBlock> + +<CodeBlock title="Rust"> +```rust copy +use std::env; + +use pg_rust_client as pg_client; +use pg_client::{client, chat, models}; + +#[tokio::main] +async fn main() { + let pg_env = client::PgEnvironment::from_env().expect("env keys"); + + let clt = client::Client::new(pg_env).expect("client value"); + + let req = chat::Request::<chat::Message>::new(models::Model::NeuralChat7B) + .add_message( + chat::Roles::System, + "You are a helpful assistant. Your model is hosted by Prediction Guard, a leading AI company.".to_string(), + ) + .add_message( + chat::Roles::User, + "Where can I access the LLMs in a safe and secure environment?".to_string(), + ) + .max_tokens(1000) + .temperature(0.8); + + let result = clt + .generate_chat_completion(&req) + .await + .expect("error from generate chat completion"); + + println!("\nchat completion response:\n\n {:?}", result); +} +``` +</CodeBlock> + +<CodeBlock title="NodeJS"> +```js copy +import * as pg from 'predictionguard'; + +const client = new pg.Client('https://api.predictionguard.com', process.env.PGKEY); + +async function Chat() { + const input = { + model: pg.Models.NeuralChat7B, + messages: [ + { + role: pg.Roles.System, + content: 'You are a helpful assistant. Your model is hosted by Prediction Guard, a leading AI company.', + }, + { + role: pg.Roles.User, + content: 'Where can I access the LLMs in a safe and secure environment?', + } + ], + maxTokens: 1000, + temperature: 0.1, + topP: 0.1, + options: { + factuality: true, + toxicity: true, + pii: pg.PIIs.Replace, + piiReplaceMethod: pg.ReplaceMethods.Random, + }, + }; + + var [result, err] = await client.Chat(input); + if (err != null) { + console.log('ERROR:' + err.error); + return; + } + + console.log('RESULT:' + result.createdDate() + ': ' + result.model + ': ' + result.choices[0].message.content); +} + +Chat(); +```` +</CodeBlock> + +<CodeBlock title="cURL"> +```bash copy +curl -il -X POST https://api.predictionguard.com/chat/completions \ + -H "x-api-key: ${PGKEY}" \ + -H "Content-Type: application/json" \ + -d '{ \ + "model": "Neural-Chat-7B", \ + "messages": [ \ + { \ + "role": "system", \ + "content": "You are a helpful assistant. Your model is hosted by Prediction Guard, a leading AI company." \ + }, \ + { \ + "role": "user", \ + "content": "Where can I access the LLMs in a safe and secure environment?" \ + } \ + ], \ + "max_tokens": 1000, \ + "temperature": 1.1, \ + "top_p": 0.1, \ + "output": { \ + "factuality": true, \ + "toxicity": true \ + }, \ + "input": { \ + "pii": "replace", \ + "pii_replace_method": "random" \ + } \ +}' +```` +</CodeBlock> + +</CodeBlocks> + +<Callout type="info" emoji="ℹ️"> + Note, you will need to replace `<your api key>` in the above examples with your actual access token. +</Callout> diff --git a/fern/docs/pages/usingllms/basic_prompting.mdx b/fern/docs/pages/usingllms/basic_prompting.mdx new file mode 100644 index 0000000..60360be --- /dev/null +++ b/fern/docs/pages/usingllms/basic_prompting.mdx @@ -0,0 +1,279 @@ +--- +title: Basic Prompting +description: Prediction Guard in action +--- + +(Run this example in Google Colab +[here](https://drive.google.com/file/d/1KVcpd3pcy_8TLq44Q5rRIQJrV1U5yIji/view?usp=sharing)) + +**Prompt and AI Engineering** is the emerging developer task of designing and +optimizing prompts (and associated workflows/ infra) for AI models to achieve +specific goals or outcomes. It involves creating high-quality inputs that can +elicit accurate and relevant responses from AI models. The next several examples +will help get you up to speed on common prompt engineering strategies. + +We will use Python to show an example: + +## Dependencies and Imports + +First, you will need to install Prediction Guard into your Python environment. + +```bash copy +$ pip install predictionguard +``` + +Second, import PredictionGuard, setup your API Key, and create the client. + +```python copy +import os +from predictionguard import PredictionGuard + +# Set your Prediction Guard token as an environmental variable. +os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>" + +client = PredictionGuard() +``` + +## Autocomplete + +Because LLMs are configured/ trained to perform the task of text completion, the +most basic kind of prompt that you might provide is an autocomplete prompt. Regardless +of prompt structure, the model function will compute the probabilities of words, +tokens, or characters that might follow in the sequence of words, tokens, or +characters that you provided in the prompt. + +Depending on the desired outcome, the prompt may be a single sentence, a paragraph, +or even an partial story. Additionally, the prompt may be open-ended, providing +only a general topic or theme, or it may be more specific, outlining a particular +scenario or plot. + +```python copy +result = client.completions.create( + model="Neural-Chat-7B", + prompt="Daniel Whitenack, a long forgotten wizard from the Lord of the Rings, entered into Mordor to" +) + +print(result['choices'][0]['text']) +``` + +This prompt should result in an output similar to: + +``` + destroy the One Ring. He was a skilled wizard, but he was not as powerful as Gandalf or Saruman. He was a member of the Istari, but he was not as well known as the other two. + +Daniel Whitenack was born in the land of Gondor, in the city of Minas Tirith. He was a descendant of the great wizard Saruman, but he was not as powerful as his ancestor. He was a +``` + +Other examples include the following (note that we can also complete things like +SQL statements): + +```python copy +result = client.completions.create( + model="Neural-Chat-7B", + prompt="Today I inspected the engine mounting equipment. I found a problem in one of the brackets so" +) + +print(result['choices'][0]['text']) + +result = client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt="""CREATE TABLE llm_queries(id SERIAL PRIMARY KEY, name TEXT NOT NULL, value REAL); +INSERT INTO llm_queries('Daniel Whitenack', 'autocomplete') +SELECT""" +) + +print(result['choices'][0]['text']) +``` + +## Zero-shot prompts + +Autocomplete is a great place to start, but it is only that: a place to start. +Throughout this workshop we will be putting on our prompt engineering hats to do +some impressive things with generative AI. As we continue along that path, there +is a general prompt structure that will pop up over and over again: + +``` + Prompt: ++------------------------------------------------------------+ +| | +| +-------------------------------------------------------+ | +| | ----------------------------------------------------- | | Task Descrip./ +| | --------------------------------------- | | Instructions +| +-------------------------------------------------------+ | +| | +| +-------------------------------------------------------+ | Current Input/ +| | ------------- | | Context +| +-------------------------------------------------------+ | +| | +| +----------------------------------------+ | Output +| | -------------------------- | | Indicator +| +----------------------------------------+ | +| | ++------------------------------------------------------------+ +``` + +One of the easiest ways to leverage the above prompt structure is to describe a +task (e.g., sentiment analysis), provide a single piece of data as context, and +then provide a single output indicator. This is called a **zero shot prompt**. +Here is a zero-shot prompt for performing sentiment analysis: + +```python copy +client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt="""### Instruction: +Respond with a sentiment label for the text included in the below user input. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment. Respond only with one of these labels and no other text. + +### Input: +This tutorial is spectacular. I love it! So wonderful. + +### Response: +""" +)['choices'][0]['text'].strip().split(' ')[0] +``` + +Which should output `POS`. + +**Note** - We are doing some post-processing on the text output (stripping out +extra whitespace and only getting the first word/label), because the model will +just continue generating text in certain cases. We will return to this later on +in the tutorials. + +**Note** - We are using a very specific prompt format (with the `### Instruction:` +etc. markers). This is the alpaca prompt format that is preferred by the +`Nous-Hermes-Llama2-13B` model. Each model might have a different preferred prompt +format, and you can find out more about that [here](../models). + +Another example of zero-shot prompting is the following for question and answer: + +```python copy +client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt=prompt +)['choices'][0]['text'].split('.')[0].strip() +``` + +## Few shot prompts + +When your task is slightly more complicated or requires a few more leaps in +reasoning to generate an appropriate response, you can turn to few shot prompting +(aka in context learning). In few shot prompting, a small number of gold standard +demonstrations are integrated into the prompt. These demonstrations serve as +example (context, output) pairs for the model, which serve to tune the probable +output on-the-fly to what we ideally want in the output. + +Although not always necessary (as seen above), few shot prompting generally +produces better results than single shot prompting in terms of consistency and +similarity to your ideal outputs. + +Let's reformat our sentiment prompt to include demonstrations: + +```python copy +prompt = """Classify the sentiment of the text. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment. + +Text: That pilot is adorable. +Sentiment: POS + +Text: This was an awful seat. +Sentiment: NEG + +Text: This pilot was brilliant. +Sentiment: POS + +Text: I saw the aircraft. +Sentiment: NEU + +Text: That food was exceptional. +Sentiment: POS + +Text: That was a private aircraft. +Sentiment: NEU + +Text: This is an unhappy pilot. +Sentiment: NEG + +Text: The staff is rough. +Sentiment: NEG + +Text: This staff is Australian. +Sentiment: NEU + +Text: The flight is boring. +Sentiment: """ + +result = client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt=prompt +) + +print(result['choices'][0]['text'].split('\n')[0].strip()) +``` + +This should output `NEG`. + +Another common example of few shot prompting is chat conversations. Although +Prediction Guard has specific functionality to support chat memory and threads. +You can actually use any non-chat-specific model to generate a chat response. +For example: + +```python copy +prompt = """The following is a conversation with an AI assistant in Hinglish. The assistant is helpful, creative, clever, and very friendly. The AI assistant responds in Hinglish. + +Human: Hello +AI: Hello! + +Human: Movie ke baare mein tumhara initial opinion kya hai? Zyada depth mein gaye bina? Tumne like kiya ki nahin? +AI: Overall, mujhe movie achchi lagi. Usme kuh high-profile actors hain, Tum kya kahte ho? + +Human: Tumhe lagta hai ki ek achche movie mein high-profile actors hone chahiye? Ya phir, ek movie ko achcha kahne ke liye high-profile actors hone chahiye? +AI: Zaroori nahin, par help zaroor milta hai. Ek actor jab tak apna kaam theek se nahin karta, tab tak high-profile nahin ban sakta. Tumhe kya lagta hai? + +Human: muje lgta hai ki yeh kewal personal pasand ke bare mai hai meri pasandida american comedy films Will Ferrol or Seth Rogan ke sath kuch bhi samil slapstick films hai muje yakin hai ki ek film thi jisme Lindsay Lohan ko bhot bada bna diya tha bhale hi usne apne surati saalo mai movies mai acting ki thi +AI: """ + +output = client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt=prompt +)['choices'][0]['text'].split('\n')[0] + +print(output) +``` + +This will output the Hinglish response similar to: + +``` + Wow, tumne mujhe bahut si baatein batayi hai. Mujhe laga tumhe yeh movie achchi lagi ho. Tumne kaha ki Lindsay Lohan bhot hi achchi acting ki hai. Tumne kaha ki tumhe yeh movie achchi lagi hai. Tumne kaha ki tumhe yeh movie achchi lagi hai. +``` + +If you don't speak Hinglish, you can check out the translation using another prompt: + +```python copy +prompt = """### Instruction: +Respond with a English translation of the following input Hinglish text. + +### Input: +{hinglish} + +### Respond: +""".format(hinglish=output) + +client.completions.create( + model="Nous-Hermes-Llama2-13B", + prompt=prompt +)['choices'][0]['text'].split('import')[0].strip() +``` + +Which will output similar to: + +``` +Wow, you've told me a lot of things. I thought you'd like this movie. You said Lindsay Lohan did a great job acting. You said you thought this movie was good. You said you thought this movie was good. +``` + +## Using The SDKs + +You can also try these examples using the other official SDKs: + +[Python](/docs/getting-started/sd-ks#pythonclient), +[Go](/docs/getting-started/sd-ks#goclient), +[Rust](/docs/getting-started/sd-ks#rustclient), +[JS](/docs/getting-started/sd-ks#jsclient), +[HTTP](/api-reference)