From 5fb839f7e078ce94222553d35bf51f2cd3deeaf3 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Sat, 22 Apr 2023 09:45:59 -0400 Subject: [PATCH] Begin building out AIFN library --- .../data_cleaning.md => ai_functions/data.md} | 4 ++-- docs/guide/ai_functions/strings.md | 16 ++++++++++++++++ docs/guide/concepts/ai_functions.md | 2 +- mkdocs.yml | 4 +++- src/marvin/ai_functions/__init__.py | 2 ++ src/marvin/ai_functions/data.py | 9 +-------- src/marvin/ai_functions/strings.py | 8 ++++++++ tests/llm_tests/ai_functions/test_data.py | 9 --------- tests/llm_tests/ai_functions/test_strings.py | 10 ++++++++++ 9 files changed, 43 insertions(+), 21 deletions(-) rename docs/guide/{use_cases/data_cleaning.md => ai_functions/data.md} (99%) create mode 100644 docs/guide/ai_functions/strings.md create mode 100644 src/marvin/ai_functions/strings.py create mode 100644 tests/llm_tests/ai_functions/test_strings.py diff --git a/docs/guide/use_cases/data_cleaning.md b/docs/guide/ai_functions/data.md similarity index 99% rename from docs/guide/use_cases/data_cleaning.md rename to docs/guide/ai_functions/data.md index e6cc5208e..7f2936043 100644 --- a/docs/guide/use_cases/data_cleaning.md +++ b/docs/guide/ai_functions/data.md @@ -1,4 +1,4 @@ -# Data cleaning +# AI Functions for data > "Data cleaning is 80% of data science." > @@ -172,7 +172,7 @@ The Python string method [`.title()`](https://docs.python.org/3/library/stdtypes ```python -from marvin.ai_functions.data import title_case +from marvin.ai_functions.strings import title_case title_case("the european went over to canada, eh?") # The European Went Over to Canada, Eh? diff --git a/docs/guide/ai_functions/strings.md b/docs/guide/ai_functions/strings.md new file mode 100644 index 000000000..23ca57338 --- /dev/null +++ b/docs/guide/ai_functions/strings.md @@ -0,0 +1,16 @@ +# AI Functions for strings + +## Actual title case + +Return a title case string that you would want to use in a title. + +The Python string method [`.title()`](https://docs.python.org/3/library/stdtypes.html#str.title) makes the first letter of every word uppercase and the remaing letters lowercase. This result isn't what you want to use for the title of a piece of writing, generally. `title_case` takes a string and returns a string you can use in a title. + +```python + +from marvin.ai_functions.strings import title_case + +title_case("the european went over to canada, eh?") +# The European Went Over to Canada, Eh? +``` + diff --git a/docs/guide/concepts/ai_functions.md b/docs/guide/concepts/ai_functions.md index ac07be124..649f9debb 100644 --- a/docs/guide/concepts/ai_functions.md +++ b/docs/guide/concepts/ai_functions.md @@ -220,7 +220,7 @@ fix_sentence("he go to mcdonald and buy burg") # "He goes to McDonald's and buys ``` ### Cleaning data -Cleaning data is such an important use case that Marvin has an entire module dedicated to it, including AI functions for categorization, standardization, entity extraction, and context-aware fills for missing values. See [the data cleaning documentation](/guide/use_cases/data_cleaning) for more information. +Cleaning data is such an important use case that Marvin has an entire module dedicated to it, including AI functions for categorization, standardization, entity extraction, and context-aware fills for missing values. See [the data cleaning documentation](/guide/ai_functions/data) for more information. ### Unit testing LLMs One of the difficulties of building an AI library is unit testing it! While it's possible to make LLM outputs deterministic by setting the temperature to zero, a small change to a prompt could result in very different outputs. Therefore, we want a way to assert that an LLM's output is "approximately equal" to an expected value. diff --git a/mkdocs.yml b/mkdocs.yml index e7b944c5a..fb479374f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -22,8 +22,10 @@ nav: - Loaders: guide/concepts/loaders_and_documents.md - Infrastructure: guide/concepts/infra.md - Plugins: guide/concepts/plugins.md + - AI Functions: + - Data: guide/ai_functions/data.md + - Strings: guide/ai_functions/strings.md - Use Cases: - - Data cleaning: guide/use_cases/data_cleaning.md - Enforcing LLM output formats: guide/use_cases/enforcing_format.md - Slackbot: guide/use_cases/slackbot.md - Development: diff --git a/src/marvin/ai_functions/__init__.py b/src/marvin/ai_functions/__init__.py index 698ae3264..f5e37001a 100644 --- a/src/marvin/ai_functions/__init__.py +++ b/src/marvin/ai_functions/__init__.py @@ -1 +1,3 @@ from .base import ai_fn + +from . import data, strings diff --git a/src/marvin/ai_functions/data.py b/src/marvin/ai_functions/data.py index ac41033fb..3313ea4ff 100644 --- a/src/marvin/ai_functions/data.py +++ b/src/marvin/ai_functions/data.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING -from marvin import ai_fn +from marvin.ai_functions import ai_fn if TYPE_CHECKING: from pandas import DataFrame @@ -58,10 +58,3 @@ def standardize(data: list[str], format: str) -> list[str]: Given a list of data, standardize the data to the given format. For example, the format could be "phone number", "sentence case", "ISO date", etc. """ - - -@ai_fn -def title_case(input: str) -> str: - """ - Given a string {input} change the case to make it APA style guide title case. - """ diff --git a/src/marvin/ai_functions/strings.py b/src/marvin/ai_functions/strings.py new file mode 100644 index 000000000..94827815f --- /dev/null +++ b/src/marvin/ai_functions/strings.py @@ -0,0 +1,8 @@ +from marvin.ai_functions import ai_fn + + +@ai_fn +def title_case(input: str) -> str: + """ + Given a string {input} change the case to make it APA style guide title case. + """ diff --git a/tests/llm_tests/ai_functions/test_data.py b/tests/llm_tests/ai_functions/test_data.py index 3f5022bbb..372897a62 100644 --- a/tests/llm_tests/ai_functions/test_data.py +++ b/tests/llm_tests/ai_functions/test_data.py @@ -135,12 +135,3 @@ def test_standardize_case(self): format="Proper case", ) assert result == ["Brown Cow", "Small Dog", "Big Cat", "Medium-Sized Bird"] - - -class TestTitleCase: - def test_short_prepositions_not_capitalized(self): - result = data_fns.title_case( - input="let me go to the store", - ) - - assert result == "Let Me Go to the store" diff --git a/tests/llm_tests/ai_functions/test_strings.py b/tests/llm_tests/ai_functions/test_strings.py new file mode 100644 index 000000000..2d6652b8a --- /dev/null +++ b/tests/llm_tests/ai_functions/test_strings.py @@ -0,0 +1,10 @@ +from marvin.ai_functions import strings as string_fns + + +class TestTitleCase: + def test_short_prepositions_not_capitalized(self): + result = string_fns.title_case( + input="let me go to the store", + ) + + assert result == "Let Me Go to the store"