From 80c2d77d18a6b8083afe7934e7b12ec6a020b121 Mon Sep 17 00:00:00 2001 From: Stefan Suwelack Date: Thu, 28 Sep 2023 11:29:38 +0200 Subject: [PATCH] updated use cases --- README.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 1210c15f..a6650607 100644 --- a/README.md +++ b/README.md @@ -32,56 +32,55 @@ spotlight.show(df, dtype={"image": spotlight.Image, "embedding": spotlight.Embed ## 🚀 Start with a use case -Machine learning and engineering teams use Spotlight to understand and communicate on complex unstructured data problems. - -Here are some interactive examples on publicly available datasets: +Machine learning and engineering teams use Spotlight to understand and communicate on complex unstructured data problems. Here are some examples on publicly available datasets along with code snippets (👨‍đŸ’ģ), interactive demos (🕹ī¸) and blog articles (📝): + - + + - - + + - - - - + - + - - + + - + + + @@ -116,7 +115,7 @@ import pandas as pd from renumics import spotlight df = pd.read_csv("https://renumics.com/data/mnist/mnist-tiny.csv") -spotlight.show(df, dtype={"image": spotlight.Image, "embedding": spotlight.Embedding}) +spotlight.show(df, dtype={"image": spotlight.Image}) ``` `pd.read_csv` loads a sample csv file as a pandas [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html). @@ -129,11 +128,12 @@ spotlight.show(df, dtype={"image": spotlight.Image, "embedding": spotlight.Embed import datasets from renumics import spotlight -dataset = datasets.load_dataset("renumics/dcase23-task2-enriched", "dev", split="all", streaming=False) -df = dataset.to_pandas() -simple_layout = datasets.load_dataset_builder("renumics/dcase23-task2-enriched", "dev").config.get_layout(config="simple") -spotlight.show(df, dtype={'path': spotlight.Audio, "embeddings_ast-finetuned-audioset-10-10-0.4593": spotlight.Embedding}, layout=simple_layout) +ds = datasets.load_dataset('renumics/emodb-enriched', split='all') +layout= spotlight.layouts.debug_classification(label='gender', prediction='m1_gender_prediction', embedding='m1_embedding', features=['age', 'emotion']) +df = ds.to_pandas() +spotlight.show(df, layout=layout) ``` +Here, the data types are discovered automatically from the dataset and we use a pre-defined layout for model debugging. Custom layouts can be built programmatically or via the UI. > The `datasets[audio]` package can be installed via pip.
ModalityTask Description Link
đŸ–ŧī¸ ImageđŸ–ŧī¸ Image[Classification] Find Issues in Any Image Classification Dataset👨‍đŸ’ģ 📝
👨‍đŸ’ģ 📝 🕹ī¸
Find data issues in the CIFAR-100 image dataset 🕹ī¸
Explore data slices in the CIFAR-100 image dataset🕹ī¸
Fine-tuning image classification models from Bing image search 👨‍đŸ’ģ📝
🔊 Audio[Classification] Find Issues in Any Audio Classification Dataset👨‍đŸ’ģ 📝👨‍đŸ’ģ 📝🕹ī¸
Find data issues in the Common Voice audio dataset🕹ī¸Debug pre-trained gender detection models on the emodb dataset📝 🕹ī¸
Compare gender detection models on the emodb dataset🕹ī¸đŸ“ 🕹ī¸
📝 Text[Classification] Find Issues in Any Text Classification Dataset 👨‍đŸ’ģ 📝
📈đŸ–ŧī¸ Mixed[EDA] Explore results from the Formula1 Montreal 2023 GP 🕹ī¸