diff --git a/renumics/spotlight/app_config.py b/renumics/spotlight/app_config.py
index 0e62bf06..15bcf217 100644
--- a/renumics/spotlight/app_config.py
+++ b/renumics/spotlight/app_config.py
@@ -4,9 +4,8 @@
 
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Any
 
-import pandas as pd
 
 from renumics.spotlight.layout.nodes import Layout
 from renumics.spotlight.analysis.typing import DataIssue
@@ -20,7 +19,7 @@ class AppConfig:
     """
 
     # dataset
-    dataset: Optional[Union[Path, pd.DataFrame]] = None
+    dataset: Any = None
     dtypes: Optional[DTypeMap] = None
     project_root: Optional[Path] = None
 
diff --git a/renumics/spotlight/data_source/data_source.py b/renumics/spotlight/data_source/data_source.py
index 50536a74..a4971312 100644
--- a/renumics/spotlight/data_source/data_source.py
+++ b/renumics/spotlight/data_source/data_source.py
@@ -89,10 +89,11 @@ def check_generation_id(self, generation_id: int) -> None:
         if self.get_generation_id() != generation_id:
             raise GenerationIDMismatch()
 
+    @property
     @abstractmethod
-    def guess_dtypes(self) -> DTypeMap:
+    def semantic_dtypes(self) -> DTypeMap:
         """
-        Guess data source's dtypes.
+        Semantic dtypes for viewer.
         """
 
     @abstractmethod
diff --git a/renumics/spotlight/data_store.py b/renumics/spotlight/data_store.py
index 26a8d3e0..7d70f9bc 100644
--- a/renumics/spotlight/data_store.py
+++ b/renumics/spotlight/data_store.py
@@ -1,7 +1,12 @@
 import hashlib
 import io
-from typing import List, Optional, Set, Union, cast
+import os
+import statistics
+from typing import Any, List, Optional, Set, Union, cast
 import numpy as np
+import filetype
+import trimesh
+import PIL.Image
 
 from renumics.spotlight.cache import external_data_cache
 from renumics.spotlight.data_source import DataSource
@@ -10,13 +15,33 @@
 from renumics.spotlight.io import audio
 from renumics.spotlight.dtypes import (
     CategoryDType,
+    DType,
     DTypeMap,
     is_audio_dtype,
     is_category_dtype,
+    is_file_dtype,
     is_str_dtype,
+    is_mixed_dtype,
+    is_bytes_dtype,
     str_dtype,
+    audio_dtype,
+    image_dtype,
+    video_dtype,
+    mesh_dtype,
+    embedding_dtype,
+    array_dtype,
+    window_dtype,
+    sequence_1d_dtype,
 )
 
+from renumics.spotlight.typing import is_iterable, is_pathtype
+from renumics.spotlight.media.mesh import Mesh
+from renumics.spotlight.media.video import Video
+from renumics.spotlight.media.audio import Audio
+from renumics.spotlight.media.image import Image
+from renumics.spotlight.media.sequence_1d import Sequence1D
+from renumics.spotlight.media.embedding import Embedding
+
 
 class DataStore:
     _data_source: DataSource
@@ -102,7 +127,14 @@ def get_waveform(self, column_name: str, index: int) -> Optional[np.ndarray]:
         return waveform
 
     def _update_dtypes(self) -> None:
-        guessed_dtypes = self._data_source.guess_dtypes()
+        guessed_dtypes = self._data_source.semantic_dtypes.copy()
+
+        # guess missing dtypes from intermediate dtypes
+        for col, dtype in self._data_source.intermediate_dtypes.items():
+            if col not in guessed_dtypes:
+                guessed_dtypes[col] = self._guess_dtype(col)
+
+        # merge guessed semantic dtypes with user dtypes
         dtypes = {
             **guessed_dtypes,
             **{
@@ -111,6 +143,8 @@ def _update_dtypes(self) -> None:
                 if column_name in guessed_dtypes
             },
         }
+
+        # determine categories for _automatic_ CategoryDtypes
         for column_name, dtype in dtypes.items():
             if (
                 is_category_dtype(dtype)
@@ -124,4 +158,91 @@ def _update_dtypes(self) -> None:
                 ]
                 category_names = sorted(cast(Set[str], set(converted_values)))
                 dtypes[column_name] = CategoryDType(category_names)
+
         self._dtypes = dtypes
+
+    def _guess_dtype(self, col: str) -> DType:
+        intermediate_dtype = self._data_source.intermediate_dtypes[col]
+        fallback_dtype = _intermediate_to_semantic_dtype(intermediate_dtype)
+
+        sample_values = self._data_source.get_column_values(col, slice(10))
+        sample_dtypes = [_guess_value_dtype(value) for value in sample_values]
+
+        try:
+            mode_dtype = statistics.mode(sample_dtypes)
+        except statistics.StatisticsError:
+            return fallback_dtype
+
+        return mode_dtype or fallback_dtype
+
+
+def _intermediate_to_semantic_dtype(intermediate_dtype: DType) -> DType:
+    if is_file_dtype(intermediate_dtype):
+        return str_dtype
+    if is_mixed_dtype(intermediate_dtype):
+        return str_dtype
+    if is_bytes_dtype(intermediate_dtype):
+        return str_dtype
+    else:
+        return intermediate_dtype
+
+
+def _guess_value_dtype(value: Any) -> Optional[DType]:
+    """
+    Infer dtype for value
+    """
+    if isinstance(value, Embedding):
+        return embedding_dtype
+    if isinstance(value, Sequence1D):
+        return sequence_1d_dtype
+    if isinstance(value, Image):
+        return image_dtype
+    if isinstance(value, Audio):
+        return audio_dtype
+    if isinstance(value, Video):
+        return video_dtype
+    if isinstance(value, Mesh):
+        return mesh_dtype
+    if isinstance(value, PIL.Image.Image):
+        return image_dtype
+    if isinstance(value, trimesh.Trimesh):
+        return mesh_dtype
+    if isinstance(value, np.ndarray):
+        return _infer_array_dtype(value)
+
+    if isinstance(value, bytes) or (is_pathtype(value) and os.path.isfile(value)):
+        kind = filetype.guess(value)
+        if kind is not None:
+            mime_group = kind.mime.split("/")[0]
+            if mime_group == "image":
+                return image_dtype
+            if mime_group == "audio":
+                return audio_dtype
+            if mime_group == "video":
+                return video_dtype
+        return str_dtype
+    if is_iterable(value):
+        try:
+            value = np.asarray(value, dtype=float)
+        except (TypeError, ValueError):
+            pass
+        else:
+            return _infer_array_dtype(value)
+    return None
+
+
+def _infer_array_dtype(value: np.ndarray) -> DType:
+    """
+    Infer dtype of a numpy array
+    """
+    if value.ndim == 3:
+        if value.shape[-1] in (1, 3, 4):
+            return image_dtype
+    elif value.ndim == 2:
+        if value.shape[0] == 2 or value.shape[1] == 2:
+            return sequence_1d_dtype
+    elif value.ndim == 1:
+        if len(value) == 2:
+            return window_dtype
+        return embedding_dtype
+    return array_dtype
diff --git a/renumics/spotlight/dataset/__init__.py b/renumics/spotlight/dataset/__init__.py
index 612bba74..99828ddf 100644
--- a/renumics/spotlight/dataset/__init__.py
+++ b/renumics/spotlight/dataset/__init__.py
@@ -245,7 +245,7 @@ def _user_column_attributes(dtype: spotlight_dtypes.DType) -> Dict[str, Type]:
         if spotlight_dtypes.is_sequence_1d_dtype(dtype):
             attribute_names["x_label"] = str
             attribute_names["y_label"] = str
-        if spotlight_dtypes.is_file_dtype(dtype):
+        if spotlight_dtypes.is_filebased_dtype(dtype):
             attribute_names["lookup"] = dict
             attribute_names["external"] = bool
         if spotlight_dtypes.is_audio_dtype(dtype):
@@ -758,7 +758,7 @@ def from_pandas(
 
                 column = prepare_column(column, dtype)
 
-                if workdir is not None and spotlight_dtypes.is_file_dtype(dtype):
+                if workdir is not None and spotlight_dtypes.is_filebased_dtype(dtype):
                     # For file-based data types, relative paths should be resolved.
                     str_mask = is_string_mask(column)
                     column[str_mask] = column[str_mask].apply(
@@ -777,7 +777,7 @@ def from_pandas(
                 else:
                     values = column.to_numpy()
 
-                if spotlight_dtypes.is_file_dtype(dtype):
+                if spotlight_dtypes.is_filebased_dtype(dtype):
                     attrs["external"] = False  # type: ignore
                     attrs["lookup"] = False  # type: ignore
 
@@ -2435,7 +2435,7 @@ def _append_column(
         elif spotlight_dtypes.is_sequence_1d_dtype(dtype):
             attrs["x_label"] = dtype.x_label
             attrs["y_label"] = dtype.y_label
-        elif spotlight_dtypes.is_file_dtype(dtype):
+        elif spotlight_dtypes.is_filebased_dtype(dtype):
             lookup = attrs.get("lookup", None)
             if is_iterable(lookup) and not isinstance(lookup, dict):
                 # Assume that we can keep all the lookup values in memory.
@@ -3002,7 +3002,7 @@ def _encode_value(
         if self._is_ref_column(column):
             value = cast(RefColumnInputType, value)
             self._assert_valid_value_type(value, dtype, column_name)
-            if spotlight_dtypes.is_file_dtype(dtype) and isinstance(value, str):
+            if spotlight_dtypes.is_filebased_dtype(dtype) and isinstance(value, str):
                 try:
                     return self._find_lookup_ref(value, column)
                 except KeyError:
diff --git a/renumics/spotlight/dtypes/__init__.py b/renumics/spotlight/dtypes/__init__.py
index d7acf3ac..7348097c 100644
--- a/renumics/spotlight/dtypes/__init__.py
+++ b/renumics/spotlight/dtypes/__init__.py
@@ -97,12 +97,14 @@ def __init__(self, x_label: str = "x", y_label: str = "y"):
 ALIASES: Dict[Any, DType] = {}
 
 
-def register_dtype(dtype: DType, aliases: list) -> None:
-    for alias in aliases:
-        assert dtype.name.lower() not in ALIASES
-        ALIASES[dtype.name.lower()] = dtype
-        assert alias not in ALIASES
-        ALIASES[alias] = dtype
+def register_dtype(dtype: DType, aliases: Optional[list] = None) -> None:
+    assert dtype.name.lower() not in ALIASES
+    ALIASES[dtype.name.lower()] = dtype
+
+    if aliases is not None:
+        for alias in aliases:
+            assert alias not in ALIASES
+            ALIASES[alias] = dtype
 
 
 bool_dtype = DType("bool")
@@ -150,9 +152,13 @@ def register_dtype(dtype: DType, aliases: list) -> None:
 video_dtype = DType("Video")
 """Video dtype"""
 register_dtype(video_dtype, [Video])
+
 mixed_dtype = DType("mixed")
 """Unknown or mixed dtype"""
 
+file_dtype = DType("file")
+"""File Dtype (bytes or str(path))"""
+
 
 DTypeMap = Dict[str, DType]
 
@@ -221,9 +227,21 @@ def is_video_dtype(dtype: DType) -> bool:
     return dtype.name == "Video"
 
 
+def is_bytes_dtype(dtype: DType) -> bool:
+    return dtype.name == "bytes"
+
+
+def is_mixed_dtype(dtype: DType) -> bool:
+    return dtype.name == "mixed"
+
+
 def is_scalar_dtype(dtype: DType) -> bool:
     return dtype.name in ("bool", "int", "float")
 
 
 def is_file_dtype(dtype: DType) -> bool:
-    return dtype.name in ("Audio", "Image", "Video", "Mesh")
+    return dtype.name == "file"
+
+
+def is_filebased_dtype(dtype: DType) -> bool:
+    return dtype.name in ("Audio", "Image", "Video", "Mesh", "file")
diff --git a/renumics/spotlight/io/pandas.py b/renumics/spotlight/io/pandas.py
index 3cba1736..4cf84f9e 100644
--- a/renumics/spotlight/io/pandas.py
+++ b/renumics/spotlight/io/pandas.py
@@ -320,7 +320,7 @@ def prepare_column(column: pd.Series, dtype: dtypes.DType) -> pd.Series:
         str_mask = is_string_mask(column)
         column[str_mask] = column[str_mask].apply(try_literal_eval)
 
-        if dtypes.is_file_dtype(dtype):
+        if dtypes.is_filebased_dtype(dtype):
             dict_mask = column.map(type) == dict
             column[dict_mask] = column[dict_mask].apply(prepare_hugging_face_dict)
 
diff --git a/renumics/spotlight/viewer.py b/renumics/spotlight/viewer.py
index f50d7c52..b7630b17 100644
--- a/renumics/spotlight/viewer.py
+++ b/renumics/spotlight/viewer.py
@@ -139,10 +139,8 @@ def show(
                 project_root = dataset
             else:
                 project_root = dataset.parent
-        elif isinstance(dataset, pd.DataFrame) or dataset is None:
-            project_root = None
         else:
-            raise TypeError("Dataset has invalid type")
+            project_root = None
 
         if folder:
             project_root = Path(folder)
diff --git a/renumics/spotlight_plugins/core/__init__.py b/renumics/spotlight_plugins/core/__init__.py
index 10953077..f741a4dd 100644
--- a/renumics/spotlight_plugins/core/__init__.py
+++ b/renumics/spotlight_plugins/core/__init__.py
@@ -21,7 +21,11 @@ def __register__() -> None:
     """
     register data sources
     """
-    from . import pandas_data_source, hdf5_data_source  # noqa: F401
+    from . import (
+        pandas_data_source,  # noqa: F401
+        hdf5_data_source,  # noqa: F401
+        huggingface_datasource,  # noqa: F401
+    )
 
 
 def __activate__(app: SpotlightApp) -> None:
diff --git a/renumics/spotlight_plugins/core/hdf5_data_source.py b/renumics/spotlight_plugins/core/hdf5_data_source.py
index 677432e7..2acc2678 100644
--- a/renumics/spotlight_plugins/core/hdf5_data_source.py
+++ b/renumics/spotlight_plugins/core/hdf5_data_source.py
@@ -107,12 +107,13 @@ def column_names(self) -> List[str]:
 
     @property
     def intermediate_dtypes(self) -> DTypeMap:
-        return self.guess_dtypes()
+        return self.semantic_dtypes
 
     def __len__(self) -> int:
         return len(self._table)
 
-    def guess_dtypes(self) -> DTypeMap:
+    @property
+    def semantic_dtypes(self) -> DTypeMap:
         return {
             column_name: create_dtype(self._table.get_dtype(column_name))
             for column_name in self.column_names
diff --git a/renumics/spotlight_plugins/core/huggingface_datasource.py b/renumics/spotlight_plugins/core/huggingface_datasource.py
new file mode 100644
index 00000000..49a50f63
--- /dev/null
+++ b/renumics/spotlight_plugins/core/huggingface_datasource.py
@@ -0,0 +1,209 @@
+from typing import List, Optional, Union, cast
+
+import datasets
+import numpy as np
+from renumics.spotlight import dtypes
+
+from renumics.spotlight.data_source import DataSource
+from renumics.spotlight.data_source.decorator import datasource
+from renumics.spotlight.dtypes import (
+    DType,
+    DTypeMap,
+    is_array_dtype,
+    is_embedding_dtype,
+    is_file_dtype,
+    is_float_dtype,
+    is_int_dtype,
+)
+from renumics.spotlight.data_source.data_source import ColumnMetadata
+
+
+_FeatureType = Union[
+    datasets.Value,
+    datasets.ClassLabel,
+    datasets.Sequence,
+    datasets.Array2D,
+    datasets.Array3D,
+    datasets.Array4D,
+    datasets.Array5D,
+    datasets.Audio,
+    datasets.Image,
+    datasets.Translation,
+    dict,
+    list,
+]
+
+
+class UnsupportedFeature(Exception):
+    """
+    We encountered an unsupported datasets Feature
+    """
+
+    def __init__(self, feature: _FeatureType) -> None:
+        super().__init__(f"Unsupported HuggingFace Feature: {type(feature)}")
+
+
+@datasource(datasets.Dataset)
+class HuggingfaceDataSource(DataSource):
+    _dataset: datasets.Dataset
+    _intermediate_dtypes: DTypeMap
+    _guessed_dtypes: DTypeMap
+
+    def __init__(self, source: datasets.Dataset):
+        super().__init__(source)
+        self._dataset = source
+        self._intermediate_dtypes = {
+            col: _get_intermediate_dtype(feat)
+            for col, feat in self._dataset.features.items()
+        }
+        self._guessed_dtypes = {}
+        for col, feat in self._dataset.features.items():
+            guessed_dtype = _guess_semantic_dtype(feat)
+            if guessed_dtype:
+                self._guessed_dtypes[col] = guessed_dtype
+
+    @property
+    def column_names(self) -> List[str]:
+        return self._dataset.column_names
+
+    @property
+    def intermediate_dtypes(self) -> DTypeMap:
+        return self._intermediate_dtypes
+
+    def __len__(self) -> int:
+        return len(self._dataset)
+
+    @property
+    def semantic_dtypes(self) -> DTypeMap:
+        return self._guessed_dtypes
+
+    def get_generation_id(self) -> int:
+        return 0
+
+    def get_uid(self) -> str:
+        return self._dataset._fingerprint
+
+    def get_name(self) -> str:
+        return self._dataset.builder_name
+
+    def get_column_values(
+        self,
+        column_name: str,
+        indices: Union[List[int], np.ndarray, slice] = slice(None),
+    ) -> np.ndarray:
+        intermediate_dtype = self._intermediate_dtypes[column_name]
+
+        if isinstance(indices, slice):
+            if indices == slice(None):
+                raw_values = self._dataset.data[column_name]
+            else:
+                actual_indices = list(range(len(self._dataset)))[indices]
+                raw_values = self._dataset.data[column_name].take(actual_indices)
+        else:
+            raw_values = self._dataset.data[column_name].take(indices)
+
+        feature = self._dataset.features[column_name]
+
+        if isinstance(feature, datasets.Audio) or isinstance(feature, datasets.Image):
+            return np.array(
+                [
+                    value["path"].as_py()
+                    if value["bytes"].as_py() is None
+                    else value["bytes"].as_py()
+                    for value in raw_values
+                ],
+                dtype=object,
+            )
+
+        if isinstance(feature, dict):
+            if is_file_dtype(intermediate_dtype):
+                return np.array(
+                    [value["bytes"].as_py() for value in raw_values], dtype=object
+                )
+            else:
+                return np.array([str(value) for value in raw_values])
+
+        if isinstance(feature, datasets.Sequence):
+            if is_array_dtype(intermediate_dtype):
+                return raw_values.to_numpy()
+            if is_embedding_dtype(intermediate_dtype):
+                return raw_values.to_numpy()
+            return np.array([str(value) for value in raw_values])
+
+        if isinstance(feature, datasets.Translation):
+            return np.array([str(value) for value in raw_values])
+
+        return raw_values.to_numpy()
+
+    def get_column_metadata(self, _: str) -> ColumnMetadata:
+        return ColumnMetadata(nullable=True, editable=False)
+
+
+def _guess_semantic_dtype(feature: _FeatureType) -> Optional[DType]:
+    if isinstance(feature, datasets.Audio):
+        return dtypes.audio_dtype
+    if isinstance(feature, datasets.Image):
+        return dtypes.image_dtype
+    if isinstance(feature, datasets.Sequence):
+        if isinstance(feature.feature, datasets.Value):
+            if feature.length != -1:
+                return dtypes.embedding_dtype
+    return None
+
+
+def _get_intermediate_dtype(feature: _FeatureType) -> DType:
+    if isinstance(feature, datasets.Value):
+        hf_dtype = cast(datasets.Value, feature).dtype
+        if hf_dtype == "bool":
+            return dtypes.bool_dtype
+        elif hf_dtype.startswith("int"):
+            return dtypes.int_dtype
+        elif hf_dtype.startswith("uint"):
+            return dtypes.int_dtype
+        elif hf_dtype.startswith("float"):
+            return dtypes.float_dtype
+        elif hf_dtype.startswith("time32"):
+            return dtypes.datetime_dtype
+        elif hf_dtype.startswith("time64"):
+            return dtypes.datetime_dtype
+        elif hf_dtype.startswith("timestamp"):
+            return dtypes.datetime_dtype
+        elif hf_dtype.startswith("date32"):
+            return dtypes.datetime_dtype
+        elif hf_dtype.startswith("date64"):
+            return dtypes.datetime_dtype
+        elif hf_dtype.startswith("duration"):
+            return dtypes.float_dtype
+        elif hf_dtype.startswith("decimal"):
+            return dtypes.float_dtype
+        elif hf_dtype == "binary":
+            return dtypes.bytes_dtype
+        elif hf_dtype == "large_binary":
+            return dtypes.bytes_dtype
+        elif hf_dtype == "string":
+            return dtypes.str_dtype
+        elif hf_dtype == "large_string":
+            return dtypes.str_dtype
+        else:
+            raise UnsupportedFeature(feature)
+    elif isinstance(feature, datasets.ClassLabel):
+        return dtypes.CategoryDType(categories=cast(datasets.ClassLabel, feature).names)
+    elif isinstance(feature, datasets.Audio):
+        return dtypes.file_dtype
+    elif isinstance(feature, datasets.Image):
+        return dtypes.file_dtype
+    elif isinstance(feature, datasets.Sequence):
+        inner_dtype = _get_intermediate_dtype(feature.feature)
+        if is_int_dtype(inner_dtype) or is_float_dtype(inner_dtype):
+            return dtypes.array_dtype
+        else:
+            return dtypes.str_dtype
+    elif isinstance(feature, dict):
+        if len(feature) == 2 and "bytes" in feature and "path" in feature:
+            return dtypes.file_dtype
+        else:
+            return dtypes.str_dtype
+    elif isinstance(feature, datasets.Translation):
+        return dtypes.str_dtype
+    else:
+        raise UnsupportedFeature(feature)
diff --git a/renumics/spotlight_plugins/core/pandas_data_source.py b/renumics/spotlight_plugins/core/pandas_data_source.py
index 4d2bf172..cde0e908 100644
--- a/renumics/spotlight_plugins/core/pandas_data_source.py
+++ b/renumics/spotlight_plugins/core/pandas_data_source.py
@@ -124,7 +124,8 @@ def intermediate_dtypes(self) -> DTypeMap:
     def __len__(self) -> int:
         return len(self._df)
 
-    def guess_dtypes(self) -> DTypeMap:
+    @property
+    def semantic_dtypes(self) -> DTypeMap:
         return {
             str(column_name): infer_dtype(self.df[column_name])
             for column_name in self.df
diff --git a/src/datatypes.ts b/src/datatypes.ts
index 6ac98b87..29ed7bca 100644
--- a/src/datatypes.ts
+++ b/src/datatypes.ts
@@ -35,7 +35,7 @@ export type IntegerDataType = BaseDataType<'int'>;
 export type FloatDataType = BaseDataType<'float'>;
 export type BooleanDataType = BaseDataType<'bool'>;
 export type DateTimeDataType = BaseDataType<'datetime'>;
-export type ArrayDataType = BaseDataType<'array'>;
+export type ArrayDataType = BaseDataType<'array', true>;
 export type WindowDataType = BaseDataType<'Window'>;
 export type StringDataType = BaseDataType<'str', true>;
 export type EmbeddingDataType = BaseDataType<'Embedding', true>;
diff --git a/src/stores/dataset/columnFactory.ts b/src/stores/dataset/columnFactory.ts
index 3ec20678..26018d58 100644
--- a/src/stores/dataset/columnFactory.ts
+++ b/src/stores/dataset/columnFactory.ts
@@ -11,7 +11,6 @@ function makeDatatype(column: Column): DataType {
         case 'float':
         case 'bool':
         case 'Window':
-        case 'array':
         case 'datetime':
             return {
                 kind,
@@ -20,6 +19,7 @@ function makeDatatype(column: Column): DataType {
                 optional: column.optional,
             };
         case 'str':
+        case 'array':
         case 'Embedding':
             return {
                 kind,