diff --git a/graphrag/index/emit/table_emitter.py b/graphrag/index/emit/table_emitter.py index 2e63c50fa..82e37d313 100644 --- a/graphrag/index/emit/table_emitter.py +++ b/graphrag/index/emit/table_emitter.py @@ -10,6 +10,7 @@ class TableEmitter(Protocol): """TableEmitter protocol for emitting tables to a destination.""" + extension: str async def emit(self, name: str, data: pd.DataFrame) -> None: diff --git a/graphrag/utils/storage.py b/graphrag/utils/storage.py index 5863c4644..479d0c0cf 100644 --- a/graphrag/utils/storage.py +++ b/graphrag/utils/storage.py @@ -51,11 +51,15 @@ async def _load_table_from_storage(name: str, storage: PipelineStorage) -> pd.Da case "parquet": return pd.read_parquet(BytesIO(await storage.get(name, as_bytes=True))) case "json": - return pd.read_json(BytesIO(await storage.get(name, as_bytes=True)), lines=True, orient="records") + return pd.read_json( + BytesIO(await storage.get(name, as_bytes=True)), + lines=True, + orient="records", + ) case "csv": return pd.read_csv(BytesIO(await storage.get(name, as_bytes=True))) - case _: - raise ValueError(f"Unknown file extension for {name}") + case _: + raise ValueError(f"Unknown file extension for {name}") except Exception: log.exception("error loading table from storage: %s", name) raise