You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have checked #657 to validate if my issue is covered by community support
Describe the issue
I started graphrag on two Chinese datasets. The small data set could run through the process, while the large data set showed the following bugs in the log file, resulting in the report create fail:
ERROR error generating community report
Traceback (most recent call last):
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/index/graph/extractors/community_reports/community_reports_extractor.py", line 60, in call
await self._llm(
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/json_parsing_llm.py", line 34, in call
result = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_token_replacing_llm.py", line 37, in call
return await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_history_tracking_llm.py", line 33, in call
output = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/caching_llm.py", line 96, in call
result = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 177, in call
result, start = await execute_with_retry()
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 159, in execute_with_retry
async for attempt in retryer:
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 166, in anext
do = await self.iter(retry_state=self._retry_state)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 153, in iter
result = await action(retry_state)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/_utils.py", line 99, in inner
return call(*args, **kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/init.py", line 398, in
self._add_action_func(lambda rs: rs.outcome.result())
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 165, in execute_with_retry
return await do_attempt(), start
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 147, in do_attempt
return await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/base_llm.py", line 48, in call
return await self._invoke_json(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_chat_llm.py", line 88, in _invoke_json
raise RuntimeError(FAILED_TO_CREATE_JSON_ERROR)
RuntimeError: Failed to generate valid JSON output
03:21:23,818 graphrag.index.emit.parquet_table_emitter ERROR Error while emitting parquet table
Traceback (most recent call last):
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/index/emit/parquet_table_emitter.py", line 40, in emit
await self._storage.set(filename, data.to_parquet())
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/util/_decorators.py", line 333, in wrapper
return func(*args, **kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/core/frame.py", line 3113, in to_parquet
return to_parquet(
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/io/parquet.py", line 480, in to_parquet
impl.write(
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/io/parquet.py", line 190, in write
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
File "pyarrow/table.pxi", line 3874, in pyarrow.lib.Table.from_pandas
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 624, in dataframe_to_arrays
arrays[i] = maybe_fut.result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self.exception
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 598, in convert_column
raise e
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 592, in convert_column
result = pa.array(col, type=type, from_pandas=True, safe=safe)
File "pyarrow/array.pxi", line 339, in pyarrow.lib.array
File "pyarrow/array.pxi", line 85, in pyarrow.lib._ndarray_to_array
File "pyarrow/error.pxi", line 91, in pyarrow.lib.check_status
pyarrow.lib.ArrowInvalid: ('cannot mix list and non-list, non-null values', 'Conversion failed for column findings with type object')
03:21:23,826 graphrag.index.reporting.file_workflow_callbacks INFO Error emitting table details=None
Steps to reproduce
No response
GraphRAG Config Used
# Paste your config here
Logs and screenshots
No response
Additional Information
GraphRAG Version:
Operating System:
Python Version:
Related Issues:
The text was updated successfully, but these errors were encountered:
xgl0626
added
the
triage
Default label assignment, indicates new issue needs reviewed by a maintainer
label
Aug 9, 2024
We have resolved several issues related to text encoding and JSON parsing that are rolled up into version 0.2.2. Please try again with that version and re-open if this is still an issue.
We have resolved several issues related to text encoding and JSON parsing that are rolled up into version 0.2.2. Please try again with that version and re-open if this is still an issue.
I still get this error when I re-execute it in version 0.3.0, and I have a similar issue in the issue community. I used two documents to build the database
We have resolved several issues related to text encoding and JSON parsing that are rolled up into version 0.2.2. Please try again with that version and re-open if this is still an issue.
The create_final_community_reports.parquet file disappears. #940
Is there an existing issue for this?
Describe the issue
I started graphrag on two Chinese datasets. The small data set could run through the process, while the large data set showed the following bugs in the log file, resulting in the report create fail:
ERROR error generating community report
Traceback (most recent call last):
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/index/graph/extractors/community_reports/community_reports_extractor.py", line 60, in call
await self._llm(
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/json_parsing_llm.py", line 34, in call
result = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_token_replacing_llm.py", line 37, in call
return await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_history_tracking_llm.py", line 33, in call
output = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/caching_llm.py", line 96, in call
result = await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 177, in call
result, start = await execute_with_retry()
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 159, in execute_with_retry
async for attempt in retryer:
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 166, in anext
do = await self.iter(retry_state=self._retry_state)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/asyncio/init.py", line 153, in iter
result = await action(retry_state)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/_utils.py", line 99, in inner
return call(*args, **kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/tenacity/init.py", line 398, in
self._add_action_func(lambda rs: rs.outcome.result())
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 165, in execute_with_retry
return await do_attempt(), start
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/rate_limiting_llm.py", line 147, in do_attempt
return await self._delegate(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/base/base_llm.py", line 48, in call
return await self._invoke_json(input, **kwargs)
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/llm/openai/openai_chat_llm.py", line 88, in _invoke_json
raise RuntimeError(FAILED_TO_CREATE_JSON_ERROR)
RuntimeError: Failed to generate valid JSON output
03:21:23,818 graphrag.index.emit.parquet_table_emitter ERROR Error while emitting parquet table
Traceback (most recent call last):
File "/home/notebook/code/group/rag_reearch/graphrag-main/graphrag/index/emit/parquet_table_emitter.py", line 40, in emit
await self._storage.set(filename, data.to_parquet())
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/util/_decorators.py", line 333, in wrapper
return func(*args, **kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/core/frame.py", line 3113, in to_parquet
return to_parquet(
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/io/parquet.py", line 480, in to_parquet
impl.write(
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pandas/io/parquet.py", line 190, in write
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
File "pyarrow/table.pxi", line 3874, in pyarrow.lib.Table.from_pandas
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 624, in dataframe_to_arrays
arrays[i] = maybe_fut.result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self.exception
File "/opt/conda/envs/graphrag/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 598, in convert_column
raise e
File "/opt/conda/envs/graphrag/lib/python3.10/site-packages/pyarrow/pandas_compat.py", line 592, in convert_column
result = pa.array(col, type=type, from_pandas=True, safe=safe)
File "pyarrow/array.pxi", line 339, in pyarrow.lib.array
File "pyarrow/array.pxi", line 85, in pyarrow.lib._ndarray_to_array
File "pyarrow/error.pxi", line 91, in pyarrow.lib.check_status
pyarrow.lib.ArrowInvalid: ('cannot mix list and non-list, non-null values', 'Conversion failed for column findings with type object')
03:21:23,826 graphrag.index.reporting.file_workflow_callbacks INFO Error emitting table details=None
Steps to reproduce
No response
GraphRAG Config Used
# Paste your config here
Logs and screenshots
No response
Additional Information
The text was updated successfully, but these errors were encountered: