Getting ZeroDivisionError with Spark #1502
Unanswered
sujithpallath
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
ydata-profiling with spark is giving ZeroDivisionError: division by zero, even though the dataframe has records.
Here is the example https://github.com/ydataai/ydata-profiling/tree/develop/examples/integrations/databricks
ZeroDivisionError Traceback (most recent call last)
in <cell line: 2>()
1 #Export the report as html and display
----> 2 report_html = report.to_html()
3 displayHTML(report_html)
python3.9/site-packages/ydata_profiling/profile_report.py in to_html(self)
468
469 """
--> 470 return self.html
471
472 def to_json(self) -> str:
python3.9/site-packages/ydata_profiling/profile_report.py in html(self)
275 def html(self) -> str:
276 if self._html is None:
--> 277 self._html = self._render_html()
278 return self._html
279
python3.9/site-packages/ydata_profiling/profile_report.py in _render_html(self)
383 from ydata_profiling.report.presentation.flavours import HTMLReport
384
--> 385 report = self.report
386
387 with tqdm(
python3.9/site-packages/ydata_profiling/profile_report.py in report(self)
269 def report(self) -> Root:
270 if self._report is None:
--> 271 self._report = get_report_structure(self.config, self.description_set)
272 return self._report
273
python3.9/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 def description_set(self) -> BaseDescription:
252 if self._description_set is None:
--> 253 self._description_set = describe_df(
254 self.config,
255 self.df,
python3.9/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
72 # Variable-specific
73 pbar.total += len(df.columns)
---> 74 series_description = get_series_descriptions(
75 config, df, summarizer, typeset, pbar
76 )
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_get_series_descriptions(config, df, summarizer, typeset, pbar)
90 args = [(name, df) for name in df.columns]
91 with multiprocessing.pool.ThreadPool(12) as executor:
---> 92 for i, (column, description) in enumerate(
93 executor.imap_unordered(multiprocess_1d, args)
94 ):
python3.9/multiprocessing/pool.py in next(self, timeout)
868 if success:
869 return value
--> 870 raise value
871
872 next = next # XXX
python3.9/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in multiprocess_1d(args)
86 """
87 column, df = args
---> 88 return column, describe_1d(config, df.select(column), summarizer, typeset)
89
90 args = [(name, df) for name in df.columns]
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/summary_spark.py in spark_describe_1d(config, series, summarizer, typeset)
62 }[dtype]
63
---> 64 return summarizer.summarize(config, series, dtype=vtype)
65
66
python3.9/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
40 object:
41 """
---> 42 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
43 return summary
44
python3.9/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
python3.9/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)
python3.9/site-packages/multimethod/init.py in call(self, *args, **kwargs)
326 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
327 try:
--> 328 return func(*args, **kwargs)
329 except TypeError as ex:
330 raise DispatchError(f"Function {func.code}") from ex
python3.9/site-packages/ydata_profiling/model/spark/describe_supported_spark.py in describe_supported_spark(config, series, summary)
29 summary["is_unique"] = n_unique == count
30 summary["n_unique"] = n_unique
---> 31 summary["p_unique"] = n_unique / count
32
33 return config, series, summary
ZeroDivisionError: division by zero
Beta Was this translation helpful? Give feedback.
All reactions