Open
Description
Issue
❌ sns.histplot(df, x=col, hue=y)
throws an error when df[col]
has mixed data types.
✔️ sns.histplot(df.astype(str), x=col, hue=y)
works fine.
Background
pandas
converts all columns to an 'object'-dtyped column if they have mixed data types, such asint
andstr
.- However, if such a column exists, that column cannot be drawn in a histogram due to the co-existence of seemingly
int
andstr
. - For example, my data has a column like
[0, 1, "a", "b" ... "e"]
. - Its column has
object
dtype, but values like0
and1
stay asint
and it seems like the conflict between the dtypes is causing the error
Traceback
ValueError Traceback (most recent call last)
File lib.pyx:2391, in pandas._libs.lib.maybe_convert_numeric()
ValueError: Unable to parse string "SGAFC590DP"
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[717], [line 5](vscode-notebook-cell:?execution_count=717&line=5)
[3](vscode-notebook-cell:?execution_count=717&line=3) for col in df_spatter.columns:
[4](vscode-notebook-cell:?execution_count=717&line=4) plt.figure(figsize=(10, 10))
----> [5](vscode-notebook-cell:?execution_count=717&line=5) sns.histplot(df_spatter, x=col, hue=y_spatter)
[6](vscode-notebook-cell:?execution_count=717&line=6) plt.xticks(rotation=90)
[7](vscode-notebook-cell:?execution_count=717&line=7) plt.show()
File c:\Users\msi\Desktop\workspace\041_HM_platform\01_src\01_work\proj_hm_process_result\.venv_hm_result\lib\site-packages\seaborn\distributions.py:1416, in histplot(data, x, y, hue, weights, stat, bins, binwidth, binrange, discrete, cumulative, common_bins, common_norm, multiple, element, fill, shrink, kde, kde_kws, line_kws, thresh, pthresh, pmax, cbar, cbar_ax, cbar_kws, palette, hue_order, hue_norm, color, log_scale, legend, ax, **kwargs)
[1405](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1405) estimate_kws = dict(
[1406](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1406) stat=stat,
[1407](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1407) bins=bins,
(...)
[1411](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1411) cumulative=cumulative,
[1412](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1412) )
[1414](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1414) if p.univariate:
-> [1416](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1416) p.plot_univariate_histogram(
[1417](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1417) multiple=multiple,
[1418](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1418) element=element,
[1419](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1419) fill=fill,
[1420](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1420) shrink=shrink,
[1421](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1421) common_norm=common_norm,
[1422](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1422) common_bins=common_bins,
[1423](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1423) kde=kde,
[1424](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1424) kde_kws=kde_kws,
[1425](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1425) color=color,
[1426](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1426) legend=legend,
[1427](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1427) estimate_kws=estimate_kws,
[1428](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1428) line_kws=line_kws,
[1429](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1429) **kwargs,
[1430](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1430) )
[1432](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1432) else:
[1434](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1434) p.plot_bivariate_histogram(
[1435](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1435) common_bins=common_bins,
[1436](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1436) common_norm=common_norm,
(...)
[1446](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1446) **kwargs,
[1447](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:1447) )
File c:\Users\msi\Desktop\workspace\041_HM_platform\01_src\01_work\proj_hm_process_result\.venv_hm_result\lib\site-packages\seaborn\distributions.py:427, in _DistributionPlotter.plot_univariate_histogram(self, multiple, element, fill, common_norm, common_bins, shrink, kde, kde_kws, color, legend, line_kws, estimate_kws, **plot_kws)
[424](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:424) histograms = {}
[426](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:426) # Do pre-compute housekeeping related to multiple groups
--> [427](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:427) all_data = self.comp_data.dropna()
[428](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:428) all_weights = all_data.get("weights", None)
[430](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/distributions.py:430) multiple_histograms = set(self.variables) - {"x", "y"}
File c:\Users\msi\Desktop\workspace\041_HM_platform\01_src\01_work\proj_hm_process_result\.venv_hm_result\lib\site-packages\seaborn\_base.py:1000, in VectorPlotter.comp_data(self)
[995](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:995) if var in self.var_levels:
[996](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:996) # TODO this should happen in some centralized location
[997](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:997) # it is similar to GH2419, but more complicated because
[998](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:998) # supporting `order` in categorical plots is tricky
[999](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:999) orig = orig[orig.isin(self.var_levels[var])]
-> [1000](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:1000) comp = pd.to_numeric(converter.convert_units(orig)).astype(float)
[1001](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:1001) transform = converter.get_transform().transform
[1002](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/seaborn/_base.py:1002) parts.append(pd.Series(transform(comp), orig.index, name=orig.name))
File c:\Users\msi\Desktop\workspace\041_HM_platform\01_src\01_work\proj_hm_process_result\.venv_hm_result\lib\site-packages\pandas\core\tools\numeric.py:232, in to_numeric(arg, errors, downcast, dtype_backend)
[230](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:230) coerce_numeric = errors not in ("ignore", "raise")
[231](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:231) try:
--> [232](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:232) values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload]
[233](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:233) values,
[234](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:234) set(),
[235](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:235) coerce_numeric=coerce_numeric,
[236](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:236) convert_to_masked_nullable=dtype_backend is not lib.no_default
[237](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:237) or isinstance(values_dtype, StringDtype)
[238](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:238) and not values_dtype.storage == "pyarrow_numpy",
[239](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:239) )
[240](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:240) except (ValueError, TypeError):
[241](file:///C:/Users/msi/Desktop/workspace/041_HM_platform/01_src/01_work/proj_hm_process_result/.venv_hm_result/lib/site-packages/pandas/core/tools/numeric.py:241) if errors == "raise":
File lib.pyx:2433, in pandas._libs.lib.maybe_convert_numeric()
ValueError: Unable to parse string "SGAFC590DP" at position 8
Metadata
Metadata
Assignees
Labels
No labels