Source code for isaricanalytics.utils
from __future__ import annotations
__all__ = [
"clean_figure_table",
"strip_html",
"strip_nonstandard_unicode_chars",
]
# -- IMPORTS --
# -- Standard libraries --
import re
import typing
# -- 3rd party libraries --
import pandas
# -- Internal libraries --
[docs]
def strip_html(value: typing.Any) -> str | typing.Any:
""":py:class:`typing.Any` : Strip HTML elements from a value.
Parameters
----------
value : typing.Any
A value.
Returns
-------
str, typing.Any
Either a string stripped of all HTML elements, or the original non-
string value.
"""
if isinstance(value, str):
return re.sub(r"<.*?>", "", value)
return value
[docs]
def strip_nonstandard_unicode_chars(value: typing.Any) -> str | typing.Any:
""":py:class:`typing.Any` : Strip non-standard Unicode characters from a value.
The non-standard Unicode characters of interest are defined within the
function itself, and are currently limited to the "↳" (U+21B3) character,
but may be extended to include other characters.
Parameters
----------
value : typing.Any
A value.
Returns
-------
str, typing.Any
Either a string stripped of all non-standard Unicode characters, or the
original non- string value.
"""
nonstandard_unicode_chars = "↳"
if isinstance(value, str):
return re.sub(rf"[{nonstandard_unicode_chars}]", "", value)
return value