Custom Types API Reference¶

Here's the reference information for all custom types of classes Scrapling implemented, with all their parameters, attributes, and methods.

You can import all of them directly like below:

from scrapling.core.custom_types import TextHandler, TextHandlers, AttributesHandler

scrapling.core.custom_types.TextHandler ¶

Bases: str


              flowchart TD
              scrapling.core.custom_types.TextHandler[TextHandler]

              

              click scrapling.core.custom_types.TextHandler href "" "scrapling.core.custom_types.TextHandler"

Extends standard Python string by adding more functionality

slots `class-attribute` `instance-attribute` ¶

__slots__ = ()

extract `class-attribute` `instance-attribute` ¶

extract = getall

extract_first `class-attribute` `instance-attribute` ¶

extract_first = get

getitem ¶

__getitem__(key)

Source code in scrapling/core/custom_types.py

def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":  # pragma: no cover
    lst = super().__getitem__(key)
    return TextHandler(lst)

split ¶

split(sep=None, maxsplit=-1)

Source code in scrapling/core/custom_types.py

def split(self, sep: str | None = None, maxsplit: SupportsIndex = -1) -> list[Any]:  # pragma: no cover
    return TextHandlers([TextHandler(s) for s in super().split(sep, maxsplit)])

strip ¶

strip(chars=None)

Source code in scrapling/core/custom_types.py

def strip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().strip(chars))

lstrip ¶

lstrip(chars=None)

Source code in scrapling/core/custom_types.py

def lstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().lstrip(chars))

rstrip ¶

rstrip(chars=None)

Source code in scrapling/core/custom_types.py

def rstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().rstrip(chars))

capitalize ¶

capitalize()

Source code in scrapling/core/custom_types.py

def capitalize(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().capitalize())

casefold ¶

casefold()

Source code in scrapling/core/custom_types.py

def casefold(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().casefold())

center ¶

center(width, fillchar=' ')

Source code in scrapling/core/custom_types.py

def center(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().center(width, fillchar))

expandtabs ¶

expandtabs(tabsize=8)

Source code in scrapling/core/custom_types.py

def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().expandtabs(tabsize))

format ¶

format(*args, **kwargs)

Source code in scrapling/core/custom_types.py

def format(self, *args: object, **kwargs: object) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().format(*args, **kwargs))

format_map ¶

format_map(mapping)

Source code in scrapling/core/custom_types.py

def format_map(self, mapping) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().format_map(mapping))

join ¶

join(iterable)

Source code in scrapling/core/custom_types.py

def join(self, iterable: Iterable[str]) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().join(iterable))

ljust ¶

ljust(width, fillchar=' ')

Source code in scrapling/core/custom_types.py

def ljust(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().ljust(width, fillchar))

rjust ¶

rjust(width, fillchar=' ')

Source code in scrapling/core/custom_types.py

def rjust(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().rjust(width, fillchar))

swapcase ¶

swapcase()

Source code in scrapling/core/custom_types.py

def swapcase(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().swapcase())

title ¶

title()

Source code in scrapling/core/custom_types.py

def title(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().title())

translate ¶

translate(table)

Source code in scrapling/core/custom_types.py

def translate(self, table) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().translate(table))

zfill ¶

zfill(width)

Source code in scrapling/core/custom_types.py

def zfill(self, width: SupportsIndex) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().zfill(width))

replace ¶

replace(old, new, count=-1)

Source code in scrapling/core/custom_types.py

def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Union[str, "TextHandler"]:
    return TextHandler(super().replace(old, new, count))

upper ¶

upper()

Source code in scrapling/core/custom_types.py

def upper(self) -> Union[str, "TextHandler"]:
    return TextHandler(super().upper())

lower ¶

lower()

Source code in scrapling/core/custom_types.py

def lower(self) -> Union[str, "TextHandler"]:
    return TextHandler(super().lower())

sort ¶

sort(reverse=False)

Return a sorted version of the string

Source code in scrapling/core/custom_types.py

def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]:
    """Return a sorted version of the string"""
    return self.__class__("".join(sorted(self, reverse=reverse)))

clean ¶

clean(remove_entities=False)

Return a new version of the string after removing all white spaces and consecutive spaces

Source code in scrapling/core/custom_types.py

def clean(self, remove_entities=False) -> Union[str, "TextHandler"]:
    """Return a new version of the string after removing all white spaces and consecutive spaces"""
    data = self.translate(__CLEANING_TABLE__)
    if remove_entities:
        data = _replace_entities(data)
    return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())

get ¶

get(default=None)

Source code in scrapling/core/custom_types.py

def get(self, default=None):  # pragma: no cover
    return self

getall ¶

getall()

Source code in scrapling/core/custom_types.py

def getall(self):  # pragma: no cover
    return self

json ¶

json()

Return JSON response if the response is jsonable otherwise throw error

Source code in scrapling/core/custom_types.py

def json(self) -> Dict:
    """Return JSON response if the response is jsonable otherwise throw error"""
    # Using str function as a workaround for orjson issue with subclasses of str.
    # Check this out: https://github.com/ijl/orjson/issues/445
    return loads(str(self))

re ¶

re(
    regex,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
    check_match=False,
)

Apply the given regex to the current text and return a list of strings with the matches.

PARAMETER	DESCRIPTION
`regex`	Can be either a compiled regular expression or a string. TYPE: `str \| Pattern`
`replace_entities`	If enabled character entity references are replaced by their corresponding character TYPE: `bool` DEFAULT: `True`
`clean_match`	If enabled, this will ignore all whitespaces and consecutive spaces while matching TYPE: `bool` DEFAULT: `False`
`case_sensitive`	If disabled, function will set the regex to ignore the letters-case while compiling it TYPE: `bool` DEFAULT: `True`
`check_match`	Used to quickly check if this regex matches or not without any operations on the results TYPE: `bool` DEFAULT: `False`

Source code in scrapling/core/custom_types.py

def re(
    self,
    regex: str | Pattern,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
    check_match: bool = False,
) -> Union["TextHandlers", bool]:
    """Apply the given regex to the current text and return a list of strings with the matches.

    :param regex: Can be either a compiled regular expression or a string.
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
    :param check_match: Used to quickly check if this regex matches or not without any operations on the results

    """
    if isinstance(regex, str):
        if case_sensitive:
            regex = re_compile(regex, UNICODE)
        else:
            regex = re_compile(regex, flags=UNICODE | IGNORECASE)

    input_text = self.clean() if clean_match else self
    results = regex.findall(input_text)
    if check_match:
        return bool(results)

    if all(_is_iterable(res) for res in results):
        results = flatten(results)

    if not replace_entities:
        return TextHandlers([TextHandler(string) for string in results])

    return TextHandlers([TextHandler(_replace_entities(s)) for s in results])

re_first ¶

re_first(
    regex,
    default=None,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Apply the given regex to text and return the first match if found, otherwise return the default value.

PARAMETER	DESCRIPTION
`regex`	Can be either a compiled regular expression or a string. TYPE: `str \| Pattern`
`default`	The default value to be returned if there is no match TYPE: `Any` DEFAULT: `None`
`replace_entities`	If enabled character entity references are replaced by their corresponding character TYPE: `bool` DEFAULT: `True`
`clean_match`	If enabled, this will ignore all whitespaces and consecutive spaces while matching TYPE: `bool` DEFAULT: `False`
`case_sensitive`	If disabled, function will set the regex to ignore the letters-case while compiling it TYPE: `bool` DEFAULT: `True`

Source code in scrapling/core/custom_types.py

def re_first(
    self,
    regex: str | Pattern,
    default: Any = None,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> "TextHandler":
    """Apply the given regex to text and return the first match if found, otherwise return the default value.

    :param regex: Can be either a compiled regular expression or a string.
    :param default: The default value to be returned if there is no match
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it

    """
    result = self.re(
        regex,
        replace_entities,
        clean_match=clean_match,
        case_sensitive=case_sensitive,
    )
    return result[0] if result else default

scrapling.core.custom_types.TextHandlers ¶

Bases: List[TextHandler]


              flowchart TD
              scrapling.core.custom_types.TextHandlers[TextHandlers]

              

              click scrapling.core.custom_types.TextHandlers href "" "scrapling.core.custom_types.TextHandlers"

The :class:TextHandlers class is a subclass of the builtin List class, which provides a few additional methods.

slots `class-attribute` `instance-attribute` ¶

__slots__ = ()

extract_first `class-attribute` `instance-attribute` ¶

extract_first = get

getall `class-attribute` `instance-attribute` ¶

getall = extract

getitem ¶

__getitem__(pos)

Source code in scrapling/core/custom_types.py

def __getitem__(self, pos: SupportsIndex | slice) -> Union[TextHandler, "TextHandlers"]:
    lst = super().__getitem__(pos)
    if isinstance(pos, slice):
        return TextHandlers(cast(List[TextHandler], lst))
    return TextHandler(cast(TextHandler, lst))

re ¶

re(
    regex,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Call the .re() method for each element in this list and return their results flattened as TextHandlers.

PARAMETER	DESCRIPTION
`regex`	Can be either a compiled regular expression or a string. TYPE: `str \| Pattern`
`replace_entities`	If enabled character entity references are replaced by their corresponding character TYPE: `bool` DEFAULT: `True`
`clean_match`	if enabled, this will ignore all whitespaces and consecutive spaces while matching TYPE: `bool` DEFAULT: `False`
`case_sensitive`	if disabled, the function will set the regex to ignore the letters-case while compiling it TYPE: `bool` DEFAULT: `True`

Source code in scrapling/core/custom_types.py

def re(
    self,
    regex: str | Pattern,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> "TextHandlers":
    """Call the ``.re()`` method for each element in this list and return
    their results flattened as TextHandlers.

    :param regex: Can be either a compiled regular expression or a string.
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: if disabled, the function will set the regex to ignore the letters-case while compiling it
    """
    results = [n.re(regex, replace_entities, clean_match, case_sensitive) for n in self]
    return TextHandlers(flatten(results))

re_first ¶

re_first(
    regex,
    default=None,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Call the .re_first() method for each element in this list and return the first result or the default value otherwise.

PARAMETER	DESCRIPTION
`regex`	Can be either a compiled regular expression or a string. TYPE: `str \| Pattern`
`default`	The default value to be returned if there is no match TYPE: `Any` DEFAULT: `None`
`replace_entities`	If enabled character entity references are replaced by their corresponding character TYPE: `bool` DEFAULT: `True`
`clean_match`	If enabled, this will ignore all whitespaces and consecutive spaces while matching TYPE: `bool` DEFAULT: `False`
`case_sensitive`	If disabled, function will set the regex to ignore the letters-case while compiling it TYPE: `bool` DEFAULT: `True`

Source code in scrapling/core/custom_types.py

def re_first(
    self,
    regex: str | Pattern,
    default: Any = None,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> TextHandler:  # pragma: no cover
    """Call the ``.re_first()`` method for each element in this list and return
    the first result or the default value otherwise.

    :param regex: Can be either a compiled regular expression or a string.
    :param default: The default value to be returned if there is no match
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
    """
    for n in self:
        for result in n.re(regex, replace_entities, clean_match, case_sensitive):
            return result
    return default

get ¶

get(default=None)

Returns the first item of the current list

PARAMETER	DESCRIPTION
`default`	the default value to return if the current list is empty DEFAULT: `None`

Source code in scrapling/core/custom_types.py

def get(self, default=None):
    """Returns the first item of the current list
    :param default: the default value to return if the current list is empty
    """
    return self[0] if len(self) > 0 else default

extract ¶

extract()

Source code in scrapling/core/custom_types.py

def extract(self):
    return self

scrapling.core.custom_types.AttributesHandler ¶

AttributesHandler(mapping=None, **kwargs)

Bases: Mapping[str, _TextHandlerType]


              flowchart TD
              scrapling.core.custom_types.AttributesHandler[AttributesHandler]

              

              click scrapling.core.custom_types.AttributesHandler href "" "scrapling.core.custom_types.AttributesHandler"

A read-only mapping to use instead of the standard dictionary for the speed boost, but at the same time I use it to add more functionalities. If the standard dictionary is needed, convert this class to a dictionary with the dict function

Source code in scrapling/core/custom_types.py

def __init__(self, mapping: Any = None, **kwargs: Any) -> None:
    mapping = (
        {key: TextHandler(value) if isinstance(value, str) else value for key, value in mapping.items()}
        if mapping is not None
        else {}
    )

    if kwargs:
        mapping.update(
            {key: TextHandler(value) if isinstance(value, str) else value for key, value in kwargs.items()}
        )

    # Fastest read-only mapping type
    self._data: Mapping[str, Any] = MappingProxyType(mapping)

slots `class-attribute` `instance-attribute` ¶

__slots__ = ('_data',)

json_string `property` ¶

json_string

Convert current attributes to JSON bytes if the attributes are JSON serializable otherwise throws error

get ¶

get(key, default=None)

Acts like the standard dictionary .get() method

Source code in scrapling/core/custom_types.py

def get(self, key: str, default: Any = None) -> _TextHandlerType:
    """Acts like the standard dictionary `.get()` method"""
    return self._data.get(key, default)

search_values ¶

search_values(keyword, partial=False)

Search current attributes by values and return a dictionary of each matching item

PARAMETER	DESCRIPTION
`keyword`	The keyword to search for in the attribute values TYPE: `str`
`partial`	If True, the function will search if keyword in each value instead of perfect match TYPE: `bool` DEFAULT: `False`

Source code in scrapling/core/custom_types.py

def search_values(self, keyword: str, partial: bool = False) -> Generator["AttributesHandler", None, None]:
    """Search current attributes by values and return a dictionary of each matching item
    :param keyword: The keyword to search for in the attribute values
    :param partial: If True, the function will search if keyword in each value instead of perfect match
    """
    for key, value in self._data.items():
        if partial:
            if keyword in value:
                yield AttributesHandler({key: value})
        else:
            if keyword == value:
                yield AttributesHandler({key: value})

getitem ¶

__getitem__(key)

Source code in scrapling/core/custom_types.py

def __getitem__(self, key: str) -> _TextHandlerType:
    return self._data[key]

iter ¶

__iter__()

Source code in scrapling/core/custom_types.py

def __iter__(self):
    return iter(self._data)

len ¶

__len__()

Source code in scrapling/core/custom_types.py

def __len__(self):
    return len(self._data)

repr ¶

__repr__()

Source code in scrapling/core/custom_types.py

def __repr__(self):
    return f"{self.__class__.__name__}({self._data})"

str ¶

__str__()

Source code in scrapling/core/custom_types.py

def __str__(self):
    return str(self._data)

contains ¶

__contains__(key)

Source code in scrapling/core/custom_types.py

def __contains__(self, key):
    return key in self._data

Custom Types API Reference¶

scrapling.core.custom_types.TextHandler ¶

__slots__ class-attribute instance-attribute ¶

extract class-attribute instance-attribute ¶

extract_first class-attribute instance-attribute ¶

__getitem__ ¶

split ¶

strip ¶

lstrip ¶

rstrip ¶

capitalize ¶

casefold ¶

center ¶

expandtabs ¶

format ¶

format_map ¶

join ¶

ljust ¶

rjust ¶

swapcase ¶

title ¶

translate ¶

zfill ¶

replace ¶

upper ¶

lower ¶

sort ¶

clean ¶

get ¶

getall ¶

json ¶

re ¶

re_first ¶

scrapling.core.custom_types.TextHandlers ¶

__slots__ class-attribute instance-attribute ¶

extract_first class-attribute instance-attribute ¶

getall class-attribute instance-attribute ¶

__getitem__ ¶

re ¶

re_first ¶

get ¶

extract ¶

scrapling.core.custom_types.AttributesHandler ¶

__slots__ class-attribute instance-attribute ¶

json_string property ¶

get ¶

search_values ¶

__getitem__ ¶

__iter__ ¶

__len__ ¶

__repr__ ¶

__str__ ¶

__contains__ ¶

slots `class-attribute` `instance-attribute` ¶

extract `class-attribute` `instance-attribute` ¶

extract_first `class-attribute` `instance-attribute` ¶

getitem ¶

slots `class-attribute` `instance-attribute` ¶

extract_first `class-attribute` `instance-attribute` ¶

getall `class-attribute` `instance-attribute` ¶

getitem ¶

slots `class-attribute` `instance-attribute` ¶

json_string `property` ¶

getitem ¶

iter ¶

len ¶

repr ¶

str ¶

contains ¶