Skip to content

Custom Types API Reference

Here's the reference information for all custom types of classes Scrapling implemented, with all their parameters, attributes, and methods.

You can import all of them directly like below:

from scrapling.core.custom_types import TextHandler, TextHandlers, AttributesHandler

scrapling.core.custom_types.TextHandler

Bases: str


              flowchart TD
              scrapling.core.custom_types.TextHandler[TextHandler]

              

              click scrapling.core.custom_types.TextHandler href "" "scrapling.core.custom_types.TextHandler"
            

Extends standard Python string by adding more functionality

__slots__ class-attribute instance-attribute

__slots__ = ()

extract class-attribute instance-attribute

extract = getall

extract_first class-attribute instance-attribute

extract_first = get

__getitem__

__getitem__(key)
Source code in scrapling/core/custom_types.py
def __getitem__(self, key: SupportsIndex | slice) -> "TextHandler":  # pragma: no cover
    lst = super().__getitem__(key)
    return TextHandler(lst)

split

split(sep=None, maxsplit=-1)
Source code in scrapling/core/custom_types.py
def split(self, sep: str | None = None, maxsplit: SupportsIndex = -1) -> list[Any]:  # pragma: no cover
    return TextHandlers([TextHandler(s) for s in super().split(sep, maxsplit)])

strip

strip(chars=None)
Source code in scrapling/core/custom_types.py
def strip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().strip(chars))

lstrip

lstrip(chars=None)
Source code in scrapling/core/custom_types.py
def lstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().lstrip(chars))

rstrip

rstrip(chars=None)
Source code in scrapling/core/custom_types.py
def rstrip(self, chars: str | None = None) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().rstrip(chars))

capitalize

capitalize()
Source code in scrapling/core/custom_types.py
def capitalize(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().capitalize())

casefold

casefold()
Source code in scrapling/core/custom_types.py
def casefold(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().casefold())

center

center(width, fillchar=' ')
Source code in scrapling/core/custom_types.py
def center(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().center(width, fillchar))

expandtabs

expandtabs(tabsize=8)
Source code in scrapling/core/custom_types.py
def expandtabs(self, tabsize: SupportsIndex = 8) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().expandtabs(tabsize))

format

format(*args, **kwargs)
Source code in scrapling/core/custom_types.py
def format(self, *args: object, **kwargs: object) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().format(*args, **kwargs))

format_map

format_map(mapping)
Source code in scrapling/core/custom_types.py
def format_map(self, mapping) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().format_map(mapping))

join

join(iterable)
Source code in scrapling/core/custom_types.py
def join(self, iterable: Iterable[str]) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().join(iterable))

ljust

ljust(width, fillchar=' ')
Source code in scrapling/core/custom_types.py
def ljust(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().ljust(width, fillchar))

rjust

rjust(width, fillchar=' ')
Source code in scrapling/core/custom_types.py
def rjust(self, width: SupportsIndex, fillchar: str = " ") -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().rjust(width, fillchar))

swapcase

swapcase()
Source code in scrapling/core/custom_types.py
def swapcase(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().swapcase())

title

title()
Source code in scrapling/core/custom_types.py
def title(self) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().title())

translate

translate(table)
Source code in scrapling/core/custom_types.py
def translate(self, table) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().translate(table))

zfill

zfill(width)
Source code in scrapling/core/custom_types.py
def zfill(self, width: SupportsIndex) -> Union[str, "TextHandler"]:  # pragma: no cover
    return TextHandler(super().zfill(width))

replace

replace(old, new, count=-1)
Source code in scrapling/core/custom_types.py
def replace(self, old: str, new: str, count: SupportsIndex = -1) -> Union[str, "TextHandler"]:
    return TextHandler(super().replace(old, new, count))

upper

upper()
Source code in scrapling/core/custom_types.py
def upper(self) -> Union[str, "TextHandler"]:
    return TextHandler(super().upper())

lower

lower()
Source code in scrapling/core/custom_types.py
def lower(self) -> Union[str, "TextHandler"]:
    return TextHandler(super().lower())

sort

sort(reverse=False)

Return a sorted version of the string

Source code in scrapling/core/custom_types.py
def sort(self, reverse: bool = False) -> Union[str, "TextHandler"]:
    """Return a sorted version of the string"""
    return self.__class__("".join(sorted(self, reverse=reverse)))

clean

clean(remove_entities=False)

Return a new version of the string after removing all white spaces and consecutive spaces

Source code in scrapling/core/custom_types.py
def clean(self, remove_entities=False) -> Union[str, "TextHandler"]:
    """Return a new version of the string after removing all white spaces and consecutive spaces"""
    data = self.translate(__CLEANING_TABLE__)
    if remove_entities:
        data = _replace_entities(data)
    return self.__class__(__CONSECUTIVE_SPACES_REGEX__.sub(" ", data).strip())

get

get(default=None)
Source code in scrapling/core/custom_types.py
def get(self, default=None):  # pragma: no cover
    return self

getall

getall()
Source code in scrapling/core/custom_types.py
def getall(self):  # pragma: no cover
    return self

json

json()

Return JSON response if the response is jsonable otherwise throw error

Source code in scrapling/core/custom_types.py
def json(self) -> Dict:
    """Return JSON response if the response is jsonable otherwise throw error"""
    # Using str function as a workaround for orjson issue with subclasses of str.
    # Check this out: https://github.com/ijl/orjson/issues/445
    return loads(str(self))

re

re(
    regex,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
    check_match=False,
)

Apply the given regex to the current text and return a list of strings with the matches.

PARAMETER DESCRIPTION
regex

Can be either a compiled regular expression or a string.

TYPE: str | Pattern

replace_entities

If enabled character entity references are replaced by their corresponding character

TYPE: bool DEFAULT: True

clean_match

If enabled, this will ignore all whitespaces and consecutive spaces while matching

TYPE: bool DEFAULT: False

case_sensitive

If disabled, function will set the regex to ignore the letters-case while compiling it

TYPE: bool DEFAULT: True

check_match

Used to quickly check if this regex matches or not without any operations on the results

TYPE: bool DEFAULT: False

Source code in scrapling/core/custom_types.py
def re(
    self,
    regex: str | Pattern,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
    check_match: bool = False,
) -> Union["TextHandlers", bool]:
    """Apply the given regex to the current text and return a list of strings with the matches.

    :param regex: Can be either a compiled regular expression or a string.
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
    :param check_match: Used to quickly check if this regex matches or not without any operations on the results

    """
    if isinstance(regex, str):
        if case_sensitive:
            regex = re_compile(regex, UNICODE)
        else:
            regex = re_compile(regex, flags=UNICODE | IGNORECASE)

    input_text = self.clean() if clean_match else self
    results = regex.findall(input_text)
    if check_match:
        return bool(results)

    if all(_is_iterable(res) for res in results):
        results = flatten(results)

    if not replace_entities:
        return TextHandlers([TextHandler(string) for string in results])

    return TextHandlers([TextHandler(_replace_entities(s)) for s in results])

re_first

re_first(
    regex,
    default=None,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Apply the given regex to text and return the first match if found, otherwise return the default value.

PARAMETER DESCRIPTION
regex

Can be either a compiled regular expression or a string.

TYPE: str | Pattern

default

The default value to be returned if there is no match

TYPE: Any DEFAULT: None

replace_entities

If enabled character entity references are replaced by their corresponding character

TYPE: bool DEFAULT: True

clean_match

If enabled, this will ignore all whitespaces and consecutive spaces while matching

TYPE: bool DEFAULT: False

case_sensitive

If disabled, function will set the regex to ignore the letters-case while compiling it

TYPE: bool DEFAULT: True

Source code in scrapling/core/custom_types.py
def re_first(
    self,
    regex: str | Pattern,
    default: Any = None,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> "TextHandler":
    """Apply the given regex to text and return the first match if found, otherwise return the default value.

    :param regex: Can be either a compiled regular expression or a string.
    :param default: The default value to be returned if there is no match
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it

    """
    result = self.re(
        regex,
        replace_entities,
        clean_match=clean_match,
        case_sensitive=case_sensitive,
    )
    return result[0] if result else default

scrapling.core.custom_types.TextHandlers

Bases: List[TextHandler]


              flowchart TD
              scrapling.core.custom_types.TextHandlers[TextHandlers]

              

              click scrapling.core.custom_types.TextHandlers href "" "scrapling.core.custom_types.TextHandlers"
            

The :class:TextHandlers class is a subclass of the builtin List class, which provides a few additional methods.

__slots__ class-attribute instance-attribute

__slots__ = ()

extract_first class-attribute instance-attribute

extract_first = get

getall class-attribute instance-attribute

getall = extract

__getitem__

__getitem__(pos)
Source code in scrapling/core/custom_types.py
def __getitem__(self, pos: SupportsIndex | slice) -> Union[TextHandler, "TextHandlers"]:
    lst = super().__getitem__(pos)
    if isinstance(pos, slice):
        return TextHandlers(cast(List[TextHandler], lst))
    return TextHandler(cast(TextHandler, lst))

re

re(
    regex,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Call the .re() method for each element in this list and return their results flattened as TextHandlers.

PARAMETER DESCRIPTION
regex

Can be either a compiled regular expression or a string.

TYPE: str | Pattern

replace_entities

If enabled character entity references are replaced by their corresponding character

TYPE: bool DEFAULT: True

clean_match

if enabled, this will ignore all whitespaces and consecutive spaces while matching

TYPE: bool DEFAULT: False

case_sensitive

if disabled, the function will set the regex to ignore the letters-case while compiling it

TYPE: bool DEFAULT: True

Source code in scrapling/core/custom_types.py
def re(
    self,
    regex: str | Pattern,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> "TextHandlers":
    """Call the ``.re()`` method for each element in this list and return
    their results flattened as TextHandlers.

    :param regex: Can be either a compiled regular expression or a string.
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: if disabled, the function will set the regex to ignore the letters-case while compiling it
    """
    results = [n.re(regex, replace_entities, clean_match, case_sensitive) for n in self]
    return TextHandlers(flatten(results))

re_first

re_first(
    regex,
    default=None,
    replace_entities=True,
    clean_match=False,
    case_sensitive=True,
)

Call the .re_first() method for each element in this list and return the first result or the default value otherwise.

PARAMETER DESCRIPTION
regex

Can be either a compiled regular expression or a string.

TYPE: str | Pattern

default

The default value to be returned if there is no match

TYPE: Any DEFAULT: None

replace_entities

If enabled character entity references are replaced by their corresponding character

TYPE: bool DEFAULT: True

clean_match

If enabled, this will ignore all whitespaces and consecutive spaces while matching

TYPE: bool DEFAULT: False

case_sensitive

If disabled, function will set the regex to ignore the letters-case while compiling it

TYPE: bool DEFAULT: True

Source code in scrapling/core/custom_types.py
def re_first(
    self,
    regex: str | Pattern,
    default: Any = None,
    replace_entities: bool = True,
    clean_match: bool = False,
    case_sensitive: bool = True,
) -> TextHandler:  # pragma: no cover
    """Call the ``.re_first()`` method for each element in this list and return
    the first result or the default value otherwise.

    :param regex: Can be either a compiled regular expression or a string.
    :param default: The default value to be returned if there is no match
    :param replace_entities: If enabled character entity references are replaced by their corresponding character
    :param clean_match: If enabled, this will ignore all whitespaces and consecutive spaces while matching
    :param case_sensitive: If disabled, function will set the regex to ignore the letters-case while compiling it
    """
    for n in self:
        for result in n.re(regex, replace_entities, clean_match, case_sensitive):
            return result
    return default

get

get(default=None)

Returns the first item of the current list

PARAMETER DESCRIPTION
default

the default value to return if the current list is empty

DEFAULT: None

Source code in scrapling/core/custom_types.py
def get(self, default=None):
    """Returns the first item of the current list
    :param default: the default value to return if the current list is empty
    """
    return self[0] if len(self) > 0 else default

extract

extract()
Source code in scrapling/core/custom_types.py
def extract(self):
    return self

scrapling.core.custom_types.AttributesHandler

AttributesHandler(mapping=None, **kwargs)

Bases: Mapping[str, _TextHandlerType]


              flowchart TD
              scrapling.core.custom_types.AttributesHandler[AttributesHandler]

              

              click scrapling.core.custom_types.AttributesHandler href "" "scrapling.core.custom_types.AttributesHandler"
            

A read-only mapping to use instead of the standard dictionary for the speed boost, but at the same time I use it to add more functionalities. If the standard dictionary is needed, convert this class to a dictionary with the dict function

Source code in scrapling/core/custom_types.py
def __init__(self, mapping: Any = None, **kwargs: Any) -> None:
    mapping = (
        {key: TextHandler(value) if isinstance(value, str) else value for key, value in mapping.items()}
        if mapping is not None
        else {}
    )

    if kwargs:
        mapping.update(
            {key: TextHandler(value) if isinstance(value, str) else value for key, value in kwargs.items()}
        )

    # Fastest read-only mapping type
    self._data: Mapping[str, Any] = MappingProxyType(mapping)

__slots__ class-attribute instance-attribute

__slots__ = ('_data',)

json_string property

json_string

Convert current attributes to JSON bytes if the attributes are JSON serializable otherwise throws error

get

get(key, default=None)

Acts like the standard dictionary .get() method

Source code in scrapling/core/custom_types.py
def get(self, key: str, default: Any = None) -> _TextHandlerType:
    """Acts like the standard dictionary `.get()` method"""
    return self._data.get(key, default)

search_values

search_values(keyword, partial=False)

Search current attributes by values and return a dictionary of each matching item

PARAMETER DESCRIPTION
keyword

The keyword to search for in the attribute values

TYPE: str

partial

If True, the function will search if keyword in each value instead of perfect match

TYPE: bool DEFAULT: False

Source code in scrapling/core/custom_types.py
def search_values(self, keyword: str, partial: bool = False) -> Generator["AttributesHandler", None, None]:
    """Search current attributes by values and return a dictionary of each matching item
    :param keyword: The keyword to search for in the attribute values
    :param partial: If True, the function will search if keyword in each value instead of perfect match
    """
    for key, value in self._data.items():
        if partial:
            if keyword in value:
                yield AttributesHandler({key: value})
        else:
            if keyword == value:
                yield AttributesHandler({key: value})

__getitem__

__getitem__(key)
Source code in scrapling/core/custom_types.py
def __getitem__(self, key: str) -> _TextHandlerType:
    return self._data[key]

__iter__

__iter__()
Source code in scrapling/core/custom_types.py
def __iter__(self):
    return iter(self._data)

__len__

__len__()
Source code in scrapling/core/custom_types.py
def __len__(self):
    return len(self._data)

__repr__

__repr__()
Source code in scrapling/core/custom_types.py
def __repr__(self):
    return f"{self.__class__.__name__}({self._data})"

__str__

__str__()
Source code in scrapling/core/custom_types.py
def __str__(self):
    return str(self._data)

__contains__

__contains__(key)
Source code in scrapling/core/custom_types.py
def __contains__(self, key):
    return key in self._data