Skip to content

Spider Classes

Here's the reference information for the spider framework classes' parameters, attributes, and methods.

You can import them directly like below:

from scrapling.spiders import Spider, Request, CrawlResult, SessionManager, Response

scrapling.spiders.Spider

Spider(crawldir=None, interval=300.0)

Bases: ABC


              flowchart TD
              scrapling.spiders.Spider[Spider]

              

              click scrapling.spiders.Spider href "" "scrapling.spiders.Spider"
            

An abstract base class for creating web spiders.

Check the documentation website for more information.

Initialize the spider.

PARAMETER DESCRIPTION
crawldir

Directory for checkpoint files. If provided, enables pause/resume.

TYPE: Optional[Union[str, Path, Path]] DEFAULT: None

interval

Seconds between periodic checkpoint saves (default 5 minutes).

TYPE: float DEFAULT: 300.0

Source code in scrapling/spiders/spider.py
def __init__(self, crawldir: Optional[Union[str, Path, AsyncPath]] = None, interval: float = 300.0):
    """Initialize the spider.

    :param crawldir: Directory for checkpoint files. If provided, enables pause/resume.
    :param interval: Seconds between periodic checkpoint saves (default 5 minutes).
    """
    if self.name is None:
        raise ValueError(f"{self.__class__.__name__} must have a name.")

    self.logger = logging.getLogger(f"scrapling.spiders.{self.name}")
    self.logger.setLevel(self.logging_level)
    self.logger.handlers.clear()
    self.logger.propagate = False  # Don't propagate to parent 'scrapling' logger

    formatter = logging.Formatter(
        fmt=self.logging_format.format(spider_name=self.name), datefmt=self.logging_date_format
    )

    # Add a log counter handler to track log counts by level
    self._log_counter = LogCounterHandler()
    self.logger.addHandler(self._log_counter)

    console_handler = logging.StreamHandler()
    console_handler.setFormatter(formatter)
    self.logger.addHandler(console_handler)

    if self.log_file:
        Path(self.log_file).parent.mkdir(parents=True, exist_ok=True)
        file_handler = logging.FileHandler(self.log_file)
        file_handler.setFormatter(formatter)
        self.logger.addHandler(file_handler)

    self.crawldir: Optional[Path] = Path(crawldir) if crawldir else None
    self._interval = interval
    self._engine: Optional[CrawlerEngine] = None
    self._original_sigint_handler: Any = None

    self._session_manager = SessionManager()

    try:
        self.configure_sessions(self._session_manager)
    except Exception as e:
        raise SessionConfigurationError(f"Error in {self.__class__.__name__}.configure_sessions(): {e}") from e

    if len(self._session_manager) == 0:
        raise SessionConfigurationError(f"{self.__class__.__name__}.configure_sessions() did not add any sessions")

    self.logger.info("Spider initialized")

name class-attribute instance-attribute

name = None

start_urls class-attribute instance-attribute

start_urls = []

allowed_domains class-attribute instance-attribute

allowed_domains = set()

robots_txt_obey class-attribute instance-attribute

robots_txt_obey = False

development_mode class-attribute instance-attribute

development_mode = False

development_cache_dir class-attribute instance-attribute

development_cache_dir = None

concurrent_requests class-attribute instance-attribute

concurrent_requests = 4

concurrent_requests_per_domain class-attribute instance-attribute

concurrent_requests_per_domain = 0

download_delay class-attribute instance-attribute

download_delay = 0.0

max_blocked_retries class-attribute instance-attribute

max_blocked_retries = 3

fp_include_kwargs class-attribute instance-attribute

fp_include_kwargs = False

fp_keep_fragments class-attribute instance-attribute

fp_keep_fragments = False

fp_include_headers class-attribute instance-attribute

fp_include_headers = False

logging_level class-attribute instance-attribute

logging_level = logging.DEBUG

logging_format class-attribute instance-attribute

logging_format = "[%(asctime)s]:({spider_name}) %(levelname)s: %(message)s"

logging_date_format class-attribute instance-attribute

logging_date_format = '%Y-%m-%d %H:%M:%S'

log_file class-attribute instance-attribute

log_file = None

logger instance-attribute

logger = logging.getLogger(f"scrapling.spiders.{self.name}")

crawldir instance-attribute

crawldir = Path(crawldir) if crawldir else None

stats property

stats

Access current crawl stats (works during streaming).

start_requests async

start_requests()

Generate initial requests to start the crawl.

By default, this generates Request objects for each URL in start_urls using the session manager's default session and parse() as callback.

Override this method for more control over initial requests (e.g., to add custom headers, use different callbacks, etc.)

Source code in scrapling/spiders/spider.py
async def start_requests(self) -> AsyncGenerator[Request, None]:
    """Generate initial requests to start the crawl.

    By default, this generates Request objects for each URL in `start_urls`
    using the session manager's default session and `parse()` as callback.

    Override this method for more control over initial requests
    (e.g., to add custom headers, use different callbacks, etc.)
    """
    if not self.start_urls:
        raise RuntimeError(
            "Spider has no starting point, either set `start_urls` or override `start_requests` function."
        )

    for url in self.start_urls:
        yield Request(url, sid=self._session_manager.default_session_id)

parse abstractmethod async

parse(response)

Default callback for processing responses

Source code in scrapling/spiders/spider.py
@abstractmethod
async def parse(self, response: "Response") -> AsyncGenerator[Dict[str, Any] | Request | None, None]:
    """Default callback for processing responses"""
    raise NotImplementedError(f"{self.__class__.__name__} must implement parse() method")
    yield  # Make this a generator for type checkers

on_start async

on_start(resuming=False)

Called before crawling starts. Override for setup logic.

PARAMETER DESCRIPTION
resuming

It's enabled if the spider is resuming from a checkpoint, left for the user to use.

TYPE: bool DEFAULT: False

Source code in scrapling/spiders/spider.py
async def on_start(self, resuming: bool = False) -> None:
    """Called before crawling starts. Override for setup logic.

    :param resuming: It's enabled if the spider is resuming from a checkpoint, left for the user to use.
    """
    if resuming:
        self.logger.debug("Resuming spider from checkpoint")
    else:
        self.logger.debug("Starting spider")

on_close async

on_close()

Called after crawling finishes. Override for cleanup logic.

Source code in scrapling/spiders/spider.py
async def on_close(self) -> None:
    """Called after crawling finishes. Override for cleanup logic."""
    self.logger.debug("Spider closed")

on_error async

on_error(request, error)

Handle request errors for all spider requests.

Override for custom error handling.

Source code in scrapling/spiders/spider.py
async def on_error(self, request: Request, error: Exception) -> None:
    """
    Handle request errors for all spider requests.

    Override for custom error handling.
    """
    pass

on_scraped_item async

on_scraped_item(item)

A hook to be overridden by users to do some processing on scraped items, return None to drop the item silently.

Source code in scrapling/spiders/spider.py
async def on_scraped_item(self, item: Dict[str, Any]) -> Dict[str, Any] | None:
    """A hook to be overridden by users to do some processing on scraped items, return `None` to drop the item silently."""
    return item

is_blocked async

is_blocked(response)

Check if the response is blocked. Users should override this for custom detection logic.

Source code in scrapling/spiders/spider.py
async def is_blocked(self, response: "Response") -> bool:
    """Check if the response is blocked. Users should override this for custom detection logic."""
    if response.status in BLOCKED_CODES:
        return True
    return False

retry_blocked_request async

retry_blocked_request(request, response)

Users should override this to prepare the blocked request before retrying, if needed.

Source code in scrapling/spiders/spider.py
async def retry_blocked_request(self, request: Request, response: "Response") -> Request:
    """Users should override this to prepare the blocked request before retrying, if needed."""
    return request

__repr__

__repr__()

String representation of the spider.

Source code in scrapling/spiders/spider.py
def __repr__(self) -> str:
    """String representation of the spider."""
    return f"<{self.__class__.__name__} '{self.name}'>"

configure_sessions

configure_sessions(manager)

Configure sessions for this spider.

Override this method to add custom sessions. The default implementation creates a FetcherSession session.

The first session added becomes the default for start_requests() unless specified otherwise.

PARAMETER DESCRIPTION
manager

SessionManager to configure

TYPE: SessionManager

Source code in scrapling/spiders/spider.py
def configure_sessions(self, manager: SessionManager) -> None:
    """Configure sessions for this spider.

    Override this method to add custom sessions.
    The default implementation creates a FetcherSession session.

    The first session added becomes the default for `start_requests()` unless specified otherwise.

    :param manager: SessionManager to configure
    """
    from scrapling.fetchers import FetcherSession

    manager.add("default", FetcherSession())

pause

pause()

Request graceful shutdown of the crawling process.

Source code in scrapling/spiders/spider.py
def pause(self):
    """Request graceful shutdown of the crawling process."""
    if self._engine:
        self._engine.request_pause()
    else:
        raise RuntimeError("No active crawl to stop")

start

start(use_uvloop=False, **backend_options)

Run the spider and return results.

This is the main entry point for running a spider. Handles async execution internally via anyio.

Pressing Ctrl+C will initiate graceful shutdown (waits for active tasks to complete). Pressing Ctrl+C a second time will force immediate stop.

If crawldir is set, a checkpoint will also be saved on graceful shutdown, allowing you to resume the crawl later by running the spider again.

PARAMETER DESCRIPTION
use_uvloop

Whether to use the faster uvloop/winloop event loop implementation, if available.

TYPE: bool DEFAULT: False

backend_options

Asyncio backend options to be used with anyio.run

TYPE: Any DEFAULT: {}

Source code in scrapling/spiders/spider.py
def start(self, use_uvloop: bool = False, **backend_options: Any) -> CrawlResult:
    """Run the spider and return results.

    This is the main entry point for running a spider.
    Handles async execution internally via anyio.

    Pressing Ctrl+C will initiate graceful shutdown (waits for active tasks to complete).
    Pressing Ctrl+C a second time will force immediate stop.

    If crawldir is set, a checkpoint will also be saved on graceful shutdown,
    allowing you to resume the crawl later by running the spider again.

    :param use_uvloop: Whether to use the faster uvloop/winloop event loop implementation, if available.
    :param backend_options: Asyncio backend options to be used with `anyio.run`
    """
    backend_options = backend_options or {}
    if use_uvloop:
        backend_options.update({"use_uvloop": True})

    # Set up SIGINT handler for graceful shutdown
    self._setup_signal_handler()
    try:
        return anyio.run(self.__run, backend="asyncio", backend_options=backend_options)
    finally:
        self._restore_signal_handler()

stream async

stream()

Stream items as they're scraped. Ideal for long-running spiders or building applications on top of the spiders.

Must be called from an async context. Yields items one by one as they are scraped. Access spider.stats during iteration for real-time statistics.

Note: SIGINT handling for pause/resume is not available in stream mode.

Source code in scrapling/spiders/spider.py
async def stream(self) -> AsyncGenerator[Dict[str, Any], None]:
    """Stream items as they're scraped. Ideal for long-running spiders or building applications on top of the spiders.

    Must be called from an async context. Yields items one by one as they are scraped.
    Access `spider.stats` during iteration for real-time statistics.

    Note: SIGINT handling for pause/resume is not available in stream mode.
    """
    token = set_logger(self.logger)
    try:
        self._engine = CrawlerEngine(self, self._session_manager, self.crawldir, self._interval)
        async for item in self._engine:
            yield item
    finally:
        self._engine = None
        reset_logger(token)
        if self.log_file:
            for handler in self.logger.handlers:
                if isinstance(handler, logging.FileHandler):
                    handler.close()

scrapling.spiders.Request

Request(
    url,
    sid="",
    callback=None,
    priority=0,
    dont_filter=False,
    meta=None,
    _retry_count=0,
    **kwargs
)
Source code in scrapling/spiders/request.py
def __init__(
    self,
    url: str,
    sid: str = "",
    callback: Callable[[Response], AsyncGenerator[Union[Dict[str, Any], "Request", None], None]] | None = None,
    priority: int = 0,
    dont_filter: bool = False,
    meta: dict[str, Any] | None = None,
    _retry_count: int = 0,
    **kwargs: Any,
) -> None:
    self.url: str = url
    self.sid: str = sid
    self.callback = callback
    self.priority: int = priority
    self.dont_filter: bool = dont_filter
    self.meta: dict[str, Any] = meta if meta else {}
    self._retry_count: int = _retry_count
    self._session_kwargs = kwargs if kwargs else {}
    self._fp: Optional[bytes] = None

url instance-attribute

url = url

sid instance-attribute

sid = sid

callback instance-attribute

callback = callback

priority instance-attribute

priority = priority

dont_filter instance-attribute

dont_filter = dont_filter

meta instance-attribute

meta = meta if meta else {}

domain cached property

domain

copy

copy()

Create a copy of this request.

Source code in scrapling/spiders/request.py
def copy(self) -> "Request":
    """Create a copy of this request."""
    return Request(
        url=self.url,
        sid=self.sid,
        callback=self.callback,
        priority=self.priority,
        dont_filter=self.dont_filter,
        meta=self.meta.copy(),
        _retry_count=self._retry_count,
        **self._session_kwargs,
    )

update_fingerprint

update_fingerprint(
    include_kwargs=False,
    include_headers=False,
    keep_fragments=False,
)

Generate a unique fingerprint for deduplication.

Caches the result in self._fp after first computation.

Source code in scrapling/spiders/request.py
def update_fingerprint(
    self,
    include_kwargs: bool = False,
    include_headers: bool = False,
    keep_fragments: bool = False,
) -> bytes:
    """Generate a unique fingerprint for deduplication.

    Caches the result in self._fp after first computation.
    """
    if self._fp is not None:
        return self._fp

    post_data = self._session_kwargs.get("data", {})
    body = b""
    if post_data:
        if isinstance(post_data, dict | list | tuple):
            body = urlencode(post_data).encode()
        elif isinstance(post_data, str):
            body = post_data.encode()
        elif isinstance(post_data, BytesIO):
            body = post_data.getvalue()
        elif isinstance(post_data, bytes):
            body = post_data
    else:
        post_data = self._session_kwargs.get("json", {})
        body = orjson.dumps(post_data) if post_data else b""

    data: Dict[str, str | Tuple] = {
        "sid": self.sid,
        "body": body.hex(),
        "method": self._session_kwargs.get("method", "GET"),
        "url": canonicalize_url(self.url, keep_fragments=keep_fragments),
    }

    if include_kwargs:
        filtered_kwargs = {
            key.lower(): _stable_value_repr(value)
            for key, value in self._session_kwargs.items()
            if key.lower() not in ("data", "json")
        }
        data["kwargs"] = tuple(sorted(filtered_kwargs.items()))

    if include_headers:
        headers = self._session_kwargs.get("headers") or self._session_kwargs.get("extra_headers") or {}
        processed_headers = {}
        # Some header normalization
        for key, value in headers.items():
            processed_headers[_convert_to_bytes(key.lower()).hex()] = _convert_to_bytes(value).hex()
        data["headers"] = tuple(processed_headers.items())

    fp = hashlib.sha1(orjson.dumps(data, option=orjson.OPT_SORT_KEYS), usedforsecurity=False).digest()
    self._fp = fp
    return fp

__repr__

__repr__()
Source code in scrapling/spiders/request.py
def __repr__(self) -> str:
    callback_name = getattr(self.callback, "__name__", None) or "None"
    return f"<Request({self.url}) priority={self.priority} callback={callback_name}>"

__str__

__str__()
Source code in scrapling/spiders/request.py
def __str__(self) -> str:
    return self.url

__lt__

__lt__(other)

Compare requests by priority

Source code in scrapling/spiders/request.py
def __lt__(self, other: object) -> bool:
    """Compare requests by priority"""
    if not isinstance(other, Request):
        return NotImplemented
    return self.priority < other.priority

__gt__

__gt__(other)

Compare requests by priority

Source code in scrapling/spiders/request.py
def __gt__(self, other: object) -> bool:
    """Compare requests by priority"""
    if not isinstance(other, Request):
        return NotImplemented
    return self.priority > other.priority

__eq__

__eq__(other)

Requests are equal if they have the same fingerprint.

Source code in scrapling/spiders/request.py
def __eq__(self, other: object) -> bool:
    """Requests are equal if they have the same fingerprint."""
    if not isinstance(other, Request):
        return NotImplemented
    if self._fp is None or other._fp is None:
        raise RuntimeError("Cannot compare requests before generating their fingerprints!")
    return self._fp == other._fp

__getstate__

__getstate__()

Prepare state for pickling - store callback as name string for pickle compatibility.

Source code in scrapling/spiders/request.py
def __getstate__(self) -> dict[str, Any]:
    """Prepare state for pickling - store callback as name string for pickle compatibility."""
    state = self.__dict__.copy()
    state["_callback_name"] = getattr(self.callback, "__name__", None) if self.callback is not None else None
    state["callback"] = None  # Don't pickle the actual callable
    return state

__setstate__

__setstate__(state)

Restore state from pickle - callback restored later via _restore_callback().

Source code in scrapling/spiders/request.py
def __setstate__(self, state: dict[str, Any]) -> None:
    """Restore state from pickle - callback restored later via _restore_callback()."""
    self._callback_name: str | None = state.pop("_callback_name", None)
    self.__dict__.update(state)

Result Classes

scrapling.spiders.result.CrawlResult dataclass

CrawlResult(stats, items, paused=False)

Complete result from a spider run.

stats instance-attribute

stats

items instance-attribute

items

paused class-attribute instance-attribute

paused = False

completed property

completed

True if the crawl completed normally (not paused).

__len__

__len__()
Source code in scrapling/spiders/result.py
def __len__(self) -> int:
    return len(self.items)

__iter__

__iter__()
Source code in scrapling/spiders/result.py
def __iter__(self) -> Iterator[dict[str, Any]]:
    return iter(self.items)

scrapling.spiders.result.CrawlStats dataclass

CrawlStats(
    requests_count=0,
    concurrent_requests=0,
    concurrent_requests_per_domain=0,
    failed_requests_count=0,
    offsite_requests_count=0,
    robots_disallowed_count=0,
    cache_hits=0,
    cache_misses=0,
    response_bytes=0,
    items_scraped=0,
    items_dropped=0,
    start_time=0.0,
    end_time=0.0,
    download_delay=0.0,
    blocked_requests_count=0,
    custom_stats=dict(),
    response_status_count=dict(),
    domains_response_bytes=dict(),
    sessions_requests_count=dict(),
    proxies=list(),
    log_levels_counter=dict(),
)

Statistics for a crawl run.

requests_count class-attribute instance-attribute

requests_count = 0

concurrent_requests class-attribute instance-attribute

concurrent_requests = 0

concurrent_requests_per_domain class-attribute instance-attribute

concurrent_requests_per_domain = 0

failed_requests_count class-attribute instance-attribute

failed_requests_count = 0

offsite_requests_count class-attribute instance-attribute

offsite_requests_count = 0

robots_disallowed_count class-attribute instance-attribute

robots_disallowed_count = 0

cache_hits class-attribute instance-attribute

cache_hits = 0

cache_misses class-attribute instance-attribute

cache_misses = 0

response_bytes class-attribute instance-attribute

response_bytes = 0

items_scraped class-attribute instance-attribute

items_scraped = 0

items_dropped class-attribute instance-attribute

items_dropped = 0

start_time class-attribute instance-attribute

start_time = 0.0

end_time class-attribute instance-attribute

end_time = 0.0

download_delay class-attribute instance-attribute

download_delay = 0.0

blocked_requests_count class-attribute instance-attribute

blocked_requests_count = 0

custom_stats class-attribute instance-attribute

custom_stats = field(default_factory=dict)

response_status_count class-attribute instance-attribute

response_status_count = field(default_factory=dict)

domains_response_bytes class-attribute instance-attribute

domains_response_bytes = field(default_factory=dict)

sessions_requests_count class-attribute instance-attribute

sessions_requests_count = field(default_factory=dict)

proxies class-attribute instance-attribute

proxies = field(default_factory=list)

log_levels_counter class-attribute instance-attribute

log_levels_counter = field(default_factory=dict)

elapsed_seconds property

elapsed_seconds

requests_per_second property

requests_per_second

increment_status

increment_status(status)
Source code in scrapling/spiders/result.py
def increment_status(self, status: int) -> None:
    self.response_status_count[f"status_{status}"] = self.response_status_count.get(f"status_{status}", 0) + 1

increment_response_bytes

increment_response_bytes(domain, count)
Source code in scrapling/spiders/result.py
def increment_response_bytes(self, domain: str, count: int) -> None:
    self.response_bytes += count
    self.domains_response_bytes[domain] = self.domains_response_bytes.get(domain, 0) + count

increment_requests_count

increment_requests_count(sid)
Source code in scrapling/spiders/result.py
def increment_requests_count(self, sid: str) -> None:
    self.requests_count += 1
    self.sessions_requests_count[sid] = self.sessions_requests_count.get(sid, 0) + 1

to_dict

to_dict()
Source code in scrapling/spiders/result.py
def to_dict(self) -> dict[str, Any]:
    return {
        "items_scraped": self.items_scraped,
        "items_dropped": self.items_dropped,
        "elapsed_seconds": round(self.elapsed_seconds, 2),
        "download_delay": round(self.download_delay, 2),
        "concurrent_requests": self.concurrent_requests,
        "concurrent_requests_per_domain": self.concurrent_requests_per_domain,
        "requests_count": self.requests_count,
        "requests_per_second": round(self.requests_per_second, 2),
        "sessions_requests_count": self.sessions_requests_count,
        "failed_requests_count": self.failed_requests_count,
        "offsite_requests_count": self.offsite_requests_count,
        "robots_disallowed_count": self.robots_disallowed_count,
        "cache_hits": self.cache_hits,
        "cache_misses": self.cache_misses,
        "blocked_requests_count": self.blocked_requests_count,
        "response_status_count": self.response_status_count,
        "response_bytes": self.response_bytes,
        "domains_response_bytes": self.domains_response_bytes,
        "proxies": self.proxies,
        "custom_stats": self.custom_stats,
        "log_count": self.log_levels_counter,
    }

scrapling.spiders.result.ItemList

Bases: list


              flowchart TD
              scrapling.spiders.result.ItemList[ItemList]

              

              click scrapling.spiders.result.ItemList href "" "scrapling.spiders.result.ItemList"
            

A list of scraped items with export capabilities.

to_json

to_json(path, *, indent=False)

Export items to a JSON file.

PARAMETER DESCRIPTION
path

Path to the output file

TYPE: Union[str, Path]

indent

Pretty-print with 2-space indentation (slightly slower)

TYPE: bool DEFAULT: False

Source code in scrapling/spiders/result.py
def to_json(self, path: Union[str, Path], *, indent: bool = False):
    """Export items to a JSON file.

    :param path: Path to the output file
    :param indent: Pretty-print with 2-space indentation (slightly slower)
    """
    options = orjson.OPT_SERIALIZE_NUMPY
    if indent:
        options |= orjson.OPT_INDENT_2

    file = Path(path)
    file.parent.mkdir(parents=True, exist_ok=True)
    file.write_bytes(orjson.dumps(list(self), option=options))
    log.info("Saved %d items to %s", len(self), path)

to_jsonl

to_jsonl(path)

Export items as JSON Lines (one JSON object per line).

PARAMETER DESCRIPTION
path

Path to the output file

TYPE: Union[str, Path]

Source code in scrapling/spiders/result.py
def to_jsonl(self, path: Union[str, Path]):
    """Export items as JSON Lines (one JSON object per line).

    :param path: Path to the output file
    """
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "wb") as f:
        for item in self:
            f.write(orjson.dumps(item, option=orjson.OPT_SERIALIZE_NUMPY))
            f.write(b"\n")
    log.info("Saved %d items to %s", len(self), path)

Session Management

scrapling.spiders.session.SessionManager

SessionManager()

Manages pre-configured session instances.

Source code in scrapling/spiders/session.py
def __init__(self) -> None:
    self._sessions: dict[str, Session] = {}
    self._default_session_id: str | None = None
    self._started: bool = False
    self._lazy_sessions: Set[str] = set()
    self._lazy_lock = Lock()

default_session_id property

default_session_id

session_ids property

session_ids

add

add(session_id, session, *, default=False, lazy=False)

Register a session instance.

PARAMETER DESCRIPTION
session_id

Name to reference this session in requests

TYPE: str

session

Your pre-configured session instance

TYPE: Session

default

If True, this becomes the default session

TYPE: bool DEFAULT: False

lazy

If True, the session will be started only when a request uses its ID.

TYPE: bool DEFAULT: False

Source code in scrapling/spiders/session.py
def add(self, session_id: str, session: Session, *, default: bool = False, lazy: bool = False) -> "SessionManager":
    """Register a session instance.

    :param session_id: Name to reference this session in requests
    :param session: Your pre-configured session instance
    :param default: If True, this becomes the default session
    :param lazy: If True, the session will be started only when a request uses its ID.
    """
    if session_id in self._sessions:
        raise ValueError(f"Session '{session_id}' already registered")

    self._sessions[session_id] = session

    if default or self._default_session_id is None:
        self._default_session_id = session_id

    if lazy:
        self._lazy_sessions.add(session_id)

    return self

remove

remove(session_id)

Removes a session.

PARAMETER DESCRIPTION
session_id

ID of session to remove

TYPE: str

Source code in scrapling/spiders/session.py
def remove(self, session_id: str) -> None:
    """Removes a session.

    :param session_id: ID of session to remove
    """
    _ = self.pop(session_id)

pop

pop(session_id)

Remove and returns a session.

PARAMETER DESCRIPTION
session_id

ID of session to remove

TYPE: str

Source code in scrapling/spiders/session.py
def pop(self, session_id: str) -> Session:
    """Remove and returns a session.

    :param session_id: ID of session to remove
    """
    if session_id not in self._sessions:
        raise KeyError(f"Session '{session_id}' not found")

    session = self._sessions.pop(session_id)
    if session_id in self._lazy_sessions:
        self._lazy_sessions.remove(session_id)

    if session and self._default_session_id == session_id:
        self._default_session_id = next(iter(self._sessions), None)

    return session

get

get(session_id)
Source code in scrapling/spiders/session.py
def get(self, session_id: str) -> Session:
    if session_id not in self._sessions:
        available = ", ".join(self._sessions.keys())
        raise KeyError(f"Session '{session_id}' not found. Available: {available}")
    return self._sessions[session_id]

start async

start()

Start all sessions that aren't already alive.

Source code in scrapling/spiders/session.py
async def start(self) -> None:
    """Start all sessions that aren't already alive."""
    if self._started:
        return

    for sid, session in self._sessions.items():
        if sid not in self._lazy_sessions and not session._is_alive:
            await session.__aenter__()

    self._started = True

close async

close()

Close all registered sessions.

Source code in scrapling/spiders/session.py
async def close(self) -> None:
    """Close all registered sessions."""
    for sid, session in self._sessions.items():
        if sid in self._lazy_sessions and not session._is_alive:
            continue
        _ = await session.__aexit__(None, None, None)

    self._started = False

fetch async

fetch(request)
Source code in scrapling/spiders/session.py
async def fetch(self, request: Request) -> Response:
    sid = request.sid if request.sid else self.default_session_id
    session = self.get(sid)

    if session:
        if sid in self._lazy_sessions and not session._is_alive:
            async with self._lazy_lock:
                if not session._is_alive:
                    await session.__aenter__()

        if isinstance(session, FetcherSession):
            client = session._client

            if isinstance(client, _ASyncSessionLogic):
                kwargs = request._session_kwargs.copy()
                method = cast(SUPPORTED_HTTP_METHODS, kwargs.pop("method", "GET"))
                response = await client._make_request(
                    method=method,
                    url=request.url,
                    **kwargs,
                )
            else:
                # Sync session or other types - shouldn't happen in async context
                raise TypeError(f"Session type {type(client)} not supported for async fetch")
        else:
            response = await session.fetch(url=request.url, **request._session_kwargs)

        response.request = request
        # Merge request meta into response meta (response meta takes priority)
        response.meta = {**request.meta, **response.meta}
        return response
    raise RuntimeError("No session found with the request session id")

__aenter__ async

__aenter__()
Source code in scrapling/spiders/session.py
async def __aenter__(self) -> "SessionManager":
    await self.start()
    return self

__aexit__ async

__aexit__(*exc)
Source code in scrapling/spiders/session.py
async def __aexit__(self, *exc) -> None:
    await self.close()

__contains__

__contains__(session_id)

Check if a session ID is registered.

Source code in scrapling/spiders/session.py
def __contains__(self, session_id: str) -> bool:
    """Check if a session ID is registered."""
    return session_id in self._sessions

__len__

__len__()

Number of registered sessions.

Source code in scrapling/spiders/session.py
def __len__(self) -> int:
    """Number of registered sessions."""
    return len(self._sessions)