Skip to main content

Documentation Index

Fetch the complete documentation index at: https://mintlify.com/Amaculus/screaming-frog-api/llms.txt

Use this file to discover all available pages before exploring further.

ConfigPatches provides a fluent Python interface for constructing patch payloads that modify .seospiderconfig files. Patches can set arbitrary config keys, add custom extractions, custom search filters, and custom JavaScript extractors. All classes are importable from the top-level screamingfrog package:
from screamingfrog import ConfigPatches, CustomSearch, CustomJavaScript, write_seospider_config

ConfigPatches

The central object for accumulating config changes. Every method returns self so calls can be chained.
from screamingfrog import ConfigPatches, CustomSearch, CustomJavaScript

patches = ConfigPatches()
patches.set("mCrawlConfig.mRenderingMode", "JAVASCRIPT")
patches.add_custom_search(CustomSearch(name="Filter 1", query=".*", data_type="REGEX"))
patches.add_custom_javascript(
    CustomJavaScript(name="Extractor 1", javascript="return document.title;")
)

patch_json = patches.to_json()
print(patch_json)

.set(path, value)

Set a single config key using dot-notation.
patches = ConfigPatches()
patches.set("mCrawlConfig.mRenderingMode", "JAVASCRIPT")
patches.set("mCrawlConfig.mMaxUrls", 5000)
ParameterTypeDescription
pathstrDot-separated config key (e.g. "mCrawlConfig.mMaxUrls").
valueAnyThe value to assign.
Returns self.

.add_extraction(name, selector, selector_type, extract_mode, attribute)

Add a custom extraction rule.
patches.add_extraction(
    name="H1 Text",
    selector="//h1",
    selector_type="XPATH",
    extract_mode="TEXT",
)

# Extract an attribute value
patches.add_extraction(
    name="OG Image",
    selector="//meta[@property='og:image']",
    selector_type="XPATH",
    extract_mode="ATTRIBUTE",
    attribute="content",
)
ParameterTypeDefaultDescription
namestrrequiredExtraction rule label.
selectorstrrequiredXPath or CSS selector expression.
selector_typestr"XPATH""XPATH" or "CSS".
extract_modestr"TEXT""TEXT" or "ATTRIBUTE".
attributestr | NoneNoneAttribute name to extract when extract_mode="ATTRIBUTE".
Returns self.

.remove_extraction(name) / .clear_extractions()

Remove a named extraction or clear all extractions.
patches.remove_extraction("H1 Text")
patches.clear_extractions()
Both return self.

.add_custom_search(rule)

Add a CustomSearch rule. See CustomSearch below for constructor details.
patches.add_custom_search(
    CustomSearch(name="Filter 1", query=".*", data_type="REGEX")
)
Returns self.

.remove_custom_search(name) / .clear_custom_searches()

Remove a named custom search or clear all custom searches.
patches.remove_custom_search("Filter 1")
patches.clear_custom_searches()
Both return self.

.add_custom_javascript(rule)

Add a CustomJavaScript rule. See CustomJavaScript below.
patches.add_custom_javascript(
    CustomJavaScript(name="Extractor 1", javascript="return document.title;")
)
Returns self.

.remove_custom_javascript(name) / .clear_custom_javascript()

Remove a named JavaScript rule or clear all JavaScript rules.
patches.remove_custom_javascript("Extractor 1")
patches.clear_custom_javascript()
Both return self.

.to_dict() / .to_json(indent=2)

Serialize the accumulated patches.
patches = (
    ConfigPatches()
    .set("mCrawlConfig.mMaxUrls", 5000)
    .add_custom_search(CustomSearch(name="Filter 1", query="error", mode="CONTAINS"))
)

print(patches.to_dict())
# {
#   'mCrawlConfig.mMaxUrls': 5000,
#   'custom_searches': [{'op': 'add', 'name': 'Filter 1', 'query': 'error', ...}]
# }

print(patches.to_json(indent=2))
MethodReturn typeDescription
.to_dict()dict[str, Any]Returns the patch payload as a Python dictionary.
.to_json(indent=2)strReturns the patch payload as a JSON string.

CustomSearch

Defines a single custom search filter rule.
from screamingfrog import CustomSearch

# Regex search across HTML source
rule = CustomSearch(
    name="Filter 1",
    query=".*",
    data_type="REGEX",
)

# Case-sensitive text match scoped to a specific element via XPath
rule = CustomSearch(
    name="Noindex Check",
    query="noindex",
    mode="CONTAINS",
    data_type="TEXT",
    scope="HTML",
    case_sensitive=True,
    xpath="//meta[@name='robots']",
)

Parameters

ParameterTypeDefaultDescription
namestrrequiredDisplay name for the custom search rule.
querystrrequiredThe search string or pattern.
modestr"CONTAINS"Match mode: "CONTAINS", "DOES_NOT_CONTAIN", "BEGINS_WITH", "ENDS_WITH", "EQUALS", or "REGEX".
data_typestr"TEXT"Data type: "TEXT" or "REGEX".
scopestr"HTML"Scope of the search: "HTML", "TEXT", or "URL".
case_sensitiveboolFalseWhether the match is case-sensitive.
xpathstr | NoneNoneRestrict the search to an XPath-selected element.

CustomJavaScript

Defines a JavaScript extraction rule that runs in the page context during rendering.
from screamingfrog import CustomJavaScript

rule = CustomJavaScript(
    name="Extractor 1",
    javascript="return document.title;",
)

# Custom timeout and content-type scope
rule = CustomJavaScript(
    name="Schema Count",
    javascript="return document.querySelectorAll('script[type=\"application/ld+json\"]').length;",
    type="EXTRACTION",
    timeout_secs=15,
    content_types="text/html",
)

Parameters

ParameterTypeDefaultDescription
namestrrequiredDisplay name for the JavaScript rule.
javascriptstrrequiredJavaScript snippet to execute. Must return a value.
typestr"EXTRACTION"Rule type. "EXTRACTION" is the standard extraction mode.
timeout_secsint10Execution timeout in seconds.
content_typesstr"text/html"MIME types the rule applies to.

write_seospider_config

Apply a ConfigPatches object to a template .seospiderconfig file and write the result to a new file.
from screamingfrog import ConfigPatches, write_seospider_config

patches = ConfigPatches().set("mCrawlConfig.mMaxUrls", 5000)

write_seospider_config(
    "base.seospiderconfig",
    "alpha.seospiderconfig",
    patches,
)

Parameters

ParameterTypeDescription
template_configstr | PathPath to the source .seospiderconfig file to use as a base.
output_configstr | PathPath for the patched output config file.
patchesConfigPatches | Mapping[str, Any]Patches to apply. Accepts a ConfigPatches instance or a plain dict in the same format as .to_dict().
sf_pathstr | NoneOptional path to the Screaming Frog installation, passed to the underlying SFConfig loader.
Returns the output file path as a Path.
write_seospider_config requires the sf-config-builder package. Install it with:
pip install sf-config-builder
If it is not installed, calling this function raises RuntimeError.

Worked examples

from screamingfrog import ConfigPatches, write_seospider_config

patches = (
    ConfigPatches()
    .set("mCrawlConfig.mRenderingMode", "JAVASCRIPT")
    .set("mCrawlConfig.mMaxUrls", 10000)
)

write_seospider_config("base.seospiderconfig", "js-crawl.seospiderconfig", patches)