Skip to content

Processed DOM

conatus.utils.browser.dom.processed

Processed DOM type.

🐴 Our DOM workhorse: As explained on our Browser concept page, ProcessedDOM is the class you will want to use if you want to build actions that leverage the DOM in its entirety.

  • The other workhorse is DOMNode, which is the class you will want to use if you want to build actions that leverage individual nodes in the DOM.
  • This class is not the raw representation of the DOM that we get from Chrome, but a cleaned-up version of it.
  • Or to put it more visually: How the DOM is processed

Two ways to get a ProcessedDOM object

You can get a ProcessedDOM object either:

from conatus.utils.browser.dom.processed import ProcessedDOM

# Example 1: From a Page object
# from conatus.utils.browser import Browser
# url = "https://example.com"
# browser = Browser()
# browser.goto(url)
# page = browser.page
# processed_dom = ProcessedDOM.from_page(page)

# Example 2: From a ChromeDOM object
from conatus.utils.browser.dom.fixtures import example_chrome_dom_inputtypes
inputtypes_dom = example_chrome_dom_inputtypes()
width = 1100
processed_dom = ProcessedDOM.from_chrome_dom(inputtypes_dom, width)
assert processed_dom.page_title == "Input Type Sandbox"

Additional references

  • API: Chrome DOM: The Chrome DOM classes, which this class is derived from.
  • API: DOM nodes: The DOMNode class, which you will want to use in conjunction with this class.

ProcessedDOM

Bases: BaseModel

Processed DOM type.

ATTRIBUTE DESCRIPTION
nodes

Nodes in the DOM.

TYPE: list[DOMNode]

root_node

Root node of the DOM.

TYPE: DOMNode

input_elements

Input elements in the DOM.

TYPE: dict[int, DOMNode]

clickable_elements

Clickable elements in the DOM.

TYPE: dict[int, DOMNode]

elements_count

Number of elements that are either clickable or inputable in the DOM.

TYPE: int

page_title

Title of the web page.

TYPE: str

page_url

URL of the web page.

TYPE: str

scroll_position

Scroll position of the page.

TYPE: tuple[int, int]

get_interactive_elements staticmethod

get_interactive_elements(
    nodes: list[DOMNode], scroll_position: tuple[int, int]
) -> tuple[dict[int, DOMNode], dict[int, DOMNode], int]

Get interactive elements from a list of DOM nodes.

Interactive elements are elements that can be clicked or inputted into. Under the hood, we call a recursive function to find these elements.

from conatus.utils.browser.dom.fixtures import (
    example_chrome_dom_inputtypes
)
from conatus.utils.browser.dom.processed import ProcessedDOM

inputtypes_dom = example_chrome_dom_inputtypes()
width = 1100
nodes = inputtypes_dom.process_nodes(width)
device_pixel_ratio = inputtypes_dom.get_device_pixel_ratio(width)
scroll_position = (
    int(inputtypes_dom.document.scroll_offset_x / device_pixel_ratio),
    int(inputtypes_dom.document.scroll_offset_y / device_pixel_ratio),
)
input_elements, clickable_elements, elements_count = (
    ProcessedDOM.get_interactive_elements(nodes, scroll_position)
)
assert len(input_elements) == 2
assert len(clickable_elements) == 6
assert elements_count == 6
PARAMETER DESCRIPTION
nodes

The list of DOM nodes.

TYPE: list[DOMNode]

scroll_position

The scroll position of the page.

TYPE: tuple[int, int]

RETURNS DESCRIPTION
dict[int, DOMNode]

input_elements: Input elements in the DOM.

dict[int, DOMNode]

clickable_elements: Clickable elements in the DOM.

int

count: Count of the elements.

RAISES DESCRIPTION
ValueError

If there are no bounds in the root node (which probably indicates a bug on Chrome's side.)

Source code in conatus/utils/browser/dom/processed.py
@staticmethod
def get_interactive_elements(
    nodes: list[DOMNode],
    scroll_position: tuple[int, int],
) -> tuple[dict[int, DOMNode], dict[int, DOMNode], int]:
    """Get interactive elements from a list of DOM nodes.

    Interactive elements are elements that can be clicked or inputted into.
    Under the hood, we call a recursive function to find these elements.

    ```python
    from conatus.utils.browser.dom.fixtures import (
        example_chrome_dom_inputtypes
    )
    from conatus.utils.browser.dom.processed import ProcessedDOM

    inputtypes_dom = example_chrome_dom_inputtypes()
    width = 1100
    nodes = inputtypes_dom.process_nodes(width)
    device_pixel_ratio = inputtypes_dom.get_device_pixel_ratio(width)
    scroll_position = (
        int(inputtypes_dom.document.scroll_offset_x / device_pixel_ratio),
        int(inputtypes_dom.document.scroll_offset_y / device_pixel_ratio),
    )
    input_elements, clickable_elements, elements_count = (
        ProcessedDOM.get_interactive_elements(nodes, scroll_position)
    )
    assert len(input_elements) == 2
    assert len(clickable_elements) == 6
    assert elements_count == 6
    ```

    Args:
        nodes: The list of DOM nodes.
        scroll_position: The scroll position of the page.

    Returns:
        `input_elements`: Input elements in the DOM.
        `clickable_elements`: Clickable elements in the DOM.
        `count`: Count of the elements.

    Raises:
        ValueError: If there are no bounds in the root node (which
            probably indicates a bug on Chrome's side.)
    """
    root = nodes[0]
    if root.bounds is None:
        err = "Root node bounds is None."
        raise ValueError(err)
    logger.debug(f"Root bounds: {root.bounds}")
    screen_bounds = (
        root.bounds[0] + scroll_position[0],
        root.bounds[1] + scroll_position[1],
        root.bounds[2] + scroll_position[0],
        root.bounds[3] + scroll_position[1],
    )
    logger.debug(f"Screen bounds: {screen_bounds}")
    return ProcessedDOM._rec_find_interactive_elements(root, screen_bounds)

from_chrome_dom staticmethod

from_chrome_dom(
    chrome_dom: ChromeDOM, width: float
) -> ProcessedDOM

Create a ProcessedDOM object from a ChromeDOM object.

from conatus.utils.browser.dom.fixtures import (
    example_chrome_dom_inputtypes
)
from conatus.utils.browser.dom.processed import ProcessedDOM

inputtypes_dom = example_chrome_dom_inputtypes()
inputtypes_dom_width = 1100
processed_dom = ProcessedDOM.from_chrome_dom(
    inputtypes_dom, inputtypes_dom_width
)
assert processed_dom.page_title == "Input Type Sandbox"
assert len(processed_dom.input_elements) == 2
assert len(processed_dom.clickable_elements) == 6
assert processed_dom.elements_count == 6
PARAMETER DESCRIPTION
chrome_dom

The ChromeDOM object.

TYPE: ChromeDOM

width

The width of the page. We need it to calculate the bounds of the nodes.

TYPE: float

RETURNS DESCRIPTION
ProcessedDOM

The ProcessedDOM object.

Source code in conatus/utils/browser/dom/processed.py
@staticmethod
def from_chrome_dom(chrome_dom: ChromeDOM, width: float) -> "ProcessedDOM":
    """Create a `ProcessedDOM` object from a `ChromeDOM` object.

    ```python
    from conatus.utils.browser.dom.fixtures import (
        example_chrome_dom_inputtypes
    )
    from conatus.utils.browser.dom.processed import ProcessedDOM

    inputtypes_dom = example_chrome_dom_inputtypes()
    inputtypes_dom_width = 1100
    processed_dom = ProcessedDOM.from_chrome_dom(
        inputtypes_dom, inputtypes_dom_width
    )
    assert processed_dom.page_title == "Input Type Sandbox"
    assert len(processed_dom.input_elements) == 2
    assert len(processed_dom.clickable_elements) == 6
    assert processed_dom.elements_count == 6
    ```

    Args:
        chrome_dom: The `ChromeDOM` object.
        width: The width of the page. We need it to calculate the bounds
            of the nodes.

    Returns:
        The `ProcessedDOM` object.
    """
    page_title = chrome_dom.page_title
    page_url = chrome_dom.page_url
    nodes = chrome_dom.process_nodes(width)
    root_node = nodes[0]
    device_pixel_ratio = chrome_dom.get_device_pixel_ratio(width)
    scroll_position = (
        int(chrome_dom.document.scroll_offset_x / device_pixel_ratio),
        int(chrome_dom.document.scroll_offset_y / device_pixel_ratio),
    )
    input_elements, clickable_elements, elements_count = (
        ProcessedDOM.get_interactive_elements(nodes, scroll_position)
    )
    return ProcessedDOM(
        nodes=nodes,
        root_node=root_node,
        input_elements=input_elements,
        clickable_elements=clickable_elements,
        page_title=page_title,
        page_url=page_url,
        elements_count=elements_count,
        scroll_position=scroll_position,
    )

from_page_async async classmethod

from_page_async(page: Page | Page) -> ProcessedDOM

Create a ProcessedDOM from a Page (either Playwright or Conatus).

Note: The expected type for a Playwright page is playwright.async_api._generated.Page.

PARAMETER DESCRIPTION
page

The Page object (either a Playwright Page or a Conatus Page).

TYPE: Page | Page

RETURNS DESCRIPTION
ProcessedDOM

The ProcessedDOM object.

Source code in conatus/utils/browser/dom/processed.py
@classmethod
async def from_page_async(cls, page: "PWPage | Page") -> "ProcessedDOM":
    """Create a `ProcessedDOM` from a Page (either Playwright or Conatus).

    _Note_: The expected type for a Playwright page is
    `playwright.async_api._generated.Page`.

    Args:
        page: The Page object (either a Playwright Page or a Conatus Page).

    Returns:
        The `ProcessedDOM` object.
    """
    pw_page = page if isinstance(page, PWPage) else page.pw_page
    width = await pw_page.evaluate("window.screen.width")  # pyright: ignore[reportAny]
    width = width if isinstance(width, float | int) else 1000
    chrome_dom = await ChromeDOM.from_page_async(pw_page)
    return ProcessedDOM.from_chrome_dom(chrome_dom, width)

from_page classmethod

from_page(page: Page | Page) -> ProcessedDOM

Create a ProcessedDOM from a Page (either Playwright or Conatus).

from conatus.utils.browser import Browser
from conatus.utils.browser.dom.processed import ProcessedDOM

url = "https://example.com"

browser = Browser()
browser.goto(url)
page = browser.page
processed_dom = ProcessedDOM.from_page(page)
PARAMETER DESCRIPTION
page

The Page object (either a Playwright Page or a Conatus Page).

TYPE: Page | Page

RETURNS DESCRIPTION
ProcessedDOM

The ProcessedDOM object.

Source code in conatus/utils/browser/dom/processed.py
@classmethod
def from_page(cls, page: "PWPage | Page") -> "ProcessedDOM":
    """Create a `ProcessedDOM` from a Page (either Playwright or Conatus).

    ```python
    from conatus.utils.browser import Browser
    from conatus.utils.browser.dom.processed import ProcessedDOM

    url = "https://example.com"

    browser = Browser()
    browser.goto(url)
    page = browser.page
    processed_dom = ProcessedDOM.from_page(page)
    ```

    Args:
        page: The Page object (either a Playwright Page or a Conatus Page).

    Returns:
        The `ProcessedDOM` object.
    """
    loop = page._loop if isinstance(page, PWPage) else page.pw_page._loop  # noqa: SLF001 # pyright: ignore[reportPrivateUsage, reportAny]
    return run_async(cls.from_page_async(page), loop=loop)  # pyright: ignore[reportAny]