Octopus
Extract Data Tool
The browser_extract tool selects elements from the current page using a CSS selector and returns their content as text, HTML, or a structured JSON array — ready for the agent to reason over.
Tool Schema
{
"name": "browser_extract",
"description": "Extract content from the current page using a CSS selector. " +
"Returns matched elements as text, HTML, or structured JSON. " +
"Always call browser_navigate first to load the target page.",
"inputSchema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector to match elements (e.g. 'h1', '.price', '[data-testid=value]')"
},
"format": {
"type": "string",
"enum": ["text", "html", "json"],
"default": "text",
"description": "Output format. 'text': inner text only. 'html': full element HTML. 'json': array of {text, html, attributes} objects."
},
"limit": {
"type": "integer",
"default": 10,
"description": "Maximum number of matching elements to return."
},
"wait_for_selector": {
"type": "boolean",
"default": false,
"description": "If true, waits up to TimeoutMs for the selector to appear before extracting."
}
},
"required": ["selector"]
}
}
Output Formats
// format: "text" — simplest, cheapest for the LLM to process
{
"count": 3,
"selector": ".product-price",
"results": ["$29.99", "$49.99", "$19.99"]
}
// format: "json" — full element details
{
"count": 2,
"selector": "table.data-table tr",
"results": [
{
"text": "AAPL $189.30 +2.34",
"html": "<tr><td>AAPL</td><td>$189.30</td><td>+2.34</td></tr>",
"attributes": {}
},
{
"text": "MSFT $415.22 -1.05",
"html": "<tr><td>MSFT</td><td>$415.22</td><td>-1.05</td></tr>",
"attributes": {}
}
]
}
Handler Implementation
public async Task<string> HandleExtractAsync(
JsonElement input, IBrowserSession session, CancellationToken ct)
{
var selector = input.GetProperty("selector").GetString()!;
var format = input.TryGetProperty("format", out var f) ? f.GetString()! : "text";
var limit = input.TryGetProperty("limit", out var l) ? l.GetInt32() : 10;
var waitForSelector = input.TryGetProperty("wait_for_selector", out var w) && w.GetBoolean();
var page = await session.GetPageAsync(ct);
if (waitForSelector)
await page.WaitForSelectorAsync(selector, new PageWaitForSelectorOptions
{
State = WaitForSelectorState.Visible,
Timeout = _config.TimeoutMs
});
var elements = await page.QuerySelectorAllAsync(selector);
var results = new List<object>();
foreach (var el in elements.Take(limit))
{
if (format == "html")
results.Add(await el.InnerHTMLAsync());
else if (format == "json")
results.Add(new
{
text = await el.InnerTextAsync(),
html = await el.InnerHTMLAsync(),
attributes = await GetAttributesAsync(el)
});
else
results.Add(await el.InnerTextAsync());
}
return JsonSerializer.Serialize(new
{
count = results.Count,
selector,
results
});
}
Selector Tips
| Pattern | Example | Use |
|---|---|---|
| Tag name | h1, table | Extract all elements of a type |
| CSS class | .product-price | Elements with a specific class |
| Data attribute | [data-field="regularMarketPrice"] | Data-annotated elements (most reliable) |
| ID | #main-content | Single unique element |
| Nested | table.results tbody tr td:nth-child(2) | Specific cells in a table |