Portal Community

Tool Schema

{
  "name": "browser_extract",
  "description": "Extract content from the current page using a CSS selector. " +
                 "Returns matched elements as text, HTML, or structured JSON. " +
                 "Always call browser_navigate first to load the target page.",
  "inputSchema": {
    "type": "object",
    "properties": {
      "selector": {
        "type": "string",
        "description": "CSS selector to match elements (e.g. 'h1', '.price', '[data-testid=value]')"
      },
      "format": {
        "type": "string",
        "enum": ["text", "html", "json"],
        "default": "text",
        "description": "Output format. 'text': inner text only. 'html': full element HTML. 'json': array of {text, html, attributes} objects."
      },
      "limit": {
        "type": "integer",
        "default": 10,
        "description": "Maximum number of matching elements to return."
      },
      "wait_for_selector": {
        "type": "boolean",
        "default": false,
        "description": "If true, waits up to TimeoutMs for the selector to appear before extracting."
      }
    },
    "required": ["selector"]
  }
}

Output Formats

// format: "text" — simplest, cheapest for the LLM to process
{
  "count":    3,
  "selector": ".product-price",
  "results":  ["$29.99", "$49.99", "$19.99"]
}

// format: "json" — full element details
{
  "count":    2,
  "selector": "table.data-table tr",
  "results":  [
    {
      "text":       "AAPL  $189.30  +2.34",
      "html":       "<tr><td>AAPL</td><td>$189.30</td><td>+2.34</td></tr>",
      "attributes": {}
    },
    {
      "text":       "MSFT  $415.22  -1.05",
      "html":       "<tr><td>MSFT</td><td>$415.22</td><td>-1.05</td></tr>",
      "attributes": {}
    }
  ]
}

Handler Implementation

public async Task<string> HandleExtractAsync(
    JsonElement input, IBrowserSession session, CancellationToken ct)
{
    var selector        = input.GetProperty("selector").GetString()!;
    var format          = input.TryGetProperty("format", out var f) ? f.GetString()! : "text";
    var limit           = input.TryGetProperty("limit", out var l) ? l.GetInt32() : 10;
    var waitForSelector = input.TryGetProperty("wait_for_selector", out var w) && w.GetBoolean();

    var page = await session.GetPageAsync(ct);

    if (waitForSelector)
        await page.WaitForSelectorAsync(selector, new PageWaitForSelectorOptions
        {
            State   = WaitForSelectorState.Visible,
            Timeout = _config.TimeoutMs
        });

    var elements = await page.QuerySelectorAllAsync(selector);
    var results  = new List<object>();

    foreach (var el in elements.Take(limit))
    {
        if (format == "html")
            results.Add(await el.InnerHTMLAsync());
        else if (format == "json")
            results.Add(new
            {
                text       = await el.InnerTextAsync(),
                html       = await el.InnerHTMLAsync(),
                attributes = await GetAttributesAsync(el)
            });
        else
            results.Add(await el.InnerTextAsync());
    }

    return JsonSerializer.Serialize(new
    {
        count    = results.Count,
        selector,
        results
    });
}

Selector Tips

PatternExampleUse
Tag nameh1, tableExtract all elements of a type
CSS class.product-priceElements with a specific class
Data attribute[data-field="regularMarketPrice"]Data-annotated elements (most reliable)
ID#main-contentSingle unique element
Nestedtable.results tbody tr td:nth-child(2)Specific cells in a table