Skip to content

Commit ab25ab1

Browse files
feat: [feat]: add ignoreSelectors to extract()
1 parent 7572751 commit ab25ab1

5 files changed

Lines changed: 77 additions & 7 deletions

File tree

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 8
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/browserbase/stagehand-dbbff1a35360850898f7d60588e257faeac145a73cfcae634cfeb1b70109b6af.yml
3-
openapi_spec_hash: 28c4b734a5309067c39bb4c4b709b9ab
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/browserbase/stagehand-7182c741edd5e22cda9bd855d31ca7e60a97a409222bb887edf87b9ce15dd493.yml
3+
openapi_spec_hash: 174581867a9191c491b22855b64c4f19
44
config_hash: a962ae71493deb11a1c903256fb25386

stagehand-java-core/src/main/kotlin/com/browserbase/api/models/sessions/SessionExtractParams.kt

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import com.browserbase.api.core.JsonMissing
1111
import com.browserbase.api.core.JsonValue
1212
import com.browserbase.api.core.Params
1313
import com.browserbase.api.core.allMaxBy
14+
import com.browserbase.api.core.checkKnown
1415
import com.browserbase.api.core.getOrThrow
1516
import com.browserbase.api.core.http.Headers
1617
import com.browserbase.api.core.http.QueryParams
@@ -632,6 +633,7 @@ private constructor(
632633
class Options
633634
@JsonCreator(mode = JsonCreator.Mode.DISABLED)
634635
private constructor(
636+
private val ignoreSelectors: JsonField<List<String>>,
635637
private val model: JsonField<Model>,
636638
private val selector: JsonField<String>,
637639
private val timeout: JsonField<Double>,
@@ -640,12 +642,24 @@ private constructor(
640642

641643
@JsonCreator
642644
private constructor(
645+
@JsonProperty("ignoreSelectors")
646+
@ExcludeMissing
647+
ignoreSelectors: JsonField<List<String>> = JsonMissing.of(),
643648
@JsonProperty("model") @ExcludeMissing model: JsonField<Model> = JsonMissing.of(),
644649
@JsonProperty("selector")
645650
@ExcludeMissing
646651
selector: JsonField<String> = JsonMissing.of(),
647652
@JsonProperty("timeout") @ExcludeMissing timeout: JsonField<Double> = JsonMissing.of(),
648-
) : this(model, selector, timeout, mutableMapOf())
653+
) : this(ignoreSelectors, model, selector, timeout, mutableMapOf())
654+
655+
/**
656+
* Selectors for elements and subtrees that should be excluded from extraction
657+
*
658+
* @throws StagehandInvalidDataException if the JSON field has an unexpected type (e.g. if
659+
* the server responded with an unexpected value).
660+
*/
661+
fun ignoreSelectors(): Optional<List<String>> =
662+
ignoreSelectors.getOptional("ignoreSelectors")
649663

650664
/**
651665
* Model configuration object or model name string (e.g., 'openai/gpt-5-nano')
@@ -671,6 +685,16 @@ private constructor(
671685
*/
672686
fun timeout(): Optional<Double> = timeout.getOptional("timeout")
673687

688+
/**
689+
* Returns the raw JSON value of [ignoreSelectors].
690+
*
691+
* Unlike [ignoreSelectors], this method doesn't throw if the JSON field has an unexpected
692+
* type.
693+
*/
694+
@JsonProperty("ignoreSelectors")
695+
@ExcludeMissing
696+
fun _ignoreSelectors(): JsonField<List<String>> = ignoreSelectors
697+
674698
/**
675699
* Returns the raw JSON value of [model].
676700
*
@@ -713,19 +737,48 @@ private constructor(
713737
/** A builder for [Options]. */
714738
class Builder internal constructor() {
715739

740+
private var ignoreSelectors: JsonField<MutableList<String>>? = null
716741
private var model: JsonField<Model> = JsonMissing.of()
717742
private var selector: JsonField<String> = JsonMissing.of()
718743
private var timeout: JsonField<Double> = JsonMissing.of()
719744
private var additionalProperties: MutableMap<String, JsonValue> = mutableMapOf()
720745

721746
@JvmSynthetic
722747
internal fun from(options: Options) = apply {
748+
ignoreSelectors = options.ignoreSelectors.map { it.toMutableList() }
723749
model = options.model
724750
selector = options.selector
725751
timeout = options.timeout
726752
additionalProperties = options.additionalProperties.toMutableMap()
727753
}
728754

755+
/** Selectors for elements and subtrees that should be excluded from extraction */
756+
fun ignoreSelectors(ignoreSelectors: List<String>) =
757+
ignoreSelectors(JsonField.of(ignoreSelectors))
758+
759+
/**
760+
* Sets [Builder.ignoreSelectors] to an arbitrary JSON value.
761+
*
762+
* You should usually call [Builder.ignoreSelectors] with a well-typed `List<String>`
763+
* value instead. This method is primarily for setting the field to an undocumented or
764+
* not yet supported value.
765+
*/
766+
fun ignoreSelectors(ignoreSelectors: JsonField<List<String>>) = apply {
767+
this.ignoreSelectors = ignoreSelectors.map { it.toMutableList() }
768+
}
769+
770+
/**
771+
* Adds a single [String] to [ignoreSelectors].
772+
*
773+
* @throws IllegalStateException if the field was previously set to a non-list.
774+
*/
775+
fun addIgnoreSelector(ignoreSelector: String) = apply {
776+
ignoreSelectors =
777+
(ignoreSelectors ?: JsonField.of(mutableListOf())).also {
778+
checkKnown("ignoreSelectors", it).add(ignoreSelector)
779+
}
780+
}
781+
729782
/** Model configuration object or model name string (e.g., 'openai/gpt-5-nano') */
730783
fun model(model: Model) = model(JsonField.of(model))
731784

@@ -793,7 +846,13 @@ private constructor(
793846
* Further updates to this [Builder] will not mutate the returned instance.
794847
*/
795848
fun build(): Options =
796-
Options(model, selector, timeout, additionalProperties.toMutableMap())
849+
Options(
850+
(ignoreSelectors ?: JsonMissing.of()).map { it.toImmutable() },
851+
model,
852+
selector,
853+
timeout,
854+
additionalProperties.toMutableMap(),
855+
)
797856
}
798857

799858
private var validated: Boolean = false
@@ -812,6 +871,7 @@ private constructor(
812871
return@apply
813872
}
814873

874+
ignoreSelectors()
815875
model().ifPresent { it.validate() }
816876
selector()
817877
timeout()
@@ -834,7 +894,8 @@ private constructor(
834894
*/
835895
@JvmSynthetic
836896
internal fun validity(): Int =
837-
(model.asKnown().getOrNull()?.validity() ?: 0) +
897+
(ignoreSelectors.asKnown().getOrNull()?.size ?: 0) +
898+
(model.asKnown().getOrNull()?.validity() ?: 0) +
838899
(if (selector.asKnown().isPresent) 1 else 0) +
839900
(if (timeout.asKnown().isPresent) 1 else 0)
840901

@@ -1056,20 +1117,21 @@ private constructor(
10561117
}
10571118

10581119
return other is Options &&
1120+
ignoreSelectors == other.ignoreSelectors &&
10591121
model == other.model &&
10601122
selector == other.selector &&
10611123
timeout == other.timeout &&
10621124
additionalProperties == other.additionalProperties
10631125
}
10641126

10651127
private val hashCode: Int by lazy {
1066-
Objects.hash(model, selector, timeout, additionalProperties)
1128+
Objects.hash(ignoreSelectors, model, selector, timeout, additionalProperties)
10671129
}
10681130

10691131
override fun hashCode(): Int = hashCode
10701132

10711133
override fun toString() =
1072-
"Options{model=$model, selector=$selector, timeout=$timeout, additionalProperties=$additionalProperties}"
1134+
"Options{ignoreSelectors=$ignoreSelectors, model=$model, selector=$selector, timeout=$timeout, additionalProperties=$additionalProperties}"
10731135
}
10741136

10751137
/** JSON Schema defining the structure of data to extract */

stagehand-java-core/src/test/kotlin/com/browserbase/api/models/sessions/SessionExtractParamsTest.kt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ internal class SessionExtractParamsTest {
1818
.instruction("Extract all product names and prices from the page")
1919
.options(
2020
SessionExtractParams.Options.builder()
21+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
2122
.model(
2223
ModelConfig.builder()
2324
.modelName("openai/gpt-5.4-mini")
@@ -63,6 +64,7 @@ internal class SessionExtractParamsTest {
6364
.instruction("Extract all product names and prices from the page")
6465
.options(
6566
SessionExtractParams.Options.builder()
67+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
6668
.model(
6769
ModelConfig.builder()
6870
.modelName("openai/gpt-5.4-mini")
@@ -112,6 +114,7 @@ internal class SessionExtractParamsTest {
112114
.instruction("Extract all product names and prices from the page")
113115
.options(
114116
SessionExtractParams.Options.builder()
117+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
115118
.model(
116119
ModelConfig.builder()
117120
.modelName("openai/gpt-5.4-mini")
@@ -144,6 +147,7 @@ internal class SessionExtractParamsTest {
144147
assertThat(body.options())
145148
.contains(
146149
SessionExtractParams.Options.builder()
150+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
147151
.model(
148152
ModelConfig.builder()
149153
.modelName("openai/gpt-5.4-mini")

stagehand-java-core/src/test/kotlin/com/browserbase/api/services/async/SessionServiceAsyncTest.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ internal class SessionServiceAsyncTest {
317317
.instruction("Extract all product names and prices from the page")
318318
.options(
319319
SessionExtractParams.Options.builder()
320+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
320321
.model(
321322
ModelConfig.builder()
322323
.modelName("openai/gpt-5.4-mini")
@@ -366,6 +367,7 @@ internal class SessionServiceAsyncTest {
366367
.instruction("Extract all product names and prices from the page")
367368
.options(
368369
SessionExtractParams.Options.builder()
370+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
369371
.model(
370372
ModelConfig.builder()
371373
.modelName("openai/gpt-5.4-mini")

stagehand-java-core/src/test/kotlin/com/browserbase/api/services/blocking/SessionServiceTest.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ internal class SessionServiceTest {
314314
.instruction("Extract all product names and prices from the page")
315315
.options(
316316
SessionExtractParams.Options.builder()
317+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
317318
.model(
318319
ModelConfig.builder()
319320
.modelName("openai/gpt-5.4-mini")
@@ -362,6 +363,7 @@ internal class SessionServiceTest {
362363
.instruction("Extract all product names and prices from the page")
363364
.options(
364365
SessionExtractParams.Options.builder()
366+
.ignoreSelectors(listOf("nav", ".cookie-banner", "#sidebar-ads"))
365367
.model(
366368
ModelConfig.builder()
367369
.modelName("openai/gpt-5.4-mini")

0 commit comments

Comments
 (0)