8484 * ACL is not checked for proxy requests: the proxy is a global transport function, not a document
8585 * operation. Access control is enforced by the target endpoint.
8686 * <p>
87- * This filter intentionally does <em>not</em> proxy requests from clients that explicitly accept
88- * (X)HTML. Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline
89- * (SPARQL DESCRIBE + XSLT) for every browser-originated proxy request would cause unbounded resource
90- * exhaustion — a connection-pool and CPU amplification attack vector. Instead, requests whose
91- * {@code Accept} header contains a non-wildcard {@code text/html} or {@code application/xhtml+xml}
92- * type fall through to the downstream handler, which serves the LDH application shell; the
93- * client-side Saxon-JS layer then issues a second, RDF-typed request that <em>does</em> hit this
94- * filter and is handled cheaply. Pure API clients that send only {@code *}{@code /*} (e.g. curl)
95- * reach the proxy because they do not list an explicit HTML type.
87+ * This filter rejects with {@link jakarta.ws.rs.NotAcceptableException} any request for which
88+ * content negotiation selects an (X)HTML variant. Rendering arbitrary external URIs as (X)HTML
89+ * through the full server-side pipeline (SPARQL DESCRIBE + XSLT) for every browser-originated
90+ * proxy request would cause unbounded resource exhaustion — a connection-pool and CPU amplification
91+ * attack vector. Browsers receive the LDH application shell from the downstream handler instead;
92+ * the client-side Saxon-JS layer then issues a second, RDF-typed request that hits this filter and
93+ * is proxied cheaply.
9694 *
9795 * @author Martynas Jusevičius {@literal <martynas@atomgraph.com>}
9896 */
@@ -118,7 +116,6 @@ public void filter(ContainerRequestContext requestContext) throws IOException
118116 URI targetURI = targetOpt .get ();
119117
120118 // negotiate the response format from RDF/SPARQL writable types
121- // skip filter if (X)HTML is the selected variant - we don't offer it for proxied responses
122119 List <MediaType > writableTypes = new ArrayList <>(getMediaTypes ().getWritable (Model .class ));
123120 writableTypes .addAll (getMediaTypes ().getWritable (ResultSet .class ));
124121 List <Variant > variants = com .atomgraph .core .model .impl .Response .getVariants (
@@ -127,15 +124,16 @@ public void filter(ContainerRequestContext requestContext) throws IOException
127124 new ArrayList <>());
128125
129126 Variant variant = getRequest ().selectVariant (variants );
130- if (variant == null )
127+ // (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
128+ // (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
129+ if (variant == null ||
130+ variant .getMediaType ().isCompatible (MediaType .TEXT_HTML_TYPE ) ||
131+ variant .getMediaType ().isCompatible (MediaType .APPLICATION_XHTML_XML_TYPE ))
131132 {
132133 if (log .isTraceEnabled ()) log .trace ("Requested Variant {} is not on the list of acceptable Response Variants: {}" , variant , variants );
133134 throw new NotAcceptableException ();
134135 }
135136
136- if (variant .getMediaType ().isCompatible (MediaType .TEXT_HTML_TYPE ) ||
137- variant .getMediaType ().isCompatible (MediaType .APPLICATION_XHTML_XML_TYPE )) return ;
138-
139137 // strip #fragment (servers do not receive fragment identifiers)
140138 if (targetURI .getFragment () != null )
141139 {
@@ -379,8 +377,7 @@ public Optional<Ontology> getOntology()
379377 }
380378
381379 /**
382- * Returns the media types registry.
383- * Core MediaTypes do not include (X)HTML types, which is what we want here.
380+ * Returns the media types registry used for content negotiation and outbound {@code Accept} headers.
384381 *
385382 * @return media types
386383 */
0 commit comments