8484 * ACL is not checked for proxy requests: the proxy is a global transport function, not a document
8585 * operation. Access control is enforced by the target endpoint.
8686 * <p>
87- * This filter rejects with {@link jakarta.ws.rs.NotAcceptableException} any request for which
88- * content negotiation selects an (X)HTML variant. Rendering arbitrary external URIs as (X)HTML
89- * through the full server-side pipeline (SPARQL DESCRIBE + XSLT) for every browser-originated
90- * proxy request would cause unbounded resource exhaustion — a connection-pool and CPU amplification
91- * attack vector. Browsers receive the LDH application shell from the downstream handler instead;
92- * the client-side Saxon-JS layer then issues a second, RDF-typed request that hits this filter and
93- * is proxied cheaply.
87+ * This filter does <em>not</em> proxy requests from clients that explicitly accept (X)HTML.
88+ * Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline
89+ * (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector.
90+ * When the {@code Accept} header contains a non-wildcard {@code text/html} or
91+ * {@code application/xhtml+xml} type, the filter returns immediately so the downstream handler
92+ * serves the LDH application shell; the client-side Saxon-JS layer then issues a second, RDF-typed
93+ * request that hits this filter and is proxied cheaply. Pure API clients that send only
94+ * {@code *}{@code /*} (e.g. curl) reach the proxy because they do not list an explicit HTML type.
9495 *
9596 * @author Martynas Jusevičius {@literal <martynas@atomgraph.com>}
9697 */
@@ -115,20 +116,30 @@ public void filter(ContainerRequestContext requestContext) throws IOException
115116
116117 URI targetURI = targetOpt .get ();
117118
118- // negotiate the response format from RDF/SPARQL writable types
119+ // do not proxy requests from clients that explicitly accept (X)HTML — they expect the app
120+ // shell, which the downstream handler serves. Browsers list text/html as a non-wildcard type;
121+ // pure API clients (curl etc.) send only */* and must reach the proxy.
122+ // (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
123+ // (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
124+ boolean clientAcceptsHtml = requestContext .getAcceptableMediaTypes ().stream ()
125+ .anyMatch (mt -> !mt .isWildcardType () && !mt .isWildcardSubtype () &&
126+ (mt .isCompatible (MediaType .TEXT_HTML_TYPE ) ||
127+ mt .isCompatible (MediaType .APPLICATION_XHTML_XML_TYPE )));
128+ if (clientAcceptsHtml ) return ;
129+
130+ // negotiate the response format from RDF/SPARQL writable types only
131+ // (client.MediaTypes prepends HTML/XHTML; strip them so selectVariant cannot pick them)
119132 List <MediaType > writableTypes = new ArrayList <>(getMediaTypes ().getWritable (Model .class ));
120133 writableTypes .addAll (getMediaTypes ().getWritable (ResultSet .class ));
134+ writableTypes .removeIf (mt -> mt .isCompatible (MediaType .TEXT_HTML_TYPE ) ||
135+ mt .isCompatible (MediaType .APPLICATION_XHTML_XML_TYPE ));
121136 List <Variant > variants = com .atomgraph .core .model .impl .Response .getVariants (
122137 writableTypes ,
123138 getSystem ().getSupportedLanguages (),
124139 new ArrayList <>());
125-
140+
126141 Variant variant = getRequest ().selectVariant (variants );
127- // (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
128- // (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
129- if (variant == null ||
130- variant .getMediaType ().isCompatible (MediaType .TEXT_HTML_TYPE ) ||
131- variant .getMediaType ().isCompatible (MediaType .APPLICATION_XHTML_XML_TYPE ))
142+ if (variant == null )
132143 {
133144 if (log .isTraceEnabled ()) log .trace ("Requested Variant {} is not on the list of acceptable Response Variants: {}" , variant , variants );
134145 throw new NotAcceptableException ();
0 commit comments