Skip to content

Commit 3064f2a

Browse files
committed
(X)HTML response fix in the proxy filter
1 parent 1e9cd8a commit 3064f2a

2 files changed

Lines changed: 14 additions & 16 deletions

File tree

http-tests/proxy/GET-proxied-external-502.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ add-agent-to-group.sh \
1919

2020
curl -k -w "%{http_code}\n" -o /dev/null -s \
2121
-G \
22+
-H "Accept: application/n-triples" \
2223
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
2324
--data-urlencode "uri=http://f1d2d4cf-90bb-4f5b-ae4b-921e584b6edd.org" \
2425
"$END_USER_BASE_URL" \

src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,13 @@
8484
* ACL is not checked for proxy requests: the proxy is a global transport function, not a document
8585
* operation. Access control is enforced by the target endpoint.
8686
* <p>
87-
* This filter intentionally does <em>not</em> proxy requests from clients that explicitly accept
88-
* (X)HTML. Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline
89-
* (SPARQL DESCRIBE + XSLT) for every browser-originated proxy request would cause unbounded resource
90-
* exhaustion — a connection-pool and CPU amplification attack vector. Instead, requests whose
91-
* {@code Accept} header contains a non-wildcard {@code text/html} or {@code application/xhtml+xml}
92-
* type fall through to the downstream handler, which serves the LDH application shell; the
93-
* client-side Saxon-JS layer then issues a second, RDF-typed request that <em>does</em> hit this
94-
* filter and is handled cheaply. Pure API clients that send only {@code *}{@code /*} (e.g. curl)
95-
* reach the proxy because they do not list an explicit HTML type.
87+
* This filter rejects with {@link jakarta.ws.rs.NotAcceptableException} any request for which
88+
* content negotiation selects an (X)HTML variant. Rendering arbitrary external URIs as (X)HTML
89+
* through the full server-side pipeline (SPARQL DESCRIBE + XSLT) for every browser-originated
90+
* proxy request would cause unbounded resource exhaustion — a connection-pool and CPU amplification
91+
* attack vector. Browsers receive the LDH application shell from the downstream handler instead;
92+
* the client-side Saxon-JS layer then issues a second, RDF-typed request that hits this filter and
93+
* is proxied cheaply.
9694
*
9795
* @author Martynas Jusevičius {@literal <martynas@atomgraph.com>}
9896
*/
@@ -118,7 +116,6 @@ public void filter(ContainerRequestContext requestContext) throws IOException
118116
URI targetURI = targetOpt.get();
119117

120118
// negotiate the response format from RDF/SPARQL writable types
121-
// skip filter if (X)HTML is the selected variant - we don't offer it for proxied responses
122119
List<MediaType> writableTypes = new ArrayList<>(getMediaTypes().getWritable(Model.class));
123120
writableTypes.addAll(getMediaTypes().getWritable(ResultSet.class));
124121
List<Variant> variants = com.atomgraph.core.model.impl.Response.getVariants(
@@ -127,15 +124,16 @@ public void filter(ContainerRequestContext requestContext) throws IOException
127124
new ArrayList<>());
128125

129126
Variant variant = getRequest().selectVariant(variants);
130-
if (variant == null)
127+
// (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
128+
// (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
129+
if (variant == null ||
130+
variant.getMediaType().isCompatible(MediaType.TEXT_HTML_TYPE) ||
131+
variant.getMediaType().isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE))
131132
{
132133
if (log.isTraceEnabled()) log.trace("Requested Variant {} is not on the list of acceptable Response Variants: {}", variant, variants);
133134
throw new NotAcceptableException();
134135
}
135136

136-
if (variant.getMediaType().isCompatible(MediaType.TEXT_HTML_TYPE) ||
137-
variant.getMediaType().isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE)) return;
138-
139137
// strip #fragment (servers do not receive fragment identifiers)
140138
if (targetURI.getFragment() != null)
141139
{
@@ -379,8 +377,7 @@ public Optional<Ontology> getOntology()
379377
}
380378

381379
/**
382-
* Returns the media types registry.
383-
* Core MediaTypes do not include (X)HTML types, which is what we want here.
380+
* Returns the media types registry used for content negotiation and outbound {@code Accept} headers.
384381
*
385382
* @return media types
386383
*/

0 commit comments

Comments
 (0)