Skip to content

Commit 8e7c071

Browse files
namedgraphclaude
andcommitted
Fix ProxyRequestFilter HTML bypass: check Accept header explicitly
Replace the selectVariant==null bypass with an explicit check for non-wildcard text/html or application/xhtml+xml in the Accept header. Browsers list these types explicitly (q=1.0) and get bypassed to the app shell; API clients that send only */* reach the proxy. The old approach (Core MediaTypes, selectVariant==null) failed for browsers because their */*;q=0.8 wildcard matched RDF variants, causing the proxy to return RDF instead of the (X)HTML app shell. Add testHtmlAcceptBypassesProxy to cover the bypass path. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c7d04ab commit 8e7c071

File tree

2 files changed

+40
-11
lines changed

2 files changed

+40
-11
lines changed

src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,15 @@
8383
* ACL is not checked for proxy requests: the proxy is a global transport function, not a document
8484
* operation. Access control is enforced by the target endpoint.
8585
* <p>
86-
* This filter intentionally does <em>not</em> proxy (X)HTML responses. Rendering arbitrary external
87-
* URIs as (X)HTML through the full server-side pipeline (SPARQL DESCRIBE + XSLT) for every
88-
* browser-originated proxy request would cause unbounded resource exhaustion — a connection-pool and
89-
* CPU amplification attack vector. Instead, HTML-only requests fall through to the downstream handler,
90-
* which serves the LDH application shell; the client-side Saxon-JS layer then issues a second,
91-
* RDF-typed request that <em>does</em> hit this filter and is handled cheaply. The bypass is
92-
* implemented by building the candidate variant list from Core's {@link MediaTypes} (RDF/SPARQL types
93-
* only, no HTML) and treating a {@code null} result from {@link Request#selectVariant} as the signal
94-
* to skip proxying.
86+
* This filter intentionally does <em>not</em> proxy requests from clients that explicitly accept
87+
* (X)HTML. Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline
88+
* (SPARQL DESCRIBE + XSLT) for every browser-originated proxy request would cause unbounded resource
89+
* exhaustion — a connection-pool and CPU amplification attack vector. Instead, requests whose
90+
* {@code Accept} header contains a non-wildcard {@code text/html} or {@code application/xhtml+xml}
91+
* type fall through to the downstream handler, which serves the LDH application shell; the
92+
* client-side Saxon-JS layer then issues a second, RDF-typed request that <em>does</em> hit this
93+
* filter and is handled cheaply. Pure API clients that send only {@code *}{@code /*} (e.g. curl)
94+
* reach the proxy because they do not list an explicit HTML type.
9595
*
9696
* @author Martynas Jusevičius {@literal <martynas@atomgraph.com>}
9797
*/
@@ -115,8 +115,18 @@ public void filter(ContainerRequestContext requestContext) throws IOException
115115

116116
URI targetURI = targetOpt.get();
117117

118-
// do not proxy requests that don't accept any RDF/SPARQL type — let the downstream handler serve the response.
119-
// Core MediaTypes contains only RDF/SPARQL types so selectVariant returns null for HTML-only Accept headers.
118+
// do not proxy requests from clients that explicitly accept (X)HTML — they expect the app shell,
119+
// which the downstream handler serves. Browsers list text/html as a non-wildcard type; pure API
120+
// clients (curl etc.) send only */* and must reach the proxy.
121+
// Defending against resource exhaustion: proxying + full server-side XSLT rendering for arbitrary
122+
// external URIs on every browser request would amplify CPU and connection-pool load unboundedly.
123+
boolean clientAcceptsHtml = requestContext.getAcceptableMediaTypes().stream()
124+
.anyMatch(mt -> !mt.isWildcardType() && !mt.isWildcardSubtype() &&
125+
(mt.isCompatible(MediaType.TEXT_HTML_TYPE) ||
126+
mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE)));
127+
if (clientAcceptsHtml) return;
128+
129+
// negotiate the response format from RDF/SPARQL writable types
120130
List<MediaType> writableTypes = new ArrayList<>(getMediaTypes().getWritable(Model.class));
121131
writableTypes.addAll(getMediaTypes().getWritable(ResultSet.class));
122132
List<Variant> variants = com.atomgraph.core.model.impl.Response.getVariants(

src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949

5050
import static org.mockito.ArgumentMatchers.any;
5151
import static org.mockito.ArgumentMatchers.anyString;
52+
import static org.mockito.Mockito.verify;
53+
import static org.mockito.Mockito.never;
5254
import static org.mockito.Mockito.when;
5355

5456
/**
@@ -94,6 +96,23 @@ public void setUp()
9496
filter.ontology = () -> Optional.empty();
9597
}
9698

99+
/**
100+
* When the client explicitly accepts (X)HTML, the filter must bypass proxying entirely and let
101+
* the downstream handler serve the app shell — regardless of the target URI.
102+
*/
103+
@Test
104+
public void testHtmlAcceptBypassesProxy() throws IOException
105+
{
106+
MultivaluedHashMap<String, String> params = new MultivaluedHashMap<>();
107+
params.putSingle("uri", EXTERNAL_URI.toString());
108+
when(uriInfo.getQueryParameters()).thenReturn(params);
109+
when(requestContext.getAcceptableMediaTypes()).thenReturn(List.of(MediaType.TEXT_HTML_TYPE));
110+
111+
filter.filter(requestContext);
112+
113+
verify(requestContext, never()).abortWith(any(Response.class));
114+
}
115+
97116
/**
98117
* When the proxy is disabled, a {@code ?uri=} pointing to an unregistered external URL must be blocked.
99118
*/

0 commit comments

Comments
 (0)