Skip to content

Commit a47bc84

Browse files
committed
Restored (X)HTML workaround in the proxy filter
1 parent ca0d4a4 commit a47bc84

3 files changed

Lines changed: 45 additions & 54 deletions

File tree

http-tests/proxy/GET-proxied-external-xhtml-406.sh

Lines changed: 0 additions & 26 deletions
This file was deleted.

src/main/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilter.java

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,14 @@
8484
* ACL is not checked for proxy requests: the proxy is a global transport function, not a document
8585
* operation. Access control is enforced by the target endpoint.
8686
* <p>
87-
* This filter rejects with {@link jakarta.ws.rs.NotAcceptableException} any request for which
88-
* content negotiation selects an (X)HTML variant. Rendering arbitrary external URIs as (X)HTML
89-
* through the full server-side pipeline (SPARQL DESCRIBE + XSLT) for every browser-originated
90-
* proxy request would cause unbounded resource exhaustion — a connection-pool and CPU amplification
91-
* attack vector. Browsers receive the LDH application shell from the downstream handler instead;
92-
* the client-side Saxon-JS layer then issues a second, RDF-typed request that hits this filter and
93-
* is proxied cheaply.
87+
* This filter does <em>not</em> proxy requests from clients that explicitly accept (X)HTML.
88+
* Rendering arbitrary external URIs as (X)HTML through the full server-side pipeline
89+
* (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector.
90+
* When the {@code Accept} header contains a non-wildcard {@code text/html} or
91+
* {@code application/xhtml+xml} type, the filter returns immediately so the downstream handler
92+
* serves the LDH application shell; the client-side Saxon-JS layer then issues a second, RDF-typed
93+
* request that hits this filter and is proxied cheaply. Pure API clients that send only
94+
* {@code *}{@code /*} (e.g. curl) reach the proxy because they do not list an explicit HTML type.
9495
*
9596
* @author Martynas Jusevičius {@literal <martynas@atomgraph.com>}
9697
*/
@@ -115,20 +116,30 @@ public void filter(ContainerRequestContext requestContext) throws IOException
115116

116117
URI targetURI = targetOpt.get();
117118

118-
// negotiate the response format from RDF/SPARQL writable types
119+
// do not proxy requests from clients that explicitly accept (X)HTML — they expect the app
120+
// shell, which the downstream handler serves. Browsers list text/html as a non-wildcard type;
121+
// pure API clients (curl etc.) send only */* and must reach the proxy.
122+
// (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
123+
// (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
124+
boolean clientAcceptsHtml = requestContext.getAcceptableMediaTypes().stream()
125+
.anyMatch(mt -> !mt.isWildcardType() && !mt.isWildcardSubtype() &&
126+
(mt.isCompatible(MediaType.TEXT_HTML_TYPE) ||
127+
mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE)));
128+
if (clientAcceptsHtml) return;
129+
130+
// negotiate the response format from RDF/SPARQL writable types only
131+
// (client.MediaTypes prepends HTML/XHTML; strip them so selectVariant cannot pick them)
119132
List<MediaType> writableTypes = new ArrayList<>(getMediaTypes().getWritable(Model.class));
120133
writableTypes.addAll(getMediaTypes().getWritable(ResultSet.class));
134+
writableTypes.removeIf(mt -> mt.isCompatible(MediaType.TEXT_HTML_TYPE) ||
135+
mt.isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE));
121136
List<Variant> variants = com.atomgraph.core.model.impl.Response.getVariants(
122137
writableTypes,
123138
getSystem().getSupportedLanguages(),
124139
new ArrayList<>());
125-
140+
126141
Variant variant = getRequest().selectVariant(variants);
127-
// (X)HTML is not offered for proxied documents — rendering external RDF as HTML server-side
128-
// (SPARQL DESCRIBE + XSLT) is expensive and creates a resource-exhaustion attack vector
129-
if (variant == null ||
130-
variant.getMediaType().isCompatible(MediaType.TEXT_HTML_TYPE) ||
131-
variant.getMediaType().isCompatible(MediaType.APPLICATION_XHTML_XML_TYPE))
142+
if (variant == null)
132143
{
133144
if (log.isTraceEnabled()) log.trace("Requested Variant {} is not on the list of acceptable Response Variants: {}", variant, variants);
134145
throw new NotAcceptableException();

src/test/java/com/atomgraph/linkeddatahub/server/filter/request/ProxyRequestFilterTest.java

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.IOException;
2727
import java.net.URI;
2828
import java.util.Collections;
29+
import java.util.List;
2930
import java.util.Locale;
3031
import org.junit.Before;
3132
import org.junit.Test;
@@ -64,41 +65,46 @@ public void setUp()
6465
@Test
6566
public void testNonProxyRequestSkipsFilter() throws IOException
6667
{
67-
// getProperty returns null by default; resolveTargetURI returns empty → filter exits immediately
6868
filter.filter(requestContext);
6969
verify(request, never()).selectVariant(anyList());
7070
verify(requestContext, never()).abortWith(any());
7171
}
7272

73-
/** No acceptable RDF/SPARQL variant — filter must throw 406. */
74-
@Test(expected = NotAcceptableException.class)
75-
public void testNullVariantThrowsNotAcceptable() throws IOException
73+
/** Client explicitly accepts text/html — filter must return early (app shell). */
74+
@Test
75+
public void testHtmlAcceptReturnsEarly() throws IOException
7676
{
7777
when(requestContext.getProperty(AC.uri.getURI()))
7878
.thenReturn(URI.create("http://example.org/resource"));
79-
when(request.selectVariant(anyList())).thenReturn(null);
79+
when(requestContext.getAcceptableMediaTypes())
80+
.thenReturn(List.of(MediaType.TEXT_HTML_TYPE));
8081
filter.filter(requestContext);
82+
verify(request, never()).selectVariant(anyList());
83+
verify(requestContext, never()).abortWith(any());
8184
}
8285

83-
/** text/html selected as best variant — filter must throw 406. */
84-
@Test(expected = NotAcceptableException.class)
85-
public void testHtmlVariantThrowsNotAcceptable() throws IOException
86+
/** Client explicitly accepts application/xhtml+xml — filter must return early (app shell). */
87+
@Test
88+
public void testXhtmlAcceptReturnsEarly() throws IOException
8689
{
8790
when(requestContext.getProperty(AC.uri.getURI()))
8891
.thenReturn(URI.create("http://example.org/resource"));
89-
when(request.selectVariant(anyList()))
90-
.thenReturn(new Variant(MediaType.TEXT_HTML_TYPE, (Locale) null, null));
92+
when(requestContext.getAcceptableMediaTypes())
93+
.thenReturn(List.of(MediaType.APPLICATION_XHTML_XML_TYPE));
9194
filter.filter(requestContext);
95+
verify(request, never()).selectVariant(anyList());
96+
verify(requestContext, never()).abortWith(any());
9297
}
9398

94-
/** application/xhtml+xml selected as best variant — filter must throw 406. */
99+
/** No acceptable RDF/SPARQL variant — filter must throw 406. */
95100
@Test(expected = NotAcceptableException.class)
96-
public void testXhtmlVariantThrowsNotAcceptable() throws IOException
101+
public void testNullVariantThrowsNotAcceptable() throws IOException
97102
{
98103
when(requestContext.getProperty(AC.uri.getURI()))
99104
.thenReturn(URI.create("http://example.org/resource"));
100-
when(request.selectVariant(anyList()))
101-
.thenReturn(new Variant(MediaType.APPLICATION_XHTML_XML_TYPE, (Locale) null, null));
105+
when(requestContext.getAcceptableMediaTypes())
106+
.thenReturn(List.of(MediaType.WILDCARD_TYPE));
107+
when(request.selectVariant(anyList())).thenReturn(null);
102108
filter.filter(requestContext);
103109
}
104110

0 commit comments

Comments
 (0)