Skip to content

Commit 05cffcf

Browse files
kasnderclaude
andcommitted
Fix DoH crash and improve resilience
- Fix NetworkOnMainThreadException when disabling DoH by moving connection pool eviction to a background thread - Add circuit breaker to skip DoH for 60s after 10 consecutive failures, avoiding timeout latency on every query when the endpoint is down - Validate DoH responses are at least 12 bytes (minimum DNS header) - Don't retry on 4xx client errors (only server errors are worth retrying) - Use null-safe comparison for endpoint in getInstance() Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9f15d42 commit 05cffcf

2 files changed

Lines changed: 52 additions & 29 deletions

File tree

app/src/main/java/net/kollnig/missioncontrol/dns/DnsOverHttpsClient.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import net.kollnig.missioncontrol.BuildConfig;
2222

2323
import java.io.IOException;
24+
import java.util.Objects;
2425
import java.util.concurrent.TimeUnit;
2526

2627
import okhttp3.ConnectionPool;
@@ -70,7 +71,7 @@ public static synchronized DnsOverHttpsClient getInstance(Context context) {
7071
* Get instance with specific endpoint (used by DnsProxyServer).
7172
*/
7273
public static synchronized DnsOverHttpsClient getInstance(String endpoint) {
73-
if (instance == null || !instance.endpoint.equals(endpoint)) {
74+
if (instance == null || !Objects.equals(instance.endpoint, endpoint)) {
7475
if (instance != null) {
7576
instance.shutdown();
7677
}
@@ -89,14 +90,13 @@ public static synchronized void resetInstance() {
8990
/**
9091
* Shutdown the OkHttpClient and release resources.
9192
* Call this when DoH is disabled to prevent idle connections from draining
92-
* battery.
93+
* battery. Runs on a background thread to avoid NetworkOnMainThreadException
94+
* when closing sockets.
9395
*/
9496
public void shutdown() {
9597
Log.i(TAG, "Shutting down DoH client");
9698
client.dispatcher().cancelAll();
97-
client.connectionPool().evictAll();
98-
// Note: We don't call executorService().shutdown() because it would
99-
// make the client unusable if a new instance isn't created in time
99+
new Thread(() -> client.connectionPool().evictAll(), "DoH-shutdown").start();
100100
}
101101

102102
/**
@@ -137,6 +137,7 @@ public byte[] resolve(@NonNull byte[] dnsQuery) {
137137
try (Response response = client.newCall(request).execute()) {
138138
if (!response.isSuccessful()) {
139139
Log.w(TAG, "DoH request failed with code: " + response.code());
140+
if (response.code() < 500) return null; // Don't retry client errors
140141
continue;
141142
}
142143

@@ -147,6 +148,10 @@ public byte[] resolve(@NonNull byte[] dnsQuery) {
147148
}
148149

149150
byte[] dnsResponse = responseBody.bytes();
151+
if (dnsResponse.length < 12) {
152+
Log.w(TAG, "DoH response too short: " + dnsResponse.length + " bytes");
153+
continue;
154+
}
150155
Log.d(TAG, "DoH response received: " + dnsResponse.length + " bytes");
151156
return dnsResponse;
152157
}

app/src/main/java/net/kollnig/missioncontrol/dns/DnsProxyServer.java

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ public class DnsProxyServer {
5555
private ServerSocket tcpServerSocket;
5656
private ExecutorService executor;
5757
private final AtomicInteger dohFailures = new AtomicInteger(0);
58+
private static final int CIRCUIT_BREAKER_THRESHOLD = 10;
59+
private static final long CIRCUIT_BREAKER_COOLDOWN_MS = 60_000;
60+
private volatile long circuitOpenUntil = 0;
5861

5962
private DnsProxyServer(Context context) {
6063
this.context = context.getApplicationContext();
@@ -221,32 +224,38 @@ private void runServer() {
221224
*/
222225
private void handleQuery(byte[] queryData, InetAddress clientAddress, int clientPort) {
223226
try {
224-
// Get the DoH client
225227
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
226-
String endpoint = prefs.getString("doh_endpoint", BuildConfig.DEFAULT_DOH_ENDPOINT);
227-
DnsOverHttpsClient dohClient = DnsOverHttpsClient.getInstance(endpoint);
228-
229-
// Forward the query via DoH (the query is already in DNS wire format)
230-
byte[] responseData = dohClient.resolve(queryData);
228+
byte[] responseData = null;
229+
230+
// Circuit breaker: skip DoH if we recently had too many failures
231+
boolean circuitOpen = System.currentTimeMillis() < circuitOpenUntil;
232+
if (!circuitOpen) {
233+
String endpoint = prefs.getString("doh_endpoint", BuildConfig.DEFAULT_DOH_ENDPOINT);
234+
DnsOverHttpsClient dohClient = DnsOverHttpsClient.getInstance(endpoint);
235+
responseData = dohClient.resolve(queryData);
236+
}
231237

232238
if (responseData != null) {
233239
dohFailures.set(0);
234-
// Send response back to client
240+
circuitOpenUntil = 0;
235241
DatagramSocket socket = serverSocket;
236242
if (socket == null || socket.isClosed()) return;
237243
DatagramPacket response = new DatagramPacket(
238244
responseData, responseData.length, clientAddress, clientPort);
239245
socket.send(response);
240246
Log.d(TAG, "DoH query successful, response sent to " + clientAddress + ":" + clientPort);
241247
} else {
242-
int failures = dohFailures.incrementAndGet();
243-
Log.w(TAG, "DoH query returned null response, failures=" + failures);
244-
245-
if (failures >= 10) {
246-
if (eu.faircode.netguard.Util.isInternetWorking(context)) {
247-
Log.w(TAG, "DoH server unreachable even though internet is working, showing notification");
248-
eu.faircode.netguard.ServiceSinkhole.dohError(context);
249-
dohFailures.set(0); // Reset to wait for the next 10 failures
248+
if (!circuitOpen) {
249+
int failures = dohFailures.incrementAndGet();
250+
Log.w(TAG, "DoH query returned null response, failures=" + failures);
251+
252+
if (failures >= CIRCUIT_BREAKER_THRESHOLD) {
253+
circuitOpenUntil = System.currentTimeMillis() + CIRCUIT_BREAKER_COOLDOWN_MS;
254+
dohFailures.set(0);
255+
if (eu.faircode.netguard.Util.isInternetWorking(context)) {
256+
Log.w(TAG, "DoH circuit breaker tripped, skipping DoH for 60s");
257+
eu.faircode.netguard.ServiceSinkhole.dohError(context);
258+
}
250259
}
251260
}
252261

@@ -335,26 +344,35 @@ private void handleTcpConnection(Socket client) {
335344
// Let's copy the resolution logic here for safety.
336345

337346
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
338-
String endpoint = prefs.getString("doh_endpoint", BuildConfig.DEFAULT_DOH_ENDPOINT);
339-
DnsOverHttpsClient dohClient = DnsOverHttpsClient.getInstance(endpoint);
347+
byte[] responseData = null;
340348

341-
byte[] responseData = dohClient.resolve(queryData);
349+
boolean circuitOpen = System.currentTimeMillis() < circuitOpenUntil;
350+
if (!circuitOpen) {
351+
String endpoint = prefs.getString("doh_endpoint", BuildConfig.DEFAULT_DOH_ENDPOINT);
352+
DnsOverHttpsClient dohClient = DnsOverHttpsClient.getInstance(endpoint);
353+
responseData = dohClient.resolve(queryData);
354+
}
342355

343356
if (responseData != null) {
344357
dohFailures.set(0);
345-
// Write response with 2-byte length prefix
358+
circuitOpenUntil = 0;
346359
out.writeShort(responseData.length);
347360
out.write(responseData);
348361
out.flush();
349362
Log.d(TAG, "DoH TCP query successful");
350363
} else {
351-
int failures = dohFailures.incrementAndGet();
352-
if (failures >= 10 && eu.faircode.netguard.Util.isInternetWorking(context)) {
353-
eu.faircode.netguard.ServiceSinkhole.dohError(context);
354-
dohFailures.set(0);
364+
if (!circuitOpen) {
365+
int failures = dohFailures.incrementAndGet();
366+
if (failures >= CIRCUIT_BREAKER_THRESHOLD) {
367+
circuitOpenUntil = System.currentTimeMillis() + CIRCUIT_BREAKER_COOLDOWN_MS;
368+
dohFailures.set(0);
369+
if (eu.faircode.netguard.Util.isInternetWorking(context)) {
370+
Log.w(TAG, "DoH circuit breaker tripped (TCP), skipping DoH for 60s");
371+
eu.faircode.netguard.ServiceSinkhole.dohError(context);
372+
}
373+
}
355374
}
356375

357-
// Fallback to standard DNS if enabled
358376
boolean dnsFallback = prefs.getBoolean("doh_dns_fallback", false);
359377
if (dnsFallback) {
360378
byte[] fallbackResponse = resolveViaStandardDns(queryData);

0 commit comments

Comments
 (0)