Skip to content

Commit 0fc325f

Browse files
authored
downstream reverse tunnel: added handshake metrics (#42593)
<!-- !!!ATTENTION!!! If you are fixing *any* crash or *any* potential security issue, *do not* open a pull request in this repo. Please report the issue via emailing envoy-security@googlegroups.com where the issue will be triaged appropriately. Thank you in advance for helping to keep Envoy secure. !!!ATTENTION!!! For an explanation of how to fill out the fields, please see the relevant section in [PULL_REQUESTS.md](https://github.com/envoyproxy/envoy/blob/main/PULL_REQUESTS.md) !!!ATTENTION!!! Please check the [use of generative AI policy](https://github.com/envoyproxy/envoy/blob/main/CONTRIBUTING.md?plain=1#L41). You may use generative AI only if you fully understand the code. You need to disclose this usage in the PR description to ensure transparency. --> Commit Message: added reverse tunnel downstream handshake metrics Additional Description: 1. reverse tunnel downstream handshake metrics only on worker level, not aggregated across worker threads. 2. stats are only captured if enable_detailed_stats flag is enabled on downstream reverse tunnel extension. Risk Level: Low Testing: 1. When handshake fails ``` # TYPE envoy_downstream_reverse_connection_handshake counter envoy_downstream_reverse_connection_handshake{worker="worker_0", cluster="upstream-cluster", result="failed", failure_reason="http.403"} 3 envoy_downstream_reverse_connection_handshake{worker="worker_1", cluster="upstream-cluster", result="failed",failure_reason="http.403"} 3 envoy_downstream_reverse_connection_handshake{worker="worker_2", cluster="upstream-cluster", result="failed", failure_reason="http. 403"} 3 envoy_downstream_reverse_connection_handshake{worker="worker_3",cluster="upstream-cluster", result="failed" failure_reason="http.403"} 3 ``` 2. When handshake passes ``` # TYPE envoy_downstream_reverse_connection_handshake counter envoy_downstream_reverse_connection_handshake{worker="worker_0",cluster="upstream-cluster" ,result="success"} 1 envoy_downstream_reverse_connection_handshake{worker="worker_1",cluster="upstream-cluster" ,result="success"} 1 envoy_downstream_reverse_connection_handshake{worker="worker_2" ,cluster="upstream-cluster", result="success"} 1 envoy_downstream_reverse_connection_handshake{worker="worker_3",cluster="upstream-cluster" ,result="success"} 1 ``` Docs Changes: None Release Notes: None Platform Specific Features: [Optional Runtime guard:] [Optional Fixes #Issue] [Optional Fixes commit #PR or SHA] [Optional Deprecated:] [Optional [API Considerations](https://github.com/envoyproxy/envoy/blob/main/api/review_checklist.md):] --------- Signed-off-by: Krishna Sharma <krishnagpl2001@gmail.com>
1 parent 6bb85ba commit 0fc325f

File tree

5 files changed

+330
-11
lines changed

5 files changed

+330
-11
lines changed

source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/rc_connection_wrapper.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "source/common/network/connection_socket_impl.h"
1212
#include "source/extensions/bootstrap/reverse_tunnel/common/reverse_connection_utility.h"
1313
#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_connection_io_handle.h"
14+
#include "source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_tunnel_initiator_extension.h"
1415

1516
namespace Envoy {
1617
namespace Extensions {
@@ -143,7 +144,7 @@ std::string RCConnectionWrapper::connect(const std::string& src_tenant_id,
143144
const Http::Status encode_status = request_encoder.encodeHeaders(*headers, true);
144145
if (!encode_status.ok()) {
145146
ENVOY_LOG(error, "RCConnectionWrapper: encodeHeaders failed: {}", encode_status.message());
146-
onHandshakeFailure("HTTP handshake encode failed");
147+
onHandshakeFailure(HandshakeFailureReason::encodeError());
147148
}
148149

149150
return connection_->connectionInfoProvider().localAddress()->asString();
@@ -156,7 +157,7 @@ void RCConnectionWrapper::decodeHeaders(Http::ResponseHeaderMapPtr&& headers, bo
156157
onHandshakeSuccess();
157158
} else {
158159
ENVOY_LOG(error, "Received non-200 HTTP response: {}", status);
159-
onHandshakeFailure(absl::StrCat("HTTP handshake failed with status ", status));
160+
onHandshakeFailure(HandshakeFailureReason::httpStatusError(absl::StrCat(status)));
160161
}
161162
}
162163

@@ -169,15 +170,36 @@ void RCConnectionWrapper::dispatchHttp1(Buffer::Instance& buffer) {
169170
}
170171
}
171172

173+
ReverseTunnelInitiatorExtension* RCConnectionWrapper::getDownstreamExtension() const {
174+
return parent_.getDownstreamExtension();
175+
}
176+
172177
void RCConnectionWrapper::onHandshakeSuccess() {
173178
std::string message = "reverse connection accepted";
174179
ENVOY_LOG(debug, "handshake succeeded: {}", message);
180+
181+
// Track handshake success stats.
182+
auto* extension = getDownstreamExtension();
183+
if (extension) {
184+
extension->incrementHandshakeStats(cluster_name_, true, "");
185+
}
186+
175187
parent_.onConnectionDone(message, this, false);
176188
}
177189

178-
void RCConnectionWrapper::onHandshakeFailure(const std::string& message) {
179-
ENVOY_LOG(debug, "handshake failed: {}", message);
180-
parent_.onConnectionDone(message, this, false);
190+
void RCConnectionWrapper::onHandshakeFailure(const HandshakeFailureReason& reason) {
191+
const std::string error_message = reason.getDetailedName();
192+
const std::string stats_failure_reason = reason.getNameForStats();
193+
194+
ENVOY_LOG(trace, "handshake failed: {}", error_message);
195+
196+
// Track handshake failure stats.
197+
auto* extension = getDownstreamExtension();
198+
if (extension) {
199+
extension->incrementHandshakeStats(cluster_name_, false, stats_failure_reason);
200+
}
201+
202+
parent_.onConnectionDone(error_message, this, false);
181203
}
182204

183205
void RCConnectionWrapper::shutdown() {

source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/rc_connection_wrapper.h

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,77 @@
1515
#include "source/common/http/response_decoder_impl_base.h"
1616
#include "source/common/network/filter_impl.h"
1717

18+
#include "absl/strings/str_cat.h"
19+
#include "absl/strings/string_view.h"
20+
#include "absl/types/optional.h"
21+
1822
namespace Envoy {
1923
namespace Extensions {
2024
namespace Bootstrap {
2125
namespace ReverseConnection {
2226

23-
// Forward declaration.
27+
// Forward declarations.
2428
class ReverseConnectionIOHandle;
29+
class ReverseTunnelInitiatorExtension;
30+
31+
/**
32+
* Class representing handshake failure with type and context.
33+
* Provides methods to generate detailed error messages and stat names.
34+
*/
35+
class HandshakeFailureReason {
36+
public:
37+
enum class Type {
38+
HttpStatusError, // HTTP response with non-200 status code
39+
EncodeError, // HTTP request encoding failed
40+
};
41+
42+
/**
43+
* Create a handshake failure reason for HTTP status errors.
44+
* @param status_code the HTTP status code received
45+
*/
46+
static HandshakeFailureReason httpStatusError(absl::string_view status_code) {
47+
return {Type::HttpStatusError, status_code};
48+
}
49+
50+
/**
51+
* Create a handshake failure reason for encoding errors.
52+
*/
53+
static HandshakeFailureReason encodeError() { return {Type::EncodeError, ""}; }
54+
55+
/**
56+
* Get a detailed human-readable error message.
57+
* @return detailed error message string
58+
*/
59+
std::string getDetailedName() const {
60+
switch (type_) {
61+
case Type::HttpStatusError:
62+
return absl::StrCat("HTTP handshake failed with status ", context_);
63+
case Type::EncodeError:
64+
return "HTTP handshake encode failed";
65+
}
66+
return "Unknown handshake failure";
67+
}
68+
69+
/**
70+
* Get the stat name suffix for this failure.
71+
* @return stat name suffix (e.g., "http.401", "encode_error")
72+
*/
73+
std::string getNameForStats() const {
74+
switch (type_) {
75+
case Type::HttpStatusError:
76+
return absl::StrCat("http.", context_);
77+
case Type::EncodeError:
78+
return "encode_error";
79+
}
80+
return "unknown";
81+
}
82+
83+
private:
84+
HandshakeFailureReason(Type type, absl::string_view context) : type_(type), context_(context) {}
85+
86+
Type type_;
87+
std::string context_;
88+
};
2589

2690
/**
2791
* Simple read filter for handling reverse connection handshake responses.
@@ -120,9 +184,9 @@ class RCConnectionWrapper : public Network::ConnectionCallbacks,
120184

121185
/**
122186
* Handle handshake failure.
123-
* @param message error message
187+
* @param reason the failure reason with type and context
124188
*/
125-
void onHandshakeFailure(const std::string& message);
189+
void onHandshakeFailure(const HandshakeFailureReason& reason);
126190

127191
/**
128192
* Perform graceful shutdown of the connection.
@@ -151,6 +215,12 @@ class RCConnectionWrapper : public Network::ConnectionCallbacks,
151215
bool handshake_completed_{false};
152216
bool shutdown_called_{false};
153217

218+
/**
219+
* Get the downstream extension for accessing stats.
220+
* @return pointer to ReverseTunnelInitiatorExtension
221+
*/
222+
ReverseTunnelInitiatorExtension* getDownstreamExtension() const;
223+
154224
public:
155225
// Dispatch incoming bytes to HTTP/1 codec.
156226
void dispatchHttp1(Buffer::Instance& buffer);

source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_tunnel_initiator_extension.cc

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "source/common/common/logger.h"
99
#include "source/common/stats/symbol_table.h"
10+
#include "source/common/stats/utility.h"
1011

1112
namespace Envoy {
1213
namespace Extensions {
@@ -281,6 +282,75 @@ absl::flat_hash_map<std::string, uint64_t> ReverseTunnelInitiatorExtension::getP
281282
return stats_map;
282283
}
283284

285+
void ReverseTunnelInitiatorExtension::incrementHandshakeStats(const std::string& cluster_id,
286+
bool success,
287+
const std::string& failure_reason) {
288+
// Check if detailed stats are enabled via configuration flag.
289+
if (!enable_detailed_stats_) {
290+
return;
291+
}
292+
293+
auto& stats_store = context_.scope();
294+
295+
// Get dispatcher name (worker name).
296+
std::string dispatcher_name = "main_thread"; // Default for main thread
297+
auto* local_registry = getLocalRegistry();
298+
if (local_registry) {
299+
// Dispatcher name is of the form "worker_x" where x is the worker index.
300+
dispatcher_name = local_registry->dispatcher().name();
301+
}
302+
303+
// Base stat name: <stat_prefix>.handshake
304+
// Labels: worker=<worker_name>, cluster=<cluster_id>, result=<success|failed>,
305+
// failure_reason=<failure_reason> (only for failures)
306+
std::string base_stat_name = fmt::format("{}.handshake", stat_prefix_);
307+
Stats::StatNameManagedStorage stat_storage(base_stat_name, stats_store.symbolTable());
308+
309+
// Create storage for all tag keys and values - must be kept alive for the entire function.
310+
Stats::StatNameManagedStorage worker_key_storage("worker", stats_store.symbolTable());
311+
Stats::StatNameManagedStorage worker_value_storage(dispatcher_name, stats_store.symbolTable());
312+
Stats::StatNameManagedStorage cluster_key_storage("cluster", stats_store.symbolTable());
313+
Stats::StatNameManagedStorage cluster_value_storage(cluster_id, stats_store.symbolTable());
314+
std::string result_value = success ? "success" : "failed";
315+
Stats::StatNameManagedStorage result_key_storage("result", stats_store.symbolTable());
316+
Stats::StatNameManagedStorage result_value_storage(result_value, stats_store.symbolTable());
317+
Stats::StatNameManagedStorage failure_reason_key_storage("failure_reason",
318+
stats_store.symbolTable());
319+
Stats::StatNameManagedStorage failure_reason_value_storage(failure_reason,
320+
stats_store.symbolTable());
321+
322+
// Now create tags vector using the stored StatNames.
323+
Stats::StatNameTagVector tags;
324+
325+
// Add worker tag.
326+
tags.push_back({worker_key_storage.statName(), worker_value_storage.statName()});
327+
328+
// Add cluster tag.
329+
if (!cluster_id.empty()) {
330+
tags.push_back({cluster_key_storage.statName(), cluster_value_storage.statName()});
331+
}
332+
333+
// Add result tag.
334+
tags.push_back({result_key_storage.statName(), result_value_storage.statName()});
335+
336+
// Add failure_reason tag for failures.
337+
if (!success && !failure_reason.empty()) {
338+
tags.push_back(
339+
{failure_reason_key_storage.statName(), failure_reason_value_storage.statName()});
340+
}
341+
342+
// Get or create the counter with tags and increment it.
343+
// The third parameter takes the tags vector (StatNameTagVectorOptConstRef).
344+
auto& handshake_counter =
345+
Stats::Utility::counterFromStatNames(stats_store, {stat_storage.statName()}, tags);
346+
handshake_counter.inc();
347+
348+
ENVOY_LOG(trace,
349+
"reverse_tunnel: incremented handshake stat {} with tags worker={}, cluster={}, "
350+
"result={}, failure_reason={}",
351+
base_stat_name, dispatcher_name, cluster_id, result_value, failure_reason);
352+
}
353+
284354
} // namespace ReverseConnection
285355
} // namespace Bootstrap
286356
} // namespace Extensions

source/extensions/bootstrap/reverse_tunnel/downstream_socket_interface/reverse_tunnel_initiator_extension.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,16 @@ class ReverseTunnelInitiatorExtension : public Server::BootstrapExtension,
9292
*/
9393
Stats::Scope& getStatsScope() const { return context_.scope(); }
9494

95+
/**
96+
* Increment handshake stats for reverse tunnel connections (per-worker only).
97+
* Only tracks stats if enable_detailed_stats flag is true.
98+
* @param cluster_id the cluster identifier for the connection
99+
* @param success true for successful handshake, false for failure
100+
* @param failure_reason optional failure reason (e.g., "encode_error", "http.401", "http.500")
101+
*/
102+
void incrementHandshakeStats(const std::string& cluster_id, bool success,
103+
const std::string& failure_reason = "");
104+
95105
/**
96106
* Test-only method to set the thread local slot for testing purposes.
97107
* This allows tests to inject a custom thread local registry and is used

0 commit comments

Comments
 (0)