Skip to content

Commit 05d2a64

Browse files
author
lexeyo
committed
feat postgres: new error fallback strategy in postgres connections auto-limiter
Implements new error fallback strategy for ConnlimitWatchdog. It could be enabled under pg-connlimit-watchdog-new-fallback userver experiment. commit_hash:ad81a29c7e4008046c24e098af4709c34d9ed164
1 parent 6fb4c09 commit 05d2a64

File tree

6 files changed

+52
-4
lines changed

6 files changed

+52
-4
lines changed

core/include/userver/utils/impl/userver_experiments.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ extern UserverExperiment kServerSelectionTimeoutExperiment;
5959
extern UserverExperiment kPgCcExperiment;
6060
extern UserverExperiment kYdbDeadlinePropagationExperiment;
6161
extern UserverExperiment kWaitAllCheckedUpgradeExperiment;
62+
extern UserverExperiment kPgConnlimitWatchdogFallbackExperiment;
6263

6364
} // namespace utils::impl
6465

core/src/utils/impl/userver_experiments.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ UserverExperiment kServerSelectionTimeoutExperiment{"mongo-server-selection-time
9595
UserverExperiment kPgCcExperiment{"pg-cc"};
9696
UserverExperiment kYdbDeadlinePropagationExperiment{"ydb-deadline-propagation"};
9797
UserverExperiment kWaitAllCheckedUpgradeExperiment{"wait-all-checked-upgrade"};
98+
UserverExperiment kPgConnlimitWatchdogFallbackExperiment{"pg-connlimit-watchdog-new-fallback"};
9899

99100
} // namespace utils::impl
100101

postgresql/src/storages/postgres/connlimit_watchdog.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
#include <algorithm>
12
#include <storages/postgres/connlimit_watchdog.hpp>
23

34
#include <storages/postgres/detail/cluster_impl.hpp>
45
#include <userver/utils/from_string.hpp>
6+
#include <userver/utils/impl/userver_experiments.hpp>
57

68
USERVER_NAMESPACE_BEGIN
79

@@ -39,6 +41,7 @@ ConnlimitWatchdog::ConnlimitWatchdog(
3941
detail::ClusterImpl& cluster,
4042
testsuite::TestsuiteTasks& testsuite_tasks,
4143
int shard_number,
44+
std::size_t min_fallback_connections,
4245
std::function<void()> on_new_connlimit,
4346
std::string host_name
4447
)
@@ -47,6 +50,7 @@ ConnlimitWatchdog::ConnlimitWatchdog(
4750
on_new_connlimit_(std::move(on_new_connlimit)),
4851
testsuite_tasks_(testsuite_tasks),
4952
shard_number_(shard_number),
53+
min_fallback_connections_(std::max(min_fallback_connections, kDefaultPoolMinSize)),
5054
host_name_(std::move(host_name))
5155
{}
5256

@@ -158,7 +162,13 @@ void ConnlimitWatchdog::DoStep(
158162
* connlimit value. The period with "too low max_connections" should be
159163
* relatively small.
160164
*/
161-
connlimit_ = kFallbackConnlimit;
165+
if (!USERVER_NAMESPACE::utils::impl::kPgConnlimitWatchdogFallbackExperiment.IsEnabled()) {
166+
connlimit_ = kFallbackConnlimit;
167+
} else {
168+
const auto previous_connlimit = connlimit_.load();
169+
connlimit_ = std::max(previous_connlimit / 2, min_fallback_connections_);
170+
steps_with_errors_ = 0;
171+
}
162172
}
163173
}
164174

postgresql/src/storages/postgres/connlimit_watchdog.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class ConnlimitWatchdog final {
2323
detail::ClusterImpl& cluster,
2424
testsuite::TestsuiteTasks& testsuite_tasks,
2525
int shard_number,
26+
std::size_t min_fallback_connections,
2627
std::function<void()> on_new_connlimit,
2728
std::string host_name = hostinfo::blocking::GetRealHostName()
2829
);
@@ -55,6 +56,7 @@ class ConnlimitWatchdog final {
5556
int steps_with_errors_{0};
5657
USERVER_NAMESPACE::utils::PeriodicTask periodic_;
5758
int shard_number_;
59+
std::size_t min_fallback_connections_;
5860
std::string host_name_;
5961
};
6062

postgresql/src/storages/postgres/detail/cluster_impl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,13 @@ ClusterImpl::ClusterImpl(
100100
ei_settings_(ei_settings),
101101
metrics_(std::move(metrics)),
102102
rr_host_idx_(0),
103-
connlimit_watchdog_(*this, testsuite_tasks, shard_number, [this]() { OnConnlimitChanged(); })
103+
connlimit_watchdog_(
104+
*this,
105+
testsuite_tasks,
106+
shard_number,
107+
cluster_settings.pool_settings.min_size,
108+
[this]() { OnConnlimitChanged(); }
109+
)
104110
{
105111
CreateTopology(dsns);
106112

postgresql/src/storages/postgres/tests/connlimit_watchdog_pgtest.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ pg::Transaction GetTransaction(pgd::ClusterImpl& cluster) {
7070

7171
constexpr size_t kReservedConn = 5;
7272
constexpr size_t kTestsuiteConnlimit = 100 - kReservedConn;
73+
constexpr size_t kFallbackConnlimit = 17;
74+
constexpr size_t kMaxStepsWithError = 3;
7375

7476
enum class MigrationVersion { kV1 = 0, kV2 = 1, kCount };
7577

@@ -93,20 +95,26 @@ class Watchdog : public PostgreSQLBase {
9395

9496
std::size_t DoStepV1() {
9597
// This watchdog use the native host like the watchdog in ClusterImpl.
96-
pg::ConnlimitWatchdog connlimit_watchdog_v1{cluster_, testsuite_tasks_, kShardNumber, [] {}};
98+
auto connlimit_watchdog_v1 = MakeConnlimitWatchdog();
9799
connlimit_watchdog_v1.StepV1();
98100
return connlimit_watchdog_v1.GetConnlimit();
99101
}
100102

101103
std::size_t DoStepV2() {
102104
// Use different host names to emulate different hosts.
103-
pg::ConnlimitWatchdog connlimit_watchdog_v2{cluster_, testsuite_tasks_, kShardNumber, [] {}, "host2"};
105+
auto connlimit_watchdog_v2 = MakeConnlimitWatchdog("host2");
104106
connlimit_watchdog_v2.StepV2();
105107
return connlimit_watchdog_v2.GetConnlimit();
106108
}
107109

110+
pg::ConnlimitWatchdog MakeConnlimitWatchdog(std::string host_name = hostinfo::blocking::GetRealHostName()) {
111+
return pg::ConnlimitWatchdog{cluster_, testsuite_tasks_, kShardNumber, kFallbackConnlimit, [] {}, host_name};
112+
}
113+
108114
pgd::ClusterImpl& GetCluster() { return cluster_; }
109115

116+
void SetUp() override { scope_.Set(utils::impl::kPgConnlimitWatchdogFallbackExperiment, true); }
117+
110118
private:
111119
void ClearTable() {
112120
auto t = GetTransaction(cluster_);
@@ -116,6 +124,7 @@ class Watchdog : public PostgreSQLBase {
116124

117125
testsuite::TestsuiteTasks testsuite_tasks_{true};
118126
pgd::ClusterImpl cluster_;
127+
utils::impl::UserverExperimentsScope scope_;
119128
};
120129

121130
UTEST_F(Watchdog, Basic) {
@@ -215,4 +224,23 @@ UTEST_F(Watchdog, MultiUsersWithV2) {
215224
EXPECT_EQ(kTestsuiteConnlimit / 2, DoStepV2());
216225
}
217226

227+
UTEST_F(Watchdog, FallbackConnlimit) {
228+
auto expected_connlimit = kTestsuiteConnlimit;
229+
auto watchdog = MakeConnlimitWatchdog();
230+
// Do single step with working connection
231+
watchdog.StepV2();
232+
// Update connection to a non-working one
233+
GetCluster().SetDsnList({GetUnavailableDsn()});
234+
235+
while (expected_connlimit >= kFallbackConnlimit) {
236+
for (size_t i = 0; i <= kMaxStepsWithError; ++i) {
237+
ASSERT_EQ(expected_connlimit, watchdog.GetConnlimit());
238+
watchdog.StepV2();
239+
}
240+
expected_connlimit /= 2;
241+
}
242+
243+
ASSERT_EQ(kFallbackConnlimit, watchdog.GetConnlimit());
244+
}
245+
218246
USERVER_NAMESPACE_END

0 commit comments

Comments
 (0)