Skip to content

Commit d439b7b

Browse files
Tnirppsrobot-piglet
authored andcommitted
feat s3api: Add URL encoding for S3 object keys
# S3 URL Encoding Fix ## Problem S3 API methods (`GetObject`, `PutObject`, etc.) failed with object keys containing special characters (spaces, Cyrillic, etc.) because they weren't being URL-encoded. `Bad URL, curl error: malformed input, url: https://mybucket.s3-some-site.awsornot.com/path/to/object with : spaces` ## Solution Added two functions for proper S3 object key encoding, compatible with AWS SDK behavior: - **`UrlEncodePathSegment()`** - encodes a single path segment, preserving RFC 3986 unreserved chars (`-`, `_`, `.`, `~`) and S3-safe chars (`$`, `&`, `,`, `:`, `=`, `@`) - **`EncodeS3Key()`** - encodes S3 object key, preserving `/` as path separators Implementation follows AWS SDK approach: [aws-sdk-cpp/URI.cpp](https://github.com/aws/aws-sdk-cpp/blob/master/aws-cpp-sdk-core/source/http/URI.cpp#L54-L60) **Examples:** ```cpp EncodeS3Key("folder/file with spaces.txt") // → "folder/file%20with%20spaces.txt" EncodeS3Key("folder/файл.txt") // → "folder/%D1%84%D0%B0%D0%B9%D0%BB.txt" ``` ## Changes - **universal/http/url.hpp, url.cpp**: New encoding functions - **universal/http/url_test.cpp**: new tests - **libraries/s3api/s3api_methods.cpp**: Apply `EncodeS3Key()` to all S3 methods Tests: протестировано CI --- Pull Request resolved: #1164 commit_hash:04d0362e7803510b3923ebcfa43f9284b635a57c
1 parent 4cfaf5f commit d439b7b

File tree

4 files changed

+211
-18
lines changed

4 files changed

+211
-18
lines changed

libraries/s3api/src/s3api/s3api_methods.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Request PutObject(
6363
Request req;
6464
req.method = clients::http::HttpMethod::kPut;
6565
req.bucket = bucket;
66-
req.req = path;
66+
req.req = http::EncodeS3Key(path);
6767

6868
req.headers[USERVER_NAMESPACE::http::headers::kContentLength] = std::to_string(data.size());
6969
req.headers[USERVER_NAMESPACE::http::headers::kContentType] = content_type;
@@ -80,15 +80,15 @@ Request DeleteObject(std::string_view bucket, std::string_view path) {
8080
Request req;
8181
req.method = clients::http::HttpMethod::kDelete;
8282
req.bucket = bucket;
83-
req.req = path;
83+
req.req = http::EncodeS3Key(path);
8484
return req;
8585
}
8686

8787
Request GetObject(std::string_view bucket, std::string_view path, std::optional<std::string_view> version) {
8888
Request req;
8989
req.method = clients::http::HttpMethod::kGet;
9090
req.bucket = bucket;
91-
req.req = path;
91+
req.req = http::EncodeS3Key(path);
9292

9393
if (version) {
9494
req.req += "?" + USERVER_NAMESPACE::http::MakeQuery({{"versionId", *version}});
@@ -101,7 +101,7 @@ Request GetObjectHead(std::string_view bucket, std::string_view path) {
101101
Request req;
102102
req.method = clients::http::HttpMethod::kHead;
103103
req.bucket = bucket;
104-
req.req = path;
104+
req.req = http::EncodeS3Key(path);
105105
return req;
106106
}
107107

@@ -152,9 +152,9 @@ Request CopyObject(
152152
Request req;
153153
req.method = clients::http::HttpMethod::kPut;
154154
req.bucket = dest_bucket;
155-
req.req = dest_key;
155+
req.req = http::EncodeS3Key(dest_key);
156156

157-
req.headers[headers::kAmzCopySource] = fmt::format("/{}/{}", source_bucket, source_key);
157+
req.headers[headers::kAmzCopySource] = fmt::format("/{}/{}", source_bucket, http::EncodeS3Key(source_key));
158158
req.headers[USERVER_NAMESPACE::http::headers::kContentType] = content_type;
159159

160160
return req;
@@ -167,7 +167,7 @@ Request CreateInternalApiRequest(
167167
Request result;
168168
result.method = clients::http::HttpMethod::kPost;
169169
result.bucket = bucket;
170-
result.req = fmt::format("{}?uploads", request.key);
170+
result.req = fmt::format("{}?uploads", http::EncodeS3Key(request.key));
171171

172172
if (request.content_type) {
173173
result.headers[http::headers::kContentType] = *request.content_type;
@@ -195,7 +195,11 @@ Request CreateInternalApiRequest(
195195
Request result;
196196
result.method = clients::http::HttpMethod::kDelete;
197197
result.bucket = bucket;
198-
result.req = fmt::format("{}?{}", request.key, http::MakeQuery({{query_args::kUploadId, request.upload_id}}));
198+
result.req = fmt::format(
199+
"{}?{}",
200+
http::EncodeS3Key(request.key),
201+
http::MakeQuery({{query_args::kUploadId, request.upload_id}})
202+
);
199203
return result;
200204
}
201205

@@ -206,7 +210,11 @@ Request CreateInternalApiRequest(
206210
Request result;
207211
result.method = clients::http::HttpMethod::kPost;
208212
result.bucket = bucket;
209-
result.req = fmt::format("{}?{}", request.key, http::MakeQuery({{query_args::kUploadId, request.upload_id}}));
213+
result.req = fmt::format(
214+
"{}?{}",
215+
http::EncodeS3Key(request.key),
216+
http::MakeQuery({{query_args::kUploadId, request.upload_id}})
217+
);
210218

211219
pugi::xml_document doc;
212220
auto multipart_upload_node = doc.append_child("CompleteMultipartUpload");
@@ -243,7 +251,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
243251
result.bucket = bucket;
244252
result.req = fmt::format(
245253
"{}?{}",
246-
request.key,
254+
http::EncodeS3Key(request.key),
247255
http::MakeQuery(
248256
{{query_args::kUploadId, request.upload_id}, {query_args::kPartNumber, std::to_string(request.part_number)}}
249257
)
@@ -256,7 +264,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
256264
Request result;
257265
result.method = clients::http::HttpMethod::kGet;
258266
result.bucket = bucket;
259-
result.req = request.key;
267+
result.req = http::EncodeS3Key(request.key);
260268

261269
http::Args params{{std::string{query_args::kUploadId}, request.upload_id}};
262270

@@ -268,7 +276,7 @@ Request CreateInternalApiRequest(const std::string& bucket, const multipart_uplo
268276
params.emplace(query_args::kPartNumberMarker, std::to_string(part_number_marker));
269277
}
270278

271-
result.req = fmt::format("{}?{}", request.key, http::MakeQuery(params));
279+
result.req = fmt::format("{}?{}", http::EncodeS3Key(request.key), http::MakeQuery(params));
272280

273281
return result;
274282
}

universal/include/userver/http/url.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,29 @@ struct DecomposedUrlView {
3838
/// @endcode
3939
std::string UrlEncode(std::string_view input_string);
4040

41+
/// @brief Encode a URL path segment (for use in S3 and similar APIs)
42+
/// @param input_string String to encode
43+
/// @returns URL-encoded string where special characters are encoded as %XX sequences,
44+
/// but path-safe characters (-, _, ., ~, $, &, ,, :, =, @) are kept unescaped
45+
/// @note This is less aggressive than UrlEncode and is suitable for encoding path segments
46+
/// where you want to preserve readability of certain special characters
47+
/// @code
48+
/// auto encoded = UrlEncodePathSegment("file-name_with spaces.txt");
49+
/// // Returns: "file-name_with%20spaces.txt"
50+
/// @endcode
51+
std::string UrlEncodePathSegment(std::string_view input_string);
52+
53+
/// @brief Encode an S3 object key for use in URL path
54+
/// @param key S3 object key (may contain '/' as part of the key name)
55+
/// @returns URL-encoded path where each segment is encoded but '/' separators are preserved
56+
/// @note S3 object keys can contain '/' which should be preserved as path separators,
57+
/// while other special characters should be encoded
58+
/// @code
59+
/// auto encoded = EncodeS3Key("folder/file with spaces.txt");
60+
/// // Returns: "folder/file%20with%20spaces.txt"
61+
/// @endcode
62+
std::string EncodeS3Key(std::string_view key);
63+
4164
using Args = std::unordered_map<std::string, std::string, utils::StrCaseHash>;
4265
using MultiArgs = std::multimap<std::string, std::string>;
4366
using PathArgs = std::unordered_map<std::string, std::string>;

universal/src/http/url.cpp

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,41 @@ constexpr std::string_view kSchemaSeparator = "://";
1818
constexpr char kQuerySeparator = '?';
1919
constexpr char kFragmentSeparator = '#';
2020

21+
// RFC 3986 unreserved characters plus some additional characters that are safe
22+
// in path segments for S3 and similar APIs
23+
bool IsPathSafeChar(unsigned char c) {
24+
// Alphanumeric characters are always safe
25+
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
26+
return true;
27+
}
28+
// RFC 3986 unreserved: - _ . ~
29+
// Additional path-safe characters for S3 compatibility: $ & , : = @
30+
switch (c) {
31+
case '-':
32+
case '_':
33+
case '.':
34+
case '~':
35+
case '$':
36+
case '&':
37+
case ',':
38+
case ':':
39+
case '=':
40+
case '@':
41+
return true;
42+
default:
43+
return false;
44+
}
45+
}
46+
47+
void EncodeByte(unsigned char symbol, std::string& result) {
48+
std::array<char, 3> bytes = {'%', 0, 0};
49+
bytes[1] = (symbol & 0xF0) / 16;
50+
bytes[1] += (bytes[1] > 9) ? 'A' - 10 : '0';
51+
bytes[2] = symbol & 0x0F;
52+
bytes[2] += (bytes[2] > 9) ? 'A' - 10 : '0';
53+
result.append(bytes.data(), bytes.size());
54+
}
55+
2156
void UrlEncodeTo(std::string_view input_string, std::string& result) {
2257
for (const char symbol : input_string) {
2358
if (isalnum(symbol)) {
@@ -37,17 +72,22 @@ void UrlEncodeTo(std::string_view input_string, std::string& result) {
3772
result.append(1, symbol);
3873
break;
3974
default:
40-
std::array<char, 3> bytes = {'%', 0, 0};
41-
bytes[1] = (symbol & 0xF0) / 16;
42-
bytes[1] += (bytes[1] > 9) ? 'A' - 10 : '0';
43-
bytes[2] = symbol & 0x0F;
44-
bytes[2] += (bytes[2] > 9) ? 'A' - 10 : '0';
45-
result.append(bytes.data(), bytes.size());
75+
EncodeByte(static_cast<unsigned char>(symbol), result);
4676
break;
4777
}
4878
}
4979
}
5080

81+
void UrlEncodePathSegmentTo(std::string_view input_string, std::string& result) {
82+
for (const unsigned char symbol : input_string) {
83+
if (IsPathSafeChar(symbol)) {
84+
result.append(1, symbol);
85+
} else {
86+
EncodeByte(symbol, result);
87+
}
88+
}
89+
}
90+
5191
} // namespace
5292

5393
std::string UrlEncode(std::string_view input_string) {
@@ -58,6 +98,34 @@ std::string UrlEncode(std::string_view input_string) {
5898
return result;
5999
}
60100

101+
std::string UrlEncodePathSegment(std::string_view input_string) {
102+
std::string result;
103+
result.reserve(3 * input_string.size());
104+
105+
UrlEncodePathSegmentTo(input_string, result);
106+
return result;
107+
}
108+
109+
std::string EncodeS3Key(std::string_view key) {
110+
std::string result;
111+
result.reserve(key.size());
112+
113+
size_t start = 0;
114+
while (start < key.size()) {
115+
size_t slash_pos = key.find('/', start);
116+
if (slash_pos == std::string::npos) {
117+
UrlEncodePathSegmentTo(key.substr(start), result);
118+
break;
119+
} else {
120+
UrlEncodePathSegmentTo(key.substr(start, slash_pos - start), result);
121+
result.push_back('/');
122+
start = slash_pos + 1;
123+
}
124+
}
125+
126+
return result;
127+
}
128+
61129
std::string UrlDecode(std::string_view range) { return impl::UrlDecode(utils::impl::InternalTag{}, range); }
62130

63131
namespace {

universal/src/http/url_test.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
USERVER_NAMESPACE_BEGIN
99

10+
using http::EncodeS3Key;
1011
using http::UrlEncode;
12+
using http::UrlEncodePathSegment;
1113

1214
namespace {
1315

@@ -22,6 +24,98 @@ TEST(UrlEncode, Latin) {
2224
EXPECT_EQ(str, UrlEncode(str));
2325
}
2426

27+
TEST(UrlEncodePathSegment, Empty) { EXPECT_EQ("", UrlEncodePathSegment("")); }
28+
29+
TEST(UrlEncodePathSegment, Latin) {
30+
constexpr std::string_view str = "SomeText1234567890";
31+
EXPECT_EQ(str, UrlEncodePathSegment(str));
32+
}
33+
34+
TEST(UrlEncodePathSegment, UnreservedChars) {
35+
// RFC 3986 unreserved: - _ . ~
36+
constexpr std::string_view str = "file-name_test.txt~backup";
37+
EXPECT_EQ(str, UrlEncodePathSegment(str));
38+
}
39+
40+
TEST(UrlEncodePathSegment, PathSafeSpecialChars) {
41+
// Additional path-safe: $ & , : = @
42+
constexpr std::string_view str = "price$100&tax:50=total@rate";
43+
EXPECT_EQ(str, UrlEncodePathSegment(str));
44+
}
45+
46+
TEST(UrlEncodePathSegment, SpacesAndSpecial) {
47+
constexpr std::string_view str = "file with spaces.txt";
48+
EXPECT_EQ("file%20with%20spaces.txt", UrlEncodePathSegment(str));
49+
}
50+
51+
TEST(UrlEncodePathSegment, SlashShouldNotBeEncoded) {
52+
// Slash should be encoded in path segment context
53+
constexpr std::string_view str = "folder/file";
54+
EXPECT_EQ("folder%2Ffile", UrlEncodePathSegment(str));
55+
}
56+
57+
TEST(UrlEncodePathSegment, QueryChars) {
58+
// ? and # should be encoded
59+
constexpr std::string_view str = "file?query#fragment";
60+
EXPECT_EQ("file%3Fquery%23fragment", UrlEncodePathSegment(str));
61+
}
62+
63+
TEST(EncodeS3Key, Empty) { EXPECT_EQ("", EncodeS3Key("")); }
64+
65+
TEST(EncodeS3Key, SimpleKey) {
66+
constexpr std::string_view key = "simple-key.txt";
67+
EXPECT_EQ(key, EncodeS3Key(key));
68+
}
69+
70+
TEST(EncodeS3Key, WithSpaces) {
71+
constexpr std::string_view key = "file with spaces.txt";
72+
EXPECT_EQ("file%20with%20spaces.txt", EncodeS3Key(key));
73+
}
74+
75+
TEST(EncodeS3Key, WithSlashes) {
76+
constexpr std::string_view key = "folder/subfolder/file.txt";
77+
EXPECT_EQ("folder/subfolder/file.txt", EncodeS3Key(key));
78+
}
79+
80+
TEST(EncodeS3Key, WithSlashesAndSpaces) {
81+
constexpr std::string_view key = "folder/file with spaces.txt";
82+
EXPECT_EQ("folder/file%20with%20spaces.txt", EncodeS3Key(key));
83+
}
84+
85+
TEST(EncodeS3Key, ComplexKey) {
86+
constexpr std::string_view key = "path/to/my file (copy).txt";
87+
// Parentheses are encoded as they're not in our path-safe set (RFC 3986 unreserved + S3-safe)
88+
EXPECT_EQ("path/to/my%20file%20%28copy%29.txt", EncodeS3Key(key));
89+
}
90+
91+
TEST(EncodeS3Key, SpecialCharsInSegments) {
92+
// Path-safe chars should be preserved, others encoded
93+
constexpr std::string_view key = "folder/file-name_with.dots~and$symbols&commas,colons:equals=at@sign";
94+
EXPECT_EQ("folder/file-name_with.dots~and$symbols&commas,colons:equals=at@sign", EncodeS3Key(key));
95+
}
96+
97+
TEST(EncodeS3Key, LeadingSlash) {
98+
// Leading slash should be preserved as S3 path separator
99+
constexpr std::string_view key = "/leading/slash.txt";
100+
EXPECT_EQ("/leading/slash.txt", EncodeS3Key(key));
101+
}
102+
103+
TEST(EncodeS3Key, MultipleSpacesInPath) {
104+
constexpr std::string_view key = "folder with spaces/file with spaces.txt";
105+
EXPECT_EQ("folder%20with%20spaces/file%20with%20spaces.txt", EncodeS3Key(key));
106+
}
107+
108+
TEST(EncodeS3Key, UnicodeCharacters) {
109+
// Unicode characters should be percent-encoded
110+
// UTF-8 encoding of Cyrillic 'ф' (U+0444) is D1 84, 'а' (U+0430) is D0 B0, 'й' (U+0439) is D0 B9
111+
constexpr std::string_view key = "folder/файл.txt";
112+
auto result = EncodeS3Key(key);
113+
// Check that Cyrillic characters are percent-encoded
114+
EXPECT_EQ(result, "folder/%D1%84%D0%B0%D0%B9%D0%BB.txt");
115+
EXPECT_TRUE(result.find("folder/") != std::string::npos);
116+
EXPECT_TRUE(result.find(".txt") != std::string::npos);
117+
}
118+
25119
TEST(UrlEncode, Special) {
26120
constexpr std::string_view str = "Text with spaces,?&=";
27121
EXPECT_EQ("Text%20with%20spaces%2C%3F%26%3D", UrlEncode(str));

0 commit comments

Comments
 (0)