Skip to content

Commit 63cc846

Browse files
Merge pull request #998 from pkwarren:pkw/string-ext-quote-reverse
PiperOrigin-RevId: 899240904
2 parents 664c31b + 6ca2c4d commit 63cc846

File tree

5 files changed

+209
-4
lines changed

5 files changed

+209
-4
lines changed

conformance/src/test/java/dev/cel/conformance/BUILD.bazel

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,7 @@ _TESTS_TO_SKIP_LEGACY = [
119119

120120
# Skip until fixed.
121121
"fields/qualified_identifier_resolution/map_value_repeat_key_heterogeneous",
122-
# TODO: Add strings.format and strings.quote.
123-
"string_ext/quote",
122+
# TODO: Add strings.format.quote.
124123
"string_ext/format",
125124
"string_ext/format_errors",
126125

@@ -148,8 +147,7 @@ _TESTS_TO_SKIP_LEGACY = [
148147
]
149148

150149
_TESTS_TO_SKIP_PLANNER = [
151-
# TODO: Add strings.format and strings.quote.
152-
"string_ext/quote",
150+
# TODO: Add strings.format.
153151
"string_ext/format",
154152
"string_ext/format_errors",
155153

extensions/src/main/java/dev/cel/extensions/CelStringExtensions.java

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,17 @@ public enum Function {
137137
SimpleType.STRING,
138138
SimpleType.STRING)),
139139
CelFunctionBinding.from("string_lower_ascii", String.class, Ascii::toLowerCase)),
140+
QUOTE(
141+
CelFunctionDecl.newFunctionDeclaration(
142+
"strings.quote",
143+
CelOverloadDecl.newGlobalOverload(
144+
"strings_quote",
145+
"Takes the given string and makes it safe to print (without any formatting"
146+
+ " due to escape sequences). If any invalid UTF-8 characters are"
147+
+ " encountered, they are replaced with \\uFFFD.",
148+
SimpleType.STRING,
149+
ImmutableList.of(SimpleType.STRING))),
150+
CelFunctionBinding.from("strings_quote", String.class, CelStringExtensions::quote)),
140151
REPLACE(
141152
CelFunctionDecl.newFunctionDeclaration(
142153
"replace",
@@ -164,6 +175,16 @@ public enum Function {
164175
"string_replace_string_string_int",
165176
ImmutableList.of(String.class, String.class, String.class, Long.class),
166177
CelStringExtensions::replace)),
178+
REVERSE(
179+
CelFunctionDecl.newFunctionDeclaration(
180+
"reverse",
181+
CelOverloadDecl.newMemberOverload(
182+
"string_reverse",
183+
"Returns a new string whose characters are the same as the target string,"
184+
+ " only formatted in reverse order.",
185+
SimpleType.STRING,
186+
SimpleType.STRING)),
187+
CelFunctionBinding.from("string_reverse", String.class, CelStringExtensions::reverse)),
167188
SPLIT(
168189
CelFunctionDecl.newFunctionDeclaration(
169190
"split",
@@ -449,6 +470,64 @@ private static Long lastIndexOf(CelCodePointArray str, CelCodePointArray substr,
449470
return -1L;
450471
}
451472

473+
private static String quote(String s) {
474+
StringBuilder sb = new StringBuilder(s.length() + 2);
475+
sb.append('"');
476+
for (int i = 0; i < s.length(); ) {
477+
int codePoint = s.codePointAt(i);
478+
if (isMalformedUtf16(s, i, codePoint)) {
479+
sb.append('\uFFFD');
480+
i++;
481+
continue;
482+
}
483+
switch (codePoint) {
484+
case '\u0007':
485+
sb.append("\\a");
486+
break;
487+
case '\b':
488+
sb.append("\\b");
489+
break;
490+
case '\f':
491+
sb.append("\\f");
492+
break;
493+
case '\n':
494+
sb.append("\\n");
495+
break;
496+
case '\r':
497+
sb.append("\\r");
498+
break;
499+
case '\t':
500+
sb.append("\\t");
501+
break;
502+
case '\u000B':
503+
sb.append("\\v");
504+
break;
505+
case '\\':
506+
sb.append("\\\\");
507+
break;
508+
case '"':
509+
sb.append("\\\"");
510+
break;
511+
default:
512+
sb.appendCodePoint(codePoint);
513+
break;
514+
}
515+
i += Character.charCount(codePoint);
516+
}
517+
sb.append('"');
518+
return sb.toString();
519+
}
520+
521+
private static boolean isMalformedUtf16(String s, int index, int codePoint) {
522+
char currentChar = s.charAt(index);
523+
if (Character.isLowSurrogate(currentChar)) {
524+
return true;
525+
}
526+
// Check for unpaired high surrogate
527+
return Character.isHighSurrogate(currentChar)
528+
&& (index + 1 >= s.length() || !Character.isLowSurrogate(s.charAt(index + 1)));
529+
}
530+
452531
private static String replaceAll(Object[] objects) {
453532
return replace((String) objects[0], (String) objects[1], (String) objects[2], -1);
454533
}
@@ -504,6 +583,10 @@ private static String replace(String text, String searchString, String replaceme
504583
return sb.append(textCpa.slice(start, textCpa.length())).toString();
505584
}
506585

586+
private static String reverse(String s) {
587+
return new StringBuilder(s).reverse().toString();
588+
}
589+
507590
private static List<String> split(String str, String separator) {
508591
return split(str, separator, Integer.MAX_VALUE);
509592
}

extensions/src/main/java/dev/cel/extensions/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,19 @@ Examples:
474474
'TacoCat'.lowerAscii() // returns 'tacocat'
475475
'TacoCÆt Xii'.lowerAscii() // returns 'tacocÆt xii'
476476

477+
### Quote
478+
479+
Takes the given string and makes it safe to print (without any formatting due
480+
to escape sequences).
481+
If any invalid UTF-8 characters are encountered, they are replaced with \uFFFD.
482+
483+
strings.quote(<string>)
484+
485+
Examples:
486+
487+
strings.quote('single-quote with "double quote"') // returns '"single-quote with \"double quote\""'
488+
strings.quote("two escape sequences \a\n") // returns '"two escape sequences \\a\\n"'
489+
477490
### Replace
478491

479492
Returns a new string based on the target, which replaces the occurrences of a
@@ -493,6 +506,20 @@ Examples:
493506
'hello hello'.replace('he', 'we', 1) // returns 'wello hello'
494507
'hello hello'.replace('he', 'we', 0) // returns 'hello hello'
495508

509+
### Reverse
510+
511+
Returns a new string whose characters are the same as the target string, only
512+
formatted in reverse order.
513+
This function relies on converting strings to Unicode code point arrays in
514+
order to reverse.
515+
516+
<string>.reverse() -> <string>
517+
518+
Examples:
519+
520+
'gums'.reverse() // returns 'smug'
521+
'John Smith'.reverse() // returns 'htimS nhoJ'
522+
496523
### Split
497524

498525
Returns a mutable list of strings split from the input by the given separator. The

extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ public void getAllFunctionNames() {
168168
"join",
169169
"lastIndexOf",
170170
"lowerAscii",
171+
"strings.quote",
171172
"replace",
172173
"split",
173174
"substring",

extensions/src/test/java/dev/cel/extensions/CelStringExtensionsTest.java

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ public void library() {
7070
"lastIndexOf",
7171
"lowerAscii",
7272
"replace",
73+
"reverse",
7374
"split",
75+
"strings.quote",
7476
"substring",
7577
"trim",
7678
"upperAscii");
@@ -1467,6 +1469,100 @@ public void stringExtension_functionSubset_success() throws Exception {
14671469
assertThat(evaluatedResult).isEqualTo(true);
14681470
}
14691471

1472+
@Test
1473+
@TestParameters("{string: 'abcd', expectedResult: 'dcba'}")
1474+
@TestParameters("{string: '', expectedResult: ''}")
1475+
@TestParameters("{string: 'a', expectedResult: 'a'}")
1476+
@TestParameters("{string: 'hello world', expectedResult: 'dlrow olleh'}")
1477+
@TestParameters("{string: 'ab가cd', expectedResult: 'dc가ba'}")
1478+
public void reverse_success(String string, String expectedResult) throws Exception {
1479+
CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst();
1480+
CelRuntime.Program program = RUNTIME.createProgram(ast);
1481+
1482+
Object evaluatedResult = program.eval(ImmutableMap.of("s", string));
1483+
1484+
assertThat(evaluatedResult).isEqualTo(expectedResult);
1485+
}
1486+
1487+
@Test
1488+
@TestParameters("{string: '😁😑😦', expectedResult: '😦😑😁'}")
1489+
@TestParameters(
1490+
"{string: '\u180e\u200b\u200c\u200d\u2060\ufeff', expectedResult:"
1491+
+ " '\ufeff\u2060\u200d\u200c\u200b\u180e'}")
1492+
public void reverse_unicode(String string, String expectedResult) throws Exception {
1493+
CelAbstractSyntaxTree ast = COMPILER.compile("s.reverse()").getAst();
1494+
CelRuntime.Program program = RUNTIME.createProgram(ast);
1495+
1496+
Object evaluatedResult = program.eval(ImmutableMap.of("s", string));
1497+
1498+
assertThat(evaluatedResult).isEqualTo(expectedResult);
1499+
}
1500+
1501+
@Test
1502+
@TestParameters("{string: 'hello', expectedResult: '\"hello\"'}")
1503+
@TestParameters("{string: '', expectedResult: '\"\"'}")
1504+
@TestParameters(
1505+
"{string: 'contains \\\"quotes\\\"', expectedResult: '\"contains \\\\\\\"quotes\\\\\\\"\"'}")
1506+
@TestParameters("{string: 'ends with \\\\', expectedResult: '\"ends with \\\\\\\\\"'}")
1507+
@TestParameters("{string: '\\\\ starts with', expectedResult: '\"\\\\\\\\ starts with\"'}")
1508+
public void quote_success(String string, String expectedResult) throws Exception {
1509+
CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst();
1510+
CelRuntime.Program program = RUNTIME.createProgram(ast);
1511+
1512+
Object evaluatedResult = program.eval(ImmutableMap.of("s", string));
1513+
1514+
assertThat(evaluatedResult).isEqualTo(expectedResult);
1515+
}
1516+
1517+
@Test
1518+
public void quote_singleWithDoubleQuotes() throws Exception {
1519+
String expr = "strings.quote('single-quote with \"double quote\"')";
1520+
String expected = "\"\\\"single-quote with \\\\\\\"double quote\\\\\\\"\\\"\"";
1521+
CelAbstractSyntaxTree ast = COMPILER.compile(expr + " == " + expected).getAst();
1522+
CelRuntime.Program program = RUNTIME.createProgram(ast);
1523+
1524+
Object evaluatedResult = program.eval();
1525+
1526+
assertThat(evaluatedResult).isEqualTo(true);
1527+
}
1528+
1529+
@Test
1530+
public void quote_escapesSpecialCharacters() throws Exception {
1531+
CelAbstractSyntaxTree ast = COMPILER.compile("strings.quote(s)").getAst();
1532+
CelRuntime.Program program = RUNTIME.createProgram(ast);
1533+
1534+
Object evaluatedResult =
1535+
program.eval(
1536+
ImmutableMap.of("s", "\u0007bell\u000Bvtab\bback\ffeed\rret\nline\ttab\\slash 가 😁"));
1537+
1538+
assertThat(evaluatedResult)
1539+
.isEqualTo("\"\\abell\\vvtab\\bback\\ffeed\\rret\\nline\\ttab\\\\slash 가 😁\"");
1540+
}
1541+
1542+
@Test
1543+
public void quote_escapesMalformed_endWithHighSurrogate() throws Exception {
1544+
CelRuntime.Program program =
1545+
RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst());
1546+
assertThat(program.eval(ImmutableMap.of("s", "end with high surrogate \uD83D")))
1547+
.isEqualTo("\"end with high surrogate \uFFFD\"");
1548+
}
1549+
1550+
@Test
1551+
public void quote_escapesMalformed_unpairedHighSurrogate() throws Exception {
1552+
CelRuntime.Program program =
1553+
RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst());
1554+
assertThat(program.eval(ImmutableMap.of("s", "bad pair \uD83DA")))
1555+
.isEqualTo("\"bad pair \uFFFDA\"");
1556+
}
1557+
1558+
@Test
1559+
public void quote_escapesMalformed_unpairedLowSurrogate() throws Exception {
1560+
CelRuntime.Program program =
1561+
RUNTIME.createProgram(COMPILER.compile("strings.quote(s)").getAst());
1562+
assertThat(program.eval(ImmutableMap.of("s", "bad pair \uDC00A")))
1563+
.isEqualTo("\"bad pair \uFFFDA\"");
1564+
}
1565+
14701566
@Test
14711567
public void stringExtension_compileUnallowedFunction_throws() {
14721568
CelCompiler celCompiler =

0 commit comments

Comments
 (0)