2121 */
2222package com .github .packageurl .internal ;
2323
24+ import static java .lang .Byte .toUnsignedInt ;
25+
2426import com .github .packageurl .ValidationException ;
25- import java .nio .ByteBuffer ;
2627import java .nio .charset .StandardCharsets ;
27- import java .util .stream .IntStream ;
2828
2929/**
3030 * String utility for validation and encoding.
@@ -35,6 +35,24 @@ public final class StringUtil {
3535
3636 private static final byte PERCENT_CHAR = '%' ;
3737
38+ private static final boolean [] UNRESERVED_CHARS = new boolean [128 ];
39+
40+ static {
41+ for (char c = '0' ; c <= '9' ; c ++) {
42+ UNRESERVED_CHARS [c ] = true ;
43+ }
44+ for (char c = 'A' ; c <= 'Z' ; c ++) {
45+ UNRESERVED_CHARS [c ] = true ;
46+ }
47+ for (char c = 'a' ; c <= 'z' ; c ++) {
48+ UNRESERVED_CHARS [c ] = true ;
49+ }
50+ UNRESERVED_CHARS ['-' ] = true ;
51+ UNRESERVED_CHARS ['.' ] = true ;
52+ UNRESERVED_CHARS ['_' ] = true ;
53+ UNRESERVED_CHARS ['~' ] = true ;
54+ }
55+
3856 private StringUtil () {
3957 throw new AssertionError ("Cannot instantiate StringUtil" );
4058 }
@@ -48,21 +66,16 @@ private StringUtil() {
4866 * @since 2.0.0
4967 */
5068 public static String toLowerCase (String s ) {
51- if (s == null ) {
52- return null ;
53- }
54-
5569 int pos = indexOfFirstUpperCaseChar (s );
5670
5771 if (pos == -1 ) {
5872 return s ;
5973 }
6074
6175 char [] chars = s .toCharArray ();
62- int length = chars .length ;
6376
64- for (int i = pos ; i < length ; i ++) {
65- chars [i ] = (char ) toLowerCase (chars [i ]);
77+ for (int length = chars . length ; pos < length ; pos ++) {
78+ chars [pos ] = (char ) toLowerCase (chars [pos ]);
6679 }
6780
6881 return new String (chars );
@@ -77,26 +90,22 @@ public static String toLowerCase(String s) {
7790 * @since 2.0.0
7891 */
7992 public static String percentDecode (final String source ) {
80- if (source == null || source . isEmpty () ) {
93+ if (source . indexOf ( PERCENT_CHAR ) == - 1 ) {
8194 return source ;
8295 }
8396
8497 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
85- int i = indexOfFirstPercentChar (bytes );
86-
87- if (i == -1 ) {
88- return source ;
89- }
9098
99+ int readPos = indexOfFirstPercentChar (bytes );
100+ int writePos = readPos ;
91101 int length = bytes .length ;
92- int writePos = i ;
93- while (i < length ) {
94- byte b = bytes [i ];
102+ while (readPos < length ) {
103+ byte b = bytes [readPos ];
95104 if (b == PERCENT_CHAR ) {
96- bytes [writePos ++] = percentDecode (bytes , i ++);
97- i += 2 ;
105+ bytes [writePos ++] = percentDecode (bytes , readPos ++);
106+ readPos += 2 ;
98107 } else {
99- bytes [writePos ++] = bytes [i ++];
108+ bytes [writePos ++] = bytes [readPos ++];
100109 }
101110 }
102111
@@ -112,34 +121,29 @@ public static String percentDecode(final String source) {
112121 * @since 2.0.0
113122 */
114123 public static String percentEncode (final String source ) {
115- if (source == null || source .isEmpty ()) {
116- return source ;
117- }
118- byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
119- int start = indexOfFirstNonAsciiChar (bytes );
120- if (start == -1 ) {
124+ if (!shouldEncode (source )) {
121125 return source ;
122126 }
123- int length = bytes .length ;
124- ByteBuffer buffer = ByteBuffer .allocate (start + ((length - start ) * 3 ));
125- if (start != 0 ) {
126- buffer .put (bytes , 0 , start );
127- }
128127
129- for (int i = start ; i < length ; i ++) {
130- byte b = bytes [i ];
131- if (shouldEncode (b )) {
132- byte b1 = (byte ) Character .toUpperCase (Character .forDigit ((b >> 4 ) & 0xF , 16 ));
133- byte b2 = (byte ) Character .toUpperCase (Character .forDigit (b & 0xF , 16 ));
134- buffer .put (PERCENT_CHAR );
135- buffer .put (b1 );
136- buffer .put (b2 );
128+ byte [] src = source .getBytes (StandardCharsets .UTF_8 );
129+ byte [] dest = new byte [3 * src .length ];
130+
131+ int writePos = 0 ;
132+ for (byte b : src ) {
133+ if (shouldEncode (toUnsignedInt (b ))) {
134+ dest [writePos ++] = PERCENT_CHAR ;
135+ dest [writePos ++] = toHexDigit (b >> 4 );
136+ dest [writePos ++] = toHexDigit (b );
137137 } else {
138- buffer . put ( b ) ;
138+ dest [ writePos ++] = b ;
139139 }
140140 }
141141
142- return new String (buffer .array (), 0 , buffer .position (), StandardCharsets .UTF_8 );
142+ return new String (dest , 0 , writePos , StandardCharsets .UTF_8 );
143+ }
144+
145+ private static byte toHexDigit (int b ) {
146+ return (byte ) Character .toUpperCase (Character .forDigit (b & 0xF , 16 ));
143147 }
144148
145149 /**
@@ -178,14 +182,34 @@ public static boolean isValidCharForKey(int c) {
178182 return (isAlphaNumeric (c ) || c == '.' || c == '_' || c == '-' );
179183 }
180184
185+ /**
186+ * Returns {@code true} if the character is in the unreserved RFC 3986 set.
187+ * <p>
188+ * <strong>Warning</strong>: Profiling shows that the performance of {@link #percentEncode} relies heavily on this method.
189+ * Modify with care.
190+ * </p>
191+ * @param c non-negative integer.
192+ */
181193 private static boolean isUnreserved (int c ) {
182- return ( isValidCharForKey ( c ) || c == '~' ) ;
194+ return c < 128 && UNRESERVED_CHARS [ c ] ;
183195 }
184196
197+ /**
198+ * @param c non-negative integer
199+ */
185200 private static boolean shouldEncode (int c ) {
186201 return !isUnreserved (c );
187202 }
188203
204+ private static boolean shouldEncode (String s ) {
205+ for (int i = 0 , length = s .length (); i < length ; i ++) {
206+ if (shouldEncode (s .charAt (i ))) {
207+ return true ;
208+ }
209+ }
210+ return false ;
211+ }
212+
189213 private static boolean isAlpha (int c ) {
190214 return (isLowerCase (c ) || isUpperCase (c ));
191215 }
@@ -195,7 +219,7 @@ private static boolean isAlphaNumeric(int c) {
195219 }
196220
197221 private static boolean isUpperCase (int c ) {
198- return ( c >= 'A' && c <= 'Z' ) ;
222+ return 'A' <= c && c <= 'Z' ;
199223 }
200224
201225 private static boolean isLowerCase (int c ) {
@@ -207,34 +231,21 @@ private static int toLowerCase(int c) {
207231 }
208232
209233 private static int indexOfFirstUpperCaseChar (String s ) {
210- int length = s .length ();
211-
212- for (int i = 0 ; i < length ; i ++) {
234+ for (int i = 0 , length = s .length (); i < length ; i ++) {
213235 if (isUpperCase (s .charAt (i ))) {
214236 return i ;
215237 }
216238 }
217-
218239 return -1 ;
219240 }
220241
221- private static int indexOfFirstNonAsciiChar (byte [] bytes ) {
222- int length = bytes .length ;
223- int start = -1 ;
224- for (int i = 0 ; i < length ; i ++) {
225- if (shouldEncode (bytes [i ])) {
226- start = i ;
227- break ;
242+ private static int indexOfFirstPercentChar (final byte [] bytes ) {
243+ for (int i = 0 , length = bytes .length ; i < length ; i ++) {
244+ if (bytes [i ] == PERCENT_CHAR ) {
245+ return i ;
228246 }
229247 }
230- return start ;
231- }
232-
233- private static int indexOfFirstPercentChar (final byte [] bytes ) {
234- return IntStream .range (0 , bytes .length )
235- .filter (i -> bytes [i ] == PERCENT_CHAR )
236- .findFirst ()
237- .orElse (-1 );
248+ return -1 ;
238249 }
239250
240251 private static byte percentDecode (final byte [] bytes , final int start ) {
0 commit comments