diff --git a/README.md b/README.md index daca475..2d8e866 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A simple library to encode/decode URL parameters. This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com). -The Kotlin version can be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder). +A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder). For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) because of its improper naming. It is actually intended to encode HTML form @@ -22,6 +22,7 @@ also addresses this issue. UrlEncoder.encode("a test &"); // -> "a%20test%20%26" UrlEncoder.encode("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"); // -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~" UrlEncoder.encode("%#okékÉȢ smile!😁"); // -> "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81" +UrlEncoder.encode("?test=a test", "?="); // -> ?test=a%20test UrlEncoder.decode("a%20test%20%26"); // -> "a test &" UrlEncoder.decode("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"); // -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~" diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ae04661..070cb70 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index 76aade4..af10245 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -16,26 +16,147 @@ import java.util.BitSet; * @since 1.0 */ public class UrlEncoder { + static final BitSet UNRESERVED_URI_CHARS; + private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); + + static { + // see https://www.rfc-editor.org/rfc/rfc3986#page-13 + var unreserved = new BitSet('~' + 1); + unreserved.set('-'); + unreserved.set('.'); + for (int c = '0'; c <= '9'; ++c) unreserved.set(c); + for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c); + unreserved.set('_'); + for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c); + unreserved.set('~'); + UNRESERVED_URI_CHARS = unreserved; + } + + private static void appendUrlEncodedByte(StringBuilder out, int ch) { + out.append("%"); + appendUrlEncodedDigit(out, ch >> 4); + appendUrlEncodedDigit(out, ch); + } + + private static void appendUrlEncodedDigit(StringBuilder out, int digit) { + out.append(HEX_DIGITS[digit & 0x0F]); + } + + /** + * Transforms a provided String URL into a new string, + * containing decoded URL characters in the UTF-8 encoding. + * + * @param source The string URL that has to be decoded + * @return The decoded String object. + * @see #encode(String, String) + * @since 1.0 + */ + public static String decode(String source) { + if (source == null || source.isBlank()) { + return source; + } + + var length = source.length(); + StringBuilder out = null; + char ch; + byte[] bytes_buffer = null; + var bytes_pos = 0; + for (var i = 0; i < length; ) { + ch = source.charAt(i); + + if (ch == '%') { + if (out == null) { + out = new StringBuilder(length); + out.append(source, 0, i); + } + + if (bytes_buffer == null) { + // the remaining characters divided by the length + // of the encoding format %xx, is the maximum number of + // bytes that can be extracted + bytes_buffer = new byte[(length - i) / 3]; + bytes_pos = 0; + } + + i += 1; + if (length < i + 2) { + throw new IllegalArgumentException("Illegal escape sequence"); + } + try { + var v = Integer.parseInt(source, i, i + 2, 16); + if (v < 0 || v > 0xFF) { + throw new IllegalArgumentException("Illegal escape value"); + } + + bytes_buffer[bytes_pos++] = (byte) v; + + i += 2; + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage()); + } + } else { + if (bytes_buffer != null) { + out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8)); + + bytes_buffer = null; + bytes_pos = 0; + } + + if (out != null) { + out.append(ch); + } + + i += 1; + } + } + + if (out == null) { + return source; + } + + if (bytes_buffer != null) { + out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8)); + } + + return out.toString(); + } + /** * Transforms a provided String object into a new string, * containing only valid URL characters in the UTF-8 encoding. * * @param source The string that has to be transformed into a valid URL * string. + * @param allow Additional characters to allow. * @return The encoded String object. * @see #decode(String) * @since 1.0 */ - public static String encode(String source) { - if (source == null) { - return null; + public static String encode(String source, char... allow) { + return encode(source, new String(allow)); + } + + /** + * Transforms a provided String object into a new string, + * containing only valid URL characters in the UTF-8 encoding. + * + * @param source The string that has to be transformed into a valid URL + * string. + * @param allow Additional characters to allow. + * @return The encoded String object. + * @see #decode(String) + * @since 1.0 + */ + public static String encode(String source, String allow) { + if (source == null || source.isBlank()) { + return source; } StringBuilder out = null; char ch; for (var i = 0; i < source.length(); ) { ch = source.charAt(i); - if (isUnreservedUriChar(ch)) { + if (isUnreservedUriChar(ch) || allow.indexOf(ch) != -1) { if (out != null) { out.append(ch); } @@ -73,115 +194,8 @@ public class UrlEncoder { return out.toString(); } - static final BitSet UNRESERVED_URI_CHARS; - - static { - // see https://www.rfc-editor.org/rfc/rfc3986#page-13 - var unreserved = new BitSet('~' + 1); - unreserved.set('-'); - unreserved.set('.'); - for (int c = '0'; c <= '9'; ++c) unreserved.set(c); - for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c); - unreserved.set('_'); - for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c); - unreserved.set('~'); - UNRESERVED_URI_CHARS = unreserved; - } - // see https://www.rfc-editor.org/rfc/rfc3986#page-13 private static boolean isUnreservedUriChar(char ch) { - if (ch > '~') return false; - return UNRESERVED_URI_CHARS.get(ch); - } - - private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); - - private static void appendUrlEncodedDigit(StringBuilder out, int digit) { - out.append(HEX_DIGITS[digit & 0x0F]); - } - - private static void appendUrlEncodedByte(StringBuilder out, int ch) { - out.append("%"); - appendUrlEncodedDigit(out, ch >> 4); - appendUrlEncodedDigit(out, ch); - } - - /** - * Transforms a provided String URL into a new string, - * containing decoded URL characters in the UTF-8 encoding. - * - * @param source The string URL that has to be decoded - * @return The decoded String object. - * @see #encode(String) - * @since 1.0 - */ - public static String decode(String source) { - if (source == null) { - return source; - } - - var length = source.length(); - StringBuilder out = null; - char ch; - byte[] bytes_buffer = null; - var bytes_pos = 0; - for (var i = 0; i < length; ) { - ch = source.charAt(i); - - if (ch == '%') { - if (out == null) { - out = new StringBuilder(source.length()); - out.append(source, 0, i); - } - - if (bytes_buffer == null) { - // the remaining characters divided by the length - // of the encoding format %xx, is the maximum number of - // bytes that can be extracted - bytes_buffer = new byte[(length - i) / 3]; - bytes_pos = 0; - } - - i += 1; - if (length < i + 2) { - throw new IllegalArgumentException("Illegal escape sequence"); - } - try { - var v = Integer.parseInt(source, i, i + 2, 16); - if (v < 0 || v > 0xFF) { - throw new IllegalArgumentException("Illegal escape value"); - } - - bytes_buffer[bytes_pos++] = (byte) v; - - i += 2; - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Illegal characters in escape sequence" + e.getMessage()); - } - } else { - if (bytes_buffer != null) { - out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8)); - - bytes_buffer = null; - bytes_pos = 0; - } - - if (out != null) { - out.append(ch); - } - - i += 1; - } - } - - if (out == null) { - return source; - } - - if (bytes_buffer != null) { - out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8)); - } - - return out.toString(); + return ch <= '~' && UNRESERVED_URI_CHARS.get(ch); } } diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java index 3dc5fd2..56775af 100644 --- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java +++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java @@ -6,48 +6,41 @@ package com.uwyn.urlencoder; import org.junit.jupiter.api.Test; +import java.util.Map; + import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; class UrlEncoderTest { - @Test - public void testEncodeURL() { - assertNull(UrlEncoder.encode(null)); - assertEquals("a%20test%20%26", UrlEncoder.encode("a test &")); - String valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"; - assertSame(valid, UrlEncoder.encode(valid)); - assertEquals("%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D", UrlEncoder.encode("!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=")); - assertEquals("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81", UrlEncoder.encode("%#okékÉȢ smile!😁")); - } + private String[] invalid = {"sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1"}; + private String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"; + private Map validMap = Map.of( + "a test &", "a%20test%20%26", + "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", + "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D", + "%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81" + ); @Test public void testDecodeURL() { assertNull(UrlEncoder.decode(null)); - assertEquals("a test &", UrlEncoder.decode("a%20test%20%26")); - String valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"; - assertSame(valid, UrlEncoder.decode(valid)); - assertEquals("!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", UrlEncoder.decode("%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D")); - assertEquals("%#okékÉȢ smile!😁", UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81")); + assertSame(same, UrlEncoder.decode(same)); + validMap.forEach((expected, source) -> assertEquals(expected, UrlEncoder.decode(source))); - try { - UrlEncoder.decode("sdkjfh%"); - fail(); - } catch (Exception e) { - assertTrue(e instanceof IllegalArgumentException); - } - - try { - UrlEncoder.decode("sdkjfh%6"); - fail(); - } catch (Exception e) { - assertTrue(e instanceof IllegalArgumentException); - } - - try { - UrlEncoder.decode("sdkjfh%xx"); - fail(); - } catch (Exception e) { - assertTrue(e instanceof IllegalArgumentException); + for (String i : invalid) { + assertThrows(IllegalArgumentException.class, () -> UrlEncoder.decode(i)); } } + + @Test + public void testEncodeURL() { + assertNull(UrlEncoder.encode(null)); + assertTrue(UrlEncoder.encode("").isEmpty()); + assertSame(same, UrlEncoder.encode(same)); + assertSame(same, UrlEncoder.encode(same, "")); + validMap.forEach((source, expected) -> assertEquals(expected, UrlEncoder.encode(source))); + + assertEquals("?test=a%20test", UrlEncoder.encode("?test=a test", "?=")); + assertEquals("?test=a%20test", UrlEncoder.encode("?test=a test", '?', '=')); + assertEquals("aaa", UrlEncoder.encode("aaa", 'a')); + } }