diff --git a/README.md b/README.md index c5d2660..84f4da9 100644 --- a/README.md +++ b/README.md @@ -8,28 +8,21 @@ # URL Encoder for Java -A simple defensive library to encode/decode URL components. +A simple library to encode/decode URL parameters. This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com). A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder). -The rules are determined by combining the unreserved character set from -[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the -percent-encode set from -[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set). +For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) +because of its improper naming. It is actually intended to encode HTML form +parameters, not URLs, causing the wrong escape sequences to be used. -Both specs above support percent decoding of two hexadecimal digits to a -binary octet, however their unreserved set of characters differs and -`application/x-www-form-urlencoded` adds conversion of space to `+`, -that has the potential to be misunderstood. +Additionally, `java.net.URLEncoder` allocates memory even when no encoding is +necessary, significantly impacting performance. This library has a negligible +performance impact when a specified string doesn't need to be encoded. -This class encodes with rules that will be decoded correctly in either case. - -Additionally, this library allocates no memory when encoding isn't needed and -does the work in a single pass without multiple loops. Both of these -optimizations have a significantly beneficial impact on performance of encoding -compared to other solutions like the standard `URLEncoder` in the JDK or -`UriUtils` in Spring. +Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) +also addresses the same issues. ## Examples (TL;DR) @@ -64,7 +57,7 @@ You have two options: The usage is as follows: ``` -Encode and decode URL components defensively. +Encode and decode URL parameters. -e encode (default) -d decode ``` diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts index 87a3442..04e2a21 100644 --- a/lib/build.gradle.kts +++ b/lib/build.gradle.kts @@ -13,7 +13,7 @@ plugins { } group = "com.uwyn" -version = "1.2.0" +version = "1.1.0" val mavenName = "UrlEncoder" val javaMainClass = "$group.${rootProject.name}.$mavenName" @@ -112,7 +112,7 @@ publishing { from(components["java"]) pom { name.set(mavenName) - description.set("A simple defensive library to encode/decode URL components") + description.set("A simple library to encode/decode URL parameters") url.set("https://github.com/gbevin/urlencoder") licenses { license { diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index 0c53377..7a2de08 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -8,19 +8,9 @@ import java.nio.charset.StandardCharsets; import java.util.*; /** - * Most defensive approach to URL encoding and decoding. + * URL encoding and decoding. *

- * Rules determined by combining the unreserved character set from - * RFC 3986 with - * the percent-encode set from - * application/x-www-form-urlencoded. - *

- * Both specs above support percent decoding of two hexadecimal digits to a - * binary octet, however their unreserved set of characters differs and - * {@code application/x-www-form-urlencoded} adds conversion of space to +, - * which has the potential to be misunderstood. - *

- * This class encodes with rules that will be decoded correctly in either case. + * Rules determined by RFC 3986. * * @author Geert Bevin (gbevin[remove] at uwyn dot com) * @author Erik C. Thauvin (erik@thauvin.net) @@ -32,14 +22,14 @@ public final class UrlEncoder { static { // see https://www.rfc-editor.org/rfc/rfc3986#page-13 - // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set - var unreserved = new BitSet('z' + 1); + var unreserved = new BitSet('~' + 1); unreserved.set('-'); unreserved.set('.'); for (int c = '0'; c <= '9'; ++c) unreserved.set(c); for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c); unreserved.set('_'); for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c); + unreserved.set('~'); UNRESERVED_URI_CHARS = unreserved; } @@ -225,9 +215,8 @@ public final class UrlEncoder { } // see https://www.rfc-editor.org/rfc/rfc3986#page-13 - // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set private static boolean isUnreservedUriChar(char ch) { - return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch); + return ch <= '~' && UNRESERVED_URI_CHARS.get(ch); } static class MainResult { @@ -264,7 +253,7 @@ public final class UrlEncoder { if (!valid_arguments) { return new MainResult("Usage : java -jar urlencoder-*.jar [-ed] text" + System.lineSeparator() + - "Encode and decode URL components defensively." + System.lineSeparator() + + "Encode and decode URL parameters." + System.lineSeparator() + " -e encode (default)" + System.lineSeparator() + " -d decode", 1); } diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java index 99cf385..ecf6624 100644 --- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java +++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java @@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.params.provider.Arguments.arguments; class UrlEncoderTest { - private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_."; + private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"; private static Stream invalid() { return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1"); @@ -27,7 +27,7 @@ class UrlEncoderTest { arguments("a test &", "a%20test%20%26"), arguments( "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", - "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D" + "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D" ), arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"), arguments(