diff --git a/README.md b/README.md index 84f4da9..6da4af9 100644 --- a/README.md +++ b/README.md @@ -8,21 +8,27 @@ # URL Encoder for Java -A simple library to encode/decode URL parameters. +A simple defensive library to encode/decode URL components. This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com). A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder). -For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) -because of its improper naming. It is actually intended to encode HTML form -parameters, not URLs, causing the wrong escape sequences to be used. +The rules are determined by combining the unreserved character set from +[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the +percent-encode set from +[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set). -Additionally, `java.net.URLEncoder` allocates memory even when no encoding is -necessary, significantly impacting performance. This library has a negligible -performance impact when a specified string doesn't need to be encoded. +Both specs above support percent decoding of two hexadecimal digits to a +binary octet, however their unreserved set of characters differs and +`application/x-www-form-urlencoded` adds conversion of space to `+`, +that has the potential to be misunderstood. -Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) -also addresses the same issues. +This class encodes with rules that will be decoded correctly in either case. + +Additionally, this library allocates no memory when encoding isn't needed and +does the work in a single pass without multiple loops, both of these +optimizations have a significantly beneficial impact on performance of encoding +compared to other solutions like the standard URLEncoder in the JDK. ## Examples (TL;DR) @@ -57,7 +63,7 @@ You have two options: The usage is as follows: ``` -Encode and decode URL parameters. +Encode and decode URL components defensively. -e encode (default) -d decode ``` diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index 7a2de08..8e736f3 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -8,9 +8,19 @@ import java.nio.charset.StandardCharsets; import java.util.*; /** - * URL encoding and decoding. + * Most defensive approach to URL encoding and decoding. *
- * Rules determined by RFC 3986. + * Rules determined by combining the unreserved character set from + * RFC 3986 with + * the percent-encode set from + * application/x-www-form-urlencoded. + *
+ * Both specs above support percent decoding of two hexadecimal digits to a + * binary octet, however their unreserved set of characters differs and + * {@code application/x-www-form-urlencoded} adds conversion of space to +, + * which has the potential to be misunderstood. + *
+ * This class encodes with rules that will be decoded correctly in either case.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
@@ -22,14 +32,13 @@ public final class UrlEncoder {
static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
- var unreserved = new BitSet('~' + 1);
+ var unreserved = new BitSet('z' + 1);
unreserved.set('-');
unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
- unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved;
}
@@ -216,7 +225,7 @@ public final class UrlEncoder {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private static boolean isUnreservedUriChar(char ch) {
- return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
+ return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
}
static class MainResult {
diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
index ecf6624..99cf385 100644
--- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
+++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
@@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
class UrlEncoderTest {
- private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
+ private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
private static Stream