Compare commits

..

8 commits

4 changed files with 38 additions and 20 deletions

View file

@ -8,21 +8,28 @@
# URL Encoder for Java # URL Encoder for Java
A simple library to encode/decode URL parameters. A simple defensive library to encode/decode URL components.
This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com). This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com).
A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder). A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder).
For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html) The rules are determined by combining the unreserved character set from
because of its improper naming. It is actually intended to encode HTML form [RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the
parameters, not URLs, causing the wrong escape sequences to be used. percent-encode set from
[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
Additionally, `java.net.URLEncoder` allocates memory even when no encoding is Both specs above support percent decoding of two hexadecimal digits to a
necessary, significantly impacting performance. This library has a negligible binary octet, however their unreserved set of characters differs and
performance impact when a specified string doesn't need to be encoded. `application/x-www-form-urlencoded` adds conversion of space to `+`,
that has the potential to be misunderstood.
Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String)) This class encodes with rules that will be decoded correctly in either case.
also addresses the same issues.
Additionally, this library allocates no memory when encoding isn't needed and
does the work in a single pass without multiple loops. Both of these
optimizations have a significantly beneficial impact on performance of encoding
compared to other solutions like the standard `URLEncoder` in the JDK or
`UriUtils` in Spring.
## Examples (TL;DR) ## Examples (TL;DR)
@ -57,7 +64,7 @@ You have two options:
The usage is as follows: The usage is as follows:
``` ```
Encode and decode URL parameters. Encode and decode URL components defensively.
-e encode (default) -e encode (default)
-d decode -d decode
``` ```

View file

@ -13,7 +13,7 @@ plugins {
} }
group = "com.uwyn" group = "com.uwyn"
version = "1.1.0" version = "1.2.0"
val mavenName = "UrlEncoder" val mavenName = "UrlEncoder"
val javaMainClass = "$group.${rootProject.name}.$mavenName" val javaMainClass = "$group.${rootProject.name}.$mavenName"
@ -112,7 +112,7 @@ publishing {
from(components["java"]) from(components["java"])
pom { pom {
name.set(mavenName) name.set(mavenName)
description.set("A simple library to encode/decode URL parameters") description.set("A simple defensive library to encode/decode URL components")
url.set("https://github.com/gbevin/urlencoder") url.set("https://github.com/gbevin/urlencoder")
licenses { licenses {
license { license {

View file

@ -8,9 +8,19 @@ import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
/** /**
* URL encoding and decoding. * Most defensive approach to URL encoding and decoding.
* <p> * <p>
* Rules determined by <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a>. * Rules determined by combining the unreserved character set from
* <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a> with
* the percent-encode set from
* <a href="https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set">application/x-www-form-urlencoded</a>.
* <p>
* Both specs above support percent decoding of two hexadecimal digits to a
* binary octet, however their unreserved set of characters differs and
* {@code application/x-www-form-urlencoded} adds conversion of space to +,
* which has the potential to be misunderstood.
* <p>
* This class encodes with rules that will be decoded correctly in either case.
* *
* @author Geert Bevin (gbevin[remove] at uwyn dot com) * @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net) * @author Erik C. Thauvin (erik@thauvin.net)
@ -22,14 +32,14 @@ public final class UrlEncoder {
static { static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13 // see https://www.rfc-editor.org/rfc/rfc3986#page-13
var unreserved = new BitSet('~' + 1); // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
var unreserved = new BitSet('z' + 1);
unreserved.set('-'); unreserved.set('-');
unreserved.set('.'); unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c); for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c); for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_'); unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c); for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved; UNRESERVED_URI_CHARS = unreserved;
} }
@ -215,8 +225,9 @@ public final class UrlEncoder {
} }
// see https://www.rfc-editor.org/rfc/rfc3986#page-13 // see https://www.rfc-editor.org/rfc/rfc3986#page-13
// and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private static boolean isUnreservedUriChar(char ch) { private static boolean isUnreservedUriChar(char ch) {
return ch <= '~' && UNRESERVED_URI_CHARS.get(ch); return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
} }
static class MainResult { static class MainResult {
@ -253,7 +264,7 @@ public final class UrlEncoder {
if (!valid_arguments) { if (!valid_arguments) {
return new MainResult("Usage : java -jar urlencoder-*.jar [-ed] text" + System.lineSeparator() + return new MainResult("Usage : java -jar urlencoder-*.jar [-ed] text" + System.lineSeparator() +
"Encode and decode URL parameters." + System.lineSeparator() + "Encode and decode URL components defensively." + System.lineSeparator() +
" -e encode (default)" + System.lineSeparator() + " -e encode (default)" + System.lineSeparator() +
" -d decode", 1); " -d decode", 1);
} }

View file

@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments; import static org.junit.jupiter.params.provider.Arguments.arguments;
class UrlEncoderTest { class UrlEncoderTest {
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~"; private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
private static Stream<String> invalid() { private static Stream<String> invalid() {
return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1"); return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1");
@ -27,7 +27,7 @@ class UrlEncoderTest {
arguments("a test &", "a%20test%20%26"), arguments("a test &", "a%20test%20%26"),
arguments( arguments(
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=", "!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D" "%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
), ),
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"), arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
arguments( arguments(