mirror of
https://github.com/gbevin/urlencoder.git
synced 2025-04-24 23:07:12 -07:00
Made the encoding even more defensive.
This commit is contained in:
parent
320be3b3a1
commit
b12f3eafd7
3 changed files with 32 additions and 17 deletions
26
README.md
26
README.md
|
@ -8,21 +8,27 @@
|
||||||
|
|
||||||
# URL Encoder for Java
|
# URL Encoder for Java
|
||||||
|
|
||||||
A simple library to encode/decode URL parameters.
|
A simple defensive library to encode/decode URL components.
|
||||||
|
|
||||||
This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com).
|
This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com).
|
||||||
A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder).
|
A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder).
|
||||||
|
|
||||||
For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html)
|
The rules are determined by combining the unreserved character set from
|
||||||
because of its improper naming. It is actually intended to encode HTML form
|
[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the
|
||||||
parameters, not URLs, causing the wrong escape sequences to be used.
|
percent-encode set from
|
||||||
|
[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
|
||||||
|
|
||||||
Additionally, `java.net.URLEncoder` allocates memory even when no encoding is
|
Both specs above support percent decoding of two hexadecimal digits to a
|
||||||
necessary, significantly impacting performance. This library has a negligible
|
binary octet, however their unreserved set of characters differs and
|
||||||
performance impact when a specified string doesn't need to be encoded.
|
`application/x-www-form-urlencoded` adds conversion of space to `+`,
|
||||||
|
that has the potential to be misunderstood.
|
||||||
|
|
||||||
Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String))
|
This class encodes with rules that will be decoded correctly in either case.
|
||||||
also addresses the same issues.
|
|
||||||
|
Additionally, this library allocates no memory when encoding isn't needed and
|
||||||
|
does the work in a single pass without multiple loops, both of these
|
||||||
|
optimizations have a significantly beneficial impact on performance of encoding
|
||||||
|
compared to other solutions like the standard URLEncoder in the JDK.
|
||||||
|
|
||||||
## Examples (TL;DR)
|
## Examples (TL;DR)
|
||||||
|
|
||||||
|
@ -57,7 +63,7 @@ You have two options:
|
||||||
The usage is as follows:
|
The usage is as follows:
|
||||||
|
|
||||||
```
|
```
|
||||||
Encode and decode URL parameters.
|
Encode and decode URL components defensively.
|
||||||
-e encode (default)
|
-e encode (default)
|
||||||
-d decode
|
-d decode
|
||||||
```
|
```
|
||||||
|
|
|
@ -8,9 +8,19 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* URL encoding and decoding.
|
* Most defensive approach to URL encoding and decoding.
|
||||||
* <p>
|
* <p>
|
||||||
* Rules determined by <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a>.
|
* Rules determined by combining the unreserved character set from
|
||||||
|
* <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a> with
|
||||||
|
* the percent-encode set from
|
||||||
|
* <a href="https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set">application/x-www-form-urlencoded</a>.
|
||||||
|
* <p>
|
||||||
|
* Both specs above support percent decoding of two hexadecimal digits to a
|
||||||
|
* binary octet, however their unreserved set of characters differs and
|
||||||
|
* {@code application/x-www-form-urlencoded} adds conversion of space to +,
|
||||||
|
* which has the potential to be misunderstood.
|
||||||
|
* <p>
|
||||||
|
* This class encodes with rules that will be decoded correctly in either case.
|
||||||
*
|
*
|
||||||
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
|
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
|
||||||
* @author Erik C. Thauvin (erik@thauvin.net)
|
* @author Erik C. Thauvin (erik@thauvin.net)
|
||||||
|
@ -22,14 +32,13 @@ public final class UrlEncoder {
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||||
var unreserved = new BitSet('~' + 1);
|
var unreserved = new BitSet('z' + 1);
|
||||||
unreserved.set('-');
|
unreserved.set('-');
|
||||||
unreserved.set('.');
|
unreserved.set('.');
|
||||||
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
|
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
|
||||||
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
|
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
|
||||||
unreserved.set('_');
|
unreserved.set('_');
|
||||||
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
|
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
|
||||||
unreserved.set('~');
|
|
||||||
UNRESERVED_URI_CHARS = unreserved;
|
UNRESERVED_URI_CHARS = unreserved;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,7 +225,7 @@ public final class UrlEncoder {
|
||||||
|
|
||||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||||
private static boolean isUnreservedUriChar(char ch) {
|
private static boolean isUnreservedUriChar(char ch) {
|
||||||
return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
|
return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static class MainResult {
|
static class MainResult {
|
||||||
|
|
|
@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
||||||
|
|
||||||
class UrlEncoderTest {
|
class UrlEncoderTest {
|
||||||
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
|
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
|
||||||
|
|
||||||
private static Stream<String> invalid() {
|
private static Stream<String> invalid() {
|
||||||
return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1");
|
return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1");
|
||||||
|
@ -27,7 +27,7 @@ class UrlEncoderTest {
|
||||||
arguments("a test &", "a%20test%20%26"),
|
arguments("a test &", "a%20test%20%26"),
|
||||||
arguments(
|
arguments(
|
||||||
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
||||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
|
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
|
||||||
),
|
),
|
||||||
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
||||||
arguments(
|
arguments(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue