mirror of
https://github.com/gbevin/urlencoder.git
synced 2025-04-24 23:07:12 -07:00
Made the encoding even more defensive.
This commit is contained in:
parent
320be3b3a1
commit
b12f3eafd7
3 changed files with 32 additions and 17 deletions
26
README.md
26
README.md
|
@ -8,21 +8,27 @@
|
|||
|
||||
# URL Encoder for Java
|
||||
|
||||
A simple library to encode/decode URL parameters.
|
||||
A simple defensive library to encode/decode URL components.
|
||||
|
||||
This library was extracted from the [RIFE2 Web Application Framework](https://rife2.com).
|
||||
A Kotlin version can also be found at [https://github.com/ethauvin/urlencoder](https://github.com/ethauvin/urlencoder).
|
||||
|
||||
For decades, we've been using [java.net.URLEncoder](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/net/URLEncoder.html)
|
||||
because of its improper naming. It is actually intended to encode HTML form
|
||||
parameters, not URLs, causing the wrong escape sequences to be used.
|
||||
The rules are determined by combining the unreserved character set from
|
||||
[RFC 3986](https://www.rfc-editor.org/rfc/rfc3986#page-13) with the
|
||||
percent-encode set from
|
||||
[application/x-www-form-urlencoded](https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set).
|
||||
|
||||
Additionally, `java.net.URLEncoder` allocates memory even when no encoding is
|
||||
necessary, significantly impacting performance. This library has a negligible
|
||||
performance impact when a specified string doesn't need to be encoded.
|
||||
Both specs above support percent decoding of two hexadecimal digits to a
|
||||
binary octet, however their unreserved set of characters differs and
|
||||
`application/x-www-form-urlencoded` adds conversion of space to `+`,
|
||||
that has the potential to be misunderstood.
|
||||
|
||||
Android's [Uri.encode](https://developer.android.com/reference/android/net/Uri#encode(java.lang.String,%20java.lang.String))
|
||||
also addresses the same issues.
|
||||
This class encodes with rules that will be decoded correctly in either case.
|
||||
|
||||
Additionally, this library allocates no memory when encoding isn't needed and
|
||||
does the work in a single pass without multiple loops, both of these
|
||||
optimizations have a significantly beneficial impact on performance of encoding
|
||||
compared to other solutions like the standard URLEncoder in the JDK.
|
||||
|
||||
## Examples (TL;DR)
|
||||
|
||||
|
@ -57,7 +63,7 @@ You have two options:
|
|||
The usage is as follows:
|
||||
|
||||
```
|
||||
Encode and decode URL parameters.
|
||||
Encode and decode URL components defensively.
|
||||
-e encode (default)
|
||||
-d decode
|
||||
```
|
||||
|
|
|
@ -8,9 +8,19 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* URL encoding and decoding.
|
||||
* Most defensive approach to URL encoding and decoding.
|
||||
* <p>
|
||||
* Rules determined by <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a>.
|
||||
* Rules determined by combining the unreserved character set from
|
||||
* <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a> with
|
||||
* the percent-encode set from
|
||||
* <a href="https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set">application/x-www-form-urlencoded</a>.
|
||||
* <p>
|
||||
* Both specs above support percent decoding of two hexadecimal digits to a
|
||||
* binary octet, however their unreserved set of characters differs and
|
||||
* {@code application/x-www-form-urlencoded} adds conversion of space to +,
|
||||
* which has the potential to be misunderstood.
|
||||
* <p>
|
||||
* This class encodes with rules that will be decoded correctly in either case.
|
||||
*
|
||||
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
|
||||
* @author Erik C. Thauvin (erik@thauvin.net)
|
||||
|
@ -22,14 +32,13 @@ public final class UrlEncoder {
|
|||
|
||||
static {
|
||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||
var unreserved = new BitSet('~' + 1);
|
||||
var unreserved = new BitSet('z' + 1);
|
||||
unreserved.set('-');
|
||||
unreserved.set('.');
|
||||
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
|
||||
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
|
||||
unreserved.set('_');
|
||||
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
|
||||
unreserved.set('~');
|
||||
UNRESERVED_URI_CHARS = unreserved;
|
||||
}
|
||||
|
||||
|
@ -216,7 +225,7 @@ public final class UrlEncoder {
|
|||
|
||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||
private static boolean isUnreservedUriChar(char ch) {
|
||||
return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
|
||||
return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
|
||||
}
|
||||
|
||||
static class MainResult {
|
||||
|
|
|
@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
|||
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
||||
|
||||
class UrlEncoderTest {
|
||||
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
|
||||
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
|
||||
|
||||
private static Stream<String> invalid() {
|
||||
return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1");
|
||||
|
@ -27,7 +27,7 @@ class UrlEncoderTest {
|
|||
arguments("a test &", "a%20test%20%26"),
|
||||
arguments(
|
||||
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
|
||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
|
||||
),
|
||||
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
||||
arguments(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue