mirror of
https://github.com/gbevin/urlencoder.git
synced 2025-04-25 07:17:11 -07:00
Made the encoding even more defensive.
This commit is contained in:
parent
320be3b3a1
commit
b12f3eafd7
3 changed files with 32 additions and 17 deletions
|
@ -8,9 +8,19 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* URL encoding and decoding.
|
||||
* Most defensive approach to URL encoding and decoding.
|
||||
* <p>
|
||||
* Rules determined by <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a>.
|
||||
* Rules determined by combining the unreserved character set from
|
||||
* <a href="https://www.rfc-editor.org/rfc/rfc3986#page-13">RFC 3986</a> with
|
||||
* the percent-encode set from
|
||||
* <a href="https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set">application/x-www-form-urlencoded</a>.
|
||||
* <p>
|
||||
* Both specs above support percent decoding of two hexadecimal digits to a
|
||||
* binary octet, however their unreserved set of characters differs and
|
||||
* {@code application/x-www-form-urlencoded} adds conversion of space to +,
|
||||
* which has the potential to be misunderstood.
|
||||
* <p>
|
||||
* This class encodes with rules that will be decoded correctly in either case.
|
||||
*
|
||||
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
|
||||
* @author Erik C. Thauvin (erik@thauvin.net)
|
||||
|
@ -22,14 +32,13 @@ public final class UrlEncoder {
|
|||
|
||||
static {
|
||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||
var unreserved = new BitSet('~' + 1);
|
||||
var unreserved = new BitSet('z' + 1);
|
||||
unreserved.set('-');
|
||||
unreserved.set('.');
|
||||
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
|
||||
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
|
||||
unreserved.set('_');
|
||||
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
|
||||
unreserved.set('~');
|
||||
UNRESERVED_URI_CHARS = unreserved;
|
||||
}
|
||||
|
||||
|
@ -216,7 +225,7 @@ public final class UrlEncoder {
|
|||
|
||||
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
|
||||
private static boolean isUnreservedUriChar(char ch) {
|
||||
return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
|
||||
return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
|
||||
}
|
||||
|
||||
static class MainResult {
|
||||
|
|
|
@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
|||
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
||||
|
||||
class UrlEncoderTest {
|
||||
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
|
||||
private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
|
||||
|
||||
private static Stream<String> invalid() {
|
||||
return Stream.of("sdkjfh%", "sdkjfh%6", "sdkjfh%xx", "sdfjfh%-1");
|
||||
|
@ -27,7 +27,7 @@ class UrlEncoderTest {
|
|||
arguments("a test &", "a%20test%20%26"),
|
||||
arguments(
|
||||
"!abcdefghijklmnopqrstuvwxyz%%ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~=",
|
||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~%3D"
|
||||
"%21abcdefghijklmnopqrstuvwxyz%25%25ABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.%7E%3D"
|
||||
),
|
||||
arguments("%#okékÉȢ smile!😁", "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"),
|
||||
arguments(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue