From b8322bfe9f06d84a37dd2414cd8f27a463fdb899 Mon Sep 17 00:00:00 2001 From: Geert Bevin Date: Fri, 6 Jan 2023 09:55:42 -0500 Subject: [PATCH 1/4] Optimization to spaceToPlus support, tests cleanups. --- .../java/com/uwyn/urlencoder/UrlEncoder.java | 37 +++++++++---------- .../com/uwyn/urlencoder/UrlEncoderTest.java | 24 ++++++------ 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index c5aff0a..984349f 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -214,27 +214,26 @@ public final class UrlEncoder { out = new StringBuilder(source.length()); out.append(source, 0, i); } - if (spaceToPlus && ch == ' ') { - out.append('+'); - i += 1; - } else { - var cp = source.codePointAt(i); - if (cp < 0x80) { + var cp = source.codePointAt(i); + if (cp < 0x80) { + if (spaceToPlus && ch == ' ') { + out.append('+'); + } else { appendUrlEncodedByte(out, cp); - i += 1; - } else if (Character.isBmpCodePoint(cp)) { - for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) { - appendUrlEncodedByte(out, b); - } - i += 1; - } else if (Character.isSupplementaryCodePoint(cp)) { - var high = Character.highSurrogate(cp); - var low = Character.lowSurrogate(cp); - for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) { - appendUrlEncodedByte(out, b); - } - i += 2; } + i += 1; + } else if (Character.isBmpCodePoint(cp)) { + for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) { + appendUrlEncodedByte(out, b); + } + i += 1; + } else if (Character.isSupplementaryCodePoint(cp)) { + var high = Character.highSurrogate(cp); + var low = Character.lowSurrogate(cp); + for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) { + appendUrlEncodedByte(out, b); + } + i += 2; } } } diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java index 62e1db5..1a7d25d 100644 --- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java +++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java @@ -104,7 +104,7 @@ class UrlEncoderTest { @ParameterizedTest(name = "processMain(-d {1}) should be {0}") @MethodSource("validMap") void testMainDecode(String expected, String source) { - var result = UrlEncoder.processMain(new String[]{"-d", source}); + var result = UrlEncoder.processMain("-d", source); assertEquals(expected, result.output); assertEquals(0, result.status, "processMain(-d " + source + ").status"); } @@ -112,7 +112,7 @@ class UrlEncoderTest { @ParameterizedTest(name = "processMain(-e {0})") @MethodSource("validMap") void testMainEncode(String source, String expected) { - var result = UrlEncoder.processMain(new String[]{source}); + var result = UrlEncoder.processMain(source); assertEquals(expected, result.output); assertEquals(0, result.status, "processMain(-e " + source + ").status"); } @@ -120,20 +120,20 @@ class UrlEncoderTest { @ParameterizedTest(name = "processMain(-d {0})") @MethodSource("invalid") void testMainEncodeWithExceptions(String source) { - assertThrows(IllegalArgumentException.class, () -> UrlEncoder.processMain(new String[]{"-d", source}), source); + assertThrows(IllegalArgumentException.class, () -> UrlEncoder.processMain("-d", source), source); } @Test void testMainTooManyArgs() { - assertTrue(UrlEncoder.processMain(new String[]{"foo", "bar", "test"}).output.contains("Usage :"), "too many args"); + assertTrue(UrlEncoder.processMain("foo", "bar", "test").output.contains("Usage :"), "too many args"); } @Test void testMainWithEmptyArgs() { - assertTrue(UrlEncoder.processMain(new String[]{" ", " "}).output.contains("Usage :"), "processMain(' ', ' ')"); - assertTrue(UrlEncoder.processMain(new String[]{"foo", " "}).output.contains("Usage :"), "processMain('foo', ' ')"); - assertTrue(UrlEncoder.processMain(new String[]{" ", "foo"}).output.contains("Usage :"), "processMain(' ', 'foo')"); - assertTrue(UrlEncoder.processMain(new String[]{"-d ", ""}).output.contains("Usage :"), "processMain('-d', '')"); + assertTrue(UrlEncoder.processMain(" ", " ").output.contains("Usage :"), "processMain(' ', ' ')"); + assertTrue(UrlEncoder.processMain("foo", " ").output.contains("Usage :"), "processMain('foo', ' ')"); + assertTrue(UrlEncoder.processMain(" ", "foo").output.contains("Usage :"), "processMain(' ', 'foo')"); + assertTrue(UrlEncoder.processMain("-d ", "").output.contains("Usage :"), "processMain('-d', '')"); assertEquals("%20", UrlEncoder.processMain(new String[]{"-e", " "}).output, "processMain('-e', ' ')"); assertEquals(" ", UrlEncoder.processMain(new String[]{"-d", " "}).output, "processMain('-d', ' ')"); } @@ -141,7 +141,7 @@ class UrlEncoderTest { @ParameterizedTest @ValueSource(strings = {"", "-d", "-e"}) void testMainWithInvalidArgs(String arg) { - var result = UrlEncoder.processMain(new String[]{arg}); + var result = UrlEncoder.processMain(arg); assertTrue(result.output.contains("Usage :"), "processMain('" + arg + "')"); assertEquals(1, result.status, "processMain('" + arg + "').status"); } @@ -149,14 +149,14 @@ class UrlEncoderTest { @ParameterizedTest(name = "processMain(-e {0})") @MethodSource("validMap") void testMainWithOption(String source, String expected) { - var result = UrlEncoder.processMain(new String[]{"-e", source}); + var result = UrlEncoder.processMain("-e", source); assertEquals(expected, result.output); assertEquals(0, result.status, "processMain(-e " + source + ").status"); } @Test void testMainWithUnknownOptions() { - assertTrue(UrlEncoder.processMain(new String[]{"-p"}).output.contains("Usage :"), "processMain(-p)"); - assertTrue(UrlEncoder.processMain(new String[]{"-"}).output.contains("Usage :"), "processMain(-)"); + assertTrue(UrlEncoder.processMain("-p").output.contains("Usage :"), "processMain(-p)"); + assertTrue(UrlEncoder.processMain("-").output.contains("Usage :"), "processMain(-)"); } } \ No newline at end of file From 844db85cbe8e5f5504c505acc2b7b02519614c76 Mon Sep 17 00:00:00 2001 From: Geert Bevin Date: Fri, 6 Jan 2023 13:58:41 -0500 Subject: [PATCH 2/4] Added support for plusToSpace decoding --- .../java/com/uwyn/urlencoder/UrlEncoder.java | 38 ++++++++++++++----- .../com/uwyn/urlencoder/UrlEncoderTest.java | 7 ++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index 984349f..27b9a2f 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -69,6 +69,20 @@ public final class UrlEncoder { * @since 1.0 */ public static String decode(String source) { + return decode(source, false); + } + + /** + * Transforms a provided String URL into a new string, + * containing decoded URL characters in the UTF-8 encoding. + * + * @param source The string URL that has to be decoded + * @param plusToSpace Convert any {@code +} to space. + * @return The decoded String object. + * @see #encode(String, String) + * @since 1.0 + */ + public static String decode(String source, boolean plusToSpace) { if (source == null || source.isEmpty()) { return source; } @@ -83,10 +97,7 @@ public final class UrlEncoder { ch = source.charAt(i); if (ch == '%') { - if (out == null) { - out = new StringBuilder(length); - out.append(source, 0, i); - } + out = startConstructingIfNeeded(out, source, i); if (bytes_buffer == null) { // the remaining characters divided by the length @@ -119,7 +130,10 @@ public final class UrlEncoder { bytes_pos = 0; } - if (out != null) { + if (plusToSpace && ch == '+') { + out = startConstructingIfNeeded(out, source, i); + out.append(" "); + } else if (out != null) { out.append(ch); } @@ -138,6 +152,14 @@ public final class UrlEncoder { return out.toString(); } + private static StringBuilder startConstructingIfNeeded(StringBuilder out, String source, int currentSourcePosition) { + if (out == null) { + out = new StringBuilder(source.length()); + out.append(source, 0, currentSourcePosition); + } + return out; + } + /** * Transforms a provided String object into a new string, * containing only valid URL characters in the UTF-8 encoding. @@ -210,10 +232,8 @@ public final class UrlEncoder { } i += 1; } else { - if (out == null) { - out = new StringBuilder(source.length()); - out.append(source, 0, i); - } + out = startConstructingIfNeeded(out, source, i); + var cp = source.codePointAt(i); if (cp < 0x80) { if (spaceToPlus && ch == ' ') { diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java index 1a7d25d..be7408d 100644 --- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java +++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java @@ -101,6 +101,13 @@ class UrlEncoderTest { assertEquals("foo bar", UrlEncoder.encode("foo bar", " ", true)); } + @Test + void testDecodePlusToSpace() { + assertEquals("foo bar", UrlEncoder.decode("foo+bar", true)); + assertEquals("foo bar foo", UrlEncoder.decode("foo+bar++foo", true)); + assertEquals("foo bar foo", UrlEncoder.decode("foo+%20bar%20+foo", true)); + } + @ParameterizedTest(name = "processMain(-d {1}) should be {0}") @MethodSource("validMap") void testMainDecode(String expected, String source) { From df4bf1a4c0c159dc7cd9aeb28d0423673cdaa5da Mon Sep 17 00:00:00 2001 From: "Erik C. Thauvin" Date: Fri, 6 Jan 2023 11:11:48 -0800 Subject: [PATCH 3/4] Added plusToSpace example --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 761675f..461d4f6 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,12 @@ compared to other solutions like the standard `URLEncoder` in the JDK or ```java UrlEncoder.encode("a test &"); // -> "a%20test%20%26" UrlEncoder.encode("%#okékÉȢ smile!😁"); // -> "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81" -UrlEncoder.encode("?test=a test", "?="); // -> ?test=a%20test -UrlEncoder.encode("foo bar", true); // -> foo+bar (encode space to +) +UrlEncoder.encode("?test=a test", "?="); // -> "?test=a%20test" +UrlEncoder.encode("foo bar", true); // -> "foo+bar" (encode space to +) UrlEncoder.decode("a%20test%20%26"); // -> "a test &" UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"); // -> "%#okékÉȢ smile!😁" +UrlEncoder.decode("foot+bar", true); // -> "foo bar" (decode + to space) ``` ## Gradle, Maven, etc. From f075688dddfc8264fc5207b58d5866391405ee4d Mon Sep 17 00:00:00 2001 From: Geert Bevin Date: Fri, 6 Jan 2023 14:28:30 -0500 Subject: [PATCH 4/4] Typo fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 461d4f6..c44050d 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ UrlEncoder.encode("foo bar", true); // -> "foo+bar" (encode space to +) UrlEncoder.decode("a%20test%20%26"); // -> "a test &" UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"); // -> "%#okékÉȢ smile!😁" -UrlEncoder.decode("foot+bar", true); // -> "foo bar" (decode + to space) +UrlEncoder.decode("foo+bar", true); // -> "foo bar" (decode + to space) ``` ## Gradle, Maven, etc.