diff --git a/README.md b/README.md index 2e7679d..c2e7fe3 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ compared to other solutions like the standard `URLEncoder` in the JDK or UrlEncoder.encode("a test &"); // -> "a%20test%20%26" UrlEncoder.encode("%#okékÉȢ smile!😁"); // -> "%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81" UrlEncoder.encode("?test=a test", "?="); // -> ?test=a%20test +UrlEncoder.encode("foo bar", true); // -> foo+bar UrlEncoder.decode("a%20test%20%26"); // -> "a test &" UrlEncoder.decode("%25%23ok%C3%A9k%C3%89%C8%A2%20smile%21%F0%9F%98%81"); // -> "%#okékÉȢ smile!😁" diff --git a/lib/build.gradle.kts b/lib/build.gradle.kts index c2145cf..da6085c 100644 --- a/lib/build.gradle.kts +++ b/lib/build.gradle.kts @@ -75,7 +75,7 @@ tasks { if (project.properties["testsBadgeApiKey"] != null) { val apiKey = project.properties["testsBadgeApiKey"] - val response: java.net.http.HttpResponse = HttpClient.newHttpClient() + val response: HttpResponse = HttpClient.newHttpClient() .send( HttpRequest.newBuilder() .uri( diff --git a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java index 0c53377..c5aff0a 100644 --- a/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java +++ b/lib/src/main/java/com/uwyn/urlencoder/UrlEncoder.java @@ -5,7 +5,9 @@ package com.uwyn.urlencoder; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; /** * Most defensive approach to URL encoding and decoding. @@ -43,6 +45,10 @@ public final class UrlEncoder { UNRESERVED_URI_CHARS = unreserved; } + private UrlEncoder() { + // no-op + } + private static void appendUrlEncodedByte(StringBuilder out, int ch) { out.append("%"); appendUrlEncodedDigit(out, ch >> 4); @@ -53,10 +59,6 @@ public final class UrlEncoder { out.append(HEX_DIGITS[digit & 0x0F]); } - private UrlEncoder() { - // no-op - } - /** * Transforms a provided String URL into a new string, * containing decoded URL characters in the UTF-8 encoding. @@ -67,7 +69,7 @@ public final class UrlEncoder { * @since 1.0 */ public static String decode(String source) { - if (source == null || source.isBlank()) { + if (source == null || source.isEmpty()) { return source; } @@ -107,7 +109,7 @@ public final class UrlEncoder { i += 2; } catch (NumberFormatException e) { - throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage()); + throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage(), e); } } else { if (bytes_buffer != null) { @@ -147,22 +149,7 @@ public final class UrlEncoder { * @since 1.0 */ public static String encode(String source) { - return encode(source, (String) null); - } - - /** - * Transforms a provided String object into a new string, - * containing only valid URL characters in the UTF-8 encoding. - * - * @param source The string that has to be transformed into a valid URL - * string. - * @param allow Additional characters to allow. - * @return The encoded String object. - * @see #decode(String) - * @since 1.0 - */ - public static String encode(String source, char... allow) { - return encode(source, new String(allow)); + return encode(source, null, false); } /** @@ -177,6 +164,37 @@ public final class UrlEncoder { * @since 1.0 */ public static String encode(String source, String allow) { + return encode(source, allow, false); + } + + /** + * Transforms a provided String object into a new string, + * containing only valid URL characters in the UTF-8 encoding. + * + * @param source The string that has to be transformed into a valid URL + * string. + * @param spaceToPlus Convert any space to {@code +}. + * @return The encoded String object. + * @see #decode(String) + * @since 1.0 + */ + public static String encode(String source, boolean spaceToPlus) { + return encode(source, null, spaceToPlus); + } + + /** + * Transforms a provided String object into a new string, + * containing only valid URL characters in the UTF-8 encoding. + * + * @param source The string that has to be transformed into a valid URL + * string. + * @param allow Additional characters to allow. + * @param spaceToPlus Convert any space to {@code +}. + * @return The encoded String object. + * @see #decode(String) + * @since 1.0 + */ + public static String encode(String source, String allow, boolean spaceToPlus) { if (source == null || source.isEmpty()) { return source; } @@ -196,23 +214,27 @@ public final class UrlEncoder { out = new StringBuilder(source.length()); out.append(source, 0, i); } - - var cp = source.codePointAt(i); - if (cp < 0x80) { - appendUrlEncodedByte(out, cp); + if (spaceToPlus && ch == ' ') { + out.append('+'); i += 1; - } else if (Character.isBmpCodePoint(cp)) { - for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) { - appendUrlEncodedByte(out, b); + } else { + var cp = source.codePointAt(i); + if (cp < 0x80) { + appendUrlEncodedByte(out, cp); + i += 1; + } else if (Character.isBmpCodePoint(cp)) { + for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) { + appendUrlEncodedByte(out, b); + } + i += 1; + } else if (Character.isSupplementaryCodePoint(cp)) { + var high = Character.highSurrogate(cp); + var low = Character.lowSurrogate(cp); + for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) { + appendUrlEncodedByte(out, b); + } + i += 2; } - i += 1; - } else if (Character.isSupplementaryCodePoint(cp)) { - var high = Character.highSurrogate(cp); - var low = Character.lowSurrogate(cp); - for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) { - appendUrlEncodedByte(out, b); - } - i += 2; } } } @@ -230,26 +252,37 @@ public final class UrlEncoder { return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch); } - static class MainResult { - final String output; - final int status; - - public MainResult(String output, int status) { - this.output = output; - this.status = status; + /** + * Main method to encode/decode URLs on the command line + * + * @param arguments the command line arguments + * @since 1.1 + */ + public static void main(String[] arguments) { + try { + var result = processMain(arguments); + if (result.status == 0) { + System.out.println(result.output); + } else { + System.err.println(result.output); + } + System.exit(result.status); + } catch (IllegalArgumentException e) { + System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage()); + System.exit(1); } } - static MainResult processMain(String[] arguments) { + static MainResult processMain(String... arguments) { var valid_arguments = false; var perform_decode = false; var args = new ArrayList<>(List.of(arguments)); if (!args.isEmpty() && args.get(0).startsWith("-")) { var option = args.remove(0); - if (option.equals("-d")) { + if (("-d").equals(option)) { perform_decode = true; valid_arguments = (args.size() == 1); - } else if (option.equals("-e")) { + } else if (("-e").equals(option)) { valid_arguments = (args.size() == 1); } else { args.clear(); @@ -275,23 +308,13 @@ public final class UrlEncoder { } } - /** - * Main method to encode/decode URLs on the command line - * @param arguments the command line arguments - * @since 1.1 - */ - public static void main(String[] arguments) { - try { - var result = processMain(arguments); - if (result.status == 0) { - System.out.println(result.output); - } else { - System.err.println(result.output); - } - System.exit(result.status); - } catch(IllegalArgumentException e) { - System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage()); - System.exit(1); + static class MainResult { + final String output; + final int status; + + public MainResult(String output, int status) { + this.output = output; + this.status = status; } } } diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java index 99cf385..62e1db5 100644 --- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java +++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java @@ -75,10 +75,9 @@ class UrlEncoderTest { @Test void testEncodeWithAllowArg() { - assertEquals("?test=a%20test", UrlEncoder.encode("?test=a test", '=', '?'), "encode(x, =, ?)"); assertEquals("?test=a%20test", UrlEncoder.encode("?test=a test", "=?"), "encode(x, =?)"); - assertEquals("aaa", UrlEncoder.encode("aaa", 'a'), "encode(aaa, a)"); - assertEquals(" ", UrlEncoder.encode(" ", ' '), "encode(' ', ' ')"); + assertEquals("aaa", UrlEncoder.encode("aaa", "a"), "encode(aaa, a)"); + assertEquals(" ", UrlEncoder.encode(" ", " "), "encode(' ', ' ')"); } @Test @@ -91,8 +90,15 @@ class UrlEncoderTest { @Test void testEncodeWithNulls() { assertNull(UrlEncoder.encode(null), "encode(null)"); - assertNull(UrlEncoder.encode(null, (String) null), "encode(null, null)"); - assertEquals("foo", UrlEncoder.encode("foo", (String) null), "encode(foo, null"); + assertNull(UrlEncoder.encode(null, null), "encode(null, null)"); + assertEquals("foo", UrlEncoder.encode("foo", null), "encode(foo, null"); + } + + @Test + void testEncodeSpaceToPlus() { + assertEquals("foo+bar", UrlEncoder.encode("foo bar", true)); + assertEquals("foo+bar++foo", UrlEncoder.encode("foo bar foo", true)); + assertEquals("foo bar", UrlEncoder.encode("foo bar", " ", true)); } @ParameterizedTest(name = "processMain(-d {1}) should be {0}")