- * Rules determined by RFC 3986. + * Rules determined by combining the unreserved character set from + * RFC 3986 with + * the percent-encode set from + * application/x-www-form-urlencoded. + *
+ * Both specs above support percent decoding of two hexadecimal digits to a + * binary octet, however their unreserved set of characters differs and + * {@code application/x-www-form-urlencoded} adds conversion of space to +, + * which has the potential to be misunderstood. + *
+ * This class encodes with rules that will be decoded correctly in either case.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
@@ -22,17 +34,21 @@ public final class UrlEncoder {
static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
- var unreserved = new BitSet('~' + 1);
+ // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
+ var unreserved = new BitSet('z' + 1);
unreserved.set('-');
unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
- unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved;
}
+ private UrlEncoder() {
+ // no-op
+ }
+
private static void appendUrlEncodedByte(StringBuilder out, int ch) {
out.append("%");
appendUrlEncodedDigit(out, ch >> 4);
@@ -43,10 +59,6 @@ public final class UrlEncoder {
out.append(HEX_DIGITS[digit & 0x0F]);
}
- private UrlEncoder() {
- // no-op
- }
-
/**
* Transforms a provided String
URL into a new string,
* containing decoded URL characters in the UTF-8 encoding.
@@ -57,7 +69,21 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String decode(String source) {
- if (source == null || source.isBlank()) {
+ return decode(source, false);
+ }
+
+ /**
+ * Transforms a provided String
URL into a new string,
+ * containing decoded URL characters in the UTF-8 encoding.
+ *
+ * @param source The string URL that has to be decoded
+ * @param plusToSpace Convert any {@code +} to space.
+ * @return The decoded String
object.
+ * @see #encode(String, String)
+ * @since 1.0
+ */
+ public static String decode(String source, boolean plusToSpace) {
+ if (source == null || source.isEmpty()) {
return source;
}
@@ -71,10 +97,7 @@ public final class UrlEncoder {
ch = source.charAt(i);
if (ch == '%') {
- if (out == null) {
- out = new StringBuilder(length);
- out.append(source, 0, i);
- }
+ out = startConstructingIfNeeded(out, source, i);
if (bytes_buffer == null) {
// the remaining characters divided by the length
@@ -97,7 +120,7 @@ public final class UrlEncoder {
i += 2;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage());
+ throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage(), e);
}
} else {
if (bytes_buffer != null) {
@@ -107,7 +130,10 @@ public final class UrlEncoder {
bytes_pos = 0;
}
- if (out != null) {
+ if (plusToSpace && ch == '+') {
+ out = startConstructingIfNeeded(out, source, i);
+ out.append(" ");
+ } else if (out != null) {
out.append(ch);
}
@@ -126,6 +152,14 @@ public final class UrlEncoder {
return out.toString();
}
+ private static StringBuilder startConstructingIfNeeded(StringBuilder out, String source, int currentSourcePosition) {
+ if (out == null) {
+ out = new StringBuilder(source.length());
+ out.append(source, 0, currentSourcePosition);
+ }
+ return out;
+ }
+
/**
* Transforms a provided String
object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
@@ -137,22 +171,7 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String encode(String source) {
- return encode(source, (String) null);
- }
-
- /**
- * Transforms a provided String
object into a new string,
- * containing only valid URL characters in the UTF-8 encoding.
- *
- * @param source The string that has to be transformed into a valid URL
- * string.
- * @param allow Additional characters to allow.
- * @return The encoded String
object.
- * @see #decode(String)
- * @since 1.0
- */
- public static String encode(String source, char... allow) {
- return encode(source, new String(allow));
+ return encode(source, null, false);
}
/**
@@ -167,6 +186,37 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String encode(String source, String allow) {
+ return encode(source, allow, false);
+ }
+
+ /**
+ * Transforms a provided String
object into a new string,
+ * containing only valid URL characters in the UTF-8 encoding.
+ *
+ * @param source The string that has to be transformed into a valid URL
+ * string.
+ * @param spaceToPlus Convert any space to {@code +}.
+ * @return The encoded String
object.
+ * @see #decode(String)
+ * @since 1.0
+ */
+ public static String encode(String source, boolean spaceToPlus) {
+ return encode(source, null, spaceToPlus);
+ }
+
+ /**
+ * Transforms a provided String
object into a new string,
+ * containing only valid URL characters in the UTF-8 encoding.
+ *
+ * @param source The string that has to be transformed into a valid URL
+ * string.
+ * @param allow Additional characters to allow.
+ * @param spaceToPlus Convert any space to {@code +}.
+ * @return The encoded String
object.
+ * @see #decode(String)
+ * @since 1.0
+ */
+ public static String encode(String source, String allow, boolean spaceToPlus) {
if (source == null || source.isEmpty()) {
return source;
}
@@ -182,14 +232,15 @@ public final class UrlEncoder {
}
i += 1;
} else {
- if (out == null) {
- out = new StringBuilder(source.length());
- out.append(source, 0, i);
- }
+ out = startConstructingIfNeeded(out, source, i);
var cp = source.codePointAt(i);
if (cp < 0x80) {
- appendUrlEncodedByte(out, cp);
+ if (spaceToPlus && ch == ' ') {
+ out.append('+');
+ } else {
+ appendUrlEncodedByte(out, cp);
+ }
i += 1;
} else if (Character.isBmpCodePoint(cp)) {
for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) {
@@ -215,30 +266,42 @@ public final class UrlEncoder {
}
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
+ // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private static boolean isUnreservedUriChar(char ch) {
- return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
+ return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
}
- static class MainResult {
- final String output;
- final int status;
-
- public MainResult(String output, int status) {
- this.output = output;
- this.status = status;
+ /**
+ * Main method to encode/decode URLs on the command line
+ *
+ * @param arguments the command line arguments
+ * @since 1.1
+ */
+ public static void main(String[] arguments) {
+ try {
+ var result = processMain(arguments);
+ if (result.status == 0) {
+ System.out.println(result.output);
+ } else {
+ System.err.println(result.output);
+ }
+ System.exit(result.status);
+ } catch (IllegalArgumentException e) {
+ System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage());
+ System.exit(1);
}
}
- static MainResult processMain(String[] arguments) {
+ static MainResult processMain(String... arguments) {
var valid_arguments = false;
var perform_decode = false;
var args = new ArrayList<>(List.of(arguments));
if (!args.isEmpty() && args.get(0).startsWith("-")) {
var option = args.remove(0);
- if (option.equals("-d")) {
+ if (("-d").equals(option)) {
perform_decode = true;
valid_arguments = (args.size() == 1);
- } else if (option.equals("-e")) {
+ } else if (("-e").equals(option)) {
valid_arguments = (args.size() == 1);
} else {
args.clear();
@@ -253,7 +316,7 @@ public final class UrlEncoder {
if (!valid_arguments) {
return new MainResult("Usage : java -jar urlencoder-*.jar [-ed] text" + System.lineSeparator() +
- "Encode and decode URL parameters." + System.lineSeparator() +
+ "Encode and decode URL components defensively." + System.lineSeparator() +
" -e encode (default)" + System.lineSeparator() +
" -d decode", 1);
}
@@ -264,23 +327,13 @@ public final class UrlEncoder {
}
}
- /**
- * Main method to encode/decode URLs on the command line
- * @param arguments the command line arguments
- * @since 1.1
- */
- public static void main(String[] arguments) {
- try {
- var result = processMain(arguments);
- if (result.status == 0) {
- System.out.println(result.output);
- } else {
- System.err.println(result.output);
- }
- System.exit(result.status);
- } catch(IllegalArgumentException e) {
- System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage());
- System.exit(1);
+ static class MainResult {
+ final String output;
+ final int status;
+
+ public MainResult(String output, int status) {
+ this.output = output;
+ this.status = status;
}
}
}
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
new file mode 100644
index 0000000..a0a140e
--- /dev/null
+++ b/src/main/java/module-info.java
@@ -0,0 +1,3 @@
+module com.uwyn.urlencoder {
+ exports com.uwyn.urlencoder;
+}
diff --git a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
similarity index 71%
rename from lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
rename to src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
index ecf6624..be7408d 100644
--- a/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
+++ b/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
@@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
class UrlEncoderTest {
- private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
+ private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
private static Stream