- * Rules determined by combining the unreserved character set from - * RFC 3986 with - * the percent-encode set from - * application/x-www-form-urlencoded. - *
- * Both specs above support percent decoding of two hexadecimal digits to a - * binary octet, however their unreserved set of characters differs and - * {@code application/x-www-form-urlencoded} adds conversion of space to +, - * which has the potential to be misunderstood. - *
- * This class encodes with rules that will be decoded correctly in either case.
+ * Rules determined by RFC 3986.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
@@ -34,21 +22,17 @@ public final class UrlEncoder {
static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
- // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
- var unreserved = new BitSet('z' + 1);
+ var unreserved = new BitSet('~' + 1);
unreserved.set('-');
unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
+ unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved;
}
- private UrlEncoder() {
- // no-op
- }
-
private static void appendUrlEncodedByte(StringBuilder out, int ch) {
out.append("%");
appendUrlEncodedDigit(out, ch >> 4);
@@ -59,6 +43,10 @@ public final class UrlEncoder {
out.append(HEX_DIGITS[digit & 0x0F]);
}
+ private UrlEncoder() {
+ // no-op
+ }
+
/**
* Transforms a provided String
URL into a new string,
* containing decoded URL characters in the UTF-8 encoding.
@@ -69,21 +57,7 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String decode(String source) {
- return decode(source, false);
- }
-
- /**
- * Transforms a provided String
URL into a new string,
- * containing decoded URL characters in the UTF-8 encoding.
- *
- * @param source The string URL that has to be decoded
- * @param plusToSpace Convert any {@code +} to space.
- * @return The decoded String
object.
- * @see #encode(String, String)
- * @since 1.0
- */
- public static String decode(String source, boolean plusToSpace) {
- if (source == null || source.isEmpty()) {
+ if (source == null || source.isBlank()) {
return source;
}
@@ -97,7 +71,10 @@ public final class UrlEncoder {
ch = source.charAt(i);
if (ch == '%') {
- out = startConstructingIfNeeded(out, source, i);
+ if (out == null) {
+ out = new StringBuilder(length);
+ out.append(source, 0, i);
+ }
if (bytes_buffer == null) {
// the remaining characters divided by the length
@@ -120,7 +97,7 @@ public final class UrlEncoder {
i += 2;
} catch (NumberFormatException e) {
- throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage(), e);
+ throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage());
}
} else {
if (bytes_buffer != null) {
@@ -130,10 +107,7 @@ public final class UrlEncoder {
bytes_pos = 0;
}
- if (plusToSpace && ch == '+') {
- out = startConstructingIfNeeded(out, source, i);
- out.append(" ");
- } else if (out != null) {
+ if (out != null) {
out.append(ch);
}
@@ -152,14 +126,6 @@ public final class UrlEncoder {
return out.toString();
}
- private static StringBuilder startConstructingIfNeeded(StringBuilder out, String source, int currentSourcePosition) {
- if (out == null) {
- out = new StringBuilder(source.length());
- out.append(source, 0, currentSourcePosition);
- }
- return out;
- }
-
/**
* Transforms a provided String
object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
@@ -171,7 +137,22 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String encode(String source) {
- return encode(source, null, false);
+ return encode(source, (String) null);
+ }
+
+ /**
+ * Transforms a provided String
object into a new string,
+ * containing only valid URL characters in the UTF-8 encoding.
+ *
+ * @param source The string that has to be transformed into a valid URL
+ * string.
+ * @param allow Additional characters to allow.
+ * @return The encoded String
object.
+ * @see #decode(String)
+ * @since 1.0
+ */
+ public static String encode(String source, char... allow) {
+ return encode(source, new String(allow));
}
/**
@@ -186,37 +167,6 @@ public final class UrlEncoder {
* @since 1.0
*/
public static String encode(String source, String allow) {
- return encode(source, allow, false);
- }
-
- /**
- * Transforms a provided String
object into a new string,
- * containing only valid URL characters in the UTF-8 encoding.
- *
- * @param source The string that has to be transformed into a valid URL
- * string.
- * @param spaceToPlus Convert any space to {@code +}.
- * @return The encoded String
object.
- * @see #decode(String)
- * @since 1.0
- */
- public static String encode(String source, boolean spaceToPlus) {
- return encode(source, null, spaceToPlus);
- }
-
- /**
- * Transforms a provided String
object into a new string,
- * containing only valid URL characters in the UTF-8 encoding.
- *
- * @param source The string that has to be transformed into a valid URL
- * string.
- * @param allow Additional characters to allow.
- * @param spaceToPlus Convert any space to {@code +}.
- * @return The encoded String
object.
- * @see #decode(String)
- * @since 1.0
- */
- public static String encode(String source, String allow, boolean spaceToPlus) {
if (source == null || source.isEmpty()) {
return source;
}
@@ -232,15 +182,14 @@ public final class UrlEncoder {
}
i += 1;
} else {
- out = startConstructingIfNeeded(out, source, i);
+ if (out == null) {
+ out = new StringBuilder(source.length());
+ out.append(source, 0, i);
+ }
var cp = source.codePointAt(i);
if (cp < 0x80) {
- if (spaceToPlus && ch == ' ') {
- out.append('+');
- } else {
- appendUrlEncodedByte(out, cp);
- }
+ appendUrlEncodedByte(out, cp);
i += 1;
} else if (Character.isBmpCodePoint(cp)) {
for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) {
@@ -266,42 +215,30 @@ public final class UrlEncoder {
}
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
- // and https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
private static boolean isUnreservedUriChar(char ch) {
- return ch <= 'z' && UNRESERVED_URI_CHARS.get(ch);
+ return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
}
- /**
- * Main method to encode/decode URLs on the command line
- *
- * @param arguments the command line arguments
- * @since 1.1
- */
- public static void main(String[] arguments) {
- try {
- var result = processMain(arguments);
- if (result.status == 0) {
- System.out.println(result.output);
- } else {
- System.err.println(result.output);
- }
- System.exit(result.status);
- } catch (IllegalArgumentException e) {
- System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage());
- System.exit(1);
+ static class MainResult {
+ final String output;
+ final int status;
+
+ public MainResult(String output, int status) {
+ this.output = output;
+ this.status = status;
}
}
- static MainResult processMain(String... arguments) {
+ static MainResult processMain(String[] arguments) {
var valid_arguments = false;
var perform_decode = false;
var args = new ArrayList<>(List.of(arguments));
if (!args.isEmpty() && args.get(0).startsWith("-")) {
var option = args.remove(0);
- if (("-d").equals(option)) {
+ if (option.equals("-d")) {
perform_decode = true;
valid_arguments = (args.size() == 1);
- } else if (("-e").equals(option)) {
+ } else if (option.equals("-e")) {
valid_arguments = (args.size() == 1);
} else {
args.clear();
@@ -316,7 +253,7 @@ public final class UrlEncoder {
if (!valid_arguments) {
return new MainResult("Usage : java -jar urlencoder-*.jar [-ed] text" + System.lineSeparator() +
- "Encode and decode URL components defensively." + System.lineSeparator() +
+ "Encode and decode URL parameters." + System.lineSeparator() +
" -e encode (default)" + System.lineSeparator() +
" -d decode", 1);
}
@@ -327,13 +264,23 @@ public final class UrlEncoder {
}
}
- static class MainResult {
- final String output;
- final int status;
-
- public MainResult(String output, int status) {
- this.output = output;
- this.status = status;
+ /**
+ * Main method to encode/decode URLs on the command line
+ * @param arguments the command line arguments
+ * @since 1.1
+ */
+ public static void main(String[] arguments) {
+ try {
+ var result = processMain(arguments);
+ if (result.status == 0) {
+ System.out.println(result.output);
+ } else {
+ System.err.println(result.output);
+ }
+ System.exit(result.status);
+ } catch(IllegalArgumentException e) {
+ System.err.println(UrlEncoder.class.getSimpleName() + ": " + e.getMessage());
+ System.exit(1);
}
}
}
diff --git a/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
similarity index 71%
rename from src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
rename to lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
index be7408d..ecf6624 100644
--- a/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
+++ b/lib/src/test/java/com/uwyn/urlencoder/UrlEncoderTest.java
@@ -16,7 +16,7 @@ import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
class UrlEncoderTest {
- private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.";
+ private final String same = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVQXYZ0123456789-_.~";
private static Stream