/* * Copyright 2001-2022 Geert Bevin (gbevin[remove] at uwyn dot com) * Licensed under the Apache License, Version 2.0 (the "License") */ package com.uwyn.urlencoder; import java.nio.charset.StandardCharsets; import java.util.BitSet; /** * URL encoding and decoding. *
* Rules determined by RFC 3986.
*
* @author Geert Bevin (gbevin[remove] at uwyn dot com)
* @author Erik C. Thauvin (erik@thauvin.net)
* @since 1.0
*/
public final class UrlEncoder {
static final BitSet UNRESERVED_URI_CHARS;
private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
static {
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
var unreserved = new BitSet('~' + 1);
unreserved.set('-');
unreserved.set('.');
for (int c = '0'; c <= '9'; ++c) unreserved.set(c);
for (int c = 'A'; c <= 'Z'; ++c) unreserved.set(c);
unreserved.set('_');
for (int c = 'a'; c <= 'z'; ++c) unreserved.set(c);
unreserved.set('~');
UNRESERVED_URI_CHARS = unreserved;
}
private static void appendUrlEncodedByte(StringBuilder out, int ch) {
out.append("%");
appendUrlEncodedDigit(out, ch >> 4);
appendUrlEncodedDigit(out, ch);
}
private static void appendUrlEncodedDigit(StringBuilder out, int digit) {
out.append(HEX_DIGITS[digit & 0x0F]);
}
private UrlEncoder() {
// no-op
}
/**
* Transforms a provided String
URL into a new string,
* containing decoded URL characters in the UTF-8 encoding.
*
* @param source The string URL that has to be decoded
* @return The decoded String
object.
* @see #encode(String, String)
* @since 1.0
*/
public static String decode(String source) {
if (source == null || source.isBlank()) {
return source;
}
var length = source.length();
StringBuilder out = null;
char ch;
byte[] bytes_buffer = null;
var bytes_pos = 0;
var i = 0;
while (i < length) {
ch = source.charAt(i);
if (ch == '%') {
if (out == null) {
out = new StringBuilder(length);
out.append(source, 0, i);
}
if (bytes_buffer == null) {
// the remaining characters divided by the length
// of the encoding format %xx, is the maximum number of
// bytes that can be extracted
bytes_buffer = new byte[(length - i) / 3];
}
i += 1;
if (length < i + 2) {
throw new IllegalArgumentException("Illegal escape sequence");
}
try {
var v = Integer.parseInt(source, i, i + 2, 16);
if (v < 0 || v > 0xFF) {
throw new IllegalArgumentException("Illegal escape value");
}
bytes_buffer[bytes_pos++] = (byte) v;
i += 2;
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Illegal characters in escape sequence: " + e.getMessage());
}
} else {
if (bytes_buffer != null) {
out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8));
bytes_buffer = null;
bytes_pos = 0;
}
if (out != null) {
out.append(ch);
}
i += 1;
}
}
if (out == null) {
return source;
}
if (bytes_buffer != null) {
out.append(new String(bytes_buffer, 0, bytes_pos, StandardCharsets.UTF_8));
}
return out.toString();
}
/**
* Transforms a provided String
object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
*
* @param source The string that has to be transformed into a valid URL
* string.
* @return The encoded String
object.
* @see #decode(String)
* @since 1.0
*/
public static String encode(String source) {
return encode(source, (String) null);
}
/**
* Transforms a provided String
object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
*
* @param source The string that has to be transformed into a valid URL
* string.
* @param allow Additional characters to allow.
* @return The encoded String
object.
* @see #decode(String)
* @since 1.0
*/
public static String encode(String source, char... allow) {
return encode(source, new String(allow));
}
/**
* Transforms a provided String
object into a new string,
* containing only valid URL characters in the UTF-8 encoding.
*
* @param source The string that has to be transformed into a valid URL
* string.
* @param allow Additional characters to allow.
* @return The encoded String
object.
* @see #decode(String)
* @since 1.0
*/
public static String encode(String source, String allow) {
if (source == null || source.isBlank()) {
return source;
}
StringBuilder out = null;
char ch;
var i = 0;
while (i < source.length()) {
ch = source.charAt(i);
if (isUnreservedUriChar(ch) || (allow != null && allow.indexOf(ch) != -1)) {
if (out != null) {
out.append(ch);
}
i += 1;
} else {
if (out == null) {
out = new StringBuilder(source.length());
out.append(source, 0, i);
}
var cp = source.codePointAt(i);
if (cp < 0x80) {
appendUrlEncodedByte(out, cp);
i += 1;
} else if (Character.isBmpCodePoint(cp)) {
for (var b : Character.toString(ch).getBytes(StandardCharsets.UTF_8)) {
appendUrlEncodedByte(out, b);
}
i += 1;
} else if (Character.isSupplementaryCodePoint(cp)) {
var high = Character.highSurrogate(cp);
var low = Character.lowSurrogate(cp);
for (var b : new String(new char[]{high, low}).getBytes(StandardCharsets.UTF_8)) {
appendUrlEncodedByte(out, b);
}
i += 2;
}
}
}
if (out == null) {
return source;
}
return out.toString();
}
// see https://www.rfc-editor.org/rfc/rfc3986#page-13
private static boolean isUnreservedUriChar(char ch) {
return ch <= '~' && UNRESERVED_URI_CHARS.get(ch);
}
static class MainResult {
final String output;
final int status;
public MainResult(String output, int status) {
this.output = output;
this.status = status;
}
}
static MainResult handleMain(String[] arguments) {
var valid_arguments = true;
if (arguments.length < 1 ||
arguments.length > 2) {
valid_arguments = false;
} else if (!arguments[0].startsWith("-")) {
if (arguments.length > 1) {
valid_arguments = false;
}
} else {
if (!arguments[0].equals("-e") &&
!arguments[0].equals("-d")) {
valid_arguments = false;
}
}
if (!valid_arguments) {
return new MainResult("Usage : java " + UrlEncoder.class.getName() + " [-ed] text" + System.lineSeparator() +
"Encode and decode URL parameters." + System.lineSeparator() +
" -e encode (default)" + System.lineSeparator() +
" -d decode" + System.lineSeparator(), 1);
}
if (1 == arguments.length) {
return new MainResult(UrlEncoder.encode(arguments[0]), 0);
} else if (arguments[0].equals("-e")) {
return new MainResult(UrlEncoder.encode(arguments[1]), 0);
}
return new MainResult(UrlEncoder.decode(arguments[1]), 0);
}
public static void main(String[] arguments) {
var result = handleMain(arguments);
switch (result.status) {
case 0: {
System.out.println(result.output);
System.exit(0);
}
case 1: {
System.err.println(result.output);
System.exit(1);
}
}
}
}