diff --git a/zip/BitInputStream.java b/zip/BitInputStream.java new file mode 100644 index 0000000..7864ce1 --- /dev/null +++ b/zip/BitInputStream.java @@ -0,0 +1,169 @@ +package org.json.zip; + +import java.io.IOException; +import java.io.InputStream; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * This is a big endian bit reader. It reads its bits from an InputStream. + * + * @version 2013-04-18 + * + */ +public class BitInputStream implements BitReader { + /** + * 2^n - 1 + */ + static final int[] mask = { 0, 1, 3, 7, 15, 31, 63, 127, 255 }; + + /** + * The number of bits remaining in the current byte. + */ + private int available = 0; + + /** + * Up to a byte's worth of unread bits. + */ + private int unread = 0; + + /** + * The source of the bits. + */ + private InputStream in; + + /** + * The number of bits read so far. This is used in padding. + */ + private long nrBits = 0; + + /** + * Make a BitReader from an InputStream. The BitReader will take bytes from + * the InputStream and unpack them into bits. + * + * @param in + * An InputStream. + */ + public BitInputStream(InputStream in) { + this.in = in; + } + + /** + * Make a BitReader. The first byte is passed in explicitly, the remaining + * bytes are obtained from the InputStream. This makes it possible to look + * at the first byte of a stream before deciding that it should be read as + * bits. + * + * @param in + * An InputStream + * @param firstByte + * The first byte, which was probably read from in. + */ + public BitInputStream(InputStream in, int firstByte) { + this.in = in; + this.unread = firstByte; + this.available = 8; + } + + /** + * Read one bit. + * + * @return true if it is a 1 bit. + */ + public boolean bit() throws IOException { + return read(1) != 0; + } + + /** + * Get the number of bits that have been read from this BitInputStream. + * This includes pad bits that have been skipped, but might not include + * bytes that have been read from the underlying InputStream that have not + * yet been delivered as bits. + * + * @return The number of bits read so far. + */ + public long nrBits() { + return this.nrBits; + } + + /** + * Check that the rest of the block has been padded with zeroes. + * + * @param factor + * The size of the block to pad. This will typically be 8, 16, + * 32, 64, 128, 256, etc. + * @return true if the block was zero padded, or false if the the padding + * contains any one bits. + * @throws IOException + */ + public boolean pad(int factor) throws IOException { + int padding = factor - (int) (this.nrBits % factor); + boolean result = true; + + for (int i = 0; i < padding; i += 1) { + if (bit()) { + result = false; + } + } + return result; + } + + /** + * Read some bits. + * + * @param width + * The number of bits to read. (0..32) + * @throws IOException + * @return the bits + */ + public int read(int width) throws IOException { + if (width == 0) { + return 0; + } + if (width < 0 || width > 32) { + throw new IOException("Bad read width."); + } + int result = 0; + while (width > 0) { + if (this.available == 0) { + this.unread = this.in.read(); + if (this.unread < 0) { + throw new IOException("Attempt to read past end."); + } + this.available = 8; + } + int take = width; + if (take > this.available) { + take = this.available; + } + result |= ((this.unread >>> (this.available - take)) & mask[take]) + << (width - take); + this.nrBits += take; + this.available -= take; + width -= take; + } + return result; + } +} diff --git a/zip/BitOutputStream.java b/zip/BitOutputStream.java new file mode 100644 index 0000000..526ad61 --- /dev/null +++ b/zip/BitOutputStream.java @@ -0,0 +1,154 @@ +package org.json.zip; + +import java.io.IOException; +import java.io.OutputStream; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * This is a big endian bit writer. It writes its bits to an OutputStream. + * + * @version 2013-04-18 + * + */ +public class BitOutputStream implements BitWriter { + + /** + * The number of bits written. + */ + private long nrBits = 0; + + /** + * The destination of the bits. + */ + private OutputStream out; + + /** + * Holder of bits not yet written. + */ + private int unwritten; + + /** + * The number of unused bits in this.unwritten. + */ + private int vacant = 8; + + /** + * Use an OutputStream to produce a BitWriter. The BitWriter will send its + * bits to the OutputStream as each byte is filled. + * + * @param out + * An Output Stream + */ + public BitOutputStream(OutputStream out) { + this.out = out; + } + + /** + * Returns the number of bits that have been written to this + * bitOutputStream. This may include bits that have not yet been written + * to the underlying outputStream. + */ + public long nrBits() { + return this.nrBits; + } + + /** + * Write a 1 bit. + * + * @throws IOException + */ + public void one() throws IOException { + write(1, 1); + } + + /** + * Pad the rest of the block with zeroes and flush. pad(8) flushes the last + * unfinished byte. The underlying OutputStream will be flushed. + * + * @param factor + * The size of the block to pad. This will typically be 8, 16, + * 32, 64, 128, 256, etc. + * @return this + * @throws IOException + */ + public void pad(int factor) throws IOException { + int padding = factor - (int) (nrBits % factor); + int excess = padding & 7; + if (excess > 0) { + this.write(0, excess); + padding -= excess; + } + while (padding > 0) { + this.write(0, 8); + padding -= 8; + } + this.out.flush(); + } + + /** + * Write some bits. Up to 32 bits can be written at a time. + * + * @param bits + * The bits to be written. + * @param width + * The number of bits to write. (0..32) + * @throws IOException + */ + public void write(int bits, int width) throws IOException { + if (bits == 0 && width == 0) { + return; + } + if (width <= 0 || width > 32) { + throw new IOException("Bad write width."); + } + while (width > 0) { + int actual = width; + if (actual > this.vacant) { + actual = this.vacant; + } + this.unwritten |= ((bits >>> (width - actual)) & + BitInputStream.mask[actual]) << (this.vacant - actual); + width -= actual; + nrBits += actual; + this.vacant -= actual; + if (this.vacant == 0) { + this.out.write(this.unwritten); + this.unwritten = 0; + this.vacant = 8; + } + } + } + + /** + * Write a 0 bit. + * + * @throws IOException + */ + public void zero() throws IOException { + write(0, 1); + + } +} diff --git a/zip/BitReader.java b/zip/BitReader.java new file mode 100644 index 0000000..1987729 --- /dev/null +++ b/zip/BitReader.java @@ -0,0 +1,41 @@ +package org.json.zip; + +import java.io.IOException; + +public interface BitReader { + /** + * Read one bit. + * + * @return true if it is a 1 bit. + */ + public boolean bit() throws IOException; + + /** + * Returns the number of bits that have been read from this bitreader. + * + * @return The number of bits read so far. + */ + public long nrBits(); + + /** + * Check that the rest of the block has been padded with zeroes. + * + * @param factor + * The size in bits of the block to pad. This will typically be + * 8, 16, 32, 64, 128, 256, etc. + * @return true if the block was zero padded, or false if the the padding + * contained any one bits. + * @throws IOException + */ + public boolean pad(int factor) throws IOException; + + /** + * Read some bits. + * + * @param width + * The number of bits to read. (0..32) + * @throws IOException + * @return the bits + */ + public int read(int width) throws IOException; +} diff --git a/zip/BitWriter.java b/zip/BitWriter.java new file mode 100644 index 0000000..83eb7e3 --- /dev/null +++ b/zip/BitWriter.java @@ -0,0 +1,51 @@ +package org.json.zip; + +import java.io.IOException; + +/** + * A bitwriter is a an interface that allows for doing output at the bit level. + * Most IO interfaces only allow for writing at the byte level or higher. + */ +public interface BitWriter { + /** + * Returns the number of bits that have been written to this bitwriter. + */ + public long nrBits(); + + /** + * Write a 1 bit. + * + * @throws IOException + */ + public void one() throws IOException; + + /** + * Pad the rest of the block with zeros and flush. + * + * @param factor + * The size in bits of the block to pad. This will typically be + * 8, 16, 32, 64, 128, 256, etc. + * @return true if the block was zero padded, or false if the the padding + * contains any one bits. + * @throws IOException + */ + public void pad(int factor) throws IOException; + + /** + * Write some bits. Up to 32 bits can be written at a time. + * + * @param bits + * The bits to be written. + * @param width + * The number of bits to write. (0..32) + * @throws IOException + */ + public void write(int bits, int width) throws IOException; + + /** + * Write a 0 bit. + * + * @throws IOException + */ + public void zero() throws IOException; +} diff --git a/zip/Compressor.java b/zip/Compressor.java new file mode 100644 index 0000000..6dddff4 --- /dev/null +++ b/zip/Compressor.java @@ -0,0 +1,575 @@ +package org.json.zip; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.Map; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.Kim; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * JSONzip is a compression scheme for JSON text. + * + * @author JSON.org + * @version 2013-04-18 + */ + +/** + * A compressor implements the compression behavior of JSONzip. It provides a + * zip method that takes a JSONObject or JSONArray and delivers a stream of + * bits to a BitWriter. + * + * FOR EVALUATION PURPOSES ONLY. THIS PACKAGE HAS NOT BEEN TESTED ADEQUATELY + * FOR PRODUCTION USE. + */ +public class Compressor extends JSONzip { + + /** + * A compressor outputs to a BitWriter. + */ + final BitWriter bitwriter; + + /** + * Create a new compressor. It may be used for an entire session or + * subsession. + * + * @param bitwriter + * The BitWriter this Compressor will output to. Don't forget to + * flush. + */ + public Compressor(BitWriter bitwriter) { + super(); + this.bitwriter = bitwriter; + } + + /** + * Return a 4 bit code for a character in a JSON number. The digits '0' to + * '9' get the codes 0 to 9. '.' is 10, '-' is 11, '+' is 12, and 'E' or + * 'e' is 13. + * + * @param digit + * An ASCII character from a JSIN number. + * @return + */ + private static int bcd(char digit) { + if (digit >= '0' && digit <= '9') { + return digit - '0'; + } + switch (digit) { + case '.': + return 10; + case '-': + return 11; + case '+': + return 12; + default: + return 13; + } + } + + /** + * Finish the final byte and flush the bitwriter. This does the same thing + * as pad(8). + * + * @throws JSONException + */ + public void flush() throws JSONException { + pad(8); + } + + /** + * Output a one bit. + * + * @throws IOException + */ + private void one() throws JSONException { + if (probe) { + log(1); + } + write(1, 1); + } + + /** + * Pad the output to fill an allotment of bits. + * + * @param factor + * The size of the bit allotment. A value of 8 will complete and + * flush the current byte. If you don't pad, then some of the + * last bits might not be sent to the Output Stream. + * @throws JSONException + */ + public void pad(int factor) throws JSONException { + try { + this.bitwriter.pad(factor); + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Write a number, using the number of bits necessary to hold the number. + * + * @param integer + * The value to be encoded. + * @param width + * The number of bits to encode the value, between 0 and 32. + * @throws JSONException + */ + private void write(int integer, int width) throws JSONException { + try { + this.bitwriter.write(integer, width); + if (probe) { + log(integer, width); + } + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Write an integer with Huffman encoding. The bit pattern that is written + * will be determined by the Huffman encoder. + * + * @param integer + * The value to be written. + * @param huff + * The Huffman encoder. + * @throws JSONException + */ + private void write(int integer, Huff huff) throws JSONException { + huff.write(integer, this.bitwriter); + } + + /** + * Write each of the bytes in a kim with Huffman encoding. + * + * @param kim + * A kim containing the bytes to be written. + * @param huff + * The Huffman encoder. + * @throws JSONException + */ + private void write(Kim kim, Huff huff) throws JSONException { + write(kim, 0, kim.length, huff); + } + + /** + * Write a range of bytes from a Kim with Huffman encoding. + * + * @param kim + * A Kim containing the bytes to be written. + * @param from + * The index of the first byte to write. + * @param thru + * The index after the last byte to write. + * @param huff + * The Huffman encoder. + * @throws JSONException + */ + private void write(Kim kim, int from, int thru, Huff huff) + throws JSONException { + for (int at = from; at < thru; at += 1) { + write(kim.get(at), huff); + } + } + + /** + * Write an integer, using the number of bits necessary to hold the number + * as determined by its keep, and increment its usage count in the keep. + * + * @param integer + * The value to be encoded. + * @param keep + * The Keep that the integer is one of. + * @throws JSONException + */ + private void writeAndTick(int integer, Keep keep) throws JSONException { + int width = keep.bitsize(); + keep.tick(integer); + if (probe) { + log("\"" + keep.value(integer) + "\""); + } + write(integer, width); + } + + /** + * Write a JSON Array. + * + * @param jsonarray + * @throws JSONException + */ + private void writeArray(JSONArray jsonarray) throws JSONException { + +// JSONzip has three encodings for arrays: +// The array is empty (zipEmptyArray). +// First value in the array is a string (zipArrayString). +// First value in the array is not a string (zipArrayValue). + + boolean stringy = false; + int length = jsonarray.length(); + if (length == 0) { + write(zipEmptyArray, 3); + } else { + Object value = jsonarray.get(0); + if (value == null) { + value = JSONObject.NULL; + } + if (value instanceof String) { + stringy = true; + write(zipArrayString, 3); + writeString((String) value); + } else { + write(zipArrayValue, 3); + writeValue(value); + } + for (int i = 1; i < length; i += 1) { + if (probe) { + log(); + } + value = jsonarray.get(i); + if (value == null) { + value = JSONObject.NULL; + } + if (value instanceof String != stringy) { + zero(); + } + one(); + if (value instanceof String) { + writeString((String) value); + } else { + writeValue(value); + } + } + zero(); + zero(); + + } + } + + /** + * Write a JSON value. + * + * @param value + * One of these types: JSONObject, JSONArray (or Map or + * Collection or array), Number (or Integer or Long or Double), + * or String, or Boolean, or JSONObject.NULL, or null. + * @throws JSONException + */ + private void writeJSON(Object value) throws JSONException { + if (JSONObject.NULL.equals(value)) { + write(zipNull, 3); + } else if (Boolean.FALSE.equals(value)) { + write(zipFalse, 3); + } else if (Boolean.TRUE.equals(value)) { + write(zipTrue, 3); + } else { + if (value instanceof Map) { + value = new JSONObject((Map) value); + } else if (value instanceof Collection) { + value = new JSONArray((Collection) value); + } else if (value.getClass().isArray()) { + value = new JSONArray(value); + } + if (value instanceof JSONObject) { + writeObject((JSONObject) value); + } else if (value instanceof JSONArray) { + writeArray((JSONArray) value); + } else { + throw new JSONException("Unrecognized object"); + } + } + } + + /** + * Write the name of an object property. Names have their own Keep and + * Huffman encoder because they are expected to be a more restricted set. + * + * @param name + * @throws JSONException + */ + private void writeName(String name) throws JSONException { + +// If this name has already been registered, then emit its integer and +// increment its usage count. + + Kim kim = new Kim(name); + int integer = this.namekeep.find(kim); + if (integer != none) { + one(); + writeAndTick(integer, this.namekeep); + } else { + +// Otherwise, emit the string with Huffman encoding, and register it. + + zero(); + write(kim, this.namehuff); + write(end, namehuff); + this.namekeep.register(kim); + } + } + + /** + * Write a JSON object. + * + * @param jsonobject + * @return + * @throws JSONException + */ + private void writeObject(JSONObject jsonobject) throws JSONException { + +// JSONzip has two encodings for objects: Empty Objects (zipEmptyObject) and +// non-empty objects (zipObject). + + boolean first = true; + Iterator keys = jsonobject.keys(); + while (keys.hasNext()) { + if (probe) { + log("\n"); + } + Object key = keys.next(); + if (key instanceof String) { + if (first) { + first = false; + write(zipObject, 3); + } else { + one(); + } + writeName((String) key); + Object value = jsonobject.get((String) key); + if (value instanceof String) { + zero(); + writeString((String) value); + } else { + one(); + writeValue(value); + } + } + } + if (first) { + write(zipEmptyObject, 3); + } else { + zero(); + } + } + + /** + * Write a string. + * + * @param string + * @throws JSONException + */ + private void writeString(String string) throws JSONException { + +// Special case for empty strings. + + if (string.length() == 0) { + zero(); + zero(); + write(end, this.substringhuff); + zero(); + } else { + Kim kim = new Kim(string); + +// Look for the string in the strings keep. If it is found, emit its +// integer and count that as a use. + + int integer = this.stringkeep.find(kim); + if (integer != none) { + one(); + writeAndTick(integer, this.stringkeep); + } else { + +// But if it is not found, emit the string's substrings. Register the string +// so that the next lookup will succeed. + + writeSubstring(kim); + this.stringkeep.register(kim); + } + } + } + + /** + * Write a string, attempting to match registered substrings. + * + * @param kim + * @throws JSONException + */ + private void writeSubstring(Kim kim) throws JSONException { + this.substringkeep.reserve(); + zero(); + int from = 0; + int thru = kim.length; + int until = thru - JSONzip.minSubstringLength; + int previousFrom = none; + int previousThru = 0; + +// Find a substring from the substring keep. + + while (true) { + int at; + int integer = none; + for (at = from; at <= until; at += 1) { + integer = this.substringkeep.match(kim, at, thru); + if (integer != none) { + break; + } + } + if (integer == none) { + break; + } + +// If a substring is found, emit any characters that were before the matched +// substring. Then emit the substring's integer and loop back to match the +// remainder with another substring. + + if (from != at) { + zero(); + write(kim, from, at, this.substringhuff); + write(end, this.substringhuff); + if (previousFrom != none) { + this.substringkeep.registerOne(kim, previousFrom, + previousThru); + previousFrom = none; + } + } + one(); + writeAndTick(integer, this.substringkeep); + from = at + this.substringkeep.length(integer); + if (previousFrom != none) { + this.substringkeep.registerOne(kim, previousFrom, + previousThru); + previousFrom = none; + } + previousFrom = at; + previousThru = from + 1; + } + +// If a substring is not found, then emit the remaining characters. + + zero(); + if (from < thru) { + write(kim, from, thru, this.substringhuff); + if (previousFrom != none) { + this.substringkeep.registerOne(kim, previousFrom, previousThru); + } + } + write(end, this.substringhuff); + zero(); + +// Register the string's substrings in the trie in hopes of future substring +// matching. + + substringkeep.registerMany(kim); + } + + /** + * Write a value. + * + * @param value + * One of these types: Boolean, Number, etc. + * @throws JSONException + */ + private void writeValue(Object value) throws JSONException { + if (value instanceof Number) { + String string = JSONObject.numberToString((Number) value); + int integer = this.values.find(string); + if (integer != none) { + write(2, 2); + writeAndTick(integer, this.values); + return; + } + if (value instanceof Integer || value instanceof Long) { + long longer = ((Number) value).longValue(); + if (longer >= 0 && longer < int14) { + write(0, 2); + if (longer < int4) { + zero(); + write((int) longer, 4); + return; + } + one(); + if (longer < int7) { + zero(); + write((int) longer, 7); + return; + } + one(); + write((int) longer, 14); + return; + } + } + write(1, 2); + for (int i = 0; i < string.length(); i += 1) { + write(bcd(string.charAt(i)), 4); + } + write(endOfNumber, 4); + this.values.register(string); + } else { + write(3, 2); + writeJSON(value); + } + } + + /** + * Output a zero bit. + * + * @throws JSONException + * + * @throws IOException + */ + private void zero() throws JSONException { + if (probe) { + log(0); + } + write(0, 1); + } + + /** + * Compress a JSONObject. + * + * @param jsonobject + * @throws JSONException + */ + public void zip(JSONObject jsonobject) throws JSONException { + begin(); + writeJSON(jsonobject); + } + + /** + * Compress a JSONArray. + * + * @param jsonarray + * @throws JSONException + */ + public void zip(JSONArray jsonarray) throws JSONException { + begin(); + writeJSON(jsonarray); + } +} diff --git a/zip/Decompressor.java b/zip/Decompressor.java new file mode 100644 index 0000000..108a2e2 --- /dev/null +++ b/zip/Decompressor.java @@ -0,0 +1,325 @@ +package org.json.zip; + +import java.io.UnsupportedEncodingException; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.Kim; + +/* + Copyright (c) 2012 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * JSONzip is a compression scheme for JSON text. + * + * @author JSON.org + * @version 2013-04-18 + */ + +public class Decompressor extends JSONzip { + + /** + * A decompressor reads bits from a BitReader. + */ + BitReader bitreader; + + /** + * Create a new compressor. It may be used for an entire session or + * subsession. + * + * @param bitreader + * The bitreader that this decompressor will read from. + */ + public Decompressor(BitReader bitreader) { + super(); + this.bitreader = bitreader; + } + + /** + * Read one bit. + * + * @return true if 1, false if 0. + * @throws JSONException + */ + private boolean bit() throws JSONException { + boolean value; + try { + value = this.bitreader.bit(); + if (probe) { + log(value ? 1 : 0); + } + return value; + } catch (Throwable e) { + throw new JSONException(e); + } + + } + + /** + * Read enough bits to obtain an integer from the keep, and increase that + * integer's weight. + * + * @param keep + * @param bitreader + * @return + * @throws JSONException + */ + private Object getAndTick(Keep keep, BitReader bitreader) + throws JSONException { + try { + int width = keep.bitsize(); + int integer = bitreader.read(width); + Object value = keep.value(integer); + if (JSONzip.probe) { + JSONzip.log("\"" + value + "\""); + JSONzip.log(integer, width); + } + if (integer >= keep.length) { + throw new JSONException("Deep error."); + } + keep.tick(integer); + return value; + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * The pad method skips the bits that padded a stream to fit some + * allocation. pad(8) will skip over the remainder of a byte. + * + * @param factor + * @return true if all of the padding bits were zero. + * @throws JSONException + */ + public boolean pad(int factor) throws JSONException { + try { + return this.bitreader.pad(factor); + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Read an integer, specifying its width in bits. + * + * @param width + * 0 to 32. + * @return An unsigned integer. + * @throws JSONException + */ + private int read(int width) throws JSONException { + try { + int value = this.bitreader.read(width); + if (probe) { + log(value, width); + } + return value; + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Read a JSONArray. + * + * @param stringy + * true if the first element is a string. + * @return + * @throws JSONException + */ + private JSONArray readArray(boolean stringy) throws JSONException { + JSONArray jsonarray = new JSONArray(); + jsonarray.put(stringy ? readString() : readValue()); + while (true) { + if (probe) { + log("\n"); + } + if (!bit()) { + if (!bit()) { + return jsonarray; + } + jsonarray.put(stringy ? readValue() : readString()); + } else { + jsonarray.put(stringy ? readString() : readValue()); + } + } + } + + /** + * Read a JSON value. The type of value is determined by the next 3 bits. + * + * @return + * @throws JSONException + */ + private Object readJSON() throws JSONException { + switch (read(3)) { + case zipObject: + return readObject(); + case zipArrayString: + return readArray(true); + case zipArrayValue: + return readArray(false); + case zipEmptyObject: + return new JSONObject(); + case zipEmptyArray: + return new JSONArray(); + case zipTrue: + return Boolean.TRUE; + case zipFalse: + return Boolean.FALSE; + default: + return JSONObject.NULL; + } + } + + private String readName() throws JSONException { + byte[] bytes = new byte[65536]; + int length = 0; + if (!bit()) { + while (true) { + int c = this.namehuff.read(this.bitreader); + if (c == end) { + break; + } + bytes[length] = (byte) c; + length += 1; + } + if (length == 0) { + return ""; + } + Kim kim = new Kim(bytes, length); + this.namekeep.register(kim); + return kim.toString(); + } + return getAndTick(this.namekeep, this.bitreader).toString(); + } + + private JSONObject readObject() throws JSONException { + JSONObject jsonobject = new JSONObject(); + while (true) { + if (probe) { + log("\n"); + } + String name = readName(); + jsonobject.put(name, !bit() ? readString() : readValue()); + if (!bit()) { + return jsonobject; + } + } + } + + private String readString() throws JSONException { + Kim kim; + int from = 0; + int thru = 0; + int previousFrom = none; + int previousThru = 0; + if (bit()) { + return getAndTick(this.stringkeep, this.bitreader).toString(); + } + byte[] bytes = new byte[65536]; + boolean one = bit(); + this.substringkeep.reserve(); + while (true) { + if (one) { + from = thru; + kim = (Kim) getAndTick(this.substringkeep, this.bitreader); + thru = kim.copy(bytes, from); + if (previousFrom != none) { + this.substringkeep.registerOne(new Kim(bytes, previousFrom, + previousThru + 1)); + } + previousFrom = from; + previousThru = thru; + one = bit(); + } else { + from = none; + while (true) { + int c = this.substringhuff.read(this.bitreader); + if (c == end) { + break; + } + bytes[thru] = (byte) c; + thru += 1; + if (previousFrom != none) { + this.substringkeep.registerOne(new Kim(bytes, + previousFrom, previousThru + 1)); + previousFrom = none; + } + } + if (!bit()) { + break; + } + one = true; + } + } + if (thru == 0) { + return ""; + } + kim = new Kim(bytes, thru); + this.stringkeep.register(kim); + this.substringkeep.registerMany(kim); + return kim.toString(); + } + + private Object readValue() throws JSONException { + switch (read(2)) { + case 0: + return new Integer(read(!bit() ? 4 : !bit() ? 7 : 14)); + case 1: + byte[] bytes = new byte[256]; + int length = 0; + while (true) { + int c = read(4); + if (c == endOfNumber) { + break; + } + bytes[length] = bcd[c]; + length += 1; + } + Object value; + try { + value = JSONObject.stringToValue(new String(bytes, 0, length, + "US-ASCII")); + } catch (UnsupportedEncodingException e) { + throw new JSONException(e); + } + this.values.register(value); + return value; + case 2: + return getAndTick(this.values, this.bitreader); + case 3: + return readJSON(); + default: + throw new JSONException("Impossible."); + } + } + + public Object unzip() throws JSONException { + begin(); + return readJSON(); + } +} diff --git a/zip/Huff.java b/zip/Huff.java new file mode 100644 index 0000000..2e1d1c9 --- /dev/null +++ b/zip/Huff.java @@ -0,0 +1,406 @@ +package org.json.zip; + +import org.json.JSONException; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * JSONzip is a compression scheme for JSON text. + * @author JSON.org + * @version 2013-04-18 + */ + +/** + * A Huffman encoder/decoder. It operates over a domain of integers, which may + * map to characters or other symbols. Symbols that are used frequently are + * given shorter codes than symbols that are used infrequently. This usually + * produces shorter messages. + * + * Initially, all of the symbols are given the same weight. The weight of a + * symbol is incremented by the tick method. The generate method is used to + * generate the encoding table. The table must be generated before encoding or + * decoding. You may regenerate the table with the latest weights at any time. + */ +public class Huff implements None, PostMortem { + + /** + * The number of symbols known to the encoder. + */ + private final int domain; + + /** + * An array that maps symbol values to symbols. + */ + private final Symbol[] symbols; + + /** + * The root of the decoding table, and the terminal of the encoding table. + */ + private Symbol table; + + /** + * Have any weights changed since the table was last generated? + */ + private boolean upToDate = false; + + /** + * The number of bits in the last symbol. This is used in tracing. + */ + private int width; + + private static class Symbol implements PostMortem { + public Symbol back; + public Symbol next; + public Symbol zero; + public Symbol one; + public final int integer; + public long weight; + + /** + * Make a symbol representing a character or other value. + * + * @param integer + * The symbol's number + */ + public Symbol(int integer) { + this.integer = integer; + this.weight = 0; + this.next = null; + this.back = null; + this.one = null; + this.zero = null; + } + + public boolean postMortem(PostMortem pm) { + boolean result = true; + Symbol that = (Symbol) pm; + + if (this.integer != that.integer || this.weight != that.weight) { + return false; + } + if ((this.back != null) != (that.back != null)) { + return false; + } + Symbol zero = this.zero; + Symbol one = this.one; + if (zero == null) { + if (that.zero != null) { + return false; + } + } else { + result = zero.postMortem(that.zero); + } + if (one == null) { + if (that.one != null) { + return false; + } + } else { + result = one.postMortem(that.one); + } + return result; + } + + } + + /** + * Construct a Huffman encoder/decoder. + * + * @param domain + * The number of values known to the object. + */ + public Huff(int domain) { + this.domain = domain; + int length = domain * 2 - 1; + this.symbols = new Symbol[length]; + +// Make the leaf symbols. + + for (int i = 0; i < domain; i += 1) { + symbols[i] = new Symbol(i); + } + +// SMake the links. + + for (int i = domain; i < length; i += 1) { + symbols[i] = new Symbol(none); + } + } + + /** + * Generate the encoding/decoding table. The table determines the bit + * sequences used by the read and write methods. + * + * @return this + */ + public void generate() { + if (!this.upToDate) { + +// Phase One: Sort the symbols by weight into a linked list. + + Symbol head = this.symbols[0]; + Symbol next; + Symbol previous = head; + Symbol symbol; + + this.table = null; + head.next = null; + for (int i = 1; i < this.domain; i += 1) { + symbol = symbols[i]; + +// If this symbol weights less than the head, then it becomes the new head. + + if (symbol.weight < head.weight) { + symbol.next = head; + head = symbol; + } else { + +// To save time, we will start the search from the previous symbol instead +// of the head unless the current symbol weights less than the previous symbol. + + if (symbol.weight < previous.weight) { + previous = head; + } + +// Find a connected pair (previous and next) where the symbol weighs the same +// or more than previous but less than the next. Link the symbol between them. + + while (true) { + next = previous.next; + if (next == null || symbol.weight < next.weight) { + break; + } + previous = next; + } + symbol.next = next; + previous.next = symbol; + previous = symbol; + } + } + +// Phase Two: Make new symbols from the two lightest symbols until only one +// symbol remains. The final symbol becomes the root of the table binary tree. + + int avail = this.domain; + Symbol first; + Symbol second; + previous = head; + while (true) { + first = head; + second = first.next; + head = second.next; + symbol = this.symbols[avail]; + avail += 1; + symbol.weight = first.weight + second.weight; + symbol.zero = first; + symbol.one = second; + symbol.back = null; + first.back = symbol; + second.back = symbol; + if (head == null) { + break; + } + +// Insert the new symbol back into the sorted list. + + if (symbol.weight < head.weight) { + symbol.next = head; + head = symbol; + previous = head; + } else { + while (true) { + next = previous.next; + if (next == null || symbol.weight < next.weight) { + break; + } + previous = next; + } + symbol.next = next; + previous.next = symbol; + previous = symbol; + } + + } + +// The last remaining symbol is the root of the table. + + this.table = symbol; + this.upToDate = true; + } + } + + private boolean postMortem(int integer) { + int[] bits = new int[this.domain]; + Symbol symbol = this.symbols[integer]; + if (symbol.integer != integer) { + return false; + } + int i = 0; + while (true) { + Symbol back = symbol.back; + if (back == null) { + break; + } + if (back.zero == symbol) { + bits[i] = 0; + } else if (back.one == symbol) { + bits[i] = 1; + } else { + return false; + } + i += 1; + symbol = back; + } + if (symbol != this.table) { + return false; + } + this.width = 0; + symbol = this.table; + while (symbol.integer == none) { + i -= 1; + symbol = bits[i] != 0 ? symbol.one : symbol.zero; + } + return symbol.integer == integer && i == 0; + } + + /** + * Compare two Huffman tables. + */ + public boolean postMortem(PostMortem pm) { + +// Go through every integer in the domain, generating its bit sequence, and +// then proving that that bit sequence produces the same integer. + + for (int integer = 0; integer < this.domain; integer += 1) { + if (!postMortem(integer)) { + JSONzip.log("\nBad huff "); + JSONzip.logchar(integer, integer); + return false; + } + } + return this.table.postMortem(((Huff) pm).table); + } + + /** + * Read bits until a symbol can be identified. The weight of the read + * symbol will be incremented. + * + * @param bitreader + * The source of bits. + * @return The integer value of the symbol. + * @throws JSONException + */ + public int read(BitReader bitreader) throws JSONException { + try { + this.width = 0; + Symbol symbol = this.table; + while (symbol.integer == none) { + this.width += 1; + symbol = bitreader.bit() ? symbol.one : symbol.zero; + } + tick(symbol.integer); + if (JSONzip.probe) { + JSONzip.logchar(symbol.integer, this.width); + } + return symbol.integer; + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Increase by 1 the weight associated with a value. + * + * @param value + * The number of the symbol to tick + * @return this + */ + public void tick(int value) { + this.symbols[value].weight += 1; + this.upToDate = false; + } + + /** + * Increase by 1 the weight associated with a range of values. + * + * @param from + * The first symbol to tick + * @param to + * The last symbol to tick + * @return this + */ + public void tick(int from, int to) { + for (int value = from; value <= to; value += 1) { + tick(value); + } + } + + /** + * Recur from a symbol back, emitting bits. We recur before emitting to + * make the bits come out in the right order. + * + * @param symbol + * The symbol to write. + * @param bitwriter + * The bitwriter to write it to. + * @throws JSONException + */ + private void write(Symbol symbol, BitWriter bitwriter) + throws JSONException { + try { + Symbol back = symbol.back; + if (back != null) { + this.width += 1; + write(back, bitwriter); + if (back.zero == symbol) { + bitwriter.zero(); + } else { + bitwriter.one(); + } + } + } catch (Throwable e) { + throw new JSONException(e); + } + } + + /** + * Write the bits corresponding to a symbol. The weight of the symbol will + * be incremented. + * + * @param value + * The number of the symbol to write + * @param bitwriter + * The destination of the bits. + * @return this + * @throws JSONException + */ + public void write(int value, BitWriter bitwriter) throws JSONException { + this.width = 0; + write(this.symbols[value], bitwriter); + tick(value); + if (JSONzip.probe) { + JSONzip.logchar(value, this.width); + } + } +} diff --git a/zip/JSONzip.java b/zip/JSONzip.java new file mode 100644 index 0000000..2128742 --- /dev/null +++ b/zip/JSONzip.java @@ -0,0 +1,281 @@ +package org.json.zip; + + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * JSONzip is a binary-encoded JSON dialect. It is designed to compress the + * messages in a session. It is adaptive, so with each message seen, it should + * improve its compression. It minimizes JSON's overhead, reducing punctuation + * to a small number of bits. It uses Huffman encoding to reduce the average + * size of characters. It uses caches (or Keeps) to keep recently seen strings + * and values, so repetitive content (such as object keys) can be + * substantially reduced. It uses a character encoding called Kim (Keep it + * minimal) that is smaller than UTF-8 for most East European, African, and + * Asian scripts. + * + * JSONzip tends to reduce most content by about half. If there is a lot of + * recurring information, the reduction can be much more dramatic. + * + * FOR EVALUATION PURPOSES ONLY. THIS PACKAGE HAS NOT YET BEEN TESTED + * ADEQUATELY FOR PRODUCTION USE. + * + * @author JSON.org + * @version 2013-04-18 + */ +public abstract class JSONzip implements None, PostMortem { + /** + * Powers of 2. + */ + public static final int[] twos = { + 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, + 1024, 2048, 4096, 8192, 16384, 32768, 65536 + }; + + /** + * The characters in JSON numbers can be reduced to 4 bits each. + */ + public static final byte[] bcd = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '-', '+', 'E' + }; + + /** + * The number of integers that can be encoded in 4 bits. + */ + public static final long int4 = 16; + + /** + * The number of integers that can be encoded in 7 bits. + */ + public static final long int7 = 128; + + /** + * The number of integers that can be encoded in 14 bits. + */ + public static final long int14 = 16384; + + /** + * The end of string code. + */ + public static final int end = 256; + + /** + * The end of number code. + */ + public static final int endOfNumber = bcd.length; + + /** + * The maximum substring length when registering many. The registration of + * one substring may be longer. + */ + public static final int maxSubstringLength = 10; + + /** + * The minimum substring length. + */ + public static final int minSubstringLength = 3; + + /** + * The package supports tracing for debugging. + */ + public static final boolean probe = false; + + /** + * The maximum number of substrings added to the substrings keep per + * string. + */ + public static final int substringLimit = 40; + + /** + * The value code for an empty object. + */ + public static final int zipEmptyObject = 0; + + /** + * The value code for an empty array. + */ + public static final int zipEmptyArray = 1; + + /** + * The value code for true. + */ + public static final int zipTrue = 2; + + /** + * The value code for false. + */ + public static final int zipFalse = 3; + + /** + * The value code for null. + */ + public static final int zipNull = 4; + + /** + * The value code for a non-empty object. + */ + public static final int zipObject = 5; + + /** + * The value code for an array with a string as its first element. + */ + public static final int zipArrayString = 6; + + /** + * The value code for an array with a non-string value as its first element. + */ + public static final int zipArrayValue = 7; + + /** + * A Huffman encoder for names. + */ + protected final Huff namehuff; + + /** + * A place to keep the names (keys). + */ + protected final MapKeep namekeep; + + /** + * A place to keep the strings. + */ + protected final MapKeep stringkeep; + + /** + * A Huffman encoder for string values. + */ + protected final Huff substringhuff; + + /** + * A place to keep the strings. + */ + protected final TrieKeep substringkeep; + + /** + * A place to keep the values. + */ + protected final MapKeep values; + + /** + * Initialize the data structures. + */ + protected JSONzip() { + this.namehuff = new Huff(end + 1); + this.namekeep = new MapKeep(9); + this.stringkeep = new MapKeep(11); + this.substringhuff = new Huff(end + 1); + this.substringkeep = new TrieKeep(12); + this.values = new MapKeep(10); + +// Increase the weights of the ASCII letters, digits, and special characters +// because they are highly likely to occur more frequently. The weight of each +// character will increase as it is used. The Huffman encoder will tend to +// use fewer bits to encode heavier characters. + + this.namehuff.tick(' ', '}'); + this.namehuff.tick('a', 'z'); + this.namehuff.tick(end); + this.namehuff.tick(end); + this.substringhuff.tick(' ', '}'); + this.substringhuff.tick('a', 'z'); + this.substringhuff.tick(end); + this.substringhuff.tick(end); + } + + /** + * + */ + protected void begin() { + this.namehuff.generate(); + this.substringhuff.generate(); + } + + /** + * Write an end-of-line to the console. + */ + static void log() { + log("\n"); + } + + /** + * Write an integer to the console. + * + * @param integer + */ + static void log(int integer) { + log(integer + " "); + } + + /** + * Write two integers, separated by ':' to the console. + * + * @param integer + * @param width + */ + static void log(int integer, int width) { + log(integer + ":" + width + " "); + } + + /** + * Write a string to the console. + * + * @param string + */ + static void log(String string) { + System.out.print(string); + } + + /** + * Write a character or its code to the console. + * + * @param integer + * @param width + */ + static void logchar(int integer, int width) { + if (integer > ' ' && integer <= '}') { + log("'" + (char) integer + "':" + width + " "); + } else { + log(integer, width); + } + } + + /** + * This method is used for testing the implementation of JSONzip. It is not + * suitable for any other purpose. It is used to compare a Compressor and a + * Decompressor, verifying that the data structures that were built during + * zipping and unzipping were the same. + * + * @return true if the structures match. + */ + public boolean postMortem(PostMortem pm) { + JSONzip that = (JSONzip) pm; + return this.namehuff.postMortem(that.namehuff) + && this.namekeep.postMortem(that.namekeep) + && this.stringkeep.postMortem(that.stringkeep) + && this.substringhuff.postMortem(that.substringhuff) + && this.substringkeep.postMortem(that.substringkeep) + && this.values.postMortem(that.values); + } +} diff --git a/zip/Keep.java b/zip/Keep.java new file mode 100644 index 0000000..377e344 --- /dev/null +++ b/zip/Keep.java @@ -0,0 +1,84 @@ +package org.json.zip; + + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * A keep is a data structure that associates strings (or substrings) with + * numbers. This allows the sending of small integers instead of strings. + * + * @author JSON.org + * @version 2013-04-18 + */ +abstract class Keep implements None, PostMortem { + protected int capacity; + protected int length; + protected int power; + protected long[] uses; + + public Keep(int bits) { + this.capacity = JSONzip.twos[bits]; + this.length = 0; + this.power = 0; + this.uses = new long[this.capacity]; + } + + /** + * When an item ages, its use count is reduced by at least half. + * + * @param use + * The current use count of an item. + * @return The new use count for that item. + */ + public static long age(long use) { + return use >= 32 ? 16 : use / 2; + } + + /** + * Return the number of bits required to contain an integer based on the + * current length of the keep. As the keep fills up, the number of bits + * required to identify one of its items goes up. + */ + public int bitsize() { + while (JSONzip.twos[this.power] < this.length) { + this.power += 1; + } + return this.power; + } + + /** + * Increase the usage count on an integer value. + */ + public void tick(int integer) { + this.uses[integer] += 1; + } + + /** + * Get the value associated with an integer. + * @param integer The number of an item in the keep. + * @return The value. + */ + abstract public Object value(int integer); +} diff --git a/zip/MapKeep.java b/zip/MapKeep.java new file mode 100644 index 0000000..1374e08 --- /dev/null +++ b/zip/MapKeep.java @@ -0,0 +1,160 @@ +package org.json.zip; + +import java.util.HashMap; + +import org.json.Kim; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * A keep is an associative data structure that maintains usage counts of each + * of the associations in its keeping. When the keep becomes full, it purges + * little used associations, and ages the survivors. Each key is assigned an + * integer value. When the keep is compacted, each key can be given a new + * value. + */ +class MapKeep extends Keep { + private Object[] list; + private HashMap map; + + /** + * Create a new Keep. + * @param bits + * The capacity of the keep expressed in the number of bits + * required to hold an integer. + */ + public MapKeep(int bits) { + super(bits); + this.list = new Object[this.capacity]; + this.map = new HashMap(this.capacity); + } + + /** + * Compact the keep. A keep may contain at most this.capacity elements. + * The keep contents can be reduced by deleting all elements with low use + * counts, and by reducing the use counts of the survivors. + */ + private void compact() { + int from = 0; + int to = 0; + while (from < this.capacity) { + Object key = this.list[from]; + long usage = age(this.uses[from]); + if (usage > 0) { + this.uses[to] = usage; + this.list[to] = key; + this.map.put(key, new Integer(to)); + to += 1; + } else { + this.map.remove(key); + } + from += 1; + } + if (to < this.capacity) { + this.length = to; + } else { + this.map.clear(); + this.length = 0; + } + this.power = 0; + } + + /** + * Find the integer value associated with this key, or nothing if this key + * is not in the keep. + * + * @param key + * An object. + * @return An integer + */ + public int find(Object key) { + Object o = this.map.get(key); + return o instanceof Integer ? ((Integer) o).intValue() : none; + } + + public boolean postMortem(PostMortem pm) { + MapKeep that = (MapKeep) pm; + if (this.length != that.length) { + JSONzip.log(this.length + " <> " + that.length); + return false; + } + for (int i = 0; i < this.length; i += 1) { + boolean b; + if (this.list[i] instanceof Kim) { + b = ((Kim) this.list[i]).equals(that.list[i]); + } else { + Object o = this.list[i]; + Object q = that.list[i]; + if (o instanceof Number) { + o = o.toString(); + } + if (q instanceof Number) { + q = q.toString(); + } + b = o.equals(q); + } + if (!b) { + JSONzip.log("\n[" + i + "]\n " + this.list[i] + "\n " + + that.list[i] + "\n " + this.uses[i] + "\n " + + that.uses[i]); + return false; + } + } + return true; + } + + /** + * Register a value in the keep. Compact the keep if it is full. The next + * time this value is encountered, its integer can be sent instead. + * @param value A value. + */ + public void register(Object value) { + if (JSONzip.probe) { + int integer = find(value); + if (integer >= 0) { + JSONzip.log("\nDuplicate key " + value); + } + } + if (this.length >= this.capacity) { + compact(); + } + this.list[this.length] = value; + this.map.put(value, new Integer(this.length)); + this.uses[this.length] = 1; + if (JSONzip.probe) { + JSONzip.log("<" + this.length + " " + value + "> "); + } + this.length += 1; + } + + /** + * Return the value associated with the integer. + * @param integer The number of an item in the keep. + * @return The value. + */ + public Object value(int integer) { + return this.list[integer]; + } +} diff --git a/zip/None.java b/zip/None.java new file mode 100644 index 0000000..818e68d --- /dev/null +++ b/zip/None.java @@ -0,0 +1,15 @@ +package org.json.zip; + +/** + * None is an interface that makes the constant none (short for + * negative one or long for -1) available to any class that implements it. + * The none value is used to stand for an integer that is not an integer, + * such as the negative result of a search. + */ +public interface None { + /** + * Negative One. + */ + public static final int none = -1; + +} diff --git a/zip/PostMortem.java b/zip/PostMortem.java new file mode 100644 index 0000000..22416d7 --- /dev/null +++ b/zip/PostMortem.java @@ -0,0 +1,47 @@ +package org.json.zip; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * The PostMortem interface allows for testing the internal state of JSONzip + * processors. Testing that JSONzip can compress an object and reproduce a + * corresponding object is not sufficient. Complete testing requires that the + * same internal data structures were constructed on both ends. If those + * structures are not equivalent, then it is likely that the implementations + * are not correct, even if convention tests are passed. + * + * PostMortem allows for testing of deep structures without breaking + * encapsulation. + */ +public interface PostMortem { + /** + * Determine if two objects are equivalent. + * + * @param pm + * Another object of the same type. + * @return true if they match. + */ + public boolean postMortem(PostMortem pm); +} diff --git a/zip/README b/zip/README new file mode 100644 index 0000000..93e6470 --- /dev/null +++ b/zip/README @@ -0,0 +1,2 @@ +FOR EVALUATION PURPOSES ONLY. THIS PACKAGE HAS NOT BEEN TESTED ADEQUATELY FOR +PRODUCTION USE. diff --git a/zip/TrieKeep.java b/zip/TrieKeep.java new file mode 100644 index 0000000..dcb13c7 --- /dev/null +++ b/zip/TrieKeep.java @@ -0,0 +1,396 @@ +package org.json.zip; + +import org.json.Kim; + +/* + Copyright (c) 2013 JSON.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + The Software shall be used for Good, not Evil. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + +/** + * A TrieKeep is a Keep that implements a Trie. + */ +class TrieKeep extends Keep { + + /** + * The trie is made of nodes. + */ + class Node implements PostMortem { + private int integer; + private Node[] next; + + /** + * Each non-leaf node contains links to up to 256 next nodes. Each node + * has an integer value. + */ + public Node() { + this.integer = none; + this.next = null; + } + + /** + * Get one of a node's 256 links. If it is a leaf node, it returns + * null. + * + * @param cell + * A integer between 0 and 255. + * @return + */ + public Node get(int cell) { + return this.next == null ? null : this.next[cell]; + } + + /** + * Get one of a node's 256 links. If it is a leap node, it returns + * null. The argument is treated as an unsigned integer. + * + * @param cell + * A byte. + * @return + */ + public Node get(byte cell) { + return get(((int) cell) & 0xFF); + } + + /** + * Compare two nodes. Their lengths must be equal. Their links must + * also compare. + */ + public boolean postMortem(PostMortem pm) { + Node that = (Node) pm; + if (that == null) { + JSONzip.log("\nMisalign"); + return false; + } + if (this.integer != that.integer) { + JSONzip.log("\nInteger " + this.integer + " <> " + + that.integer); + return false; + } + if (this.next == null) { + if (that.next == null) { + return true; + } + JSONzip.log("\nNext is null " + this.integer); + return false; + } + for (int i = 0; i < 256; i += 1) { + Node node = this.next[i]; + if (node != null) { + if (!node.postMortem(that.next[i])) { + return false; + } + } else if (that.next[i] != null) { + JSONzip.log("\nMisalign " + i); + return false; + } + } + return true; + } + + /** + * Set a node's link to another node. + * + * @param cell + * An integer between 0 and 255. + * @param node + * The new value for the cell. + */ + public void set(int cell, Node node) { + if (this.next == null) { + this.next = new Node[256]; + } + if (JSONzip.probe) { + if (node == null || this.next[cell] != null) { + JSONzip.log("\nUnexpected set.\n"); + } + } + this.next[cell] = node; + } + + /** + * Set a node's link to another node. + * + * @param cell + * A byte. + * @param node + * The new value for the cell. + */ + public void set(byte cell, Node node) { + set(((int) cell) & 0xFF, node); + } + + /** + * Get one of a node's 256 links. It will not return null. If there is + * no link, then a link is manufactured. + * + * @param cell + * A integer between 0 and 255. + * @return + */ + public Node vet(int cell) { + Node node = get(cell); + if (node == null) { + node = new Node(); + set(cell, node); + } + return node; + } + + /** + * Get one of a node's 256 links. It will not return null. If there is + * no link, then a link is manufactured. + * + * @param cell + * A byte. + * @return + */ + public Node vet(byte cell) { + return vet(((int) cell) & 0xFF); + } + } + + private int[] froms; + private int[] thrus; + private Node root; + private Kim[] kims; + + /** + * Create a new Keep of kims. + * + * @param bits + * The log2 of the capacity of the Keep. For example, if bits is + * 12, then the keep's capacity will be 4096. + */ + public TrieKeep(int bits) { + super(bits); + this.froms = new int[this.capacity]; + this.thrus = new int[this.capacity]; + this.kims = new Kim[this.capacity]; + this.root = new Node(); + } + + /** + * Get the kim associated with an integer. + * + * @param integer + * @return + */ + public Kim kim(int integer) { + Kim kim = this.kims[integer]; + int from = this.froms[integer]; + int thru = this.thrus[integer]; + if (from != 0 || thru != kim.length) { + kim = new Kim(kim, from, thru); + this.froms[integer] = 0; + this.thrus[integer] = kim.length; + this.kims[integer] = kim; + } + return kim; + } + + /** + * Get the length of the Kim associated with an integer. This is sometimes + * much faster than get(integer).length. + * + * @param integer + * @return + */ + public int length(int integer) { + return this.thrus[integer] - this.froms[integer]; + } + + /** + * Find the integer value associated with this key, or nothing if this key + * is not in the keep. + * + * @param key + * An object. + * @return An integer + */ + public int match(Kim kim, int from, int thru) { + Node node = this.root; + int best = none; + for (int at = from; at < thru; at += 1) { + node = node.get(kim.get(at)); + if (node == null) { + break; + } + if (node.integer != none) { + best = node.integer; + } + from += 1; + } + return best; + } + + public boolean postMortem(PostMortem pm) { + boolean result = true; + TrieKeep that = (TrieKeep) pm; + if (this.length != that.length) { + JSONzip.log("\nLength " + this.length + " <> " + that.length); + return false; + } + if (this.capacity != that.capacity) { + JSONzip.log("\nCapacity " + this.capacity + " <> " + + that.capacity); + return false; + } + for (int i = 0; i < this.length; i += 1) { + Kim thiskim = this.kim(i); + Kim thatkim = that.kim(i); + if (!thiskim.equals(thatkim)) { + JSONzip.log("\n[" + i + "] " + thiskim + " <> " + thatkim); + result = false; + } + } + return result && this.root.postMortem(that.root); + } + + public void registerMany(Kim kim) { + int length = kim.length; + int limit = this.capacity - this.length; + if (limit > JSONzip.substringLimit) { + limit = JSONzip.substringLimit; + } + int until = length - (JSONzip.minSubstringLength - 1); + for (int from = 0; from < until; from += 1) { + int len = length - from; + if (len > JSONzip.maxSubstringLength) { + len = JSONzip.maxSubstringLength; + } + len += from; + Node node = this.root; + for (int at = from; at < len; at += 1) { + Node next = node.vet(kim.get(at)); + if (next.integer == none + && at - from >= (JSONzip.minSubstringLength - 1)) { + next.integer = this.length; + this.uses[this.length] = 1; + this.kims[this.length] = kim; + this.froms[this.length] = from; + this.thrus[this.length] = at + 1; + if (JSONzip.probe) { + try { + JSONzip.log("<<" + this.length + " " + + new Kim(kim, from, at + 1) + ">> "); + } catch (Throwable ignore) { + } + } + this.length += 1; + limit -= 1; + if (limit <= 0) { + return; + } + } + node = next; + } + } + } + + public void registerOne(Kim kim) { + int integer = registerOne(kim, 0, kim.length); + if (integer != none) { + this.kims[integer] = kim; + } + } + + public int registerOne(Kim kim, int from, int thru) { + if (this.length < this.capacity) { + Node node = this.root; + for (int at = from; at < thru; at += 1) { + node = node.vet(kim.get(at)); + } + if (node.integer == none) { + int integer = this.length; + node.integer = integer; + this.uses[integer] = 1; + this.kims[integer] = kim; + this.froms[integer] = from; + this.thrus[integer] = thru; + if (JSONzip.probe) { + try { + JSONzip.log("<<" + integer + " " + new Kim(kim, from, thru) + ">> "); + } catch (Throwable ignore) { + } + } + this.length += 1; + return integer; + } + } + return none; + } + + /** + * Reserve space in the keep, compacting if necessary. A keep may contain + * at most -capacity- elements. The keep contents can be reduced by + * deleting all elements with low use counts, rebuilding the trie with the + * survivors. + */ + public void reserve() { + if (this.capacity - this.length < JSONzip.substringLimit) { + int from = 0; + int to = 0; + this.root = new Node(); + while (from < this.capacity) { + if (this.uses[from] > 1) { + Kim kim = this.kims[from]; + int thru = this.thrus[from]; + Node node = this.root; + for (int at = this.froms[from]; at < thru; at += 1) { + Node next = node.vet(kim.get(at)); + node = next; + } + node.integer = to; + this.uses[to] = age(this.uses[from]); + this.froms[to] = this.froms[from]; + this.thrus[to] = thru; + this.kims[to] = kim; + to += 1; + } + from += 1; + } + +// It is possible, but highly unlikely, that too many items survive. +// If that happens, clear the keep. + + if (this.capacity - to < JSONzip.substringLimit) { + this.power = 0; + this.root = new Node(); + to = 0; + } + this.length = to; + while (to < this.capacity) { + this.uses[to] = 0; + this.kims[to] = null; + this.froms[to] = 0; + this.thrus[to] = 0; + to += 1; + + } + } + } + + public Object value(int integer) { + return kim(integer); + } +}