Skip to content

Commit

Permalink
Merge pull request #10 from doubledutch/feat-improved-compression
Browse files Browse the repository at this point in the history
Feat improved compression
  • Loading branch information
kasperjj authored Aug 23, 2016
2 parents 5d5954d + 7cc8f8c commit 2270e1c
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 15 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = '1.2.1'
version = '1.2.2'

group = "me.doubledutch"
archivesBaseName = "lazyjson"
Expand Down Expand Up @@ -64,7 +64,7 @@ check{


jar {
def versionFile = file('./src/main/resources/version.properties')
def versionFile = file('./src/main/resources/lazyjson_version.properties')

if(versionFile.canRead()){
def Properties props=new Properties()
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/me/doubledutch/lazyjson/LazyParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,14 @@ protected void tokenize() throws LazyException{
root=stack[1];
stackTop=root;
n++;
boolean expectValue=false;
LazyNode token=null;
for(;n<length;n++){
c=cbuf[n];
switch(c){
case '{':
push(LazyNode.cObject(n));
expectValue=false;
break;
case '}':
// The end of an object, pop off the last value and field if any
Expand Down Expand Up @@ -223,8 +225,13 @@ protected void tokenize() throws LazyException{
if(stackTop!=null && stackTop.type==LazyNode.FIELD){
drop();
}
// Was there a trailing comma?
if(expectValue){
throw new LazyException("Unexpected comma without another value",n-1);
}
break;
case '"':
expectValue=false;
if(stackTop.type==LazyNode.ARRAY){
token=LazyNode.cStringValue(n+1);
stackTop.addChild(token);
Expand Down Expand Up @@ -258,12 +265,14 @@ protected void tokenize() throws LazyException{
break;
case ',':
// This must be the end of a value and the start of another
expectValue=true;
break;
case '[':
if(stackTop.type==LazyNode.OBJECT){
throw new LazyException("Missing field name for array",n);
}
push(LazyNode.cArray(n));
expectValue=false;
break;
case ']':
token=pop();
Expand All @@ -283,6 +292,10 @@ protected void tokenize() throws LazyException{
if(stackTop!=null && stackTop.type==LazyNode.FIELD){
drop();
}
// Was there a trailing comma?
if(expectValue){
throw new LazyException("Unexpected comma without another value",n-1);
}
break;
case ' ':
case '\t':
Expand All @@ -292,6 +305,7 @@ protected void tokenize() throws LazyException{
break;
default:
// This must be a new value
expectValue=false;
if(c=='n'){
// Must be null value
if(cbuf[++n]=='u' && cbuf[++n]=='l' && cbuf[++n]=='l'){
Expand Down Expand Up @@ -354,5 +368,8 @@ protected void tokenize() throws LazyException{
if(size()!=0){
throw new LazyException("Unexpected end of JSON data");
}
if(expectValue){
throw new LazyException("Unexpected trailing comma");
}
}
}
32 changes: 26 additions & 6 deletions src/main/java/me/doubledutch/lazyjson/compressor/Compressor.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ public class Compressor{
private boolean dirtyFlag=false;
private DictionaryCache dictionary;

private int templateHit=0;
private int templateMiss=0;

public Compressor(String prefix,int windowSizeArg,int minRepetitions) throws IOException{
this.windowSize=windowSizeArg;
this.minRepetitions=minRepetitions;
Expand Down Expand Up @@ -69,11 +72,9 @@ public byte[] compress(String str){


public byte[] compress(LazyElement elm){
// 1. Parse data
// LazyElement elm=LazyElement.parse(str);
// 2. Generate template
// First, extract the template
Template t=elm.extractTemplate();
// 3. If template satisfies criterea - compress
// If the template satisfies our compression criterea - compress
if(shouldCompress(t)){
try{
ByteBuffer buf=ByteBuffer.allocate(elm.getSourceLength()-2);
Expand All @@ -83,17 +84,19 @@ public byte[] compress(LazyElement elm){
buf.rewind();
byte[] result=new byte[pos];
buf.get(result);
templateHit++;
return result;
}catch(BufferOverflowException boe){
// Compressed output larger than raw data
// Compressed output equal to or larger than raw data
}
}
// 4. return encoded data
// Return raw encoded data
// TODO: this is incredibly inefficient... fix!
byte[] encoded=elm.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer buf=ByteBuffer.allocate(2+encoded.length);
buf.putShort((short)-1);
buf.put(encoded);
templateMiss++;
return buf.array();
}

Expand Down Expand Up @@ -168,4 +171,21 @@ public void commit() throws IOException{
ftest.renameTo(new File(prefix+".dictionary"));
}
}

public int getTemplateCount(){
return templateSet.size();
}

public int getDictionaryCount(){
return dictionary.getSize();
}

public double getTemplateUtilization(){
if(templateHit+templateMiss==0)return 0.0;
return templateHit/((double)templateHit+templateMiss);
}

public double getDictionaryUtilization(){
return dictionary.getDictionaryUtilization();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ public class DictionaryCache{
private int minRepetitions;
private boolean dirty=false;

private int dictionaryHit=0;
private int dictionaryMiss=0;

/**
* Create a new dictionary with the given window size and given repetition
* requirement before new values are added to the dictionary.
Expand All @@ -39,6 +42,15 @@ protected boolean removeEldestEntry(Map.Entry<String,Integer> eldest){
};
}

/**
* Returns the number of entries currently commited to this dictionary.
*
* @return the number of entries in the dictionary
*/
public int getSize(){
return next;
}

/**
* Returns a flag specifying wether or not the dictionary has been modified.
*
Expand Down Expand Up @@ -132,14 +144,21 @@ public short get(String value){
*/
public short put(String value){
// Do we already have this value?
if(dataMap.containsKey(value))return dataMap.get(value);
if(dataMap.containsKey(value)){
dictionaryHit++;
return dataMap.get(value);
}
// Are we filled up?
if(next==MAX_SIZE)return -1;
if(next==MAX_SIZE){
dictionaryMiss++;
return -1;
}
// Should we add values without actual repetitions?
if(minRepetitions==0){
data[next]=value;
dataMap.put(value,next);
dirty=true;
dictionaryHit++;
return next++;
}
// Have we seen this value before?
Expand All @@ -151,6 +170,7 @@ public short put(String value){
data[next]=value;
dataMap.put(value,next);
dirty=true;
dictionaryHit++;
return next++;
}else{
slidingWindow.put(value,count);
Expand All @@ -159,6 +179,12 @@ public short put(String value){
// Add this new value to the map
slidingWindow.put(value,1);
}
dictionaryMiss++;
return -1;
}

public double getDictionaryUtilization(){
if(dictionaryHit+dictionaryMiss==0)return 0.0;
return dictionaryHit/((double)dictionaryHit+dictionaryMiss);
}
}
4 changes: 4 additions & 0 deletions src/main/resources/lazyjson_version.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#Tue Aug 23 10:23:09 PDT 2016
BUILD_VERSION=1.2.2
BUILD_DATE=2016-08-23T17\:23\:09Z
BUILD_NUMBER=590
4 changes: 0 additions & 4 deletions src/main/resources/version.properties

This file was deleted.

24 changes: 24 additions & 0 deletions src/test/java/me/doubledutch/lazyjson/BadJSONDataTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,30 @@ public void testBadNumber1() throws LazyException{
LazyObject obj=new LazyObject(str);
}

@Test(expected=LazyException.class)
public void testBadComma1() throws LazyException{
String str="{\"foo\":42,}";
LazyObject obj=new LazyObject(str);
}

@Test(expected=LazyException.class)
public void testBadComma2() throws LazyException{
String str="[\"foo\",42,]";
LazyObject obj=new LazyObject(str);
}

@Test(expected=LazyException.class)
public void testBadComma3() throws LazyException{
String str="[],";
LazyObject obj=new LazyObject(str);
}

@Test(expected=LazyException.class)
public void testBadComma4() throws LazyException{
String str="{},";
LazyObject obj=new LazyObject(str);
}

@Test(expected=LazyException.class)
public void testBadNumber2() throws LazyException{
String str="{\"foo\":-9.f}";
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/me/doubledutch/lazyjson/LazyObjectTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ public void testComplexObject() throws Exception{
"\"Item\":{"+
"\"ID\":2983980,"+
"\"Rating\":5,"+
"\"Type\":null,"+
"\"Type\":null"+
"},"+
"\"User\":{"+
"\"ID\":478830012,"+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public void testSet() throws Exception{
LazyObject obj=new LazyObject(str);
assertEquals(i,obj.getInt("foo"));
}
assertTrue(c.getTemplateUtilization()>0.0);
}

@Test
Expand Down

0 comments on commit 2270e1c

Please sign in to comment.