Skip to content

Commit

Permalink
Add support for Malmo MDP (pull #21)
Browse files Browse the repository at this point in the history
  • Loading branch information
howard-abrams authored and saudet committed Sep 8, 2017
1 parent 013900a commit 55818ac
Show file tree
Hide file tree
Showing 16 changed files with 682 additions and 0 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ Doom is not ready yet but you can make it work if you feel adventurous with some
* export MAVEN_OPTS=-Djava.library.path=THEFOLDEROFTHELIB
* mvn compile exec:java -Dexec.mainClass="YOURMAINCLASS"

# Malmo (Minecraft)

![Malmo](malmo.gif)

* Download and unzip Malmo from [here](https://github.com/Microsoft/malmo/releases)
* export MALMO_HOME=YOURMALMO_FOLDER
* export MALMO_XSD_PATH=$MALMO_HOME/Schemas
* launch malmo per [instructions](https://github.com/Microsoft/malmo#launching-minecraft-with-our-mod)
* run with this [main](https://github.com/deeplearning4j/dl4j-examples/blob/master/rl4j-examples/src/main/java/org/deeplearning4j/examples/rl4j/MalmoPixels.java)



# WIP

* Documentation
Expand Down
Binary file added malmo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<module>rl4j-gym</module>
<module>rl4j-doom</module>
<module>rl4j-ale</module>
<module>rl4j-malmo</module>
</modules>
<packaging>pom</packaging>

Expand Down Expand Up @@ -200,6 +201,7 @@
<directory>rl4j-gym</directory>
<directory>rl4j-doom</directory>
<directory>rl4j-ale</directory>
<directory>rl4j-malmo</directory>
</directories>
</configuration>
</plugin>
Expand Down
31 changes: 31 additions & 0 deletions rl4j-malmo/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<parent>
<groupId>org.deeplearning4j</groupId>
<artifactId>rl4j</artifactId>
<version>0.9.2-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>rl4j-malmo</artifactId>
<packaging>jar</packaging>

<name>rl4j-malmo</name>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>org.deeplearning4j</groupId>
<artifactId>rl4j-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.microsoft.msr.malmo</groupId>
<artifactId>MalmoJavaJar</artifactId>
<version>0.30.0</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package org.deeplearning4j.malmo;

import org.deeplearning4j.rl4j.space.DiscreteSpace;

/**
* Abstract base class for all Malmo-specific action spaces
* @author howard-abrams (howard.abrams@ca.com) on 1/12/17.
*/
public abstract class MalmoActionSpace extends DiscreteSpace {
/**
* Array of action strings that will be sent to Malmo
*/
protected String[] actions;

/**
* Protected constructor
* @param size number of discrete actions in this space
*/
protected MalmoActionSpace(int size) {
super(size);
}

@Override
public Object encode(Integer action) {
return actions[action];
}

@Override
public Integer noOp() {
return -1;
}

/**
* Sets the seed used for random generation of actions
* @param seed random number generator seed
*/
public void setRandomSeed(long seed) {
rd.setSeed(seed);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.deeplearning4j.malmo;

/**
* Action space that allows for a fixed set of specific Malmo actions
* @author howard-abrams (howard.abrams@ca.com) on 1/12/17.
*/
public class MalmoActionSpaceDiscrete extends MalmoActionSpace {
/**
* Construct an actions space from an array of action strings
* @param actions Array of action strings
*/
public MalmoActionSpaceDiscrete(String... actions) {
super(actions.length);
this.actions = actions;
}
}
32 changes: 32 additions & 0 deletions rl4j-malmo/src/main/java/org/deeplearning4j/malmo/MalmoBox.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.deeplearning4j.malmo;

import java.util.Arrays;

import org.deeplearning4j.rl4j.space.Encodable;

/**
* Encodable state as a simple value array similar to Gym Box model, but without a JSON constructor
* @author howard-abrams (howard.abrams@ca.com) on 1/12/17.
*/
public class MalmoBox implements Encodable {
double[] value;

/**
* Construct state from an array of doubles
* @param value state values
*/
//TODO: If this constructor was added to "Box", we wouldn't need this class at all.
public MalmoBox(double... value) {
this.value = value;
}

@Override
public double[] toArray() {
return value;
}

@Override
public String toString() {
return Arrays.toString(value);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package org.deeplearning4j.malmo;

/**
* Exception thrown when Malmo cannot connect to a client after multiple retries
* @author howard-abrams (howard.abrams@ca.com) on 1/12/17.
*/
public class MalmoConnectionError extends RuntimeException {
private static final long serialVersionUID = -9034754802977073358L;

public MalmoConnectionError(String string) {
super(string);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.deeplearning4j.malmo;

import java.util.Arrays;

import com.microsoft.msr.malmo.WorldState;

/**
* A Malmo consistency policy that ensures the both there is a reward and next observation has a different position that the previous one.
* This will only work for your mission if you require that every action moves to a new location.
* @author howard-abrams (howard.abrams@ca.com) on 1/12/17.
*/
public class MalmoDescretePositionPolicy implements MalmoObservationPolicy {
MalmoObservationSpacePosition observationSpace = new MalmoObservationSpacePosition();

@Override
public boolean isObservationConsistant(WorldState world_state, WorldState original_world_state) {
MalmoBox last_observation = observationSpace.getObservation(world_state);
MalmoBox old_observation = observationSpace.getObservation(original_world_state);

double[] newvalues = old_observation == null ? null : old_observation.toArray();
double[] oldvalues = last_observation == null ? null : last_observation.toArray();

return !(world_state.getObservations().isEmpty() || world_state.getRewards().isEmpty()
|| Arrays.equals(oldvalues, newvalues));
}

}
Loading

0 comments on commit 55818ac

Please sign in to comment.