JimClarke5 · JimClarke5 · Feb 11, 2021 · Feb 4, 2021 · Feb 5, 2021 · Feb 9, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -50,9 +50,12 @@ jobs:
     steps:
       - name: Install environment
         run: |
-          yum -y update
-          yum -y install centos-release-scl-rh epel-release
-          yum -y install java-1.8.0-openjdk-devel devtoolset-7 rh-git218 patch python36-devel python36-numpy python36-pip python36-six
+          echo Not updating glibc since CUDA fails with updated versions
+          GLIBC="glibc glibc-common glibc-devel glibc-headers"
+          yum --disablerepo updates -y install $GLIBC
+          yum -x "$GLIBC" -y update
+          yum -x "$GLIBC" -y install centos-release-scl-rh epel-release
+          yum -x "$GLIBC" -y install java-1.8.0-openjdk-devel devtoolset-7 rh-git218 patch perl-Data-Dumper python36-devel python36-numpy python36-pip python36-six
           echo Downloading Maven
           curl -L https://archive.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz -o $HOME/apache-maven-3.6.3-bin.tar.gz
           tar xzf $HOME/apache-maven-3.6.3-bin.tar.gz -C /opt/

diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,5 @@ gradleBuild
 .classpath
 
 **/target
+.tf_configure.bazelrc
+.clwb/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,110 @@
+# Building and Contributing to TensorFlow Java
+
+## Building
+
+To build all the artifacts, simply invoke the command `mvn install` at the root of this repository (or the Maven command of your choice). It is also
+possible to build artifacts with support for MKL enabled with
+`mvn install -Djavacpp.platform.extension=-mkl` or CUDA with `mvn install -Djavacpp.platform.extension=-gpu`
+or both with `mvn install -Djavacpp.platform.extension=-mkl-gpu`.
+
+When building this project for the first time in a given workspace, the script will attempt to download
+the [TensorFlow runtime library sources](https://github.com/tensorflow/tensorflow) and build of all the native code for your platform. This requires a
+valid environment for building TensorFlow, including the [bazel](https://bazel.build/)
+build tool and a few Python dependencies (please read [TensorFlow documentation](https://www.tensorflow.org/install/source)
+for more details).
+
+This step can take multiple hours on a regular laptop. It is possible though to skip completely the native build if you are working on a version that
+already has pre-compiled native artifacts for your platform [available on Sonatype OSS Nexus repository](#Snapshots). You just need to activate
+the `dev` profile in your Maven command to use those artifacts instead of building them from scratch
+(e.g. `mvn install -Pdev`).
+
+Modifying the native op generation code (not the annotation processor) or the JavaCPP configuration (not the abstract Pointers) will require a
+complete build could be required to reflect the changes, otherwise `-Pdev` should be fine.
+
+### Native Builds
+
+In some cases, like when adding GPU support or re-generating op classes, you will need to re-build the native library. 99% of this is building
+TensorFlow, which by default is configured for the [CI](.github/workflows/ci.yml). The build configuration can be customized using the same methods as
+TensorFlow, so if you're building locally, you may need to clone the [tensorflow](https://github.com/tensorflow/tensorflow) project, run its
+configuration script (`./configure`), and copy the resulting
+`.tf_configure.bazelrc` to `tensorflow-core-api`. This overrides the default options, and you can add to it manually (i.e. adding `build --copt="-g"`
+to build with debugging info).
+
+### GPU Support
+
+Currently, due to build time constraints, the GPU binaries only support compute capacities 3.5 and 7.0.  
+To use with un-supported GPUs, you have to build it yourself, after changing the value [here](tensorflow-core/tensorflow-core-api/build.sh#L27),
+setting the environment variable `TF_CUDA_COMPUTE_CAPABILITIES`, or configuring it in a bazel rc file (
+i.e. `build --action_env TF_CUDA_COMPUTE_CAPABILITIES="6.1"`). While this is far from ideal, we are working on getting more build resources, and for
+now this is the best option.
+
+To build for GPU, pass `-Djavacpp.platform.extension=-gpu` to maven. By default, the CI options are used for the bazel build, see the above section
+for more info. If you add `bazelrc` files, make sure the `TF_CUDA_COMPUTE_CAPABILITIES` value in them matches the value set elsewhere, as it will take
+precedence if present.
+
+## Running Tests
+
+`ndarray` can be tested using the maven `test` target.  `tensorflow-core` and `tensorflow-framework`, however, should be tested using
+the `integration-test` target, due to the need to include native binaries. It will **not** be ran when using the `test` target of parent projects, but
+will be ran by `install` or `integration-test`. If you see a `no jnitensorflow in java.library.path` error from tests it is likely because you're
+running the wrong test target.
+
+### Native Crashes
+
+Occasionally tests will fail with a message like:
+
+```
+Failed to execute goal org.apache.maven.plugins:maven-surefire-plugin:2.22.0:test(default-test)on project tensorflow-core-api:There are test failures.
+
+    Please refer to C:\mpicbg\workspace\tensorflow\java\tensorflow-core\tensorflow-core-api\target\surefire-reports for the individual test results.
+    Please refer to dump files(if any exist)[date]-jvmRun[N].dump,[date].dumpstream and[date]-jvmRun[N].dumpstream.
+    The forked VM terminated without properly saying goodbye.VM crash or System.exit called?
+    Command was cmd.exe/X/C"C:\Users\me\.jdks\adopt-openj9-1.8.0_275\jre\bin\java -jar C:\Users\me\AppData\Local\Temp\surefire236563113746082396\surefirebooter5751859365434514212.jar C:\Users\me\AppData\Local\Temp\surefire236563113746082396 2020-12-18T13-57-26_766-jvmRun1 surefire2445852067572510918tmp surefire_05950149004635894208tmp"
+    Error occurred in starting fork,check output in log
+    Process Exit Code:-1
+    Crashed tests:
+    org.tensorflow.TensorFlowTest
+    org.apache.maven.surefire.booter.SurefireBooterForkException:The forked VM terminated without properly saying goodbye.VM crash or System.exit called?
+    Command was cmd.exe/X/C"C:\Users\me\.jdks\adopt-openj9-1.8.0_275\jre\bin\java -jar C:\Users\me\AppData\Local\Temp\surefire236563113746082396\surefirebooter5751859365434514212.jar C:\Users\me\AppData\Local\Temp\surefire236563113746082396 2020-12-18T13-57-26_766-jvmRun1 surefire2445852067572510918tmp surefire_05950149004635894208tmp"
+    Error occurred in starting fork,check output in log
+    Process Exit Code:-1
+    Crashed tests:
+    org.tensorflow.TensorFlowTest
+    at org.apache.maven.plugin.surefire.booterclient.ForkStarter.fork(ForkStarter.java:671)
+    at org.apache.maven.plugin.surefire.booterclient.ForkStarter.fork(ForkStarter.java:533)
+    at org.apache.maven.plugin.surefire.booterclient.ForkStarter.run(ForkStarter.java:278)
+    at org.apache.maven.plugin.surefire.booterclient.ForkStarter.run(ForkStarter.java:244)
+```
+
+This is because the native code crashed (i.e. because of a segfault), and it should have created a dump file somewhere in the project that you can use
+to tell what caused the issue.
+
+## Contributing
+
+### Formatting
+
+Java sources should be formatted according to the [Google style guide](https://google.github.io/styleguide/javaguide.html). It can be included
+in [IntelliJ](https://github.com/google/styleguide/blob/gh-pages/intellij-java-google-style.xml) and
+[Eclipse](https://github.com/google/styleguide/blob/gh-pages/eclipse-java-google-style.xml).
+[Google's C++ style guide](https://google.github.io/styleguide/cppguide.html) should also be used for C++ code.
+
+### Code generation
+
+Code generation for `Ops` and related classes is done during `tensorflow-core-api`'s `compile` phase, using the annotation processor in
+`tensorflow-core-generator`. If you change or add any operator classes (annotated with `org.tensorflow.op.annotation.Operator`), endpoint methods (
+annotated with `org.tensorflow.op.annotation.Endpoint`), or change the annotation processor, be sure to re-run a
+`mvn install` in `tensorflow-core-api` (`-Pdev` is fine for this, it just needs to run the annotation processor).
+
+### Working with Bazel generation
+
+`tensorflow-core-api` uses Bazel-built C++ code generation to generate most of the `@Operator` classes. See [Native Builds](#native-builds) for
+instructions on configuring the bazel build. To run the code generation, use the `//:java_op_generator` target. The resulting binary has good help
+text (viewable in
+[op_gen_main.cc](tensorflow-core/tensorflow-core-api/src/bazel/op_generator/op_gen_main.cc#L31-L48)). Generally, it should be called with arguments
+that are something like:
+
+```
+bazel-out/k8-opt/bin/external/org_tensorflow/tensorflow/libtensorflow_cc.so --output_dir=src/gen/java --api_dirs=bazel-tensorflow-core-api/external/org_tensorflow/tensorflow/core/api_def/base_api,src/bazel/api_def
+```
+
+(called in `tensorflow-core-api`).
diff --git a/LICENSE b/LICENSE
@@ -1,5 +1,3 @@
-Copyright 2020 The TensorFlow Authors.  All rights reserved.
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/

diff --git a/README.md b/README.md
@@ -34,26 +34,17 @@ The following describes the layout of the repository and its different artifacts
   * Intended audience: any developer who needs a Java n-dimensional array implementation, whether or not they
     use it with TensorFlow
 
-## Building Sources
 
-To build all the artifacts, simply invoke the command `mvn install` at the root of this repository (or 
-the Maven command of your choice). It is also possible to build artifacts with support for MKL enabled with
-`mvn install -Djavacpp.platform.extension=-mkl` or CUDA with `mvn install -Djavacpp.platform.extension=-gpu`
-or both with `mvn install -Djavacpp.platform.extension=-mkl-gpu`.
+## Communication
 
-When building this project for the first time in a given workspace, the script will attempt to download
-the [TensorFlow runtime library sources](https://github.com/tensorflow/tensorflow) and build of all the native code
-for your platform. This requires a valid environment for building TensorFlow, including the [bazel](https://bazel.build/)
-build tool and a few Python dependencies (please read [TensorFlow documentation](https://www.tensorflow.org/install/source)
-for more details).
+This repository is maintained by TensorFlow JVM Special Interest Group (SIG). You can easily join the group
+by subscribing to the [jvm@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/jvm)
+mailing list, or you can simply send pull requests and raise issues to this repository.
+There is also a [sig-jvm Gitter channel](https://gitter.im/tensorflow/sig-jvm).
 
-This step can take multiple hours on a regular laptop. It is possible though to skip completely the native build if you are
-working on a version that already has pre-compiled native artifacts for your platform [available on Sonatype OSS Nexus repository](#Snapshots).
-You just need to activate the `dev` profile in your Maven command to use those artifacts instead of building them from scratch
-(e.g. `mvn install -Pdev`).
+## Building Sources
 
-Note that modifying any source files under `tensorflow-core` may impact the low-level TensorFlow bindings, in which case a
-complete build could be required to reflect the changes.
+See [CONTRIBUTING.md](CONTRIBUTING.md#building).
 
 ## Using Maven Artifacts
 
@@ -162,6 +153,4 @@ This table shows the mapping between different version of TensorFlow for Java an
 
 ## How to Contribute?
 
-This repository is maintained by TensorFlow JVM Special Interest Group (SIG). You can easily join the group
-by subscribing to the [jvm@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/jvm)
-mailing list, or you can simply send pull requests and raise issues to this repository.
+Contributions are welcome, guidelines are located in [CONTRIBUTING.md](CONTRIBUTING.md).
diff --git a/ndarray/pom.xml b/ndarray/pom.xml
@@ -80,7 +80,6 @@
           <forkCount>1</forkCount>
           <reuseForks>false</reuseForks>
           <argLine>-Xmx2G -XX:MaxPermSize=256m</argLine>
-          <skipTests>false</skipTests>
           <includes>
             <include>**/*Test.java</include>
           </includes>

diff --git a/ndarray/src/main/java/org/tensorflow/ndarray/impl/dimension/DimensionalSpace.java b/ndarray/src/main/java/org/tensorflow/ndarray/impl/dimension/DimensionalSpace.java
@@ -18,8 +18,8 @@
 package org.tensorflow.ndarray.impl.dimension;
 
 import java.util.Arrays;
-import org.tensorflow.ndarray.index.Index;
 import org.tensorflow.ndarray.Shape;
+import org.tensorflow.ndarray.index.Index;
 
 public class DimensionalSpace {
 
@@ -35,24 +35,42 @@ public static DimensionalSpace create(Shape shape) {
   }
 
   public RelativeDimensionalSpace mapTo(Index[] indices) {
-    if (dimensions == null || indices.length > dimensions.length) {
+    if (dimensions == null) {
       throw new ArrayIndexOutOfBoundsException();
     }
     int dimIdx = 0;
+    int indexIdx = 0;
     int newDimIdx = 0;
     int segmentationIdx = -1;
     long initialOffset = 0;
 
-    Dimension[] newDimensions = new Dimension[dimensions.length];
-    while (dimIdx < indices.length) {
+    int newAxes = 0;
+    boolean seenEllipsis = false;
+    for (Index idx : indices) {
+      if (idx.isNewAxis()) {
+        newAxes += 1;
+      }
+      if (idx.isEllipsis()) {
+        if (seenEllipsis) {
+          throw new IllegalArgumentException("Only one ellipsis allowed");
+        } else {
+          seenEllipsis = true;
+        }
+      }
+    }
+    int newLength = dimensions.length + newAxes;
+
+    Dimension[] newDimensions = new Dimension[newLength];
+    while (indexIdx < indices.length) {
 
-      if (indices[dimIdx].isPoint()) {
+      if (indices[indexIdx].isPoint()) {
         // When an index targets a single point in a given dimension, calculate the offset of this
         // point and cumulate the offset of any subsequent point as well
         long offset = 0;
         do {
-          offset += indices[dimIdx].mapCoordinate(0, dimensions[dimIdx]);
-        } while (++dimIdx < indices.length && indices[dimIdx].isPoint());
+          offset += indices[indexIdx].mapCoordinate(0, dimensions[dimIdx]);
+          dimIdx++;
+        } while (++indexIdx < indices.length && indices[indexIdx].isPoint());
 
         // If this is the first index, then the offset is the position of the whole dimension
         // space within the original one. If not, then we apply the offset to the last vectorial
@@ -65,14 +83,47 @@ public RelativeDimensionalSpace mapTo(Index[] indices) {
           segmentationIdx = newDimIdx - 1;
         }
 
+      } else if (indices[indexIdx].isNewAxis()) {
+        long newSize;
+        if (dimIdx == 0) {
+          // includes everything.  Should really include future reduction (at()) but that doesn't seem to cause issues
+          //   elsewhere
+          newSize = dimensions[0].numElements() * dimensions[0].elementSize();
+        } else {
+          newSize = dimensions[dimIdx - 1].elementSize();
+        }
+
+        newDimensions[newDimIdx] = new Axis(1, newSize);
+        segmentationIdx = newDimIdx; // is this correct?
+        ++newDimIdx;
+        ++indexIdx;
+      } else if (indices[indexIdx].isEllipsis()) {
+        int remainingDimensions = dimensions.length - dimIdx;
+        int requiredDimensions = 0;
+        for (int i = indexIdx + 1; i < indices.length; i++) {
+          if (!indices[i].isNewAxis()) {
+            requiredDimensions++;
+          }
+        }
+        // while the number of dimensions left < the number of indices that consume axes
+        while (remainingDimensions > requiredDimensions) {
+          Dimension dim = dimensions[dimIdx++];
+          if (dim.isSegmented()) {
+            segmentationIdx = newDimIdx;
+          }
+          newDimensions[newDimIdx++] = dim;
+          remainingDimensions--;
+        }
+        indexIdx++;
       } else {
         // Map any other index to the appropriate dimension of this space
-        Dimension newDimension = indices[dimIdx].apply(dimensions[dimIdx++]);
+        Dimension newDimension = indices[indexIdx].apply(dimensions[dimIdx++]);
         newDimensions[newDimIdx] = newDimension;
         if (newDimension.isSegmented()) {
           segmentationIdx = newDimIdx;
         }
         ++newDimIdx;
+        ++indexIdx;
       }
     }
 

diff --git a/ndarray/src/main/java/org/tensorflow/ndarray/index/All.java b/ndarray/src/main/java/org/tensorflow/ndarray/index/All.java
@@ -39,4 +39,19 @@ public Dimension apply(Dimension dim) {
 
   private All() {
   }
+
+  @Override
+  public boolean beginMask() {
+    return true;
+  }
+
+  @Override
+  public boolean endMask() {
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return All.class.getSimpleName() + "()";
+  }
 }
diff --git a/ndarray/src/main/java/org/tensorflow/ndarray/index/At.java b/ndarray/src/main/java/org/tensorflow/ndarray/index/At.java
@@ -16,6 +16,7 @@
  */
 package org.tensorflow.ndarray.index;
 
+import java.util.StringJoiner;
 import org.tensorflow.ndarray.impl.dimension.Dimension;
 
 final class At implements Index {
@@ -27,22 +28,47 @@ public long numElements(Dimension dim) {
 
   @Override
   public long mapCoordinate(long coordinate, Dimension dim) {
-    return dim.positionOf(coord); // TODO validate coordinate is 0?
+    long coord = this.coord >= 0 ? this.coord : dim.numElements() + this.coord;
+    return dim.positionOf(coord);
   }
 
   @Override
   public Dimension apply(Dimension dim) {
-    throw new IllegalStateException(); // FIXME?
+    if (!keepDim) {
+      throw new UnsupportedOperationException("Should be handled in DimensionalSpace.");
+    }
+
+    return dim.withIndex(this);
   }
 
   @Override
   public boolean isPoint() {
-    return true;
+    return !keepDim;
   }
 
-  At(long coord) {
+  At(long coord, boolean keepDim) {
     this.coord = coord;
+    this.keepDim = keepDim;
   }
 
   private final long coord;
+  private final boolean keepDim;
+
+  @Override
+  public long begin() {
+    return coord;
+  }
+
+  @Override
+  public long end() {
+    return coord + 1;
+  }
+
+  @Override
+  public String toString() {
+    return new StringJoiner(", ", At.class.getSimpleName() + "(", ")")
+        .add("coord=" + coord)
+        .add("keepDim=" + keepDim)
+        .toString();
+  }
 }