diff --git a/build.xml b/build.xml index bc50a95c7..ada68f949 100644 --- a/build.xml +++ b/build.xml @@ -85,7 +85,7 @@ - + @@ -95,6 +95,7 @@ + @@ -136,6 +137,7 @@ + @@ -151,6 +153,7 @@ + @@ -450,6 +453,7 @@ + diff --git a/src/java/boa/functions/BoaAstIntrinsics.java b/src/java/boa/functions/BoaAstIntrinsics.java index badbf1a91..c8d738486 100644 --- a/src/java/boa/functions/BoaAstIntrinsics.java +++ b/src/java/boa/functions/BoaAstIntrinsics.java @@ -1,5 +1,5 @@ /* - * Copyright 2017-20222, Hridesh Rajan, Robert Dyer, + * Copyright 2017-2023, Hridesh Rajan, Robert Dyer, * Iowa State University of Science and Technology * Bowling Green State University * and University of Nebraska Board of Regents @@ -64,7 +64,7 @@ */ public class BoaAstIntrinsics { @SuppressWarnings("rawtypes") - static Context context; + public static Context context; private static MapFile.Reader map; private static MapFile.Reader commentsMap; private static MapFile.Reader issuesMap; diff --git a/src/java/boa/functions/BoaIntrinsics.java b/src/java/boa/functions/BoaIntrinsics.java index f81ac7223..c9141a806 100644 --- a/src/java/boa/functions/BoaIntrinsics.java +++ b/src/java/boa/functions/BoaIntrinsics.java @@ -17,6 +17,8 @@ */ package boa.functions; +import java.io.BufferedInputStream; +import java.io.ObjectInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -29,6 +31,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import boa.datagen.DefaultProperties; import boa.types.Code.CodeRepository; import boa.types.Code.Revision; import boa.types.Diff.ChangedFile; @@ -41,12 +49,41 @@ * @author rdyer */ public class BoaIntrinsics { + private static Set forksData; + + private static void loadForksData() { + try { + final Configuration conf = BoaAstIntrinsics.context.getConfiguration(); + final FileSystem fs; + final Path p; + if (DefaultProperties.localDataPath != null) { + p = new Path(DefaultProperties.localDataPath, "forks.bin"); + fs = FileSystem.getLocal(conf); + } else { + p = new Path( + BoaAstIntrinsics.context.getConfiguration().get("fs.default.name", "hdfs://boa-njt/"), + new Path(conf.get("boa.forks.file", conf.get("boa.ast.dir", conf.get("boa.input.dir", "")) + "/forks.bin")) + ); + fs = FileSystem.get(conf); + } + + try (final FSDataInputStream data = fs.open(p); + final BufferedInputStream bis = new BufferedInputStream(data); + final ObjectInputStream ois = new ObjectInputStream(bis)) { + forksData = (Set)ois.readObject(); + } + } catch (final Exception e) { + System.err.println("Error reading forks.bin: " + e.getMessage()); + e.printStackTrace(); + forksData = new HashSet(); + } + } + @FunctionSpec(name = "isfork", returnType = "bool", formalParameters = { "Project" }) public static boolean isfork(final Project p) { - final String[] knownForks = { }; - final Set forks = new HashSet<>(); - Collections.addAll(forks, knownForks); - return p.getForked() || forks.contains(p.getId()); + if (forksData == null) + loadForksData(); + return p.getForked() || forksData.contains(Integer.parseInt(p.getId())); } private final static String[] fixingRegex = { diff --git a/src/java/boa/runtime/BoaRunner.java b/src/java/boa/runtime/BoaRunner.java index 651a977d4..62459d338 100644 --- a/src/java/boa/runtime/BoaRunner.java +++ b/src/java/boa/runtime/BoaRunner.java @@ -1,5 +1,5 @@ /* - * Copyright 2014-2021, Anthony Urso, Hridesh Rajan, Robert Dyer, + * Copyright 2014-2023, Anthony Urso, Hridesh Rajan, Robert Dyer, * Iowa State University of Science and Technology * and University of Nebraska Board of Regents * @@ -120,6 +120,11 @@ public Job job(final Path[] ins, final Path out) throws IOException { .hasArg() .withArgName("INPUT") .create("c")); + options.addOption(org.apache.commons.cli.OptionBuilder.withLongOpt("forks") + .withDescription("which INPUT to use for forks data") + .hasArg() + .withArgName("INPUT") + .create("f")); options.addOption(org.apache.commons.cli.OptionBuilder.withLongOpt("splitsize") .withDescription("split size in BYTES") .hasArg() diff --git a/templates/BoaJavaHadoop.stg b/templates/BoaJavaHadoop.stg index cd2da8a48..012dd5290 100644 --- a/templates/BoaJavaHadoop.stg +++ b/templates/BoaJavaHadoop.stg @@ -56,6 +56,8 @@ public class extends boa.runtime.BoaRunner { configuration.set("boa.ast.dir", line.getOptionValue("ast")); if (line.hasOption("comments")) configuration.set("boa.comments.dir", line.getOptionValue("comments")); + if (line.hasOption("forks")) + configuration.set("boa.forks.file", line.getOptionValue("forks")); if (line.hasOption("splitsize")) configuration.setInt("mapred.max.split.size", Integer.parseInt(line.getOptionValue("splitsize")));