From a94ccab403583ccc37f8b530b933aef0504b0450 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Sat, 22 Jan 2022 19:46:03 -0600 Subject: [PATCH] add support for a samplesize -ss flag on the runner --- src/java/boa/functions/BoaAstIntrinsics.java | 11 +++++++++-- src/java/boa/runtime/BoaMapper.java | 2 ++ src/java/boa/runtime/BoaRunner.java | 4 ++++ templates/BoaJavaHadoop.stg | 5 +++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/java/boa/functions/BoaAstIntrinsics.java b/src/java/boa/functions/BoaAstIntrinsics.java index c6323bf11..da988012b 100644 --- a/src/java/boa/functions/BoaAstIntrinsics.java +++ b/src/java/boa/functions/BoaAstIntrinsics.java @@ -1,7 +1,8 @@ /* - * Copyright 2017, Hridesh Rajan, Robert Dyer, + * Copyright 2017-20222, Hridesh Rajan, Robert Dyer, * Iowa State University of Science and Technology - * and Bowling Green State University + * Bowling Green State University + * and University of Nebraska Board of Regents * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,6 +67,7 @@ public class BoaAstIntrinsics { private static MapFile.Reader map; private static MapFile.Reader commentsMap; private static MapFile.Reader issuesMap; + private static int samplecounter; private static final Revision emptyRevision; static { @@ -272,9 +274,14 @@ public static IssuesRoot getissues(final IssueRepository f) { @SuppressWarnings("rawtypes") public static void setup(final Context context) { + BoaAstIntrinsics.samplecounter = 0; BoaAstIntrinsics.context = context; } + public static boolean testsample(final int samplesize) { + return BoaAstIntrinsics.samplecounter++ % samplesize != 0; + } + private static void openMap() { try { final Configuration conf = context.getConfiguration(); diff --git a/src/java/boa/runtime/BoaMapper.java b/src/java/boa/runtime/BoaMapper.java index 3508e6d40..48f6af5c1 100644 --- a/src/java/boa/runtime/BoaMapper.java +++ b/src/java/boa/runtime/BoaMapper.java @@ -42,6 +42,7 @@ public abstract class BoaMapper extends Mapper excludeProjects; + protected int samplesize; /** {@inheritDoc} */ @Override @@ -62,6 +63,7 @@ protected void setup(final Mapper.Conte this.context = context; this.excludeProjects = this.conf.getStringCollection("boa.exclude.projects"); + this.samplesize = (int)this.conf.getLong("boa.samplesize", 1); } protected boolean excludeProject(final String id) { diff --git a/src/java/boa/runtime/BoaRunner.java b/src/java/boa/runtime/BoaRunner.java index 57674249d..651a977d4 100644 --- a/src/java/boa/runtime/BoaRunner.java +++ b/src/java/boa/runtime/BoaRunner.java @@ -125,6 +125,10 @@ public Job job(final Path[] ins, final Path out) throws IOException { .hasArg() .withArgName("BYTES") .create("s")); + options.addOption(org.apache.commons.cli.OptionBuilder.withLongOpt("samplesize") + .withDescription("sample size") + .hasArg() + .create("ss")); options.addOption(org.apache.commons.cli.OptionBuilder.withLongOpt("excludelist") .withDescription("A comma-separated list of project IDs to exclude when running.") .hasArg() diff --git a/templates/BoaJavaHadoop.stg b/templates/BoaJavaHadoop.stg index b9c46cea4..cd2da8a48 100644 --- a/templates/BoaJavaHadoop.stg +++ b/templates/BoaJavaHadoop.stg @@ -86,6 +86,8 @@ public class extends boa.runtime.BoaRunner { // pass any arguments to map/reduce classes via configuration if (line.hasOption("excludelist")) configuration.setStrings("boa.exclude.projects", line.getOptionValue("excludelist")); + if (line.hasOption("samplesize")) + configuration.setLong("boa.samplesize", Integer.parseInt(line.getOptionValue("samplesize"))); if (line.hasOption("time")) { configuration.setBoolean("boa.debug.timings", true); configuration.setLong("mapred.job.reuse.jvm.num.tasks", 1); @@ -149,6 +151,9 @@ public class extends boa.runtime.BoaRunner { return; } + if (boa.functions.BoaAstIntrinsics.testsample(this.samplesize)) + return; + if (context.getConfiguration().getBoolean("boa.debug.timings", false)) LOG.info(key.toString());