From f74be09fe537db0a06c2b57d6b977d5d3d00dfa3 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Fri, 13 Aug 2021 14:35:30 -0500 Subject: [PATCH 01/11] add --time option to boa runner to generate debug timings per project --- src/java/boa/runtime/BoaRunner.java | 1 + templates/BoaJavaHadoop.stg | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/java/boa/runtime/BoaRunner.java b/src/java/boa/runtime/BoaRunner.java index 974074616..686b9da5f 100644 --- a/src/java/boa/runtime/BoaRunner.java +++ b/src/java/boa/runtime/BoaRunner.java @@ -105,6 +105,7 @@ public Job job(final Path[] ins, final Path out) throws IOException { static { options.addOption("p", "profile", false, "if true, profiles the execution of 1 map task"); options.addOption("b", "block", false, "if true, wait for job to finish and show status"); + options.addOption("t", "time", false, "if true, dump debug timings for each project"); options.addOption(OptionBuilder.withLongOpt("job") .withDescription("sets the MySql ID to update with this job's status") .hasArg() diff --git a/templates/BoaJavaHadoop.stg b/templates/BoaJavaHadoop.stg index 0edc9d6aa..b07a00316 100644 --- a/templates/BoaJavaHadoop.stg +++ b/templates/BoaJavaHadoop.stg @@ -80,6 +80,8 @@ public class extends boa.runtime.BoaRunner { // pass any arguments to map/reduce classes via configuration if (line.hasOption("excludelist")) configuration.setStrings("boa.exclude.projects", line.getOptionValue("excludelist")); + if (line.hasOption("time")) + configuration.setBoolean("boa.debug.timings", true); jb.submit(); @@ -139,6 +141,9 @@ public class extends boa.runtime.BoaRunner { return; } + if (context.getConfiguration().getBoolean("boa.debug.timings", false)) + LOG.error(DateTimeFormatter.ofPattern("HH:mm:ss").format(LocalDateTime.now()) + " - " + key.toString()); + try { boa.functions.BoaMathIntrinsics.random = new java.util.Random( + key.hashCode()); boa.types.Toplevel.Project _input = boa.types.Toplevel.Project.parseFrom(com.google.protobuf.CodedInputStream.newInstance(value.getBytes(), 0, value.getLength())); @@ -148,6 +153,9 @@ public class extends boa.runtime.BoaRunner { boa.io.BoaOutputCommitter.lastSeenEx = e; throw new java.io.IOException("map failure for key '" + key.toString() + "'", e); } + + if (context.getConfiguration().getBoolean("boa.debug.timings", false)) + LOG.error(DateTimeFormatter.ofPattern("HH:mm:ss").format(LocalDateTime.now()) + " - " + key.toString()); } /** {@inheritDoc} */ From d62a505bd96637cf0c6f7c0c996299dbb8c7c401 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Fri, 13 Aug 2021 14:40:07 -0500 Subject: [PATCH 02/11] logs need to use fqn --- templates/BoaJavaHadoop.stg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/BoaJavaHadoop.stg b/templates/BoaJavaHadoop.stg index b07a00316..b3fd90db8 100644 --- a/templates/BoaJavaHadoop.stg +++ b/templates/BoaJavaHadoop.stg @@ -142,7 +142,7 @@ public class extends boa.runtime.BoaRunner { } if (context.getConfiguration().getBoolean("boa.debug.timings", false)) - LOG.error(DateTimeFormatter.ofPattern("HH:mm:ss").format(LocalDateTime.now()) + " - " + key.toString()); + LOG.error(java.time.format.DateTimeFormatter.ofPattern("HH:mm:ss").format(java.time.LocalDateTime.now()) + " - " + key.toString()); try { boa.functions.BoaMathIntrinsics.random = new java.util.Random( + key.hashCode()); @@ -155,7 +155,7 @@ public class extends boa.runtime.BoaRunner { } if (context.getConfiguration().getBoolean("boa.debug.timings", false)) - LOG.error(DateTimeFormatter.ofPattern("HH:mm:ss").format(LocalDateTime.now()) + " - " + key.toString()); + LOG.error(java.time.format.DateTimeFormatter.ofPattern("HH:mm:ss").format(java.time.LocalDateTime.now()) + " - " + key.toString()); } /** {@inheritDoc} */ From 440213f59276af9045d9946c10debc02dc70f21f Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Fri, 13 Aug 2021 14:47:08 -0500 Subject: [PATCH 03/11] simplify debug logs --- templates/BoaJavaHadoop.stg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/BoaJavaHadoop.stg b/templates/BoaJavaHadoop.stg index b3fd90db8..20ef4ae6a 100644 --- a/templates/BoaJavaHadoop.stg +++ b/templates/BoaJavaHadoop.stg @@ -137,12 +137,12 @@ public class extends boa.runtime.BoaRunner { @Override protected void map(final org.apache.hadoop.io.Text key, final org.apache.hadoop.io.BytesWritable value, final org.apache.hadoop.mapreduce.Mapper\.Context context) throws java.io.IOException { if (excludeProject(key.toString())) { - LOG.error("EXCLUDED PROJECT: " + key.toString()); + LOG.info("EXCLUDED PROJECT: " + key.toString()); return; } if (context.getConfiguration().getBoolean("boa.debug.timings", false)) - LOG.error(java.time.format.DateTimeFormatter.ofPattern("HH:mm:ss").format(java.time.LocalDateTime.now()) + " - " + key.toString()); + LOG.info(key.toString()); try { boa.functions.BoaMathIntrinsics.random = new java.util.Random( + key.hashCode()); @@ -155,7 +155,7 @@ public class extends boa.runtime.BoaRunner { } if (context.getConfiguration().getBoolean("boa.debug.timings", false)) - LOG.error(java.time.format.DateTimeFormatter.ofPattern("HH:mm:ss").format(java.time.LocalDateTime.now()) + " - " + key.toString()); + LOG.info(key.toString()); } /** {@inheritDoc} */ From a9c22be46b34fa32dd63091f6d50c60004dd251d Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Fri, 13 Aug 2021 15:10:39 -0500 Subject: [PATCH 04/11] fix fragile test --- src/test/boa/test/compiler/TestTraversalBad.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/boa/test/compiler/TestTraversalBad.java b/src/test/boa/test/compiler/TestTraversalBad.java index 31a376ebd..452e7609f 100644 --- a/src/test/boa/test/compiler/TestTraversalBad.java +++ b/src/test/boa/test/compiler/TestTraversalBad.java @@ -37,6 +37,6 @@ public void traversalWithStop() throws IOException { @Test public void traversalWithNoReturn() throws IOException { - codegen(load(badDir + "traverse-with-no-return-statement.boa"), "Error on line 137: missing return statement"); + codegen(load(badDir + "traverse-with-no-return-statement.boa"), "Error on line 142: missing return statement"); } } From 2f46cdc71de7833da3eafdef924348648273420a Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Fri, 13 Aug 2021 15:40:22 -0500 Subject: [PATCH 05/11] fix inher attrs push/pop bug --- .../transforms/InheritedAttributeTransformer.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/java/boa/compiler/transforms/InheritedAttributeTransformer.java b/src/java/boa/compiler/transforms/InheritedAttributeTransformer.java index 30956bebb..730ddf9a8 100644 --- a/src/java/boa/compiler/transforms/InheritedAttributeTransformer.java +++ b/src/java/boa/compiler/transforms/InheritedAttributeTransformer.java @@ -227,7 +227,10 @@ private void updateVisitClause(final boolean isBefore, final SymbolTable env, fi if (visitMap.containsKey(typeToFind)) { vs = visitMap.get(typeToFind); - vs.getBody().getStatements().add(0, generatePushExpStatement(b, token, vs.getComponent().getIdentifier().getToken(), e)); + if (isBefore) + vs.getBody().getStatements().add(0, generatePushExpStatement(b, token, vs.getComponent().getIdentifier().getToken(), e)); + else + vs.getBody().getStatements().add(0, generatePopExpStatement(b, token, e)); } else { // 2) Otherwise, add a 'before T' clause with a 's_t_#.push(node)' final Block blk; @@ -236,7 +239,10 @@ private void updateVisitClause(final boolean isBefore, final SymbolTable env, fi else blk = new Block(); - blk.getStatements().add(0, generatePushExpStatement(b, token, "_n", e)); + if (isBefore) + blk.getStatements().add(0, generatePushExpStatement(b, token, "_n", e)); + else + blk.getStatements().add(0, generatePopExpStatement(b, token, e)); vs = new VisitStatement(isBefore, new Component(ASTFactory.createIdentifier("_n", env), ASTFactory.createIdentifier(typeToFind, env)), blk); TypeCheckingVisitor.instance.start(vs, e.env); From 40576c75a10138527d6a9a97dfb86aa4c11a10af Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Sat, 14 Aug 2021 13:19:44 -0500 Subject: [PATCH 06/11] fix duplicate warnings --- src/java/boa/compiler/BoaCompiler.java | 2 ++ .../compiler/visitors/TypeCheckingVisitor.java | 16 +++++++++++----- src/test/boa/test/compiler/BaseTest.java | 1 + 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/java/boa/compiler/BoaCompiler.java b/src/java/boa/compiler/BoaCompiler.java index 16c86dfe2..db5b7dc4a 100644 --- a/src/java/boa/compiler/BoaCompiler.java +++ b/src/java/boa/compiler/BoaCompiler.java @@ -136,7 +136,9 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int try { if (!parserErrorListener.hasError) { + TypeCheckingVisitor.warn = true; TypeCheckingVisitor.instance.start(p, new SymbolTable()); + TypeCheckingVisitor.warn = false; final TaskClassifyingVisitor simpleVisitor = new TaskClassifyingVisitor(); simpleVisitor.start(p); diff --git a/src/java/boa/compiler/visitors/TypeCheckingVisitor.java b/src/java/boa/compiler/visitors/TypeCheckingVisitor.java index 40101f6ed..8004b4ff4 100644 --- a/src/java/boa/compiler/visitors/TypeCheckingVisitor.java +++ b/src/java/boa/compiler/visitors/TypeCheckingVisitor.java @@ -1,7 +1,8 @@ /* - * Copyright 2017, Anthony Urso, Hridesh Rajan, Robert Dyer, + * Copyright 2017-2021, Anthony Urso, Hridesh Rajan, Robert Dyer, * Iowa State University of Science and Technology - * and Bowling Green State University + * Bowling Green State University + * and University of Nebraska Board of Regents * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,6 +45,7 @@ public class TypeCheckingVisitor extends AbstractVisitorNoReturn { BoaType lastRetType; public static final TypeCheckingVisitor instance = new TypeCheckingVisitor(); + public static boolean warn = true; /** * This verifies visitors have at most 1 before/after for a type. @@ -416,7 +418,10 @@ public void visit(final Factor n, final SymbolTable env) { node.accept(this, env); n.getOperand().env = env; + boolean oldwarn = warn; + warn = false; final List formalParameters = this.check((Call) node, env); + warn = oldwarn; try { type = env.getFunction(((Identifier)n.getOperand()).getToken(), formalParameters).erase(formalParameters); @@ -566,9 +571,9 @@ public void visit(final AssignmentStatement n, final SymbolTable env) { n.env = env; try { - n.env.setIsLhs(true); + n.env.setIsLhs(true); n.getLhs().accept(this, env); - n.env.setIsLhs(false); + n.env.setIsLhs(false); } catch (final TypeCheckException e) { if (!e.getMessage().startsWith("expected a call to function")) throw e; @@ -1566,6 +1571,7 @@ protected BoaType checkPairs(final List pl, final SymbolTable env) { } protected void warn(final Node node, final String msg) { - System.err.println("WARNING at line " + node.beginLine + ", columns " + node.beginColumn + "-" + node.endColumn + ": " + msg); + if (warn) + System.err.println("WARNING at line " + node.beginLine + ", columns " + node.beginColumn + "-" + node.endColumn + ": " + msg); } } diff --git a/src/test/boa/test/compiler/BaseTest.java b/src/test/boa/test/compiler/BaseTest.java index eee518a59..4518b5bcc 100644 --- a/src/test/boa/test/compiler/BaseTest.java +++ b/src/test/boa/test/compiler/BaseTest.java @@ -221,6 +221,7 @@ protected StartContext typecheck(final String input, final String error) throws final StartContext ctx = parse(input); try { + TypeCheckingVisitor.warn = false; TypeCheckingVisitor.instance.start(ctx.ast, new SymbolTable()); if (error != null) fail("expected error: " + error); From 8ae98f72e583d97e82bf92890f3eb5cae3fea129 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Sat, 14 Aug 2021 15:51:46 -0500 Subject: [PATCH 07/11] better error messages for assigning to output variables --- src/java/boa/compiler/visitors/TypeCheckingVisitor.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/java/boa/compiler/visitors/TypeCheckingVisitor.java b/src/java/boa/compiler/visitors/TypeCheckingVisitor.java index 8004b4ff4..184b3903e 100644 --- a/src/java/boa/compiler/visitors/TypeCheckingVisitor.java +++ b/src/java/boa/compiler/visitors/TypeCheckingVisitor.java @@ -412,6 +412,8 @@ public void visit(final Factor n, final SymbolTable env) { warn(node, "directly indexing maps can lead to runtime crashes - replace with lookup(" + n.getOperand() + ", " + new PrettyPrintVisitor().startAndReturn(((Index)node).getStart()) + ", )"); type = ((BoaMap) type).getType(); } else { + if (n.getOperand().type instanceof BoaTable) + throw new TypeCheckException(n.getOperand(), "can not assign to output variable '" + n.getOperand() + "' - did you mean to use < Date: Sat, 14 Aug 2021 15:54:39 -0500 Subject: [PATCH 08/11] add test case for bad input: o[3][3] = 3; --- src/test/boa/test/compiler/TestTypecheckBad.java | 5 +++++ test/typecheck/errors/assignoutput.boa | 3 +++ 2 files changed, 8 insertions(+) create mode 100644 test/typecheck/errors/assignoutput.boa diff --git a/src/test/boa/test/compiler/TestTypecheckBad.java b/src/test/boa/test/compiler/TestTypecheckBad.java index b85c12bb1..1a7b2167c 100644 --- a/src/test/boa/test/compiler/TestTypecheckBad.java +++ b/src/test/boa/test/compiler/TestTypecheckBad.java @@ -125,4 +125,9 @@ public void aggregatorInEmit() throws IOException { public void stopInAfter() throws IOException { typecheck(load(badDir + "stop-in-after.boa"), "Stop statement not allowed inside 'after' visits"); } + + @Test + public void assignOutput() throws IOException { + typecheck(load(badDir + "assignoutput.boa"), "can not assign to output variable 'o' - did you mean to use < Date: Sun, 15 Aug 2021 09:13:41 -0500 Subject: [PATCH 09/11] dont clone if -cache is used --- src/java/boa/datagen/SeqRepoImporter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/java/boa/datagen/SeqRepoImporter.java b/src/java/boa/datagen/SeqRepoImporter.java index e9b055f3f..42036bc28 100644 --- a/src/java/boa/datagen/SeqRepoImporter.java +++ b/src/java/boa/datagen/SeqRepoImporter.java @@ -368,6 +368,8 @@ private synchronized Project storeRepository(final Project project, final int i) // clone repository if (!gitDir.exists()) { + if (cache) + return null; // return null to skip non-cached project final String[] args = { repo.getUrl(), gitDir.getAbsolutePath() }; try { RepositoryCloner.clone(args); @@ -448,7 +450,9 @@ private synchronized boolean isFiltered(Project project) { return false; final String lang = project.getMainLanguage(); if (lang != null - && (lang.equals("Java") || lang.equals("JavaScript") || lang.equals("PHP"))) + && (lang.equals("Java") + || lang.equals("JavaScript") + || lang.equals("PHP"))) return false; return true; } From 7e8e7fc5d8993345dc2b56f337c625aa173eed08 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Sun, 15 Aug 2021 09:17:23 -0500 Subject: [PATCH 10/11] add -recover datagen option --- src/java/boa/datagen/BoaGenerator.java | 61 ++++++++++++++------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/java/boa/datagen/BoaGenerator.java b/src/java/boa/datagen/BoaGenerator.java index fdb1e8050..f5c6e13eb 100644 --- a/src/java/boa/datagen/BoaGenerator.java +++ b/src/java/boa/datagen/BoaGenerator.java @@ -52,37 +52,41 @@ public static void main(final String[] args) throws IOException { } BoaGenerator.handleCmdOptions(cl, options, args); - /* - * 1. if user provides local json files - * 2. if user provides username and password - * in both the cases json files are going to be available - */ - - if (jsonAvailable) { - try { - SeqRepoImporter.main(new String[0]); - } catch (InterruptedException e) { - e.printStackTrace(); - } - SeqCombiner.main(new String[0]); - } else if (tokenAvailable) { // when user provides local repo and doesn't have json files - MetaDataMaster mdm = new MetaDataMaster(); - mdm.downloadRepoNames(DefaultProperties.TOKEN, DefaultProperties.OUTPUT); - + if (cl.hasOption("recover")) { SeqCombiner.main(new String[0]); - } else { // when user provides local repo and does not have json files - File output = new File(DefaultProperties.OUTPUT); - if (!output.exists()) - output.mkdirs(); - LocalGitSequenceGenerator.localGitSequenceGenerate(DefaultProperties.GH_GIT_PATH, DefaultProperties.OUTPUT); - try { - MapFileGen.main(new String[0]); - } catch (Exception e) { - e.printStackTrace(); + } else { + /* + * 1. if user provides local json files + * 2. if user provides username and password + * in both the cases json files are going to be available + */ + + if (jsonAvailable) { + try { + SeqRepoImporter.main(new String[0]); + } catch (InterruptedException e) { + e.printStackTrace(); + } + SeqCombiner.main(new String[0]); + } else if (tokenAvailable) { // when user provides local repo and doesn't have json files + MetaDataMaster mdm = new MetaDataMaster(); + mdm.downloadRepoNames(DefaultProperties.TOKEN, DefaultProperties.OUTPUT); + + SeqCombiner.main(new String[0]); + } else { // when user provides local repo and does not have json files + File output = new File(DefaultProperties.OUTPUT); + if (!output.exists()) + output.mkdirs(); + LocalGitSequenceGenerator.localGitSequenceGenerate(DefaultProperties.GH_GIT_PATH, DefaultProperties.OUTPUT); + try { + MapFileGen.main(new String[0]); + } catch (Exception e) { + e.printStackTrace(); + } } - } - clear(); + clear(); + } } private static final void printHelp(Options options, String message) { @@ -113,6 +117,7 @@ private static void addOptions(Options options) { options.addOption("targetUser", true, "username of target repository"); options.addOption("targetRepo", true, "name of the target repository"); options.addOption("cache", false, "enable if you want to use already cloned repositories"); + options.addOption("recover", false, "enable to recover partially built dataset - this will only combine generated data"); options.addOption("debug", false, "enable for debug mode"); options.addOption("debugparse", false, "enable for debug mode when parsing source files"); options.addOption("help", false, "shows this help"); From dcc749e542e9afcd557067d4484021fcec01a1d0 Mon Sep 17 00:00:00 2001 From: Robert Dyer Date: Sun, 15 Aug 2021 09:22:30 -0500 Subject: [PATCH 11/11] add -skip datagen option --- src/java/boa/datagen/BoaGenerator.java | 15 +++++--- src/java/boa/datagen/DefaultProperties.java | 11 +++--- src/java/boa/datagen/SeqRepoImporter.java | 38 ++++++++++++--------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/src/java/boa/datagen/BoaGenerator.java b/src/java/boa/datagen/BoaGenerator.java index f5c6e13eb..69bbb34f2 100644 --- a/src/java/boa/datagen/BoaGenerator.java +++ b/src/java/boa/datagen/BoaGenerator.java @@ -1,6 +1,7 @@ /* - * Copyright 2015, Hridesh Rajan, Robert Dyer, - * and Iowa State University of Science and Technology + * Copyright 2015-2021, Hridesh Rajan, Robert Dyer, + * Iowa State University of Science and Technology + * and University of Nebraska Board of Regents * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +33,7 @@ * The main entry point for Boa tools for generating datasets. * * @author hridesh - * + * @author rdyer */ public class BoaGenerator { private static boolean jsonAvailable = true; @@ -56,8 +57,8 @@ public static void main(final String[] args) throws IOException { SeqCombiner.main(new String[0]); } else { /* - * 1. if user provides local json files - * 2. if user provides username and password + * 1. if user provides local json files + * 2. if user provides username and password * in both the cases json files are going to be available */ @@ -117,6 +118,7 @@ private static void addOptions(Options options) { options.addOption("targetUser", true, "username of target repository"); options.addOption("targetRepo", true, "name of the target repository"); options.addOption("cache", false, "enable if you want to use already cloned repositories"); + options.addOption("skip", true, "skip every Nth project (useful for sampling)"); options.addOption("recover", false, "enable to recover partially built dataset - this will only combine generated data"); options.addOption("debug", false, "enable for debug mode"); options.addOption("debugparse", false, "enable for debug mode when parsing source files"); @@ -193,6 +195,9 @@ private static void handleCmdOptions(CommandLine cl, Options options, final Stri if (cl.hasOption("cache")) { DefaultProperties.CACHE = true; } + if (cl.hasOption("skip")) { + DefaultProperties.SKIPS = cl.getOptionValue("skip"); + } if (cl.hasOption("libs")) { DefaultProperties.CLASSPATH_ROOT = cl.getOptionValue("libs"); } diff --git a/src/java/boa/datagen/DefaultProperties.java b/src/java/boa/datagen/DefaultProperties.java index a162d48b0..9e5428caf 100644 --- a/src/java/boa/datagen/DefaultProperties.java +++ b/src/java/boa/datagen/DefaultProperties.java @@ -28,7 +28,8 @@ public class DefaultProperties { public static String TOTAL_MAX_PROJECTS = "" + Long.MAX_VALUE; public static String MAX_COMMITS = "10000"; public static String MAX_SIZE_FOR_PROJECT_WITH_COMMITS = String.valueOf(1 << 26); // Integer.MAX_VALUE / 3 - + public static String SKIPS = "1"; + public static boolean DEBUG = false; public static boolean DEBUGPARSE = false; public static boolean CACHE = false; @@ -62,22 +63,22 @@ public class DefaultProperties { public static final String SF_JSON_CACHE_PATH = "json_cache"; public static final String SF_SVN_PATH = "svn"; public static final String SF_TICKETS_PATH = "tickets"; - + // GitHub paths public static String GH_JSON_PATH = "repos-metadata-Boa-upto1213"; public static String GH_GIT_PATH = ""; public static String GH_ISSUE_PATH = ""; public static final String GH_TICKETS_PATH = "tickets"; public static String TOKEN = null; - + public static String CLASSPATH_ROOT = getClasspathRoot(); public static String OUTPUT = "output"; - + public static boolean STORE_ASCII_PRINTABLE_CONTENTS = true; public static boolean STORE_COMMITS = true; public static String localDataPath = null; - + @SuppressWarnings("unused") private static String getRoot() { File dir = new File(System.getProperty("user.dir")); diff --git a/src/java/boa/datagen/SeqRepoImporter.java b/src/java/boa/datagen/SeqRepoImporter.java index 42036bc28..52d69acb7 100644 --- a/src/java/boa/datagen/SeqRepoImporter.java +++ b/src/java/boa/datagen/SeqRepoImporter.java @@ -49,6 +49,7 @@ public class SeqRepoImporter { private final static boolean debug = Properties.getBoolean("debug", DefaultProperties.DEBUG); private final static boolean cache = Properties.getBoolean("cache", DefaultProperties.CACHE); + private final static long skips = Long.parseLong(Properties.getProperty("skip", DefaultProperties.SKIPS)) + 1; private final static File gitRootPath = new File(Properties.getProperty("gh.svn.path", DefaultProperties.GH_GIT_PATH)); final static String jsonPath = Properties.getProperty("gh.json.path", DefaultProperties.GH_JSON_PATH); @@ -130,25 +131,30 @@ static void processJSON(final File file) { try { final JsonObject rp = repoArray.get(i).getAsJsonObject(); final RepoMetadata repo = new RepoMetadata(rp); - if (repo.id != null && repo.name != null && !processedProjectIds.contains(repo.id)) { - final Project project = repo.toBoaMetaDataProtobuf(); // current project instance only contains metadata - - // System.out.println(jRepo.toString()); - boolean assigned = false; - while (!getDone() && !assigned) { - for (int j = 0; !getDone() && j < POOL_SIZE; j++) { - if (workers[j].isReady() && !workers[j].isAssigned()) { - workers[j].setProject(project); - workers[j].setAssigned(true); - assigned = true; - break; + if (counter % skips == 0) { + if (repo.id != null && repo.name != null && !processedProjectIds.contains(repo.id)) { + final Project project = repo.toBoaMetaDataProtobuf(); // current project instance only contains metadata + + // System.out.println(jRepo.toString()); + boolean assigned = false; + while (!getDone() && !assigned) { + for (int j = 0; !getDone() && j < POOL_SIZE; j++) { + if (workers[j].isReady() && !workers[j].isAssigned()) { + workers[j].setProject(project); + workers[j].setAssigned(true); + assigned = true; + break; + } } + // Thread.sleep(100); } - // Thread.sleep(100); + if (assigned) + System.out.println("Assigned the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id + + " from the " + i + "th object of the json file: " + file.getPath()); } - if (assigned) - System.out.println("Assigned the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id - + " from the " + i + "th object of the json file: " + file.getPath()); + } else { + System.out.println("Skipped the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id + + " from the " + i + "th object of the json file: " + file.getPath()); } } catch (final Exception e) { System.err.println("Error proccessing item " + i + " of page " + file.getPath());