diff --git a/.vscode/settings.json b/.vscode/settings.json index f4b38fb..b037f46 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,4 +23,10 @@ "/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing" ], "pylint.importStrategy": "useBundled", + "java.configuration.updateBuildConfiguration": "automatic", + "java.project.sourcePaths": [ + "." + ], + "java.project.referencedLibraries": [], + "java.format.settings.url": ".vscode/java-formatter.xml", } \ No newline at end of file diff --git a/java/README.md b/java/README.md new file mode 100644 index 0000000..febf653 --- /dev/null +++ b/java/README.md @@ -0,0 +1,107 @@ +# Java Snippets + +The Java snippets are contained in the `snippets` directory under various Java package directories. They can built using the `pom.xml` in this directory using `mvn package`. The result will be the `sz-sdk-snippets.jar` file ni the `target` directory. + +There are several ways to run the code snippets. + +## Run Directly + +You may run any individual Snippet class directly providing you have a Senzing repository to run it with and the `SENZING_ENGINE_CONFIGURATION_JSON` environment variable set for connecting to that repository. Many of the snippets will find a default data file to run with if run from this directory, but also allow the caller to use a different data file if given by the first command-line arguemnt. + +1. Run a snippet that takes no command-line arguments. + ``` + java -cp target/sz-sdk-snippets.jar loading.LoadRecords + ``` + +2. Run a snippet and override the input file using command-line arguments + ``` + java -cp target/sz-sdk-snippets.jar loading.LoadRecordsViaLoop ../../resources/data/load-500-with-errors.jsonl + ``` + +# Run Individually via Runner + +The `com.senzing.runner.SnippetRunner` class will run one or more snippets for you and create a temporary Senzing repository to run +then against. This is the `Main-Class` of the `sz-sdk-snippets.jar` file so it can be executed using `java -jar target/sz-sdk-snippets.jar`. + +**NOTE:** When code snippets are run this way you cannot specify command-line arguments for individual snippets, nor can you respond to command-line input requests (they will be automatically be responded by the runner -- including forced termination of a snippet that is intended to run indefinitely). + +1. Execute all code snippets: + ``` + java -jar target/sz-sdk-snippets.jar all + ``` + +2. Execute all code snippets in a Java package: + ``` + java -jar target/sz-sdk-snippets.jar loading + ``` + +3. Execute all code snippets from multiple packages: + ``` + java -jar target/sz-sdk-snippets.jar loading redo + ``` +4. Execute specific code snippets: + ``` + java -jar target/sz-sdk-snippets.jar loading.LoadViaLoop loading.LoadViaQueue + ``` +5. Mix and match packages with individual snippets: + ``` + java -jar target/sz-sdk-snippets.jar redo loading.LoadViaLoop + ``` +6. Generate a help message by specifying no arguments: + ``` + java -jar target/sz-sdk-snippets.jar + + java -jar sz-sdk-snippets.jar [ all | | ]* + + - Specifying no arguments will print this message + - Specifying "all" will run all snippets + - Specifying one or more groups will run all snippets in those groups + - Specifying one or more snippets will run those snippet + + Examples: + + java -jar sz-sdk-snippets.jar all + + java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures + + java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords + + Snippet Group Names: + - configuration + - deleting + - information + - initialization + - loading + - redo + - searching + - stewardship + + Snippet Names: + - configuration.AddDataSources + - configuration.InitDefaultConfig + - deleting.DeleteViaFutures + - deleting.DeleteViaLoop + - deleting.DeleteWithInfoViaFutures + - information.CheckDatastorePerformance + - information.GetDatastoreInfo + - information.GetLicense + - information.GetVersion + - initialization.EnginePriming + - initialization.EnvironmentAndHubs + - initialization.PurgeRepository + - loading.LoadRecords + - loading.LoadTruthSetWithInfoViaLoop + - loading.LoadViaFutures + - loading.LoadViaLoop + - loading.LoadViaQueue + - loading.LoadWithInfoViaFutures + - loading.LoadWithStatsViaLoop + - redo.LoadWithRedoViaLoop + - redo.RedoContinuous + - redo.RedoContinuousViaFutures + - redo.RedoWithInfoContinuous + - searching.SearchRecords + - searching.SearchViaFutures + - stewardship.ForceResolve + - stewardship.ForceUnresolve + ``` diff --git a/java/pom.xml b/java/pom.xml new file mode 100644 index 0000000..0ecd293 --- /dev/null +++ b/java/pom.xml @@ -0,0 +1,130 @@ + + 4.0.0 + com.senzing + sz-sdk-snippets + jar + 4.0.0 + Senzing Java SDK + The Code Snippet Examples for Senzing V4 Java SDK. + http://github.com/Senzing/code-snippets-v4 + + + The Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0 + + + + + com.senzing + sz-sdk + 4.0.0 + system + ${SENZING_DIR}/lib/sz-sdk.jar + + + org.glassfish + javax.json + 1.1.4 + + + org.xerial + sqlite-jdbc + 3.47.2.0 + + + + 17 + 17 + 17 + UTF-8 + UTF-8 + + + snippets + + + ${project.basedir}/runner/resources + + + ${project.basedir}/../resources + + + ${project.artifactId} + + + org.apache.maven.plugins + maven-compiler-plugin + 3.13.0 + + + -Xlint:unchecked + -Xlint:deprecation + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.2.0 + + + generate-sources + + add-source + + + + ${project.basedir}/runner/java + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.6.0 + + false + + ${SENZING_DIR}/lib/sz-sdk.jar + + + + *:* + + META-INF/MANIFEST.MF + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + + + com.senzing.runner.SnippetRunner + ${project.version} + + + + + + + + + + diff --git a/java/runner/java/com/senzing/runner/InstallLocations.java b/java/runner/java/com/senzing/runner/InstallLocations.java new file mode 100644 index 0000000..63fcd51 --- /dev/null +++ b/java/runner/java/com/senzing/runner/InstallLocations.java @@ -0,0 +1,467 @@ +package com.senzing.runner; + +import javax.json.JsonObject; +import java.io.File; +import java.io.StringWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; + +import static com.senzing.runner.Utilities.*; + +/** + * Describes the directories on disk used to find the Senzing product + * installation and the support directories. + */ +public class InstallLocations { + /** + * The installation location. + */ + private File installDir; + + /** + * The location of the configuration files for the config directory. + */ + private File configDir; + + /** + * The location of the resource files for the resource directory. + */ + private File resourceDir; + + /** + * The location of the support files for the support directory. + */ + private File supportDir; + + /** + * The location of the template files for the template directory. + */ + private File templatesDir; + + /** + * Indicates if the installation direction is from a development build. + */ + private boolean devBuild = false; + + /** + * Default constructor. + */ + private InstallLocations() { + this.installDir = null; + this.configDir = null; + this.resourceDir = null; + this.supportDir = null; + this.templatesDir = null; + this.devBuild = false; + } + + /** + * Gets the primary installation directory. + * + * @return The primary installation directory. + */ + public File getInstallDirectory() { + return this.installDir; + } + + /** + * Gets the configuration directory. + * + * @return The configuration directory. + */ + public File getConfigDirectory() { + return this.configDir; + } + + /** + * Gets the resource directory. + * + * @return The resource directory. + */ + public File getResourceDirectory() { + return this.resourceDir; + } + + /** + * Gets the support directory. + * + * @return The support directory. + */ + public File getSupportDirectory() { + return this.supportDir; + } + + /** + * Gets the templates directory. + * + * @return The templates directory. + */ + public File getTemplatesDirectory() { + return this.templatesDir; + } + + /** + * Checks if the installation is actually a development build. + * + * @return true if this installation represents a development + * build, otherwise false. + */ + public boolean isDevelopmentBuild() { + return this.devBuild; + } + + /** + * Produces a {@link String} describing this instance. + * + * @return A {@link String} describing this instance. + */ + public String toString() { + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + + pw.println(); + pw.println("--------------------------------------------------"); + pw.println("installDirectory : " + this.getInstallDirectory()); + pw.println("configDirectory : " + this.getConfigDirectory()); + pw.println("supportDirectory : " + this.getSupportDirectory()); + pw.println("resourceDirectory : " + this.getResourceDirectory()); + pw.println("templatesDirectory : " + this.getTemplatesDirectory()); + pw.println("developmentBuild : " + this.isDevelopmentBuild()); + + return sw.toString(); + } + + /** + * Finds the install directories and returns the {@link InstallLocations} + * instance describing those locations. + * + * @param senzingDir The optional Senzing installation directory if one has + * been provided, null otherwise. + * + * @return The {@link InstallLocations} instance describing the install + * locations. + */ + public static InstallLocations findLocations() { + final String osName = System.getProperty("os.name"); + + boolean windows = false; + boolean macOS = false; + + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") + || lowerOSName.indexOf("darwin") >= 0) + { + macOS = true; + } + + File installDir = null; + File configDir = null; + File resourceDir = null; + File supportDir = null; + File templatesDir = null; + try { + String defaultInstallPath; + String defaultConfigPath = null; + + if (windows) { + defaultInstallPath = "C:\\\\Program Files\\Senzing\\er"; + } else if (macOS) { + defaultInstallPath = "/opt/senzing/er"; + } else { + defaultInstallPath = "/opt/senzing/er"; + defaultConfigPath = "/etc/opt/senzing"; + } + + // set the install path if one has been provided + String installPath = System.getProperty("senzing.install.dir"); + String configPath = System.getProperty("senzing.config.dir"); + String supportPath = System.getProperty("senzing.support.dir"); + String resourcePath = System.getProperty("senzing.resource.dir"); + + // try environment variables if system properties don't work + if (installPath == null || installPath.trim().length() == 0) { + installPath = System.getenv("SENZING_DIR"); + } + if (configPath == null || configPath.trim().length() == 0) { + configPath = System.getenv("SENZING_ETC_DIR"); + } + if (supportPath == null || supportPath.trim().length() == 0) { + supportPath = System.getenv("SENZING_DATA_DIR"); + } + + // normalize empty strings as null + if (installPath != null && installPath.trim().length() == 0) { + installPath = null; + } + if (configPath != null && configPath.trim().length() == 0) { + configPath = null; + } + if (supportPath != null && supportPath.trim().length() == 0) { + supportPath = null; + } + if (resourcePath != null && resourcePath.trim().length() == 0) { + resourcePath = null; + } + + // check the senzing directory + installDir = new File(installPath == null ? defaultInstallPath : installPath); + if (!installDir.exists()) { + System.err.println("Could not find Senzing installation directory:"); + System.err.println(" " + installDir); + System.err.println(); + if (installPath != null) { + System.err.println( + "Check the -Dsenzing.install.dir=[path] command line option."); + } else { + System.err.println( + "Use the -Dsenzing.install.dir=[path] command line option to " + + "specify a path"); + } + + return null; + } + + // normalize the senzing directory + String dirName = installDir.getName(); + if (installDir.isDirectory() && !dirName.equalsIgnoreCase("er") + && dirName.equalsIgnoreCase("senzing")) + { + // for windows or linux allow the "Senzing" dir as well + installDir = new File(installDir, "er"); + } + + if (!installDir.isDirectory()) { + System.err.println("Senzing installation directory appears invalid:"); + System.err.println(" " + installDir); + System.err.println(); + if (installPath != null) { + System.err.println( + "Check the -Dsenzing.install.dir=[path] command line option."); + } else { + System.err.println( + "Use the -Dsenzing.install.dir=[path] command line option to " + + "specify a path"); + } + + return null; + } + + if (supportPath == null || supportPath.trim().length() == 0) { + // try to determine the support path + File installParent = installDir.getParentFile(); + File dataRoot = new File(installParent, "data"); + if (dataRoot.exists() && dataRoot.isDirectory()) { + File versionFile = new File(installDir, "szBuildVersion.json"); + String dataVersion = null; + if (versionFile.exists()) { + String text = readTextFileAsString(versionFile, UTF_8); + JsonObject jsonObject = parseJsonObject(text); + dataVersion = (jsonObject.containsKey("DATA_VERSION") + ? jsonObject.getString("DATA_VERSION") : null); + } + + // try the data version directory + supportDir = (dataVersion == null) ? null : new File(dataRoot, dataVersion.trim()); + + // check if data version was not found + if (supportDir == null || !supportDir.exists()) { + // look to see if we only have one data version installed + File[] versionDirs = dataRoot.listFiles(f -> { + return f.getName().matches("\\d+\\.\\d+\\.\\d+"); + }); + if (versionDirs.length == 1 && supportDir == null) { + // use the single data version found + supportDir = versionDirs[0]; + + } else if (versionDirs.length > 1) { + System.err.println( + "Could not infer support directory. Multiple data " + + "directory versions at: "); + System.err.println(" " + dataRoot); + if (supportDir != null) { + System.err.println(); + System.err.println("Expected to find: " + supportDir); + } + throw new IllegalStateException( + ((supportDir == null) ? "Could not infer support directory." + : "Could not find support directory (" + supportDir + ").") + + " Multiple data directory versions found at: " + dataRoot); + } else { + // no version directories were found, maybe the data root is + // the actual support directory (mapped in a docker image) + File[] ibmFiles = dataRoot.listFiles(f -> { + return f.getName().toLowerCase().endsWith(".ibm"); + }); + File libPostalDir = new File(dataRoot, "libpostal"); + + // require the .ibm files and libpostal to exist + if (ibmFiles.length > 0 && libPostalDir.exists()) { + supportDir = dataRoot; + } + } + } + + } + if (supportDir == null) { + // use the default path + supportDir = new File(installDir, "data"); + } + + } else { + // use the specified explicit path + supportDir = new File(supportPath); + } + + if (!supportDir.exists()) { + System.err.println("The support directory does not exist:"); + System.err.println(" " + supportDir); + if (supportPath != null) { + System.err.println("Check the -Dsenzing.support.dir=[path] command line option."); + } else { + System.err.println("Use the -Dsenzing.support.dir=[path] command line option to " + + "specify a path"); + } + + throw new IllegalStateException("The support directory does not exist: " + supportDir); + } + + if (!supportDir.isDirectory()) { + System.err.println("The support directory is invalid:"); + System.err.println(" " + supportDir); + if (supportPath != null) { + System.err.println("Check the -Dsenzing.support.dir=[path] command line option."); + } else { + System.err.println("Use the -Dsenzing.support.dir=[path] command line option to " + + "specify a path"); + } + throw new IllegalStateException("The support directory is invalid: " + supportDir); + + } + + // check the config path + if (configPath != null) { + configDir = new File(configPath); + } + + // check for a dev build installation + if (configDir == null && installDir != null && "dist".equals(installDir.getName())) { + configDir = new File(installDir, "data"); + } + + // if still null and there is a default, then use it + if (configDir == null && defaultConfigPath != null) { + configDir = new File(defaultConfigPath); + if (!configDir.exists()) { + configDir = null; + } + } + + // if still null, try to use the install's etc directory + if (configDir == null && installDir != null) { + configDir = new File(installDir, "etc"); + if (!configDir.exists()) { + configDir = null; + } + } + + if (configPath != null && !configDir.exists()) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies a path that does not exist:"); + System.err.println(" " + configPath); + + throw new IllegalStateException("Explicit config path does not exist: " + configPath); + } + if (configDir != null && configDir.exists()) { + if (!configDir.isDirectory()) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies a file, not a directory:"); + System.err.println(" " + configPath); + + throw new IllegalStateException( + "Explicit config path is not directory: " + configPath); + } + + String[] requiredFiles = { "cfgVariant.json" }; + List missingFiles = new ArrayList<>(requiredFiles.length); + + for (String fileName : requiredFiles) { + File configFile = new File(configDir, fileName); + File supportFile = new File(supportDir, fileName); + if (!configFile.exists() && !supportFile.exists()) { + missingFiles.add(fileName); + } + } + if (missingFiles.size() > 0 && configPath != null) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies an invalid config directory:"); + for (String missing : missingFiles) { + System.err.println(" " + missing + " was not found"); + } + throw new IllegalStateException( + "Explicit config path missing required files: " + missingFiles); + } + } + + // now determine the resource path + resourceDir = (resourcePath == null) ? null : new File(resourcePath); + if (resourceDir == null) { + resourceDir = new File(installDir, "resources"); + if (!resourceDir.exists()) + resourceDir = null; + } + + if (resourceDir != null && resourceDir.exists() && resourceDir.isDirectory()) { + templatesDir = new File(resourceDir, "templates"); + } + + if (resourcePath != null) { + if (!resourceDir.exists()) { + System.err.println( + "The -Dsenzing.resource.dir=[path] option specifies a path that does not exist:"); + System.err.println(" " + resourcePath); + + throw new IllegalStateException( + "Explicit resource path does not exist: " + resourcePath); + } + + if (!resourceDir.isDirectory() || !templatesDir.exists() || !templatesDir.isDirectory()) { + System.err.println( + "The -Dsenzing.resource.dir=[path] option specifies an invalid " + + "resource directory:"); + System.err.println(" " + resourcePath); + + throw new IllegalStateException( + "Explicit resource path is not valid: " + resourcePath); + } + + } else if (!resourceDir.exists() || !resourceDir.isDirectory() || !templatesDir.exists() + || !templatesDir.isDirectory()) { + resourceDir = null; + templatesDir = null; + } + + // construct and initialize the result + InstallLocations result = new InstallLocations(); + result.installDir = installDir; + result.configDir = configDir; + result.supportDir = supportDir; + result.resourceDir = resourceDir; + result.templatesDir = templatesDir; + result.devBuild = ("dist".equals(installDir.getName())); + + // return the result + return result; + + } catch (RuntimeException e) { + e.printStackTrace(); + throw e; + + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } +} diff --git a/java/runner/java/com/senzing/runner/SnippetRunner.java b/java/runner/java/com/senzing/runner/SnippetRunner.java new file mode 100644 index 0000000..0516b71 --- /dev/null +++ b/java/runner/java/com/senzing/runner/SnippetRunner.java @@ -0,0 +1,504 @@ +package com.senzing.runner; + +import java.io.*; +import java.sql.*; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.zip.*; +import javax.json.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.*; + +import static com.senzing.runner.Utilities.*; +import static com.senzing.sdk.SzFlag.SZ_NO_FLAGS; + +/** + * Helper class to run each of the snippetts. + */ +public class SnippetRunner { + public static final String SOURCE_KEY_PREFIX = "source."; + + public static final String LOAD_KEY_PREFIX = "load."; + + public static final String INPUT_KEY_PREFIX = "input."; + + public static final String DESTROY_AFTER_KEY = "destroyAfter"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String TEST_SOURCE = "TEST"; + + private static final long ONE_MILLION = 1000000L; + + private static final String JAR_PATH = getJarPath(); + + private static final int SIGTERM_EXIT_CODE = 143; + + /** + * Harness for running one or more of the code snippets. + * + * @param args The command line arguments. + */ + public static void main(String[] args) { + try { + SortedMap> snippetMap = getSnippetMap(); + Set snippetOptions = new LinkedHashSet<>(); + snippetOptions.addAll(snippetMap.keySet()); + for (Set set : snippetMap.values()) { + snippetOptions.addAll(set); + } + + if (args.length == 0) { + printUsage(snippetMap); + System.exit(1); + } + String settings = System.getProperty("senzing.settings"); + if (settings != null) { + settings = settings.trim(); + } + + // check for settings in the environment if needed + if (settings == null) { + settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings != null) { + settings = settings.trim(); + } + } + + // validate the settings if we have them + if (settings != null) { + JsonObject settingsJson = null; + try { + settingsJson = parseJsonObject(settings); + } catch (Exception e) { + System.err.println("The provided Senzing settings were not valid JSON:"); + System.err.println(); + System.err.println(toJsonText(settingsJson, true)); + System.exit(1); + } + } + + // validate the SENZING_DIR + InstallLocations installLocations = null; + try { + installLocations = InstallLocations.findLocations(); + + } catch (Exception e) { + System.exit(1); + } + + Set snippets = new LinkedHashSet<>(); + for (int index = 0; index < args.length; index++) { + String arg = args[index]; + if (arg.equals("all")) { + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); + } + } + }); + continue; + } + if (!snippetOptions.contains(arg)) { + System.err.println("Unrecognized code snippet or snippet group: " + arg); + System.exit(1); + } + if (snippetMap.containsKey(arg)) { + for (String snippet : snippetMap.get(arg)) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); + } + } + } else { + if (!snippets.contains(arg)) { + snippets.add(arg); + } + } + } + + // check if we do not have settings and if not setup a temporary repository + if (settings == null) { + settings = setupTempRepository(installLocations); + } + + Long defaultConfigId = null; + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + defaultConfigId = configMgr.getDefaultConfigId(); + + } catch (SzException e) { + e.printStackTrace(); + } finally { + env.destroy(); + env = null; + } + + // execute each snippet + for (String snippet : snippets) { + System.out.println(); + long start = System.nanoTime(); + Properties properties = new Properties(); + String resourceName = "/" + snippet.replaceAll("\\.", "/") + + ".properties"; + try (InputStream is = SnippetRunner.class.getResourceAsStream(resourceName)) { + if (is != null) { + properties.load(is); + } + } + + System.out.println("Preparing repository for " + snippet + "..."); + env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + // first purge the repository + SzDiagnostic diagnostic = env.getDiagnostic(); + diagnostic.purgeRepository(); + + // now set the configuration + SzConfigManager configMgr = env.getConfigManager(); + // check if we need to configure sources + if (properties.containsKey(SOURCE_KEY_PREFIX + 0)) { + SzConfig config = env.getConfig(); + long handle = config.createConfig(); + String snippetConfig = null; + try { + for (int index = 0; + properties.containsKey(SOURCE_KEY_PREFIX + index); + index++) + { + String sourceKey = SOURCE_KEY_PREFIX + index; + String source = properties.getProperty(sourceKey); + source = source.trim(); + System.out.println("Adding data source: " + source); + config.addDataSource(handle, source); + } + snippetConfig = config.exportConfig(handle); + + } finally { + config.closeConfig(handle); + } + + // register the config + long configId = configMgr.addConfig(snippetConfig, snippet); + + // set the default config to the snippet config + configMgr.setDefaultConfigId(configId); + + } else { + // set the default config to the initial default + configMgr.setDefaultConfigId(defaultConfigId); + } + + // check if there are files we need to load + if (properties.containsKey(LOAD_KEY_PREFIX + 0)) { + SzEngine engine = env.getEngine(); + for (int index = 0; properties.containsKey(LOAD_KEY_PREFIX + index); index++) + { + String loadKey = LOAD_KEY_PREFIX + index; + String fileName = properties.getProperty(loadKey); + fileName = fileName.trim(); + System.out.println("Loading records from file resource: " + fileName); + try (InputStream is = SnippetRunner.class.getResourceAsStream(fileName)) + { + if (is == null) { + throw new IllegalArgumentException( + "Missing resource (" + fileName + ") for load file (" + + loadKey + ") for snippet (" + snippet + ")"); + } + InputStreamReader isr = new InputStreamReader(is, UTF_8); + BufferedReader br = new BufferedReader(isr); + for (String line = br.readLine(); line != null; line = br.readLine()) { + line = line.trim(); + if (line.length() == 0) continue; + if (line.startsWith("#")) continue; + JsonObject record = Json.createReader(new StringReader(line)).readObject(); + String dataSource = record.getString(DATA_SOURCE, TEST_SOURCE); + String recordId = record.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSource, recordId); + engine.addRecord(recordKey, line, SZ_NO_FLAGS); + } + } + } + } + + } catch (SzException e) { + e.printStackTrace(); + } finally { + env.destroy(); + } + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Prepared repository for " + snippet + ". (" + duration + "ms)"); + + executeSnippet(snippet, installLocations, settings, properties); + } + System.out.println(); + + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + private static String[] createRuntimeEnv(InstallLocations senzingInstall, String settings) { + Map origEnv = System.getenv(); + List envList = new ArrayList<>(origEnv.size() + 10); + origEnv.forEach((envKey, envVal) -> { + envList.add(envKey + "=" + envVal); + }); + envList.add("SENZING_ENGINE_CONFIGURATION_JSON=" + settings); + return envList.toArray(new String[envList.size()]); + } + + private static Thread startOutputThread(InputStream stream, PrintStream ps) { + Thread thread = new Thread(() -> { + final String UTF8 = "UTF-8"; + try (InputStreamReader isr = new InputStreamReader(stream, UTF8); + BufferedReader br = new BufferedReader(isr)) + { + for (String line = br.readLine(); line != null; line = br.readLine()) { + ps.println(line); + ps.flush(); + } + } catch (IOException e) { + e.printStackTrace(); + } + }); + thread.start(); + return thread; + } + + private static void executeSnippet(String snippet, + InstallLocations senzingInstall, + String settings, + Properties properties) + throws Exception + { + String[] cmdArray = new String[] { "java", "-cp", JAR_PATH, snippet }; + + String[] runtimeEnv = createRuntimeEnv(senzingInstall, settings); + + System.out.println(); + System.out.println("Executing " + snippet + "..."); + long start = System.nanoTime(); + Runtime runtime = Runtime.getRuntime(); + Process process = runtime.exec(cmdArray, runtimeEnv); + Thread errThread = startOutputThread(process.getErrorStream(), System.err); + Thread outThread = startOutputThread(process.getInputStream(), System.out); + if (properties != null && properties.containsKey(INPUT_KEY_PREFIX + 0)) { + try { + // sleep for 1 second to give the process a chance to start up + Thread.sleep(1000L); + } catch (InterruptedException ignore) { + // ignore interruptions + } + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(process.getOutputStream(), UTF_8)); + for (int index = 0; + properties.containsKey(INPUT_KEY_PREFIX + index); + index++) + { + String inputLine = properties.getProperty(INPUT_KEY_PREFIX + index); + System.out.println(inputLine); + System.out.flush(); + inputLine = (inputLine == null) ? "" : inputLine.trim(); + pw.println(inputLine); + pw.flush(); + } + } + int exitValue = 0; + int expectedExitValue = 0; + if (properties.containsKey(DESTROY_AFTER_KEY)) { + String propValue = properties.getProperty(DESTROY_AFTER_KEY); + long delay = Long.parseLong(propValue); + boolean exited = process.waitFor(delay, TimeUnit.MILLISECONDS); + if (!exited && process.isAlive()) { + expectedExitValue = SIGTERM_EXIT_CODE; + System.out.println(); + System.out.println("Runner destroying " + snippet + " process..."); + // NOTE: using process.destroy() does not trigger the registered + // shutdown hooks in the snippet sub-process for some reason + Process killer = runtime.exec("kill " + process.pid()); + killer.waitFor(); // wait for the kill process to complete + } + exitValue = process.waitFor(); + + } else { + // wait indefinitely for the process to terminate + exitValue = process.waitFor(); + } + + errThread.join(); + outThread.join(); + if (exitValue != expectedExitValue) { + throw new Exception("Failed to execute snippet; " + snippet + + " (" + exitValue + ")"); + } + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Executed " + snippet + ". (" + duration + "ms)"); + } + + private static void printUsage(SortedMap> snippetMap) { + System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]*"); + System.err.println(); + System.err.println(" - Specifying no arguments will print this message"); + System.err.println(" - Specifying \"all\" will run all snippets"); + System.err.println(" - Specifying one or more groups will run all snippets in those groups"); + System.err.println(" - Specifying one or more snippets will run those snippet"); + System.err.println(); + System.err.println("Examples:"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar all"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords"); + System.err.println(); + System.err.println("Snippet Group Names:"); + snippetMap.keySet().forEach(group -> { + System.err.println(" - " + group); + }); + System.err.println(); + System.err.println("Snippet Names:"); + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + System.err.println(" - " + snippet); + } + }); + System.err.println(); + } + + private static String getJarPath() throws RuntimeException { + try { + final String osName = System.getProperty("os.name"); + + boolean windows = false; + boolean macOS = false; + + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") || lowerOSName.indexOf("darwin") >= 0) { + macOS = true; + } + + String resourceName = SnippetRunner.class.getSimpleName() + ".class"; + String url = SnippetRunner.class.getResource(resourceName).toString(); + String jarPath = url.replaceAll("jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); + + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); + } + + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.substring(1); + } + return jarPath; + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static SortedMap> getSnippetMap() throws Exception { + SortedMap> snippetMap = new TreeMap<>(); + File jarFile = new File(JAR_PATH); + try (FileInputStream fis = new FileInputStream(jarFile); ZipInputStream zis = new ZipInputStream(fis)) { + for (ZipEntry entry = zis.getNextEntry(); entry != null; entry = zis.getNextEntry()) { + String name = entry.getName(); + if (name.startsWith("com/")) { + continue; + } + if (name.startsWith("org/")) { + continue; + } + if (name.startsWith("javax/")) { + continue; + } + if (name.startsWith("META-INF/")) { + continue; + } + if (!name.endsWith(".class")) { + continue; + } + if (name.indexOf('$') >= 0) { + continue; + } + int index = name.indexOf('/'); + if (index < 0) { + continue; + } + String group = name.substring(0, index); + String snippet = name.substring(0, name.length() - ".class".length()).replace('/', '.'); + SortedSet snippetSet = snippetMap.get(group); + if (snippetSet == null) { + snippetSet = new TreeSet<>(); + snippetMap.put(group, snippetSet); + } + snippetSet.add(snippet); + } + } + return snippetMap; + } + + /** + * + */ + private static String setupTempRepository(InstallLocations senzingInstall) throws Exception { + File resourcesDir = senzingInstall.getResourceDirectory(); + File templatesDir = senzingInstall.getTemplatesDirectory(); + File configDir = senzingInstall.getConfigDirectory(); + File schemaDir = new File(resourcesDir, "schema"); + File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); + File configFile = new File(templatesDir, "g2config.json"); + + // lay down the database schema + File databaseFile = File.createTempFile("G2C-", ".db"); + String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); + + try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); + BufferedReader br = new BufferedReader(rdr); + Connection conn = DriverManager.getConnection(jdbcUrl); + Statement stmt = conn.createStatement()) { + for (String sql = br.readLine(); sql != null; sql = br.readLine()) { + sql = sql.trim(); + if (sql.length() == 0) + continue; + stmt.execute(sql); + } + } + + String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath(); + String configPath = configDir.getCanonicalPath(); + String resourcePath = resourcesDir.toString(); + String databasePath = databaseFile.getCanonicalPath(); + String baseConfig = readTextFileAsString(configFile, UTF_8); + String settings = """ + { + "PIPELINE": { + "SUPPORTPATH": "%s", + "CONFIGPATH": "%s", + "RESOURCEPATH": "%s" + }, + "SQL": { + "CONNECTION": "sqlite3://na:na@%s" + } + } + """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); + + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + + long configId = configMgr.addConfig(baseConfig, "Default Config"); + configMgr.setDefaultConfigId(configId); + + } finally { + env.destroy(); + } + + return settings; + } +} diff --git a/java/runner/java/com/senzing/runner/Utilities.java b/java/runner/java/com/senzing/runner/Utilities.java new file mode 100644 index 0000000..cf01796 --- /dev/null +++ b/java/runner/java/com/senzing/runner/Utilities.java @@ -0,0 +1,166 @@ +package com.senzing.runner; + +import java.util.*; +import java.io.*; +import javax.json.*; +import java.nio.charset.Charset; + +import static javax.json.stream.JsonGenerator.PRETTY_PRINTING; + +/** + * Provides utilities for the snippet runner. + */ +public class Utilities { + /** + * Constant for the name of the UTF-8 character encoding. + */ + public static final String UTF_8 = "UTF-8"; + + /** + * Constant for the UTF-8 {@link Charset}. + */ + public static final Charset UTF_8_CHARSET = Charset.forName(UTF_8); + + /** + * Pretty printing {@link JsonWriterFactory}. + */ + private static JsonWriterFactory PRETTY_WRITER_FACTORY = Json + .createWriterFactory(Collections.singletonMap(PRETTY_PRINTING, true)); + + /** + * Parses JSON text as a {@link JsonObject}. If the specified text is not + * formatted as a JSON object then an exception will be thrown. + * + * @param jsonText The JSON text to be parsed. + * + * @return The parsed {@link JsonObject}. + */ + public static JsonObject parseJsonObject(String jsonText) { + if (jsonText == null) { + return null; + } + StringReader sr = new StringReader(jsonText); + JsonReader jsonReader = Json.createReader(sr); + return jsonReader.readObject(); + } + + /** + * Converts the specified {@link JsonValue} to a JSON string. + * + * @param writer The {@link Writer} to write to. + * + * @param jsonValue The {@link JsonValue} describing the JSON. + * + * @param prettyPrint Whether or not to pretty-print the JSON text. + * + * @return The specified {@link Writer}. + * + * @param The type of the writer to which the write the {@link JsonValue}. + */ + public static T toJsonText(T writer, JsonValue jsonValue, boolean prettyPrint) { + Objects.requireNonNull(writer, "Writer cannot be null"); + + JsonWriter jsonWriter = (prettyPrint) + ? PRETTY_WRITER_FACTORY.createWriter(writer) : Json.createWriter(writer); + + if (jsonValue != null) { + jsonWriter.write(jsonValue); + } else { + jsonWriter.write(JsonValue.NULL); + } + + return writer; + } + + /** + * Converts the specified {@link JsonValue} to a JSON string. + * + * @param jsonValue The {@link JsonValue} describing the JSON. + * + * @param prettyPrint Whether or not to pretty-print the JSON text. + * + * @return The specified {@link JsonValue} converted to a JSON string. + */ + public static String toJsonText(JsonValue jsonValue, boolean prettyPrint) { + return toJsonText(new StringWriter(), jsonValue, prettyPrint).toString(); + } + + /** + * Using the specified character encoding, this method will wraps the specified + * {@link Reader} in a new {@link Reader} that will skip the "byte order mark" + * (BOM) character at the beginning of the file for UTF character encodings + * (e.g.: "UTF-8", "UTF-16" or "UTF-32"). If the specified character encoding is + * not a "UTF" character encoding then it is simply returned as-is. + * + * @param src The source {@link Reader}. + * @param encoding The character encoding. + * @return The new {@link Reader} that will skip the byte-order mark. + * @throws IOException If an I/O failure occurs. + * @throws NullPointerException If either parameter is null. + */ + public static Reader bomSkippingReader(Reader src, String encoding) throws IOException, NullPointerException { + // check if encoding is null (illegal) + if (encoding == null) { + throw new NullPointerException("Cannot skip byte order mark without specifying the encoding."); + } + + // check if we have an encoding that is NOT a UTF encoding + if (!encoding.toUpperCase().startsWith("UTF")) { + // if not UTF encoding then there should not be a BOM to skip + return src; + } + + // create a pushback reader and peek at the first character + PushbackReader result = new PushbackReader(src, 1); + int first = result.read(); + + // check if already at EOF + if (first == -1) { + // just return the source stream + return src; + } + + // check if we do NOT have a byte order mark + if (first != 0xFEFF) { + // push the character back on to the stream so it can be read + result.unread(first); + } + + // return the pushback reader + return result; + } + + /** + * Reads the contents of the file as text and returns the {@link String} + * representing the contents. The text is expected to be encoded in the + * specified character encoding. If the specified character encoding is + * null then the system default encoding is used. + * + * @param file The {@link File} whose contents should be read. + * @param charEncoding The character encoding for the text in the file. + * @return The {@link String} representing the contents of the file. + * @throws IOException If an I/O failure occurs. + */ + public static String readTextFileAsString(File file, String charEncoding) throws IOException { + Charset charset = (charEncoding == null) + ? Charset.defaultCharset() : Charset.forName(charEncoding); + + try (FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis, charset); + Reader reader = bomSkippingReader(isr, charset.name()); + BufferedReader br = new BufferedReader(reader)) { + long size = file.length(); + if (size > Integer.MAX_VALUE) + size = Integer.MAX_VALUE; + + StringBuilder sb = new StringBuilder((int) size); + for (int nextChar = br.read(); nextChar >= 0; nextChar = br.read()) { + if (nextChar == 0) + continue; + sb.append((char) nextChar); + } + return sb.toString(); + } + } + +} diff --git a/java/runner/resources/deleting/DeleteViaFutures.properties b/java/runner/resources/deleting/DeleteViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/deleting/DeleteViaLoop.properties b/java/runner/resources/deleting/DeleteViaLoop.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteViaLoop.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties b/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/initialization/PurgeRepository.properties b/java/runner/resources/initialization/PurgeRepository.properties new file mode 100644 index 0000000..82d8e06 --- /dev/null +++ b/java/runner/resources/initialization/PurgeRepository.properties @@ -0,0 +1,2 @@ +load.0=/data/load-500.jsonl +input.0=yes diff --git a/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties new file mode 100644 index 0000000..c5f7301 --- /dev/null +++ b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties @@ -0,0 +1,3 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE diff --git a/java/runner/resources/redo/LoadWithRedoViaLoop.properties b/java/runner/resources/redo/LoadWithRedoViaLoop.properties new file mode 100644 index 0000000..c5f7301 --- /dev/null +++ b/java/runner/resources/redo/LoadWithRedoViaLoop.properties @@ -0,0 +1,3 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE diff --git a/java/runner/resources/redo/RedoContinuous.properties b/java/runner/resources/redo/RedoContinuous.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoContinuous.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/redo/RedoContinuousViaFutures.properties b/java/runner/resources/redo/RedoContinuousViaFutures.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoContinuousViaFutures.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/redo/RedoWithInfoContinuous.properties b/java/runner/resources/redo/RedoWithInfoContinuous.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoWithInfoContinuous.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/searching/SearchRecords.properties b/java/runner/resources/searching/SearchRecords.properties new file mode 100644 index 0000000..d90c3bc --- /dev/null +++ b/java/runner/resources/searching/SearchRecords.properties @@ -0,0 +1,6 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl diff --git a/java/runner/resources/searching/SearchViaFutures.properties b/java/runner/resources/searching/SearchViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/searching/SearchViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/snippets/configuration/AddDataSources.java b/java/snippets/configuration/AddDataSources.java new file mode 100644 index 0000000..ca29e52 --- /dev/null +++ b/java/snippets/configuration/AddDataSources.java @@ -0,0 +1,99 @@ +package configuration; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddDataSources { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddDataSources.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the config and config manager from the environment + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + + // setup a loop to handle race-condition conflicts on + // replacing the default config ID + boolean replacedConfig = false; + while (!replacedConfig) { + // get the current default config ID and associated config JSON + long configId = configMgr.getDefaultConfigId(); + String configDefinition = configMgr.getConfig(configId); + + // prepare an in-memory config to be modified and get the handle + long configHandle = config.importConfig(configDefinition); + String modifiedConfig = null; + try { + // create an array of the data sources to add + String[] dataSources = { "CUSTOMERS", "EMPLOYEES", "WATCHLIST" }; + + // loop through the array and add each data source + for (String dataSource : dataSources) { + config.addDataSource(configHandle, dataSource); + } + + // export the modified config to JSON text + modifiedConfig = config.exportConfig(configHandle); + + } finally { + config.closeConfig(configHandle); + } + + // add the modified config to the repository with a comment + long newConfigId = configMgr.addConfig( + modifiedConfig, "Added truth set data sources"); + + try { + // replace the default config + configMgr.replaceDefaultConfigId(configId, newConfigId); + + // if we get here then set the flag indicating success + replacedConfig = true; + + } catch (SzReplaceConflictException e) { + // if we get here then another thread or process has + // changed the default config ID since we retrieved it + // (i.e.: we have a race condition) so we allow the + // loop to repeat with the latest default config ID + } + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/configuration/InitDefaultConfig.java b/java/snippets/configuration/InitDefaultConfig.java new file mode 100644 index 0000000..b28fd2a --- /dev/null +++ b/java/snippets/configuration/InitDefaultConfig.java @@ -0,0 +1,70 @@ +package configuration; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class InitDefaultConfig { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = InitDefaultConfig.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the config and config manager from the environment + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + + // prepare an in-memory config to be modified and get the handle + long configHandle = config.createConfig(); + String configDefinition = null; + try { + configDefinition = config.exportConfig(configHandle); + + } finally { + config.closeConfig(configHandle); + } + + // add the modified config to the repository with a comment + long configId = configMgr.addConfig( + configDefinition, "Initial configuration"); + + // replace the default config + configMgr.setDefaultConfigId(configId); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/configuration/README.md b/java/snippets/configuration/README.md new file mode 100644 index 0000000..5537d31 --- /dev/null +++ b/java/snippets/configuration/README.md @@ -0,0 +1,11 @@ +# Deleting Data +The configuration snippets outline how to modify the Senzing configuration, register the modified configuration with a configuration ID and update the default configuration ID for the repository. + +You may either `setDefaultConfigId()` or `replaceDefaultConfigId()`. Initially, the the default config ID must be set since there is no existing config ID to replace. However, when updating you may use `replaceDefaultConfigId()` to guard against race conditions of multiple threads or processes updating at the same time. + +## Snippets +* **AddDataSources.java** + * Gets the current default config, creates a modified config with additional data sources, registers that modified config and then replaces the default config ID. +* **InitDefaultConfig.java** + * Initializes the repository with a default config ID using the template configuration provided by Senzing. + diff --git a/java/snippets/deleting/DeleteViaFutures.java b/java/snippets/deleting/DeleteViaFutures.java new file mode 100644 index 0000000..2372b0b --- /dev/null +++ b/java/snippets/deleting/DeleteViaFutures.java @@ -0,0 +1,286 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + engine.deleteRecord(recordKey, SZ_NO_FLAGS); + + return null; + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/deleting/DeleteViaLoop.java b/java/snippets/deleting/DeleteViaLoop.java new file mode 100644 index 0000000..f14c3ca --- /dev/null +++ b/java/snippets/deleting/DeleteViaLoop.java @@ -0,0 +1,168 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteViaLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.deleteRecord( + SzRecordKey.of(dataSourceCode, recordId), SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/deleting/DeleteWithInfoViaFutures.java b/java/snippets/deleting/DeleteWithInfoViaFutures.java new file mode 100644 index 0000000..5b770d9 --- /dev/null +++ b/java/snippets/deleting/DeleteWithInfoViaFutures.java @@ -0,0 +1,325 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteWithInfoViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteWithInfoViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + return engine.deleteRecord(recordKey, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Total entities deleted : " + entityIdSet.size()); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.remove(entityId); + } catch (SzNotFoundException e) { + entityIdSet.add(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/deleting/README.md b/java/snippets/deleting/README.md new file mode 100644 index 0000000..3afe0f7 --- /dev/null +++ b/java/snippets/deleting/README.md @@ -0,0 +1,15 @@ +# Deleting Data + +The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository. + +Deleting a record only requires the data source code and record ID for the record to be deleted. + +## Snippets + +- **DeleteViaFutures.java** + - Read and delete source records from a file using multiple threads +- **DeleteViaLoop.java** + - Basic read and delete source records from a file +- **DeleteWithInfoViaFutures.java** + - Read and delete source records from a file using multiple threads + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `deleteRecord()` method and track the entity ID's. diff --git a/java/snippets/information/CheckDatastorePerformance.java b/java/snippets/information/CheckDatastorePerformance.java new file mode 100644 index 0000000..2779bd2 --- /dev/null +++ b/java/snippets/information/CheckDatastorePerformance.java @@ -0,0 +1,57 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class CheckDatastorePerformance { + private static final int SECONDS_TO_RUN = 3; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = CheckDatastorePerformance.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + String result = diagnostic.checkDatastorePerformance(SECONDS_TO_RUN); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetDatastoreInfo.java b/java/snippets/information/GetDatastoreInfo.java new file mode 100644 index 0000000..06aae5a --- /dev/null +++ b/java/snippets/information/GetDatastoreInfo.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetDatastoreInfo { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetDatastoreInfo.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + String result = diagnostic.getDatastoreInfo(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetLicense.java b/java/snippets/information/GetLicense.java new file mode 100644 index 0000000..b56378b --- /dev/null +++ b/java/snippets/information/GetLicense.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetLicense { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetLicense.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + + String result = product.getLicense(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetVersion.java b/java/snippets/information/GetVersion.java new file mode 100644 index 0000000..c2ab665 --- /dev/null +++ b/java/snippets/information/GetVersion.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetVersion { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetVersion.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + + String result = product.getVersion(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/README.md b/java/snippets/information/README.md new file mode 100644 index 0000000..11c2b95 --- /dev/null +++ b/java/snippets/information/README.md @@ -0,0 +1,14 @@ +# System & Repository Information + +The information snippets outline the retrieval of different informational aspects of a Senzing instance or engine. + +## Snippets + +- **CheckDatastorePerformance.java** + - Run an insert test against the Senzing repository to gauge performance +- **GetDatastoreInfo.java** + - Return basic information about the Senzing repository(s) +- **GetLicense.java** + - Return the currently in use license details +- **GetVersion.java** + - Return the current Senzing product version details diff --git a/java/snippets/initialization/EnginePriming.java b/java/snippets/initialization/EnginePriming.java new file mode 100644 index 0000000..7e58f1e --- /dev/null +++ b/java/snippets/initialization/EnginePriming.java @@ -0,0 +1,61 @@ +package initialization; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class EnginePriming { + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = EnginePriming.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzEngine engine = env.getEngine(); + + long start = System.nanoTime(); + + System.out.println("Priming Senzing engine..."); + engine.primeEngine(); + + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Primed Senzing engine. (" + duration + "ms)"); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/initialization/EnvironmentAndHubs.java b/java/snippets/initialization/EnvironmentAndHubs.java new file mode 100644 index 0000000..9bd0c1e --- /dev/null +++ b/java/snippets/initialization/EnvironmentAndHubs.java @@ -0,0 +1,66 @@ +package initialization; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class EnvironmentAndHubs { + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = EnginePriming.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + SzDiagnostic diagnostic = env.getDiagnostic(); + SzEngine engine = env.getEngine(); + + System.out.println(product); + System.out.println(config); + System.out.println(configMgr); + System.out.println(diagnostic); + System.out.println(engine); + + // do work with the hub handles which are valid + // until the env.destroy() function is called + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/initialization/PurgeRepository.java b/java/snippets/initialization/PurgeRepository.java new file mode 100644 index 0000000..e47a911 --- /dev/null +++ b/java/snippets/initialization/PurgeRepository.java @@ -0,0 +1,87 @@ +package initialization; + +import java.io.*; +import java.util.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class PurgeRepository { + private static final String PURGE_MESSAGE = """ + **************************************** WARNING **************************************** + + This example will purge all currently loaded data from the Senzing datastore! + Before proceeding, all instances of Senzing (custom code, tools, etc.) must be shut down. + + ***************************************************************************************** + + Are you sure you want to continue and purge the Senzing datastore? (y/n) """; + + private static final Set YES_ANSWERS + = Set.of("y", "Y", "Yes", "yes", "YES"); + + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + System.out.println(PURGE_MESSAGE); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + String response = br.readLine(); + if (!YES_ANSWERS.contains(response)) { + System.exit(1); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = PurgeRepository.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + long start = System.nanoTime(); + + System.out.println("Purging Senzing repository..."); + diagnostic.purgeRepository(); + + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Purged Senzing repository. (" + duration + "ms)"); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + } +} \ No newline at end of file diff --git a/java/snippets/initialization/README.md b/java/snippets/initialization/README.md new file mode 100644 index 0000000..316ff63 --- /dev/null +++ b/java/snippets/initialization/README.md @@ -0,0 +1,11 @@ +# Initialization + +## Snippets + +- **EnginePriming.java** + - Priming the Senzing engine before use loads resource intensive assets upfront. Without priming the first SDK call to the engine will appear slower than usual as it causes these assets to be loaded +- **EnvironmentsAndHubs.java** + - Basic example of how to create an abstract Senzing factory and each of the available engines +- **PurgeRepository.java** + - **WARNING** This script will remove all data from a Senzing repository, use with caution! **WARNING** + - It will prompt first, still use with caution! diff --git a/java/snippets/loading/LoadRecords.java b/java/snippets/loading/LoadRecords.java new file mode 100644 index 0000000..8de197c --- /dev/null +++ b/java/snippets/loading/LoadRecords.java @@ -0,0 +1,171 @@ +package loading; + +import java.util.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadRecords { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadRecords.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (Map.Entry entry : getRecords().entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1001"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1001", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Robert", + "PRIMARY_NAME_LAST": "Smith", + "DATE_OF_BIRTH": "12/11/1978", + "ADDR_TYPE": "MAILING", + "ADDR_FULL": "123 Main Street, Las Vegas, NV 89132", + "PHONE_TYPE": "HOME", + "PHONE_NUMBER": "702-919-1300", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1002"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1002", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_GENERATION": "II", + "DATE_OF_BIRTH": "11/12/1978", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Lane", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89111", + "PHONE_TYPE": "MOBILE", + "PHONE_NUMBER": "702-919-1300" + } + """); + + records.put( + SzRecordKey.of("TEST", "1003"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1003", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_MIDDLE": "J", + "DATE_OF_BIRTH": "12/11/1978", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1004"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1004", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "B", + "PRIMARY_NAME_LAST": "Smith", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Ln", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1005"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1005", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Rob", + "PRIMARY_NAME_MIDDLE": "E", + "PRIMARY_NAME_LAST": "Smith", + "DRIVERS_LICENSE_NUMBER": "112233", + "DRIVERS_LICENSE_STATE": "NV", + "ADDR_TYPE": "MAILING", + "ADDR_LINE1": "123 E Main St", + "ADDR_CITY": "Henderson", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java new file mode 100644 index 0000000..0ba3127 --- /dev/null +++ b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java @@ -0,0 +1,220 @@ +package loading; + +import java.io.*; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadTruthSetWithInfoViaLoop { + private static final List INPUT_FILES = List.of( + "../resources/data/truthset/customers.jsonl", + "../resources/data/truthset/reference.jsonl", + "../resources/data/truthset/watchlist.jsonl"); + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadTruthSetWithInfoViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the input files + for (String filePath : INPUT_FILES) { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + String info = engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_WITH_INFO_FLAGS); + + successCount++; + + // process the info + processInfo(engine, info); + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, filePath, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, filePath, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, filePath, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + } + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Total entities created : " + entityIdSet.size()); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + String filePath, + int lineNumber, + String recordJson) + { + File file = new File(filePath); + String fileName = file.getName(); + + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName + + " AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/LoadViaFutures.java b/java/snippets/loading/LoadViaFutures.java new file mode 100644 index 0000000..b2eee67 --- /dev/null +++ b/java/snippets/loading/LoadViaFutures.java @@ -0,0 +1,287 @@ +package loading; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + engine.addRecord(recordKey, record.line, SZ_NO_FLAGS); + + // return null since we have no "info" to return + return null; + }); + + // add the future to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/LoadViaLoop.java b/java/snippets/loading/LoadViaLoop.java new file mode 100644 index 0000000..334d789 --- /dev/null +++ b/java/snippets/loading/LoadViaLoop.java @@ -0,0 +1,168 @@ +package loading; + +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadViaLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/LoadViaQueue.java b/java/snippets/loading/LoadViaQueue.java new file mode 100644 index 0000000..38c1e06 --- /dev/null +++ b/java/snippets/loading/LoadViaQueue.java @@ -0,0 +1,271 @@ +package loading; + +import java.io.*; +import java.util.concurrent.*; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; +import static java.lang.Thread.State.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadViaQueue { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final int MAXIMUM_BACKLOG = 100; + private static final long POLL_TIMEOUT = 3000L; + private static final TimeUnit POLL_TIME_UNIT = TimeUnit.MILLISECONDS; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static final Object MONITOR = new Object(); + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public record Record(int lineNumber, String line) { } + + private static final BlockingQueue recordQueue + = new LinkedBlockingQueue<>(MAXIMUM_BACKLOG); + + private static volatile Exception producerFailure = null; + private static volatile Exception consumerFailure = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadViaQueue.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + Thread producer = new Thread(() -> { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // add the record to the queue + recordQueue.put(new Record(lineNumber, line)); + } + + } catch (Exception e) { + producerFailure = e; + } + }); + + // start the producer + producer.start(); + + Thread consumer = new Thread(() -> { + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop while producer has not failed and is either still running + // or there are remaining records + while (producerFailure == null + && (!isTerminated(producer) || recordQueue.size() > 0)) + { + Record record = recordQueue.poll(POLL_TIMEOUT, POLL_TIME_UNIT); + + // check if we timed out getting the next record + if (record == null) { + // continue the loop to check if we are done + continue; + } + + // get the line number and line from the record + int lineNumber = record.lineNumber; + String line = record.line; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + synchronized (MONITOR) { + successCount++; + } + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + } + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter( + new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + } + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + } + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + consumerFailure = e; + } + }); + + // start the consumer + consumer.start(); + + // join the threads + while (!isTerminated(producer)) { + try { + producer.join(); + } catch (InterruptedException ignore) { + ignore.printStackTrace(); + } + } + while (!isTerminated(consumer)) { + try { + consumer.join(); + } catch (InterruptedException ignore) { + ignore.printStackTrace(); + } + } + + try { + // check for producer and consumer failures + if (producerFailure != null) { + throw producerFailure; + } + if (consumerFailure != null) { + throw consumerFailure; + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + synchronized (MONITOR) { + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + } + + } + + } + + private static boolean isTerminated(Thread thread) { + synchronized (thread) { + return (thread.getState() == TERMINATED); + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/LoadWithInfoViaFutures.java b/java/snippets/loading/LoadWithInfoViaFutures.java new file mode 100644 index 0000000..e082f9b --- /dev/null +++ b/java/snippets/loading/LoadWithInfoViaFutures.java @@ -0,0 +1,325 @@ +package loading; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadWithInfoViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadWithInfoViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + return engine.addRecord(recordKey, record.line, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Total entities created : " + entityIdSet.size()); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/LoadWithStatsViaLoop.java b/java/snippets/loading/LoadWithStatsViaLoop.java new file mode 100644 index 0000000..c246fdc --- /dev/null +++ b/java/snippets/loading/LoadWithStatsViaLoop.java @@ -0,0 +1,187 @@ +package loading; + +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadWithStatsViaLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final int STATS_INTERVAL = 100; + private static final int STATS_TRUNCATE = 70; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadWithStatsViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + // check if it is time obtain stats + if ((successCount % STATS_INTERVAL) == 0) { + try { + String stats = engine.getStats(); + if (stats.length() > STATS_TRUNCATE) { + stats = stats.substring(0, STATS_TRUNCATE) + " ..."; + } + System.out.println("* STATS: " + stats); + + } catch (SzException e) { + // trap the stats exeption so it is not misinterpreted + // as an exception from engine.addRecord() + System.err.println("**** FAILED TO OBTAIN STATS: " + e); + } + } + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/README.md b/java/snippets/loading/README.md new file mode 100644 index 0000000..035b9a8 --- /dev/null +++ b/java/snippets/loading/README.md @@ -0,0 +1,23 @@ +# Loading Data + +The loading snippets outline adding new source records. Adding source records ingests [mapped](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) JSON data, completes the entity resolution process and persists outcomes in the Senzing repository. Adding a source record with the same data source code and record ID as an existing record will replace it. + +## Snippets + +- **LoadRecords.java** + - Basic iteration over a few records, adding each one +- **LoadTruthSetWithInfoViaLoop.java** + - Read and load from multiple source files, adding a sample truth + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `addRecord()` method and track the entity ID's for the records. +- **LoaeViaFutures.java** + - Read and load source records from a file using multiple threads +- **LoadViaLoop.java** + - Basic read and add source records from a file +- **LoadViaQueue.java** + - Read and load source records using a queue +- **LoadWithInfoViaFutures.java** + - Read and load source records from a file using multiple threads + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `addRecord()` method and track the entity ID's for the records. +- **LoadWithStatsViaLoop.java** + - Basic read and add source records from a file + - Periodic calling to `getStats()` method during load to track loading statistics. diff --git a/java/snippets/redo/LoadWithRedoViaLoop.java b/java/snippets/redo/LoadWithRedoViaLoop.java new file mode 100644 index 0000000..6c10e60 --- /dev/null +++ b/java/snippets/redo/LoadWithRedoViaLoop.java @@ -0,0 +1,242 @@ +package redo; + +import java.io.*; +import java.util.List; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadWithRedoViaLoop { + private static final List INPUT_FILES = List.of( + "../resources/data/truthset/customers.jsonl", + "../resources/data/truthset/reference.jsonl", + "../resources/data/truthset/watchlist.jsonl"); + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadWithRedoViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the input files + for (String filePath: INPUT_FILES) { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, filePath, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, filePath, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + trackRetryRecord(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, filePath, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + } + + // now that we have loaded the records, check for redos and handle them + while (engine.countRedoRecords() > 0) { + // get the next redo record + String redo = engine.getRedoRecord(); + + try { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // increment the redone count + redoneCount++; + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + String filePath, + int lineNumber, + String recordJson) + { + File file = new File(filePath); + String fileName = file.getName(); + + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName + + " AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/README.md b/java/snippets/redo/README.md new file mode 100644 index 0000000..918a2b3 --- /dev/null +++ b/java/snippets/redo/README.md @@ -0,0 +1,21 @@ +# Redo Records + +The redo snippets outline processing redo records. During normal processing of loading, deleting and replacing data the Senzing engine may determine additional work needs to be completed for an entity. There are times the Senzing engine will decide to defer this additional work. Examples of why this may happen include: + +- Records loaded in parallel are clustering around the same entities causing contention +- Automatic corrections +- Cleansing decisions made on attributes determined to no longer be useful for entity resolution + +When an entity requires additional work a record is automatically created in the system indicating this requirement. These records are called redo records. Redo records need to be periodically or continuously checked for and processed. Periodically is suitable after manipulating smaller portions of data, for example, at the end of a batch load of data. In contrast, a continuous process checking for and processing redo records is suitable in a streaming system that is constantly manipulating data. In general, it is recommended to have a continuous redo process checking for any redo records to process and processing them. + +## Snippets + +- **LoadWithRedoViaLoop.java** + - Read and load source records from a file and then process any redo records +- **RedoContinuous.java** + - Basic example of continuously monitoring for redo records to process +- **RedoContinuousViaFutures.java** + - Continuously monitor for redo records to process using multiple threads +- **RedoWithInfoContinuous.java** + - Continuously monitor for redo records to process + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `processRedoRecord()` method and track the entity ID's for the records. diff --git a/java/snippets/redo/RedoContinuous.java b/java/snippets/redo/RedoContinuous.java new file mode 100644 index 0000000..cc9da66 --- /dev/null +++ b/java/snippets/redo/RedoContinuous.java @@ -0,0 +1,176 @@ +package redo; + +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoContinuous { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + + try { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // increment the redone count + redoneCount++; + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/RedoContinuousViaFutures.java b/java/snippets/redo/RedoContinuousViaFutures.java new file mode 100644 index 0000000..84352e2 --- /dev/null +++ b/java/snippets/redo/RedoContinuousViaFutures.java @@ -0,0 +1,280 @@ +package redo; + +import java.io.*; +import java.util.*; +import java.util.concurrent.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoContinuousViaFutures { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long HANDLE_PAUSE_TIMEOUT = 100L; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuousViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, String> pendingFutures = new IdentityHashMap<>(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // shutdown the executor service + if (!executor.isShutdown()) executor.shutdown(); + + try { + handlePendingFutures(pendingFutures, true); + } catch (Exception e) { + e.printStackTrace(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) break; + + Future future = executor.submit(() -> { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // return null since we have no "info" to return + return null; + }); + + // add the future to the pending future list + pendingFutures.put(future, redo); + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(HANDLE_PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + + // check if there are no redo records right now + if (engine.countRedoRecords() == 0) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void handlePendingFutures(Map, String> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,String>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,String> entry = iter.next(); + Future future = entry.getKey(); + String redoRecord = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + redoneCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRedo(WARNING, e, redoRecord); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + trackRetryRecord(redoRecord); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRedo(CRITICAL, e, redoRecord); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/RedoWithInfoContinuous.java b/java/snippets/redo/RedoWithInfoContinuous.java new file mode 100644 index 0000000..9b54e3d --- /dev/null +++ b/java/snippets/redo/RedoWithInfoContinuous.java @@ -0,0 +1,224 @@ +package redo; + +import java.io.*; +import java.util.HashSet; +import java.util.Set; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoWithInfoContinuous { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoWithInfoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + + try { + // process the redo record + String info = engine.processRedoRecord(redo, SZ_WITH_INFO_FLAGS); + + // increment the redone count + redoneCount++; + + // process the info + processInfo(engine, info); + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total entities affected : " + entityIdSet.size()); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + +} \ No newline at end of file diff --git a/java/snippets/searching/README.md b/java/snippets/searching/README.md new file mode 100644 index 0000000..0525212 --- /dev/null +++ b/java/snippets/searching/README.md @@ -0,0 +1,14 @@ +# Searching for Entities + +The search snippets outline searching for entities in the system. Searching for entities uses the same mapped JSON data [specification](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) as SDK methods such as `add_record()` to format the search request. + +There are [considerations](https://senzing.zendesk.com/hc/en-us/articles/360007880814-Guidelines-for-Successful-Entity-Searching) to be aware of when searching. + +## Snippets + +- **SearchRecords.java** + - Basic iteration over a few records, searching for each one + - To see results first load records with [LoadTruthSetWithInfoViaLoop.java](../loading/LoadTruthSetViaLoop.java) +- **SearchViaFutures.java** + - Read and search for records from a file using multiple threads + - To see results first load records with [LoadViaFutures.java](../loading/LoadViaFutures.java) diff --git a/java/snippets/searching/SearchRecords.java b/java/snippets/searching/SearchRecords.java new file mode 100644 index 0000000..b151b25 --- /dev/null +++ b/java/snippets/searching/SearchRecords.java @@ -0,0 +1,122 @@ +package searching; + +import java.io.StringReader; +import java.util.*; +import javax.json.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class SearchRecords { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = SearchRecords.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (String criteria : getSearchCriteria()) { + // call the searchByAttributes() function with default flags + String result = engine.searchByAttributes( + criteria, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + + JsonObject jsonObj = Json.createReader( + new StringReader(result)).readObject(); + + System.out.println(); + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + if (jsonArr.size() == 0) { + System.out.println("No results for criteria: " + criteria); + } else { + System.out.println("Results for criteria: " + criteria); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + String name = obj.getString("ENTITY_NAME", null); + System.out.println(entityId + ": " + name); + } + } + System.out.flush(); + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing a list of criteria to search on. + * + * @return A {@link List} {@link String} JSON text values desribing the + * sets of criteria with which to search. + */ + public static List getSearchCriteria() { + List records = new LinkedList<>(); + records.add( + """ + { + "NAME_FULL": "Susan Moony", + "DATE_OF_BIRTH": "15/6/1998", + "SSN_NUMBER": "521212123" + } + """); + + records.add( + """ + { + "NAME_FIRST": "Robert", + "NAME_LAST": "Smith", + "ADDR_FULL": "123 Main Street Las Vegas NV 89132" + } + """); + + records.add( + """ + { + "NAME_FIRST": "Makio", + "NAME_LAST": "Yamanaka", + "ADDR_FULL": "787 Rotary Drive Rotorville FL 78720" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/searching/SearchViaFutures.java b/java/snippets/searching/SearchViaFutures.java new file mode 100644 index 0000000..cb7d0fe --- /dev/null +++ b/java/snippets/searching/SearchViaFutures.java @@ -0,0 +1,296 @@ +package searching; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class SearchViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/search-5K.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Criteria(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + private static Set foundEntities = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = SearchViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Criteria> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Criteria criteria = new Criteria(lineNumber, line); + + try { + Future future = executor.submit(() -> { + // call the searchByAttributes() function with default flags + return engine.searchByAttributes( + criteria.line, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + }); + + // add the future to the pending future list + pendingFutures.put(future, criteria); + + } catch (JsonException e) { + logFailedSearch(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println( + "Searches successfully completed : " + successCount); + System.out.println( + "Total entities found via searches : " + foundEntities.size()); + System.out.println( + "Searches failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Criteria> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Criteria>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Criteria> entry = iter.next(); + Future future = entry.getKey(); + Criteria criteria = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value and check for an exception + String results = future.get(); + + // if we get here then increment the success count + successCount++; + + // parse the results + JsonObject jsonObj = Json.createReader( + new StringReader(results)).readObject(); + + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + foundEntities.add(entityId); + } + + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedSearch(ERROR, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedSearch(WARNING, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(criteria.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedSearch(CRITICAL, e, criteria.lineNumber, criteria.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param criteriaJson The JSON text for the failed search criteria. + */ + private static void logFailedSearch(String errorType, + Exception exception, + int lineNumber, + String criteriaJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " + lineNumber + ": "); + System.err.println(criteriaJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/stewardship/ForceResolve.java b/java/snippets/stewardship/ForceResolve.java new file mode 100644 index 0000000..21b9ff6 --- /dev/null +++ b/java/snippets/stewardship/ForceResolve.java @@ -0,0 +1,172 @@ +package stewardship; + +import java.util.*; +import javax.json.*; +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of force-resolving records that + * otherwise will not resolve to one another. + */ +public class ForceResolve { + private static final String TEST = "TEST"; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = ForceResolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID to force resolve..."); + SzRecordKey key1 = SzRecordKey.of(TEST, "1"); + SzRecordKey key3 = SzRecordKey.of(TEST, "3"); + + String record1 = engine.getRecord(key1, SZ_RECORD_DEFAULT_FLAGS); + String record3 = engine.getRecord(key3, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj1 = Json.createReader(new StringReader(record1)).readObject(); + JsonObject obj3 = Json.createReader(new StringReader(record3)).readObject(); + + obj1 = obj1.getJsonObject("JSON_DATA"); + obj3 = obj3.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job1 = Json.createObjectBuilder(obj1); + JsonObjectBuilder job3 = Json.createObjectBuilder(obj3); + + for (JsonObjectBuilder job : List.of(job1, job3)) { + job.add("TRUSTED_ID_NUMBER", "TEST_R1-TEST_R3"); + job.add("TRUSTED_ID_TYPE", "FORCE_RESOLVE"); + } + + record1 = job1.build().toString(); + record3 = job3.build().toString(); + + engine.addRecord(key1, record1, SZ_NO_FLAGS); + engine.addRecord(key3, record3, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1", + "PRIMARY_NAME_FULL": "Patrick Smith", + "AKA_NAME_FULL": "Paddy Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "2"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "2", + "PRIMARY_NAME_FULL": "Patricia Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688", + "DATE_OF_BIRTH": "5/4/1994" + } + """); + + records.put( + SzRecordKey.of("TEST", "3"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "3", + "PRIMARY_NAME_FULL": "Pat Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/stewardship/ForceUnresolve.java b/java/snippets/stewardship/ForceUnresolve.java new file mode 100644 index 0000000..6ff8475 --- /dev/null +++ b/java/snippets/stewardship/ForceUnresolve.java @@ -0,0 +1,172 @@ +package stewardship; + +import java.util.*; +import javax.json.*; +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of force-unresolving records that + * otherwise will not resolve to one another. + */ +public class ForceUnresolve { + private static final String TEST = "TEST"; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = ForceUnresolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID to force unresolve..."); + SzRecordKey key4 = SzRecordKey.of(TEST, "4"); + SzRecordKey key6 = SzRecordKey.of(TEST, "6"); + + String record4 = engine.getRecord(key4, SZ_RECORD_DEFAULT_FLAGS); + String record6 = engine.getRecord(key6, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj4 = Json.createReader(new StringReader(record4)).readObject(); + JsonObject obj6 = Json.createReader(new StringReader(record6)).readObject(); + + obj4 = obj4.getJsonObject("JSON_DATA"); + obj6 = obj6.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job4 = Json.createObjectBuilder(obj4); + JsonObjectBuilder job6 = Json.createObjectBuilder(obj6); + + job4.add("TRUSTED_ID_NUMBER", "TEST_R4-TEST_R6"); + job4.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + job6.add("TRUSTED_ID_NUMBER", "TEST_R6-TEST_R4"); + job6.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + record4 = job4.build().toString(); + record6 = job6.build().toString(); + + engine.addRecord(key4, record4, SZ_NO_FLAGS); + engine.addRecord(key6, record6, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "4"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "4", + "PRIMARY_NAME_FULL": "Elizabeth Jonas", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "SSN_NUMBER": "767-87-7678", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "5"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "5", + "PRIMARY_NAME_FULL": "Beth Jones", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "SSN_NUMBER": "767-87-7678", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "6"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "6", + "PRIMARY_NAME_FULL": "Betsey Jones", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "202-787-7678" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/stewardship/README.md b/java/snippets/stewardship/README.md new file mode 100644 index 0000000..993dcc5 --- /dev/null +++ b/java/snippets/stewardship/README.md @@ -0,0 +1,29 @@ +# Stewardship + +The stewardship snippets outline forced resolution and forced un-resolution of records from entities. Stewardship provides the ability to force records to resolve or un-resolve when, for example, Senzing doesn't have enough information at a point in time, but you may have knowledge outside of Senzing to override a decision Senzing has made. Basic stewardship utilizes the `TRUSTED_ID` feature to influence entity resolution. See the [Entity Specification](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) for additional details. + +In these examples, the current JSON data for a record is first retrieved and additional `TRUSTED_ID` attributes are appended before replacing the records and completing entity resolution, now taking into account the influence of the `TRUSTED_ID` attributes: + +- `TRUSTED_ID_NUMBER` - when the values across records is the same the records resolve to the same entity. If the values used across records differ, the records will not resolve to the same entity. +- `TRUSTED_ID_TYPE` - an arbitrary value to indicate the use of the TRUSTED_ID_NUMBER. + +## Snippets + +- **ForceResolve.java** + - Force resolve records together to a single entity +- **ForceUnresolve.java** + - Force un-resolve a record from an entity into a new entity + +## Example Usage + +### Force Resolve + +Force resolve first adds 3 records and details which entity they each belong to. + +With additional knowledge not represented in Senzing you know record 3 "Pat Smith" represents the same person as record 1 "Patrick Smith". To force resolve these 2 records to the same entity, first fetch the current representation of each record with `getRecord()`. Next add `TRUSTED_ID_NUMBER` and `TRUSTED_ID_TYPE` attributes to each of the retrieved records. `TRUSTED_ID_NUMBER` uses the same value to indicate these records should always be considered the same entity and resolve together. In this example the data source of the records and their record IDs are used to create `TRUSTED_ID_NUMBER`. `TRUSTED_ID_TYPE` is set as FORCE_RESOLVE as an indicator they were forced together. + +### Force UnResolve + +Force UnResolve first adds 3 records and details all records resolved to the same entity. + +With additional knowledge not represented in Senzing you know record 6 "Betsey Jones" is not the same as records 4 and 5; Betsey is a twin to "Elizabeth Jones". To force unresolve "Betsey" from the "Elizabeth" entity, first fetch the current representation of each record with `getRecord()`. Next add `TRUSTED_ID_NUMBER` and `TRUSTED_ID_TYPE` attributes to each of the retrieved records. `TRUSTED_ID_NUMBER` uses a different value to indicate these records should always be considered different entities and not resolve together. In this example the data source of the records and their record IDs are used to create `TRUSTED_ID_NUMBER`. `TRUSTED_ID_TYPE` is set as FORCE_UNRESOLVE as an indicator they were forced apart.