From 6b8abb78da016b5793b7f735e7baf21d1ac78063 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Fri, 17 Jan 2025 12:07:31 -0800 Subject: [PATCH 1/9] Interim commit --- java/pom.xml | 122 +++++ .../com/senzing/runner/InstallLocations.java | 467 ++++++++++++++++++ .../com/senzing/runner/SnippetRunner.java | 325 ++++++++++++ java/runner/com/senzing/runner/Utilities.java | 165 +++++++ java/snippets/loading/AddRecords.java | 176 +++++++ java/snippets/loading/AddRecordsLoop.java | 176 +++++++ 6 files changed, 1431 insertions(+) create mode 100644 java/pom.xml create mode 100644 java/runner/com/senzing/runner/InstallLocations.java create mode 100644 java/runner/com/senzing/runner/SnippetRunner.java create mode 100644 java/runner/com/senzing/runner/Utilities.java create mode 100644 java/snippets/loading/AddRecords.java create mode 100644 java/snippets/loading/AddRecordsLoop.java diff --git a/java/pom.xml b/java/pom.xml new file mode 100644 index 0000000..b7cf3da --- /dev/null +++ b/java/pom.xml @@ -0,0 +1,122 @@ + + 4.0.0 + com.senzing + sz-sdk-snippets + jar + 4.0.0 + Senzing Java SDK + The Code Snippet Examples for Senzing V4 Java SDK. + http://github.com/Senzing/code-snippets-v4 + + + The Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0 + + + + + com.senzing + sz-sdk + 4.0.0 + system + ${SENZING_DIR}/lib/sz-sdk.jar + + + org.glassfish + javax.json + 1.1.4 + + + org.xerial + sqlite-jdbc + 3.47.2.0 + + + + 17 + 17 + 17 + UTF-8 + UTF-8 + + + snippets + ${project.artifactId} + + + org.apache.maven.plugins + maven-compiler-plugin + 3.13.0 + + + -Xlint:unchecked + -Xlint:deprecation + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.2.0 + + + generate-sources + + add-source + + + + runner + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.6.0 + + false + + ${SENZING_DIR}/lib/sz-sdk.jar + + + + *:* + + META-INF/MANIFEST.MF + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + + + com.senzing.runner.SnippetRunner + ${project.version} + + + + + + + + + + diff --git a/java/runner/com/senzing/runner/InstallLocations.java b/java/runner/com/senzing/runner/InstallLocations.java new file mode 100644 index 0000000..63fcd51 --- /dev/null +++ b/java/runner/com/senzing/runner/InstallLocations.java @@ -0,0 +1,467 @@ +package com.senzing.runner; + +import javax.json.JsonObject; +import java.io.File; +import java.io.StringWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; + +import static com.senzing.runner.Utilities.*; + +/** + * Describes the directories on disk used to find the Senzing product + * installation and the support directories. + */ +public class InstallLocations { + /** + * The installation location. + */ + private File installDir; + + /** + * The location of the configuration files for the config directory. + */ + private File configDir; + + /** + * The location of the resource files for the resource directory. + */ + private File resourceDir; + + /** + * The location of the support files for the support directory. + */ + private File supportDir; + + /** + * The location of the template files for the template directory. + */ + private File templatesDir; + + /** + * Indicates if the installation direction is from a development build. + */ + private boolean devBuild = false; + + /** + * Default constructor. + */ + private InstallLocations() { + this.installDir = null; + this.configDir = null; + this.resourceDir = null; + this.supportDir = null; + this.templatesDir = null; + this.devBuild = false; + } + + /** + * Gets the primary installation directory. + * + * @return The primary installation directory. + */ + public File getInstallDirectory() { + return this.installDir; + } + + /** + * Gets the configuration directory. + * + * @return The configuration directory. + */ + public File getConfigDirectory() { + return this.configDir; + } + + /** + * Gets the resource directory. + * + * @return The resource directory. + */ + public File getResourceDirectory() { + return this.resourceDir; + } + + /** + * Gets the support directory. + * + * @return The support directory. + */ + public File getSupportDirectory() { + return this.supportDir; + } + + /** + * Gets the templates directory. + * + * @return The templates directory. + */ + public File getTemplatesDirectory() { + return this.templatesDir; + } + + /** + * Checks if the installation is actually a development build. + * + * @return true if this installation represents a development + * build, otherwise false. + */ + public boolean isDevelopmentBuild() { + return this.devBuild; + } + + /** + * Produces a {@link String} describing this instance. + * + * @return A {@link String} describing this instance. + */ + public String toString() { + StringWriter sw = new StringWriter(); + PrintWriter pw = new PrintWriter(sw); + + pw.println(); + pw.println("--------------------------------------------------"); + pw.println("installDirectory : " + this.getInstallDirectory()); + pw.println("configDirectory : " + this.getConfigDirectory()); + pw.println("supportDirectory : " + this.getSupportDirectory()); + pw.println("resourceDirectory : " + this.getResourceDirectory()); + pw.println("templatesDirectory : " + this.getTemplatesDirectory()); + pw.println("developmentBuild : " + this.isDevelopmentBuild()); + + return sw.toString(); + } + + /** + * Finds the install directories and returns the {@link InstallLocations} + * instance describing those locations. + * + * @param senzingDir The optional Senzing installation directory if one has + * been provided, null otherwise. + * + * @return The {@link InstallLocations} instance describing the install + * locations. + */ + public static InstallLocations findLocations() { + final String osName = System.getProperty("os.name"); + + boolean windows = false; + boolean macOS = false; + + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") + || lowerOSName.indexOf("darwin") >= 0) + { + macOS = true; + } + + File installDir = null; + File configDir = null; + File resourceDir = null; + File supportDir = null; + File templatesDir = null; + try { + String defaultInstallPath; + String defaultConfigPath = null; + + if (windows) { + defaultInstallPath = "C:\\\\Program Files\\Senzing\\er"; + } else if (macOS) { + defaultInstallPath = "/opt/senzing/er"; + } else { + defaultInstallPath = "/opt/senzing/er"; + defaultConfigPath = "/etc/opt/senzing"; + } + + // set the install path if one has been provided + String installPath = System.getProperty("senzing.install.dir"); + String configPath = System.getProperty("senzing.config.dir"); + String supportPath = System.getProperty("senzing.support.dir"); + String resourcePath = System.getProperty("senzing.resource.dir"); + + // try environment variables if system properties don't work + if (installPath == null || installPath.trim().length() == 0) { + installPath = System.getenv("SENZING_DIR"); + } + if (configPath == null || configPath.trim().length() == 0) { + configPath = System.getenv("SENZING_ETC_DIR"); + } + if (supportPath == null || supportPath.trim().length() == 0) { + supportPath = System.getenv("SENZING_DATA_DIR"); + } + + // normalize empty strings as null + if (installPath != null && installPath.trim().length() == 0) { + installPath = null; + } + if (configPath != null && configPath.trim().length() == 0) { + configPath = null; + } + if (supportPath != null && supportPath.trim().length() == 0) { + supportPath = null; + } + if (resourcePath != null && resourcePath.trim().length() == 0) { + resourcePath = null; + } + + // check the senzing directory + installDir = new File(installPath == null ? defaultInstallPath : installPath); + if (!installDir.exists()) { + System.err.println("Could not find Senzing installation directory:"); + System.err.println(" " + installDir); + System.err.println(); + if (installPath != null) { + System.err.println( + "Check the -Dsenzing.install.dir=[path] command line option."); + } else { + System.err.println( + "Use the -Dsenzing.install.dir=[path] command line option to " + + "specify a path"); + } + + return null; + } + + // normalize the senzing directory + String dirName = installDir.getName(); + if (installDir.isDirectory() && !dirName.equalsIgnoreCase("er") + && dirName.equalsIgnoreCase("senzing")) + { + // for windows or linux allow the "Senzing" dir as well + installDir = new File(installDir, "er"); + } + + if (!installDir.isDirectory()) { + System.err.println("Senzing installation directory appears invalid:"); + System.err.println(" " + installDir); + System.err.println(); + if (installPath != null) { + System.err.println( + "Check the -Dsenzing.install.dir=[path] command line option."); + } else { + System.err.println( + "Use the -Dsenzing.install.dir=[path] command line option to " + + "specify a path"); + } + + return null; + } + + if (supportPath == null || supportPath.trim().length() == 0) { + // try to determine the support path + File installParent = installDir.getParentFile(); + File dataRoot = new File(installParent, "data"); + if (dataRoot.exists() && dataRoot.isDirectory()) { + File versionFile = new File(installDir, "szBuildVersion.json"); + String dataVersion = null; + if (versionFile.exists()) { + String text = readTextFileAsString(versionFile, UTF_8); + JsonObject jsonObject = parseJsonObject(text); + dataVersion = (jsonObject.containsKey("DATA_VERSION") + ? jsonObject.getString("DATA_VERSION") : null); + } + + // try the data version directory + supportDir = (dataVersion == null) ? null : new File(dataRoot, dataVersion.trim()); + + // check if data version was not found + if (supportDir == null || !supportDir.exists()) { + // look to see if we only have one data version installed + File[] versionDirs = dataRoot.listFiles(f -> { + return f.getName().matches("\\d+\\.\\d+\\.\\d+"); + }); + if (versionDirs.length == 1 && supportDir == null) { + // use the single data version found + supportDir = versionDirs[0]; + + } else if (versionDirs.length > 1) { + System.err.println( + "Could not infer support directory. Multiple data " + + "directory versions at: "); + System.err.println(" " + dataRoot); + if (supportDir != null) { + System.err.println(); + System.err.println("Expected to find: " + supportDir); + } + throw new IllegalStateException( + ((supportDir == null) ? "Could not infer support directory." + : "Could not find support directory (" + supportDir + ").") + + " Multiple data directory versions found at: " + dataRoot); + } else { + // no version directories were found, maybe the data root is + // the actual support directory (mapped in a docker image) + File[] ibmFiles = dataRoot.listFiles(f -> { + return f.getName().toLowerCase().endsWith(".ibm"); + }); + File libPostalDir = new File(dataRoot, "libpostal"); + + // require the .ibm files and libpostal to exist + if (ibmFiles.length > 0 && libPostalDir.exists()) { + supportDir = dataRoot; + } + } + } + + } + if (supportDir == null) { + // use the default path + supportDir = new File(installDir, "data"); + } + + } else { + // use the specified explicit path + supportDir = new File(supportPath); + } + + if (!supportDir.exists()) { + System.err.println("The support directory does not exist:"); + System.err.println(" " + supportDir); + if (supportPath != null) { + System.err.println("Check the -Dsenzing.support.dir=[path] command line option."); + } else { + System.err.println("Use the -Dsenzing.support.dir=[path] command line option to " + + "specify a path"); + } + + throw new IllegalStateException("The support directory does not exist: " + supportDir); + } + + if (!supportDir.isDirectory()) { + System.err.println("The support directory is invalid:"); + System.err.println(" " + supportDir); + if (supportPath != null) { + System.err.println("Check the -Dsenzing.support.dir=[path] command line option."); + } else { + System.err.println("Use the -Dsenzing.support.dir=[path] command line option to " + + "specify a path"); + } + throw new IllegalStateException("The support directory is invalid: " + supportDir); + + } + + // check the config path + if (configPath != null) { + configDir = new File(configPath); + } + + // check for a dev build installation + if (configDir == null && installDir != null && "dist".equals(installDir.getName())) { + configDir = new File(installDir, "data"); + } + + // if still null and there is a default, then use it + if (configDir == null && defaultConfigPath != null) { + configDir = new File(defaultConfigPath); + if (!configDir.exists()) { + configDir = null; + } + } + + // if still null, try to use the install's etc directory + if (configDir == null && installDir != null) { + configDir = new File(installDir, "etc"); + if (!configDir.exists()) { + configDir = null; + } + } + + if (configPath != null && !configDir.exists()) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies a path that does not exist:"); + System.err.println(" " + configPath); + + throw new IllegalStateException("Explicit config path does not exist: " + configPath); + } + if (configDir != null && configDir.exists()) { + if (!configDir.isDirectory()) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies a file, not a directory:"); + System.err.println(" " + configPath); + + throw new IllegalStateException( + "Explicit config path is not directory: " + configPath); + } + + String[] requiredFiles = { "cfgVariant.json" }; + List missingFiles = new ArrayList<>(requiredFiles.length); + + for (String fileName : requiredFiles) { + File configFile = new File(configDir, fileName); + File supportFile = new File(supportDir, fileName); + if (!configFile.exists() && !supportFile.exists()) { + missingFiles.add(fileName); + } + } + if (missingFiles.size() > 0 && configPath != null) { + System.err.println( + "The -Dsenzing.config.dir=[path] option specifies an invalid config directory:"); + for (String missing : missingFiles) { + System.err.println(" " + missing + " was not found"); + } + throw new IllegalStateException( + "Explicit config path missing required files: " + missingFiles); + } + } + + // now determine the resource path + resourceDir = (resourcePath == null) ? null : new File(resourcePath); + if (resourceDir == null) { + resourceDir = new File(installDir, "resources"); + if (!resourceDir.exists()) + resourceDir = null; + } + + if (resourceDir != null && resourceDir.exists() && resourceDir.isDirectory()) { + templatesDir = new File(resourceDir, "templates"); + } + + if (resourcePath != null) { + if (!resourceDir.exists()) { + System.err.println( + "The -Dsenzing.resource.dir=[path] option specifies a path that does not exist:"); + System.err.println(" " + resourcePath); + + throw new IllegalStateException( + "Explicit resource path does not exist: " + resourcePath); + } + + if (!resourceDir.isDirectory() || !templatesDir.exists() || !templatesDir.isDirectory()) { + System.err.println( + "The -Dsenzing.resource.dir=[path] option specifies an invalid " + + "resource directory:"); + System.err.println(" " + resourcePath); + + throw new IllegalStateException( + "Explicit resource path is not valid: " + resourcePath); + } + + } else if (!resourceDir.exists() || !resourceDir.isDirectory() || !templatesDir.exists() + || !templatesDir.isDirectory()) { + resourceDir = null; + templatesDir = null; + } + + // construct and initialize the result + InstallLocations result = new InstallLocations(); + result.installDir = installDir; + result.configDir = configDir; + result.supportDir = supportDir; + result.resourceDir = resourceDir; + result.templatesDir = templatesDir; + result.devBuild = ("dist".equals(installDir.getName())); + + // return the result + return result; + + } catch (RuntimeException e) { + e.printStackTrace(); + throw e; + + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } +} diff --git a/java/runner/com/senzing/runner/SnippetRunner.java b/java/runner/com/senzing/runner/SnippetRunner.java new file mode 100644 index 0000000..ba07ddd --- /dev/null +++ b/java/runner/com/senzing/runner/SnippetRunner.java @@ -0,0 +1,325 @@ +package com.senzing.runner; + +import java.io.*; +import java.lang.reflect.Method; +import java.sql.*; +import java.util.*; +import java.util.zip.*; +import javax.json.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.*; + +import static com.senzing.runner.Utilities.*; + +/** + * Helper class to run each of the snippetts. + */ +public class SnippetRunner { + private static final long ONE_MILLION = 1000000L; + + /** + * Harness for running one or more of the code snippets. + * @param args The command line arguments. + */ + public static void main(String[] args) { + try { + SortedMap> snippetMap = getSnippetMap(); + Set snippetOptions = new LinkedHashSet<>(); + snippetOptions.addAll(snippetMap.keySet()); + for (Set set : snippetMap.values()) { + snippetOptions.addAll(set); + } + + if (args.length == 0) { + printUsage(snippetMap); + System.exit(1); + } + String settings = System.getProperty("senzing.settings"); + if (settings != null) { + settings = settings.trim(); + } + + // check for settings in the environment if needed + if (settings == null) { + settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings != null) { + settings = settings.trim(); + } + } + + // validate the settings if we have them + if (settings != null) { + JsonObject settingsJson = null; + try { + settingsJson = parseJsonObject(settings); + } catch (Exception e) { + System.err.println( + "The provided Senzing settings were not valid JSON:"); + System.err.println(); + System.err.println(toJsonText(settingsJson, true)); + System.exit(1); + } + } + + // validate the SENZING_DIR + InstallLocations installLocations = null; + try { + installLocations = InstallLocations.findLocations(); + + } catch (Exception e) { + System.exit(1); + } + + Set snippets = new LinkedHashSet<>(); + for (int index = 0; index < args.length; index++) { + String arg = args[index]; + if (arg.equals("all")) { + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); + } + } + }); + continue; + } + if (!snippetOptions.contains(arg)) { + System.err.println("Unrecognized code snippet or snippet group: " + arg); + System.exit(1); + } + if (snippetMap.containsKey(arg)) { + for (String snippet : snippetMap.get(arg)) { + if (!snippets.contains(snippet)) { + snippets.add(snippet); + } + } + } else { + if (!snippets.contains(arg)) { + snippets.add(arg); + } + } + } + + // check if we do not have settings and if not setup a temporary repository + if (settings == null) { + settings = setupTempRepository(installLocations); + } + + Long defaultConfigId = null; + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + defaultConfigId = configMgr.getDefaultConfigId(); + + } catch (SzException e) { + e.printStackTrace(); + } finally { + env.destroy(); + env = null; + } + + // execute each snippet + for (String snippet : snippets) { + System.out.println(); + long start = System.nanoTime(); + System.out.println("Preparing repository for " + snippet + "..."); + env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + configMgr.setDefaultConfigId(defaultConfigId); + + SzDiagnostic diagnostic = env.getDiagnostic(); + diagnostic.purgeRepository(); + + } catch (SzException e) { + e.printStackTrace(); + } finally { + env.destroy(); + } + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Prepared repository for " + snippet + ". (" + duration + "ms)"); + + executeSnippet(snippet, installLocations, settings); + } + System.out.println(); + + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + private static void executeSnippet(String snippet, InstallLocations senzingInstall, String settings) + throws Exception + { + String[] args = { settings }; + Class snippetClass = Class.forName(snippet); + Object snippetInstance = snippetClass.getConstructor().newInstance(); + Method mainMethod = snippetClass.getMethod("main", args.getClass()); + Object[] methodArgs = { args }; + System.out.println(); + System.out.println("Executing " + snippet + "..."); + long start = System.nanoTime(); + mainMethod.invoke(snippetInstance, methodArgs); + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Executed " + snippet + ". (" + duration + "ms)"); + } + + private static void printUsage(SortedMap> snippetMap) { + System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]* ]"); + System.err.println(); + System.err.println(" - Specifying no arguments will print this message"); + System.err.println(" - Specifying \"all\" will run all snippets"); + System.err.println(" - Specifying one or more groups will run all snippets in those groups"); + System.err.println(" - Specifying one or more snippets will run those snippet"); + System.err.println(); + System.err.println("Examples:"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar all"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar loading.AddRecords loading.AddFutures"); + System.err.println(); + System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.AddRecords"); + System.err.println(); + System.err.println("Snippet Group Names:"); + snippetMap.keySet().forEach(group -> { + System.err.println(" - " + group); + }); + System.err.println(); + System.err.println("Snippet Names:"); + snippetMap.values().forEach(snippetSet -> { + for (String snippet : snippetSet) { + System.err.println(" - " + snippet); + } + }); + System.err.println(); + } + + private static String getJarPath() throws Exception { + final String osName = System.getProperty("os.name"); + + boolean windows = false; + boolean macOS = false; + + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") + || lowerOSName.indexOf("darwin") >= 0) + { + macOS = true; + } + + String resourceName = SnippetRunner.class.getSimpleName() + ".class"; + String url = SnippetRunner.class.getResource(resourceName).toString(); + String jarPath = url.replaceAll( + "jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); + + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); + } + + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.substring(1); + } + return jarPath; + } + + private static SortedMap> getSnippetMap() + throws Exception + { + String jarPath = getJarPath(); + + SortedMap> snippetMap = new TreeMap<>(); + File jarFile = new File(jarPath); + try (FileInputStream fis = new FileInputStream(jarFile); + ZipInputStream zis = new ZipInputStream(fis)) + { + for (ZipEntry entry = zis.getNextEntry(); + entry != null; + entry = zis.getNextEntry()) + { + String name = entry.getName(); + if (name.startsWith("com/")) continue; + if (name.startsWith("org/")) continue; + if (name.startsWith("javax/")) continue; + if (name.startsWith("META-INF/")) continue; + if (!name.endsWith(".class")) continue; + int index = name.indexOf('/'); + if (index < 0) continue; + String group = name.substring(0, index); + String snippet = name.substring( + 0, name.length() - ".class".length()).replace('/','.'); + SortedSet snippetSet = snippetMap.get(group); + if (snippetSet == null) { + snippetSet = new TreeSet<>(); + snippetMap.put(group, snippetSet); + } + snippetSet.add(snippet); + } + } + return snippetMap; + } + + /** + * + */ + private static String setupTempRepository(InstallLocations senzingInstall) + throws Exception + { + File resourcesDir = senzingInstall.getResourceDirectory(); + File templatesDir = senzingInstall.getTemplatesDirectory(); + File configDir = senzingInstall.getConfigDirectory(); + File schemaDir = new File(resourcesDir, "schema"); + File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); + File configFile = new File(templatesDir, "g2config.json"); + + // lay down the database schema + File databaseFile = File.createTempFile("G2C-", ".db"); + String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); + + try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); + BufferedReader br = new BufferedReader(rdr); + Connection conn = DriverManager.getConnection(jdbcUrl); + Statement stmt = conn.createStatement()) + { + for (String sql = br.readLine(); sql != null; sql = br.readLine()) { + sql = sql.trim(); + if (sql.length() == 0) continue; + stmt.execute(sql); + } + } + + String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath(); + String configPath = configDir.getCanonicalPath(); + String resourcePath = resourcesDir.toString(); + String databasePath = databaseFile.getCanonicalPath(); + String baseConfig = readTextFileAsString(configFile, UTF_8); + String settings = """ + { + "PIPELINE": { + "SUPPORTPATH": "%s", + "CONFIGPATH": "%s", + "RESOURCEPATH": "%s" + }, + "SQL": { + "CONNECTION": "sqlite3://na:na@%s" + } + } + """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); + + SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); + try { + SzConfigManager configMgr = env.getConfigManager(); + + long configId = configMgr.addConfig(baseConfig, "Default Config"); + configMgr.setDefaultConfigId(configId); + + } finally { + env.destroy(); + } + + return settings; + } +} diff --git a/java/runner/com/senzing/runner/Utilities.java b/java/runner/com/senzing/runner/Utilities.java new file mode 100644 index 0000000..fe023b3 --- /dev/null +++ b/java/runner/com/senzing/runner/Utilities.java @@ -0,0 +1,165 @@ +package com.senzing.runner; + +import java.util.*; +import java.io.*; +import javax.json.*; +import java.nio.charset.Charset; + +import static javax.json.stream.JsonGenerator.PRETTY_PRINTING; + +/** + * Provides utilities for the snippet runner. + */ +public class Utilities { + /** + * Constant for the name of the UTF-8 character encoding. + */ + public static final String UTF_8 = "UTF-8"; + + /** + * Constant for the UTF-8 {@link Charset}. + */ + public static final Charset UTF_8_CHARSET = Charset.forName(UTF_8); + + /** + * Pretty printing {@link JsonWriterFactory}. + */ + private static JsonWriterFactory PRETTY_WRITER_FACTORY = Json + .createWriterFactory(Collections.singletonMap(PRETTY_PRINTING, true)); + + /** + * Parses JSON text as a {@link JsonObject}. If the specified text is not + * formatted as a JSON object then an exception will be thrown. + * + * @param jsonText The JSON text to be parsed. + * + * @return The parsed {@link JsonObject}. + */ + public static JsonObject parseJsonObject(String jsonText) { + if (jsonText == null) + return null; + StringReader sr = new StringReader(jsonText); + JsonReader jsonReader = Json.createReader(sr); + return jsonReader.readObject(); + } + + /** + * Converts the specified {@link JsonValue} to a JSON string. + * + * @param writer The {@link Writer} to write to. + * + * @param jsonValue The {@link JsonValue} describing the JSON. + * + * @param prettyPrint Whether or not to pretty-print the JSON text. + * + * @return The specified {@link Writer}. + * + * @param The type of the writer to which the write the {@link JsonValue}. + */ + public static T toJsonText(T writer, JsonValue jsonValue, boolean prettyPrint) { + Objects.requireNonNull(writer, "Writer cannot be null"); + + JsonWriter jsonWriter = (prettyPrint) + ? PRETTY_WRITER_FACTORY.createWriter(writer) : Json.createWriter(writer); + + if (jsonValue != null) { + jsonWriter.write(jsonValue); + } else { + jsonWriter.write(JsonValue.NULL); + } + + return writer; + } + + /** + * Converts the specified {@link JsonValue} to a JSON string. + * + * @param jsonValue The {@link JsonValue} describing the JSON. + * + * @param prettyPrint Whether or not to pretty-print the JSON text. + * + * @return The specified {@link JsonValue} converted to a JSON string. + */ + public static String toJsonText(JsonValue jsonValue, boolean prettyPrint) { + return toJsonText(new StringWriter(), jsonValue, prettyPrint).toString(); + } + + /** + * Using the specified character encoding, this method will wraps the specified + * {@link Reader} in a new {@link Reader} that will skip the "byte order mark" + * (BOM) character at the beginning of the file for UTF character encodings + * (e.g.: "UTF-8", "UTF-16" or "UTF-32"). If the specified character encoding is + * not a "UTF" character encoding then it is simply returned as-is. + * + * @param src The source {@link Reader}. + * @param encoding The character encoding. + * @return The new {@link Reader} that will skip the byte-order mark. + * @throws IOException If an I/O failure occurs. + * @throws NullPointerException If either parameter is null. + */ + public static Reader bomSkippingReader(Reader src, String encoding) throws IOException, NullPointerException { + // check if encoding is null (illegal) + if (encoding == null) { + throw new NullPointerException("Cannot skip byte order mark without specifying the encoding."); + } + + // check if we have an encoding that is NOT a UTF encoding + if (!encoding.toUpperCase().startsWith("UTF")) { + // if not UTF encoding then there should not be a BOM to skip + return src; + } + + // create a pushback reader and peek at the first character + PushbackReader result = new PushbackReader(src, 1); + int first = result.read(); + + // check if already at EOF + if (first == -1) { + // just return the source stream + return src; + } + + // check if we do NOT have a byte order mark + if (first != 0xFEFF) { + // push the character back on to the stream so it can be read + result.unread(first); + } + + // return the pushback reader + return result; + } + + /** + * Reads the contents of the file as text and returns the {@link String} + * representing the contents. The text is expected to be encoded in the + * specified character encoding. If the specified character encoding is + * null then the system default encoding is used. + * + * @param file The {@link File} whose contents should be read. + * @param charEncoding The character encoding for the text in the file. + * @return The {@link String} representing the contents of the file. + * @throws IOException If an I/O failure occurs. + */ + public static String readTextFileAsString(File file, String charEncoding) throws IOException { + Charset charset = (charEncoding == null) + ? Charset.defaultCharset() : Charset.forName(charEncoding); + + try (FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis, charset); + Reader reader = bomSkippingReader(isr, charset.name()); + BufferedReader br = new BufferedReader(reader)) { + long size = file.length(); + if (size > Integer.MAX_VALUE) + size = Integer.MAX_VALUE; + + StringBuilder sb = new StringBuilder((int) size); + for (int nextChar = br.read(); nextChar >= 0; nextChar = br.read()) { + if (nextChar == 0) + continue; + sb.append((char) nextChar); + } + return sb.toString(); + } + } + +} diff --git a/java/snippets/loading/AddRecords.java b/java/snippets/loading/AddRecords.java new file mode 100644 index 0000000..12e35bc --- /dev/null +++ b/java/snippets/loading/AddRecords.java @@ -0,0 +1,176 @@ +package loading; + +import java.util.Map; +import java.util.LinkedHashMap; + +import com.senzing.sdk.SzEnvironment; +import com.senzing.sdk.core.SzCoreEnvironment; +import com.senzing.sdk.SzEngine; +import com.senzing.sdk.SzException; +import com.senzing.sdk.SzRecordKey; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddRecords { + public static void main(String[] args) { + // get the senzing repository settings + String settings = (args.length > 0) ? args[0] + : System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddRecords.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (Map.Entry entry : getRecords().entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1001"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1001", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Robert", + "PRIMARY_NAME_LAST": "Smith", + "DATE_OF_BIRTH": "12/11/1978", + "ADDR_TYPE": "MAILING", + "ADDR_FULL": "123 Main Street, Las Vegas, NV 89132", + "PHONE_TYPE": "HOME", + "PHONE_NUMBER": "702-919-1300", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1002"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1002", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_GENERATION": "II", + "DATE_OF_BIRTH": "11/12/1978", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Lane", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89111", + "PHONE_TYPE": "MOBILE", + "PHONE_NUMBER": "702-919-1300" + } + """); + + records.put( + SzRecordKey.of("TEST", "1003"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1003", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_MIDDLE": "J", + "DATE_OF_BIRTH": "12/11/1978", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1004"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1004", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "B", + "PRIMARY_NAME_LAST": "Smith", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Ln", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1005"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1005", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Rob", + "PRIMARY_NAME_MIDDLE": "E", + "PRIMARY_NAME_LAST": "Smith", + "DRIVERS_LICENSE_NUMBER": "112233", + "DRIVERS_LICENSE_STATE": "NV", + "ADDR_TYPE": "MAILING", + "ADDR_LINE1": "123 E Main St", + "ADDR_CITY": "Henderson", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/loading/AddRecordsLoop.java b/java/snippets/loading/AddRecordsLoop.java new file mode 100644 index 0000000..2f9ca99 --- /dev/null +++ b/java/snippets/loading/AddRecordsLoop.java @@ -0,0 +1,176 @@ +package loading; + +import java.util.Map; +import java.util.LinkedHashMap; + +import com.senzing.sdk.SzEnvironment; +import com.senzing.sdk.core.SzCoreEnvironment; +import com.senzing.sdk.SzEngine; +import com.senzing.sdk.SzException; +import com.senzing.sdk.SzRecordKey; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddRecordsLoop { + public static void main(String[] args) { + // get the senzing repository settings + String settings = (args.length > 0) ? args[0] + : System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddRecordsLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (Map.Entry entry : getRecords().entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1001"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1001", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Robert", + "PRIMARY_NAME_LAST": "Smith", + "DATE_OF_BIRTH": "12/11/1978", + "ADDR_TYPE": "MAILING", + "ADDR_FULL": "123 Main Street, Las Vegas, NV 89132", + "PHONE_TYPE": "HOME", + "PHONE_NUMBER": "702-919-1300", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1002"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1002", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_GENERATION": "II", + "DATE_OF_BIRTH": "11/12/1978", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Lane", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89111", + "PHONE_TYPE": "MOBILE", + "PHONE_NUMBER": "702-919-1300" + } + """); + + records.put( + SzRecordKey.of("TEST", "1003"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1003", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Bob", + "PRIMARY_NAME_LAST": "Smith", + "PRIMARY_NAME_MIDDLE": "J", + "DATE_OF_BIRTH": "12/11/1978", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1004"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1004", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "B", + "PRIMARY_NAME_LAST": "Smith", + "ADDR_TYPE": "HOME", + "ADDR_LINE1": "1515 Adela Ln", + "ADDR_CITY": "Las Vegas", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132", + "EMAIL_ADDRESS": "bsmith@work.com" + } + """); + + records.put( + SzRecordKey.of("TEST", "1005"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1005", + "RECORD_TYPE": "PERSON", + "PRIMARY_NAME_FIRST": "Rob", + "PRIMARY_NAME_MIDDLE": "E", + "PRIMARY_NAME_LAST": "Smith", + "DRIVERS_LICENSE_NUMBER": "112233", + "DRIVERS_LICENSE_STATE": "NV", + "ADDR_TYPE": "MAILING", + "ADDR_LINE1": "123 E Main St", + "ADDR_CITY": "Henderson", + "ADDR_STATE": "NV", + "ADDR_POSTAL_CODE": "89132" + } + """); + + return records; + } +} \ No newline at end of file From 925cdfc4824a3e413ba487fea5696164fb8d65b3 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Fri, 17 Jan 2025 12:10:59 -0800 Subject: [PATCH 2/9] Updated .gitignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index bcbe78a..8e76eec 100644 --- a/.gitignore +++ b/.gitignore @@ -127,4 +127,8 @@ dmypy.json # Pyre type checker .pyre/ -.history \ No newline at end of file +.history + +# VS Code +.vscode/ + From 179bf12bb3d55120b247d5702f2f3b378f6b5acb Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Fri, 17 Jan 2025 12:11:22 -0800 Subject: [PATCH 3/9] Updated .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8e76eec..a527bd8 100644 --- a/.gitignore +++ b/.gitignore @@ -130,5 +130,5 @@ dmypy.json .history # VS Code -.vscode/ +.vscode/* From bfce9f758ba48edbee5eecdd8d25d70f2a364ed9 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Fri, 17 Jan 2025 12:13:05 -0800 Subject: [PATCH 4/9] Updated .gitignore --- .gitignore | 2 +- .vscode/settings.json | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a527bd8..8e76eec 100644 --- a/.gitignore +++ b/.gitignore @@ -130,5 +130,5 @@ dmypy.json .history # VS Code -.vscode/* +.vscode/ diff --git a/.vscode/settings.json b/.vscode/settings.json index f4b38fb..b037f46 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -23,4 +23,10 @@ "/home/ant/Work/Senzing/git/sz-sdk-python/src/senzing" ], "pylint.importStrategy": "useBundled", + "java.configuration.updateBuildConfiguration": "automatic", + "java.project.sourcePaths": [ + "." + ], + "java.project.referencedLibraries": [], + "java.format.settings.url": ".vscode/java-formatter.xml", } \ No newline at end of file From b2ff26f2660dd491a8aa8231db47257c96af759c Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Wed, 22 Jan 2025 10:49:33 -0800 Subject: [PATCH 5/9] Interim commit --- .../com/senzing/runner/SnippetRunner.java | 248 ++++++++------ java/runner/com/senzing/runner/Utilities.java | 5 +- java/snippets/loading/AddFutures.java | 283 +++++++++++++++ java/snippets/loading/AddRecords.java | 3 +- java/snippets/loading/AddRecordsLoop.java | 239 +++++++------ java/snippets/loading/AddWithInfoFutures.java | 322 ++++++++++++++++++ 6 files changed, 868 insertions(+), 232 deletions(-) create mode 100644 java/snippets/loading/AddFutures.java create mode 100644 java/snippets/loading/AddWithInfoFutures.java diff --git a/java/runner/com/senzing/runner/SnippetRunner.java b/java/runner/com/senzing/runner/SnippetRunner.java index ba07ddd..c3eff49 100644 --- a/java/runner/com/senzing/runner/SnippetRunner.java +++ b/java/runner/com/senzing/runner/SnippetRunner.java @@ -17,14 +17,17 @@ */ public class SnippetRunner { private static final long ONE_MILLION = 1000000L; - + + private static final String JAR_PATH = getJarPath(); + /** * Harness for running one or more of the code snippets. + * * @param args The command line arguments. */ public static void main(String[] args) { try { - SortedMap> snippetMap = getSnippetMap(); + SortedMap> snippetMap = getSnippetMap(); Set snippetOptions = new LinkedHashSet<>(); snippetOptions.addAll(snippetMap.keySet()); for (Set set : snippetMap.values()) { @@ -33,7 +36,7 @@ public static void main(String[] args) { if (args.length == 0) { printUsage(snippetMap); - System.exit(1); + System.exit(1); } String settings = System.getProperty("senzing.settings"); if (settings != null) { @@ -54,8 +57,7 @@ public static void main(String[] args) { try { settingsJson = parseJsonObject(settings); } catch (Exception e) { - System.err.println( - "The provided Senzing settings were not valid JSON:"); + System.err.println("The provided Senzing settings were not valid JSON:"); System.err.println(); System.err.println(toJsonText(settingsJson, true)); System.exit(1); @@ -66,7 +68,7 @@ public static void main(String[] args) { InstallLocations installLocations = null; try { installLocations = InstallLocations.findLocations(); - + } catch (Exception e) { System.exit(1); } @@ -79,14 +81,14 @@ public static void main(String[] args) { for (String snippet : snippetSet) { if (!snippets.contains(snippet)) { snippets.add(snippet); - } + } } }); continue; } if (!snippetOptions.contains(arg)) { System.err.println("Unrecognized code snippet or snippet group: " + arg); - System.exit(1); + System.exit(1); } if (snippetMap.containsKey(arg)) { for (String snippet : snippetMap.get(arg)) { @@ -103,7 +105,7 @@ public static void main(String[] args) { // check if we do not have settings and if not setup a temporary repository if (settings == null) { - settings = setupTempRepository(installLocations); + settings = setupTempRepository(installLocations); } Long defaultConfigId = null; @@ -143,30 +145,64 @@ public static void main(String[] args) { executeSnippet(snippet, installLocations, settings); } System.out.println(); - + } catch (Exception e) { e.printStackTrace(); System.exit(1); } } - private static void executeSnippet(String snippet, InstallLocations senzingInstall, String settings) - throws Exception - { - String[] args = { settings }; - Class snippetClass = Class.forName(snippet); - Object snippetInstance = snippetClass.getConstructor().newInstance(); - Method mainMethod = snippetClass.getMethod("main", args.getClass()); - Object[] methodArgs = { args }; + private static String[] createRuntimeEnv(InstallLocations senzingInstall, String settings) { + Map origEnv = System.getenv(); + List envList = new ArrayList<>(origEnv.size() + 10); + origEnv.forEach((envKey, envVal) -> { + envList.add(envKey + "=" + envVal); + }); + envList.add("SENZING_ENGINE_CONFIGURATION_JSON=" + settings); + return envList.toArray(new String[envList.size()]); + } + + private static Thread startOutputThread(InputStream stream, PrintStream ps) { + Thread thread = new Thread(() -> { + final String UTF8 = "UTF-8"; + try (InputStreamReader isr = new InputStreamReader(stream, UTF8); + BufferedReader br = new BufferedReader(isr)) + { + for (String line = br.readLine(); line != null; line = br.readLine()) { + ps.println(line); + ps.flush(); + } + } catch (IOException e) { + e.printStackTrace(); + } + }); + thread.start(); + return thread; + } + + private static void executeSnippet(String snippet, InstallLocations senzingInstall, String settings) + throws Exception { + String[] cmdArray = new String[] { "java", "-cp", JAR_PATH, snippet }; + + String[] runtimeEnv = createRuntimeEnv(senzingInstall, settings); + System.out.println(); System.out.println("Executing " + snippet + "..."); long start = System.nanoTime(); - mainMethod.invoke(snippetInstance, methodArgs); + Process process = Runtime.getRuntime().exec(cmdArray, runtimeEnv); + Thread errThread = startOutputThread(process.getErrorStream(), System.err); + Thread outThread = startOutputThread(process.getInputStream(), System.out); + int exitValue = process.waitFor(); + errThread.join(); + outThread.join(); + if (exitValue != 0) { + throw new Exception("Failed to execute snippet; " + snippet); + } long duration = (System.nanoTime() - start) / ONE_MILLION; System.out.println("Executed " + snippet + ". (" + duration + "ms)"); } - private static void printUsage(SortedMap> snippetMap) { + private static void printUsage(SortedMap> snippetMap) { System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]* ]"); System.err.println(); System.err.println(" - Specifying no arguments will print this message"); @@ -195,62 +231,70 @@ private static void printUsage(SortedMap> snippetMap) { }); System.err.println(); } - - private static String getJarPath() throws Exception { - final String osName = System.getProperty("os.name"); - - boolean windows = false; - boolean macOS = false; - - String lowerOSName = osName.toLowerCase().trim(); - if (lowerOSName.startsWith("windows")) { - windows = true; - } else if (lowerOSName.startsWith("mac") - || lowerOSName.indexOf("darwin") >= 0) - { - macOS = true; - } - String resourceName = SnippetRunner.class.getSimpleName() + ".class"; - String url = SnippetRunner.class.getResource(resourceName).toString(); - String jarPath = url.replaceAll( - "jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); - - if (windows && jarPath.startsWith("/")) { - jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); - } + private static String getJarPath() throws RuntimeException { + try { + final String osName = System.getProperty("os.name"); + + boolean windows = false; + boolean macOS = false; + + String lowerOSName = osName.toLowerCase().trim(); + if (lowerOSName.startsWith("windows")) { + windows = true; + } else if (lowerOSName.startsWith("mac") || lowerOSName.indexOf("darwin") >= 0) { + macOS = true; + } + + String resourceName = SnippetRunner.class.getSimpleName() + ".class"; + String url = SnippetRunner.class.getResource(resourceName).toString(); + String jarPath = url.replaceAll("jar:file:(.*\\.jar)\\!/.*\\.class", "$1"); - if (windows && jarPath.startsWith("/")) { - jarPath = jarPath.substring(1); + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.replaceAll("[/]+([^/].*)", "$1"); + } + + if (windows && jarPath.startsWith("/")) { + jarPath = jarPath.substring(1); + } + return jarPath; + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); } - return jarPath; } - private static SortedMap> getSnippetMap() - throws Exception - { - String jarPath = getJarPath(); - - SortedMap> snippetMap = new TreeMap<>(); - File jarFile = new File(jarPath); - try (FileInputStream fis = new FileInputStream(jarFile); - ZipInputStream zis = new ZipInputStream(fis)) - { - for (ZipEntry entry = zis.getNextEntry(); - entry != null; - entry = zis.getNextEntry()) - { + private static SortedMap> getSnippetMap() throws Exception { + SortedMap> snippetMap = new TreeMap<>(); + File jarFile = new File(JAR_PATH); + try (FileInputStream fis = new FileInputStream(jarFile); ZipInputStream zis = new ZipInputStream(fis)) { + for (ZipEntry entry = zis.getNextEntry(); entry != null; entry = zis.getNextEntry()) { String name = entry.getName(); - if (name.startsWith("com/")) continue; - if (name.startsWith("org/")) continue; - if (name.startsWith("javax/")) continue; - if (name.startsWith("META-INF/")) continue; - if (!name.endsWith(".class")) continue; + if (name.startsWith("com/")) { + continue; + } + if (name.startsWith("org/")) { + continue; + } + if (name.startsWith("javax/")) { + continue; + } + if (name.startsWith("META-INF/")) { + continue; + } + if (!name.endsWith(".class")) { + continue; + } + if (name.indexOf('$') >= 0) { + continue; + } int index = name.indexOf('/'); - if (index < 0) continue; - String group = name.substring(0, index); - String snippet = name.substring( - 0, name.length() - ".class".length()).replace('/','.'); + if (index < 0) { + continue; + } + String group = name.substring(0, index); + String snippet = name.substring(0, name.length() - ".class".length()).replace('/', '.'); SortedSet snippetSet = snippetMap.get(group); if (snippetSet == null) { snippetSet = new TreeSet<>(); @@ -265,61 +309,59 @@ private static SortedMap> getSnippetMap() /** * */ - private static String setupTempRepository(InstallLocations senzingInstall) - throws Exception - { - File resourcesDir = senzingInstall.getResourceDirectory(); - File templatesDir = senzingInstall.getTemplatesDirectory(); - File configDir = senzingInstall.getConfigDirectory(); - File schemaDir = new File(resourcesDir, "schema"); - File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); - File configFile = new File(templatesDir, "g2config.json"); + private static String setupTempRepository(InstallLocations senzingInstall) throws Exception { + File resourcesDir = senzingInstall.getResourceDirectory(); + File templatesDir = senzingInstall.getTemplatesDirectory(); + File configDir = senzingInstall.getConfigDirectory(); + File schemaDir = new File(resourcesDir, "schema"); + File schemaFile = new File(schemaDir, "szcore-schema-sqlite-create.sql"); + File configFile = new File(templatesDir, "g2config.json"); // lay down the database schema - File databaseFile = File.createTempFile("G2C-", ".db"); - String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); - - try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); - BufferedReader br = new BufferedReader(rdr); - Connection conn = DriverManager.getConnection(jdbcUrl); - Statement stmt = conn.createStatement()) - { + File databaseFile = File.createTempFile("G2C-", ".db"); + String jdbcUrl = "jdbc:sqlite:" + databaseFile.getCanonicalPath(); + + try (FileReader rdr = new FileReader(schemaFile, UTF_8_CHARSET); + BufferedReader br = new BufferedReader(rdr); + Connection conn = DriverManager.getConnection(jdbcUrl); + Statement stmt = conn.createStatement()) { for (String sql = br.readLine(); sql != null; sql = br.readLine()) { sql = sql.trim(); - if (sql.length() == 0) continue; + if (sql.length() == 0) + continue; stmt.execute(sql); } } - String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath(); - String configPath = configDir.getCanonicalPath(); + String supportPath = senzingInstall.getSupportDirectory().getCanonicalPath(); + String configPath = configDir.getCanonicalPath(); String resourcePath = resourcesDir.toString(); String databasePath = databaseFile.getCanonicalPath(); - String baseConfig = readTextFileAsString(configFile, UTF_8); - String settings = """ - { - "PIPELINE": { - "SUPPORTPATH": "%s", - "CONFIGPATH": "%s", - "RESOURCEPATH": "%s" - }, - "SQL": { - "CONNECTION": "sqlite3://na:na@%s" + String baseConfig = readTextFileAsString(configFile, UTF_8); + String settings = """ + { + "PIPELINE": { + "SUPPORTPATH": "%s", + "CONFIGPATH": "%s", + "RESOURCEPATH": "%s" + }, + "SQL": { + "CONNECTION": "sqlite3://na:na@%s" + } } - } - """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); + """.formatted(supportPath, configPath, resourcePath, databasePath).trim(); SzEnvironment env = SzCoreEnvironment.newBuilder().settings(settings).build(); try { SzConfigManager configMgr = env.getConfigManager(); - + long configId = configMgr.addConfig(baseConfig, "Default Config"); configMgr.setDefaultConfigId(configId); } finally { env.destroy(); } - + return settings; } } diff --git a/java/runner/com/senzing/runner/Utilities.java b/java/runner/com/senzing/runner/Utilities.java index fe023b3..cf01796 100644 --- a/java/runner/com/senzing/runner/Utilities.java +++ b/java/runner/com/senzing/runner/Utilities.java @@ -20,7 +20,7 @@ public class Utilities { * Constant for the UTF-8 {@link Charset}. */ public static final Charset UTF_8_CHARSET = Charset.forName(UTF_8); - + /** * Pretty printing {@link JsonWriterFactory}. */ @@ -36,8 +36,9 @@ public class Utilities { * @return The parsed {@link JsonObject}. */ public static JsonObject parseJsonObject(String jsonText) { - if (jsonText == null) + if (jsonText == null) { return null; + } StringReader sr = new StringReader(jsonText); JsonReader jsonReader = Json.createReader(sr); return jsonReader.readObject(); diff --git a/java/snippets/loading/AddFutures.java b/java/snippets/loading/AddFutures.java new file mode 100644 index 0000000..a685ee4 --- /dev/null +++ b/java/snippets/loading/AddFutures.java @@ -0,0 +1,283 @@ +package loading; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + engine.addRecord(recordKey, record.line, SZ_NO_FLAGS); + + return null; + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile))); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/AddRecords.java b/java/snippets/loading/AddRecords.java index 12e35bc..14699be 100644 --- a/java/snippets/loading/AddRecords.java +++ b/java/snippets/loading/AddRecords.java @@ -17,8 +17,7 @@ public class AddRecords { public static void main(String[] args) { // get the senzing repository settings - String settings = (args.length > 0) ? args[0] - : System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { System.err.println("Unable to get settings."); throw new IllegalArgumentException("Unable to get settings"); diff --git a/java/snippets/loading/AddRecordsLoop.java b/java/snippets/loading/AddRecordsLoop.java index 2f9ca99..40a9c3a 100644 --- a/java/snippets/loading/AddRecordsLoop.java +++ b/java/snippets/loading/AddRecordsLoop.java @@ -1,13 +1,9 @@ package loading; -import java.util.Map; -import java.util.LinkedHashMap; - -import com.senzing.sdk.SzEnvironment; +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; import com.senzing.sdk.core.SzCoreEnvironment; -import com.senzing.sdk.SzEngine; -import com.senzing.sdk.SzException; -import com.senzing.sdk.SzRecordKey; import static com.senzing.sdk.SzFlag.*; @@ -15,10 +11,26 @@ * Provides a simple example of adding records to the Senzing repository. */ public class AddRecordsLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + public static void main(String[] args) { // get the senzing repository settings - String settings = (args.length > 0) ? args[0] - : System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); if (settings == null) { System.err.println("Unable to get settings."); throw new IllegalArgumentException("Unable to get settings"); @@ -34,31 +46,74 @@ public static void main(String[] args) { .verboseLogging(false) .build(); - try { + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + BufferedReader br = new BufferedReader(isr)) + { // get the engine from the environment SzEngine engine = env.getEngine(); + int lineNumber = 0; // loop through the example records and add them to the repository - for (Map.Entry entry : getRecords().entrySet()) { - SzRecordKey recordKey = entry.getKey(); - String recordDefinition = entry.getValue(); - - // call the addRecord() function with no flags - engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); - - System.out.println("Record " + recordKey.recordId() + " added"); - System.out.flush(); - } + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; - } catch (SzException e) { - // handle any exception that may have occurred - System.err.println("Senzing Error Message : " + e.getMessage()); - System.err.println("Senzing Error Code : " + e.getErrorCode()); - e.printStackTrace(); - throw new RuntimeException(e); + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile))); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } } catch (Exception e) { - e.printStackTrace(); + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); if (e instanceof RuntimeException) { throw ((RuntimeException) e); } @@ -67,110 +122,44 @@ public static void main(String[] args) { } finally { // IMPORTANT: make sure to destroy the environment env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + } } /** - * This is a support method for providing example records to add. + * Example method for logging failed records. * - * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} - * JSON text values desribing the records to be added. + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. */ - public static Map getRecords() { - Map records = new LinkedHashMap<>(); - records.put( - SzRecordKey.of("TEST", "1001"), - """ - { - "DATA_SOURCE": "TEST", - "RECORD_ID": "1001", - "RECORD_TYPE": "PERSON", - "PRIMARY_NAME_FIRST": "Robert", - "PRIMARY_NAME_LAST": "Smith", - "DATE_OF_BIRTH": "12/11/1978", - "ADDR_TYPE": "MAILING", - "ADDR_FULL": "123 Main Street, Las Vegas, NV 89132", - "PHONE_TYPE": "HOME", - "PHONE_NUMBER": "702-919-1300", - "EMAIL_ADDRESS": "bsmith@work.com" - } - """); - - records.put( - SzRecordKey.of("TEST", "1002"), - """ - { - "DATA_SOURCE": "TEST", - "RECORD_ID": "1002", - "RECORD_TYPE": "PERSON", - "PRIMARY_NAME_FIRST": "Bob", - "PRIMARY_NAME_LAST": "Smith", - "PRIMARY_NAME_GENERATION": "II", - "DATE_OF_BIRTH": "11/12/1978", - "ADDR_TYPE": "HOME", - "ADDR_LINE1": "1515 Adela Lane", - "ADDR_CITY": "Las Vegas", - "ADDR_STATE": "NV", - "ADDR_POSTAL_CODE": "89111", - "PHONE_TYPE": "MOBILE", - "PHONE_NUMBER": "702-919-1300" - } - """); - - records.put( - SzRecordKey.of("TEST", "1003"), - """ - { - "DATA_SOURCE": "TEST", - "RECORD_ID": "1003", - "RECORD_TYPE": "PERSON", - "PRIMARY_NAME_FIRST": "Bob", - "PRIMARY_NAME_LAST": "Smith", - "PRIMARY_NAME_MIDDLE": "J", - "DATE_OF_BIRTH": "12/11/1978", - "EMAIL_ADDRESS": "bsmith@work.com" - } - """); - - records.put( - SzRecordKey.of("TEST", "1004"), - """ - { - "DATA_SOURCE": "TEST", - "RECORD_ID": "1004", - "RECORD_TYPE": "PERSON", - "PRIMARY_NAME_FIRST": "B", - "PRIMARY_NAME_LAST": "Smith", - "ADDR_TYPE": "HOME", - "ADDR_LINE1": "1515 Adela Ln", - "ADDR_CITY": "Las Vegas", - "ADDR_STATE": "NV", - "ADDR_POSTAL_CODE": "89132", - "EMAIL_ADDRESS": "bsmith@work.com" - } - """); - - records.put( - SzRecordKey.of("TEST", "1005"), - """ - { - "DATA_SOURCE": "TEST", - "RECORD_ID": "1005", - "RECORD_TYPE": "PERSON", - "PRIMARY_NAME_FIRST": "Rob", - "PRIMARY_NAME_MIDDLE": "E", - "PRIMARY_NAME_LAST": "Smith", - "DRIVERS_LICENSE_NUMBER": "112233", - "DRIVERS_LICENSE_STATE": "NV", - "ADDR_TYPE": "MAILING", - "ADDR_LINE1": "123 E Main St", - "ADDR_CITY": "Henderson", - "ADDR_STATE": "NV", - "ADDR_POSTAL_CODE": "89132" - } - """); - - return records; + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); } + } \ No newline at end of file diff --git a/java/snippets/loading/AddWithInfoFutures.java b/java/snippets/loading/AddWithInfoFutures.java new file mode 100644 index 0000000..d08e542 --- /dev/null +++ b/java/snippets/loading/AddWithInfoFutures.java @@ -0,0 +1,322 @@ +package loading; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddWithInfoFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddWithInfoFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + return engine.addRecord(recordKey, record.line, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Total entities created : " + entityIdSet.size()); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile))); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file From 3bce82a553e049f8196a3a9469f59f3ccbc4fd19 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Wed, 22 Jan 2025 16:53:12 -0800 Subject: [PATCH 6/9] Interim commit --- java/pom.xml | 7 +- .../com/senzing/runner/InstallLocations.java | 0 .../com/senzing/runner/SnippetRunner.java | 47 ++- .../com/senzing/runner/Utilities.java | 0 .../LoadTruthSetWithInfoViaLoop.properties | 1 + .../{AddRecords.java => LoadRecords.java} | 12 +- .../loading/LoadTruthSetWithInfoViaLoop.java | 215 ++++++++++++++ .../{AddFutures.java => LoadViaFutures.java} | 11 +- .../{AddRecordsLoop.java => LoadViaLoop.java} | 11 +- java/snippets/loading/LoadViaQueue.java | 271 ++++++++++++++++++ ...tures.java => LoadWithInfoViaFutures.java} | 11 +- 11 files changed, 561 insertions(+), 25 deletions(-) rename java/runner/{ => java}/com/senzing/runner/InstallLocations.java (100%) rename java/runner/{ => java}/com/senzing/runner/SnippetRunner.java (87%) rename java/runner/{ => java}/com/senzing/runner/Utilities.java (100%) create mode 100644 java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties rename java/snippets/loading/{AddRecords.java => LoadRecords.java} (95%) create mode 100644 java/snippets/loading/LoadTruthSetWithInfoViaLoop.java rename java/snippets/loading/{AddFutures.java => LoadViaFutures.java} (98%) rename java/snippets/loading/{AddRecordsLoop.java => LoadViaLoop.java} (97%) create mode 100644 java/snippets/loading/LoadViaQueue.java rename java/snippets/loading/{AddWithInfoFutures.java => LoadWithInfoViaFutures.java} (98%) diff --git a/java/pom.xml b/java/pom.xml index b7cf3da..bda2e47 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -42,6 +42,11 @@ snippets + + + ${project.basedir}/runner/resources + + ${project.artifactId} @@ -67,7 +72,7 @@ - runner + ${project.basedir}/runner/java diff --git a/java/runner/com/senzing/runner/InstallLocations.java b/java/runner/java/com/senzing/runner/InstallLocations.java similarity index 100% rename from java/runner/com/senzing/runner/InstallLocations.java rename to java/runner/java/com/senzing/runner/InstallLocations.java diff --git a/java/runner/com/senzing/runner/SnippetRunner.java b/java/runner/java/com/senzing/runner/SnippetRunner.java similarity index 87% rename from java/runner/com/senzing/runner/SnippetRunner.java rename to java/runner/java/com/senzing/runner/SnippetRunner.java index c3eff49..a0ccc1d 100644 --- a/java/runner/com/senzing/runner/SnippetRunner.java +++ b/java/runner/java/com/senzing/runner/SnippetRunner.java @@ -1,7 +1,6 @@ package com.senzing.runner; import java.io.*; -import java.lang.reflect.Method; import java.sql.*; import java.util.*; import java.util.zip.*; @@ -16,6 +15,8 @@ * Helper class to run each of the snippetts. */ public class SnippetRunner { + public static final String SOURCES_KEY = "sources"; + private static final long ONE_MILLION = 1000000L; private static final String JAR_PATH = getJarPath(); @@ -125,15 +126,53 @@ public static void main(String[] args) { for (String snippet : snippets) { System.out.println(); long start = System.nanoTime(); + Properties snippetProperties = new Properties(); + String resourceName = "/" + snippet.replaceAll("\\.", "/") + + ".properties"; + InputStream is = SnippetRunner.class.getResourceAsStream(resourceName); + if (is != null) { + snippetProperties.load(is); + } + String sourceList = snippetProperties.getProperty(SOURCES_KEY); + System.out.println("Preparing repository for " + snippet + "..."); env = SzCoreEnvironment.newBuilder().settings(settings).build(); try { - SzConfigManager configMgr = env.getConfigManager(); - configMgr.setDefaultConfigId(defaultConfigId); - + // first purge the repository SzDiagnostic diagnostic = env.getDiagnostic(); diagnostic.purgeRepository(); + // now set the configuration + SzConfigManager configMgr = env.getConfigManager(); + // check if we need to configure sources + if (sourceList != null) { + SzConfig config = env.getConfig(); + long handle = config.createConfig(); + String snippetConfig = null; + try { + String[] sources = sourceList.split(","); + for (String source : sources) { + source = source.trim(); + System.out.println("Adding data source: " + source); + config.addDataSource(handle, source); + } + snippetConfig = config.exportConfig(handle); + + } finally { + config.closeConfig(handle); + } + + // register the config + long configId = configMgr.addConfig(snippetConfig, snippet); + + // set the default config to the snippet config + configMgr.setDefaultConfigId(configId); + + } else { + // set the default config to the initial default + configMgr.setDefaultConfigId(defaultConfigId); + } + } catch (SzException e) { e.printStackTrace(); } finally { diff --git a/java/runner/com/senzing/runner/Utilities.java b/java/runner/java/com/senzing/runner/Utilities.java similarity index 100% rename from java/runner/com/senzing/runner/Utilities.java rename to java/runner/java/com/senzing/runner/Utilities.java diff --git a/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties new file mode 100644 index 0000000..f48afe2 --- /dev/null +++ b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties @@ -0,0 +1 @@ +sources=CUSTOMERS,WATCHLIST,REFERENCE diff --git a/java/snippets/loading/AddRecords.java b/java/snippets/loading/LoadRecords.java similarity index 95% rename from java/snippets/loading/AddRecords.java rename to java/snippets/loading/LoadRecords.java index 14699be..8de197c 100644 --- a/java/snippets/loading/AddRecords.java +++ b/java/snippets/loading/LoadRecords.java @@ -1,20 +1,16 @@ package loading; -import java.util.Map; -import java.util.LinkedHashMap; +import java.util.*; -import com.senzing.sdk.SzEnvironment; +import com.senzing.sdk.*; import com.senzing.sdk.core.SzCoreEnvironment; -import com.senzing.sdk.SzEngine; -import com.senzing.sdk.SzException; -import com.senzing.sdk.SzRecordKey; import static com.senzing.sdk.SzFlag.*; /** * Provides a simple example of adding records to the Senzing repository. */ -public class AddRecords { +public class LoadRecords { public static void main(String[] args) { // get the senzing repository settings String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); @@ -24,7 +20,7 @@ public static void main(String[] args) { } // create a descriptive instance name (can be anything) - String instanceName = AddRecords.class.getSimpleName(); + String instanceName = LoadRecords.class.getSimpleName(); // initialize the Senzing environment SzEnvironment env = SzCoreEnvironment.newBuilder() diff --git a/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java new file mode 100644 index 0000000..c56860a --- /dev/null +++ b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java @@ -0,0 +1,215 @@ +package loading; + +import java.io.*; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadTruthSetWithInfoViaLoop { + private static final List INPUT_FILES = List.of( + "../resources/data/truthset/customers.json", + "../resources/data/truthset/reference.json", + "../resources/data/truthset/watchlist.json"); + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadTruthSetWithInfoViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // loop through the input files + for (String filePath : INPUT_FILES) { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + String info = engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_WITH_INFO_FLAGS); + + successCount++; + + // process the info + processInfo(engine, info); + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + } + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Total entities created : " + entityIdSet.size()); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/AddFutures.java b/java/snippets/loading/LoadViaFutures.java similarity index 98% rename from java/snippets/loading/AddFutures.java rename to java/snippets/loading/LoadViaFutures.java index a685ee4..f570f16 100644 --- a/java/snippets/loading/AddFutures.java +++ b/java/snippets/loading/LoadViaFutures.java @@ -12,8 +12,11 @@ /** * Provides a simple example of adding records to the Senzing repository. */ -public class AddFutures { +public class LoadViaFutures { private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + private static final String RETRY_PREFIX = "retry-"; private static final String RETRY_SUFFIX = ".jsonl"; @@ -49,7 +52,7 @@ public static void main(String[] args) { } // create a descriptive instance name (can be anything) - String instanceName = AddFutures.class.getSimpleName(); + String instanceName = LoadViaFutures.class.getSimpleName(); // initialize the Senzing environment SzEnvironment env = SzCoreEnvironment.newBuilder() @@ -67,7 +70,7 @@ public static void main(String[] args) { Map, Record> pendingFutures = new IdentityHashMap<>(); try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); BufferedReader br = new BufferedReader(isr)) { // get the engine from the environment @@ -246,7 +249,7 @@ private static void handlePendingFutures(Map, Record> pendingFutures, if (retryFile == null) { retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile))); + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } retryWriter.println(record.line); diff --git a/java/snippets/loading/AddRecordsLoop.java b/java/snippets/loading/LoadViaLoop.java similarity index 97% rename from java/snippets/loading/AddRecordsLoop.java rename to java/snippets/loading/LoadViaLoop.java index 40a9c3a..334d789 100644 --- a/java/snippets/loading/AddRecordsLoop.java +++ b/java/snippets/loading/LoadViaLoop.java @@ -10,8 +10,11 @@ /** * Provides a simple example of adding records to the Senzing repository. */ -public class AddRecordsLoop { +public class LoadViaLoop { private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + private static final String RETRY_PREFIX = "retry-"; private static final String RETRY_SUFFIX = ".jsonl"; @@ -37,7 +40,7 @@ public static void main(String[] args) { } // create a descriptive instance name (can be anything) - String instanceName = AddRecordsLoop.class.getSimpleName(); + String instanceName = LoadViaLoop.class.getSimpleName(); // initialize the Senzing environment SzEnvironment env = SzCoreEnvironment.newBuilder() @@ -49,7 +52,7 @@ public static void main(String[] args) { String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); BufferedReader br = new BufferedReader(isr)) { // get the engine from the environment @@ -98,7 +101,7 @@ public static void main(String[] args) { if (retryFile == null) { retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile))); + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } retryWriter.println(line); diff --git a/java/snippets/loading/LoadViaQueue.java b/java/snippets/loading/LoadViaQueue.java new file mode 100644 index 0000000..38c1e06 --- /dev/null +++ b/java/snippets/loading/LoadViaQueue.java @@ -0,0 +1,271 @@ +package loading; + +import java.io.*; +import java.util.concurrent.*; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; +import static java.lang.Thread.State.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadViaQueue { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final int MAXIMUM_BACKLOG = 100; + private static final long POLL_TIMEOUT = 3000L; + private static final TimeUnit POLL_TIME_UNIT = TimeUnit.MILLISECONDS; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static final Object MONITOR = new Object(); + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public record Record(int lineNumber, String line) { } + + private static final BlockingQueue recordQueue + = new LinkedBlockingQueue<>(MAXIMUM_BACKLOG); + + private static volatile Exception producerFailure = null; + private static volatile Exception consumerFailure = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadViaQueue.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + Thread producer = new Thread(() -> { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // add the record to the queue + recordQueue.put(new Record(lineNumber, line)); + } + + } catch (Exception e) { + producerFailure = e; + } + }); + + // start the producer + producer.start(); + + Thread consumer = new Thread(() -> { + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop while producer has not failed and is either still running + // or there are remaining records + while (producerFailure == null + && (!isTerminated(producer) || recordQueue.size() > 0)) + { + Record record = recordQueue.poll(POLL_TIMEOUT, POLL_TIME_UNIT); + + // check if we timed out getting the next record + if (record == null) { + // continue the loop to check if we are done + continue; + } + + // get the line number and line from the record + int lineNumber = record.lineNumber; + String line = record.line; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + synchronized (MONITOR) { + successCount++; + } + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + } + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter( + new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + } + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + synchronized (MONITOR) { + errorCount++; // increment the error count + } + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + consumerFailure = e; + } + }); + + // start the consumer + consumer.start(); + + // join the threads + while (!isTerminated(producer)) { + try { + producer.join(); + } catch (InterruptedException ignore) { + ignore.printStackTrace(); + } + } + while (!isTerminated(consumer)) { + try { + consumer.join(); + } catch (InterruptedException ignore) { + ignore.printStackTrace(); + } + } + + try { + // check for producer and consumer failures + if (producerFailure != null) { + throw producerFailure; + } + if (consumerFailure != null) { + throw consumerFailure; + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + synchronized (MONITOR) { + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + } + + } + + } + + private static boolean isTerminated(Thread thread) { + synchronized (thread) { + return (thread.getState() == TERMINATED); + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/loading/AddWithInfoFutures.java b/java/snippets/loading/LoadWithInfoViaFutures.java similarity index 98% rename from java/snippets/loading/AddWithInfoFutures.java rename to java/snippets/loading/LoadWithInfoViaFutures.java index d08e542..e082f9b 100644 --- a/java/snippets/loading/AddWithInfoFutures.java +++ b/java/snippets/loading/LoadWithInfoViaFutures.java @@ -12,8 +12,11 @@ /** * Provides a simple example of adding records to the Senzing repository. */ -public class AddWithInfoFutures { +public class LoadWithInfoViaFutures { private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + private static final String RETRY_PREFIX = "retry-"; private static final String RETRY_SUFFIX = ".jsonl"; @@ -52,7 +55,7 @@ public static void main(String[] args) { } // create a descriptive instance name (can be anything) - String instanceName = AddWithInfoFutures.class.getSimpleName(); + String instanceName = LoadWithInfoViaFutures.class.getSimpleName(); // initialize the Senzing environment SzEnvironment env = SzCoreEnvironment.newBuilder() @@ -70,7 +73,7 @@ public static void main(String[] args) { Map, Record> pendingFutures = new IdentityHashMap<>(); try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); BufferedReader br = new BufferedReader(isr)) { // get the engine from the environment @@ -253,7 +256,7 @@ private static void handlePendingFutures(SzEngine engine, if (retryFile == null) { retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); retryWriter = new PrintWriter( - new OutputStreamWriter(new FileOutputStream(retryFile))); + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); } retryWriter.println(record.line); From 83f9843c9592a7406143efc17526131f41c6a2b3 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Mon, 27 Jan 2025 17:17:23 -0800 Subject: [PATCH 7/9] Interim commit --- java/pom.xml | 3 + .../com/senzing/runner/SnippetRunner.java | 130 ++++++- .../deleting/DeleteViaFutures.properties | 1 + .../deleting/DeleteViaLoop.properties | 1 + .../DeleteWithInfoViaFutures.properties | 1 + .../initialization/PurgeRepository.properties | 2 + .../LoadTruthSetWithInfoViaLoop.properties | 4 +- .../redo/LoadWithRedoViaLoop.properties | 3 + .../resources/redo/RedoContinuous.properties | 7 + .../redo/RedoContinuousViaFutures.properties | 7 + .../redo/RedoWithInfoContinuous.properties | 7 + .../searching/SearchRecords.properties | 6 + .../searching/SearchViaFutures.properties | 1 + .../configuration/AddDataSources.java | 99 ++++++ java/snippets/deleting/DeleteViaFutures.java | 286 +++++++++++++++ java/snippets/deleting/DeleteViaLoop.java | 168 +++++++++ .../deleting/DeleteWithInfoViaFutures.java | 325 ++++++++++++++++++ .../CheckDatastorePerformance.java | 57 +++ .../information/GetDatastoreInfo.java | 55 +++ java/snippets/information/GetLicense.java | 55 +++ java/snippets/information/GetVersion.java | 55 +++ .../initialization/EnginePriming.java | 61 ++++ .../initialization/EnvironmentAndHubs.java | 66 ++++ .../initialization/PurgeRepository.java | 87 +++++ .../loading/LoadTruthSetWithInfoViaLoop.java | 31 +- java/snippets/loading/LoadViaFutures.java | 3 +- .../loading/LoadWithStatsViaLoop.java | 187 ++++++++++ java/snippets/redo/LoadWithRedoViaLoop.java | 242 +++++++++++++ java/snippets/redo/RedoContinuous.java | 176 ++++++++++ .../redo/RedoContinuousViaFutures.java | 280 +++++++++++++++ .../snippets/redo/RedoWithInfoContinuous.java | 224 ++++++++++++ java/snippets/searching/SearchRecords.java | 122 +++++++ java/snippets/searching/SearchViaFutures.java | 296 ++++++++++++++++ resources/data/truthset/customers.jsonl | 120 +++++++ resources/data/truthset/reference.jsonl | 22 ++ resources/data/truthset/watchlist.jsonl | 17 + 36 files changed, 3176 insertions(+), 31 deletions(-) create mode 100644 java/runner/resources/deleting/DeleteViaFutures.properties create mode 100644 java/runner/resources/deleting/DeleteViaLoop.properties create mode 100644 java/runner/resources/deleting/DeleteWithInfoViaFutures.properties create mode 100644 java/runner/resources/initialization/PurgeRepository.properties create mode 100644 java/runner/resources/redo/LoadWithRedoViaLoop.properties create mode 100644 java/runner/resources/redo/RedoContinuous.properties create mode 100644 java/runner/resources/redo/RedoContinuousViaFutures.properties create mode 100644 java/runner/resources/redo/RedoWithInfoContinuous.properties create mode 100644 java/runner/resources/searching/SearchRecords.properties create mode 100644 java/runner/resources/searching/SearchViaFutures.properties create mode 100644 java/snippets/configuration/AddDataSources.java create mode 100644 java/snippets/deleting/DeleteViaFutures.java create mode 100644 java/snippets/deleting/DeleteViaLoop.java create mode 100644 java/snippets/deleting/DeleteWithInfoViaFutures.java create mode 100644 java/snippets/information/CheckDatastorePerformance.java create mode 100644 java/snippets/information/GetDatastoreInfo.java create mode 100644 java/snippets/information/GetLicense.java create mode 100644 java/snippets/information/GetVersion.java create mode 100644 java/snippets/initialization/EnginePriming.java create mode 100644 java/snippets/initialization/EnvironmentAndHubs.java create mode 100644 java/snippets/initialization/PurgeRepository.java create mode 100644 java/snippets/loading/LoadWithStatsViaLoop.java create mode 100644 java/snippets/redo/LoadWithRedoViaLoop.java create mode 100644 java/snippets/redo/RedoContinuous.java create mode 100644 java/snippets/redo/RedoContinuousViaFutures.java create mode 100644 java/snippets/redo/RedoWithInfoContinuous.java create mode 100644 java/snippets/searching/SearchRecords.java create mode 100644 java/snippets/searching/SearchViaFutures.java create mode 100644 resources/data/truthset/customers.jsonl create mode 100644 resources/data/truthset/reference.jsonl create mode 100644 resources/data/truthset/watchlist.jsonl diff --git a/java/pom.xml b/java/pom.xml index bda2e47..0ecd293 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -46,6 +46,9 @@ ${project.basedir}/runner/resources + + ${project.basedir}/../resources + ${project.artifactId} diff --git a/java/runner/java/com/senzing/runner/SnippetRunner.java b/java/runner/java/com/senzing/runner/SnippetRunner.java index a0ccc1d..7898848 100644 --- a/java/runner/java/com/senzing/runner/SnippetRunner.java +++ b/java/runner/java/com/senzing/runner/SnippetRunner.java @@ -3,6 +3,7 @@ import java.io.*; import java.sql.*; import java.util.*; +import java.util.concurrent.TimeUnit; import java.util.zip.*; import javax.json.*; @@ -10,17 +11,30 @@ import com.senzing.sdk.core.*; import static com.senzing.runner.Utilities.*; +import static com.senzing.sdk.SzFlag.SZ_NO_FLAGS; /** * Helper class to run each of the snippetts. */ public class SnippetRunner { - public static final String SOURCES_KEY = "sources"; + public static final String SOURCE_KEY_PREFIX = "source."; + + public static final String LOAD_KEY_PREFIX = "load."; + + public static final String INPUT_KEY_PREFIX = "input."; + + public static final String DESTROY_AFTER_KEY = "destroyAfter"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String TEST_SOURCE = "TEST"; private static final long ONE_MILLION = 1000000L; private static final String JAR_PATH = getJarPath(); + private static final int SIGTERM_EXIT_CODE = 143; + /** * Harness for running one or more of the code snippets. * @@ -126,14 +140,14 @@ public static void main(String[] args) { for (String snippet : snippets) { System.out.println(); long start = System.nanoTime(); - Properties snippetProperties = new Properties(); + Properties properties = new Properties(); String resourceName = "/" + snippet.replaceAll("\\.", "/") + ".properties"; - InputStream is = SnippetRunner.class.getResourceAsStream(resourceName); - if (is != null) { - snippetProperties.load(is); + try (InputStream is = SnippetRunner.class.getResourceAsStream(resourceName)) { + if (is != null) { + properties.load(is); + } } - String sourceList = snippetProperties.getProperty(SOURCES_KEY); System.out.println("Preparing repository for " + snippet + "..."); env = SzCoreEnvironment.newBuilder().settings(settings).build(); @@ -145,13 +159,17 @@ public static void main(String[] args) { // now set the configuration SzConfigManager configMgr = env.getConfigManager(); // check if we need to configure sources - if (sourceList != null) { + if (properties.containsKey(SOURCE_KEY_PREFIX + 0)) { SzConfig config = env.getConfig(); long handle = config.createConfig(); String snippetConfig = null; try { - String[] sources = sourceList.split(","); - for (String source : sources) { + for (int index = 0; + properties.containsKey(SOURCE_KEY_PREFIX + index); + index++) + { + String sourceKey = SOURCE_KEY_PREFIX + index; + String source = properties.getProperty(sourceKey); source = source.trim(); System.out.println("Adding data source: " + source); config.addDataSource(handle, source); @@ -173,6 +191,38 @@ public static void main(String[] args) { configMgr.setDefaultConfigId(defaultConfigId); } + // check if there are files we need to load + if (properties.containsKey(LOAD_KEY_PREFIX + 0)) { + SzEngine engine = env.getEngine(); + for (int index = 0; properties.containsKey(LOAD_KEY_PREFIX + index); index++) + { + String loadKey = LOAD_KEY_PREFIX + index; + String fileName = properties.getProperty(loadKey); + fileName = fileName.trim(); + System.out.println("Loading records from file resource: " + fileName); + try (InputStream is = SnippetRunner.class.getResourceAsStream(fileName)) + { + if (is == null) { + throw new IllegalArgumentException( + "Missing resource (" + fileName + ") for load file (" + + loadKey + ") for snippet (" + snippet + ")"); + } + InputStreamReader isr = new InputStreamReader(is, UTF_8); + BufferedReader br = new BufferedReader(isr); + for (String line = br.readLine(); line != null; line = br.readLine()) { + line = line.trim(); + if (line.length() == 0) continue; + if (line.startsWith("#")) continue; + JsonObject record = Json.createReader(new StringReader(line)).readObject(); + String dataSource = record.getString(DATA_SOURCE, TEST_SOURCE); + String recordId = record.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSource, recordId); + engine.addRecord(recordKey, line, SZ_NO_FLAGS); + } + } + } + } + } catch (SzException e) { e.printStackTrace(); } finally { @@ -181,7 +231,7 @@ public static void main(String[] args) { long duration = (System.nanoTime() - start) / ONE_MILLION; System.out.println("Prepared repository for " + snippet + ". (" + duration + "ms)"); - executeSnippet(snippet, installLocations, settings); + executeSnippet(snippet, installLocations, settings, properties); } System.out.println(); @@ -219,8 +269,12 @@ private static Thread startOutputThread(InputStream stream, PrintStream ps) { return thread; } - private static void executeSnippet(String snippet, InstallLocations senzingInstall, String settings) - throws Exception { + private static void executeSnippet(String snippet, + InstallLocations senzingInstall, + String settings, + Properties properties) + throws Exception + { String[] cmdArray = new String[] { "java", "-cp", JAR_PATH, snippet }; String[] runtimeEnv = createRuntimeEnv(senzingInstall, settings); @@ -228,14 +282,58 @@ private static void executeSnippet(String snippet, InstallLocations senzingInsta System.out.println(); System.out.println("Executing " + snippet + "..."); long start = System.nanoTime(); - Process process = Runtime.getRuntime().exec(cmdArray, runtimeEnv); + Runtime runtime = Runtime.getRuntime(); + Process process = runtime.exec(cmdArray, runtimeEnv); Thread errThread = startOutputThread(process.getErrorStream(), System.err); Thread outThread = startOutputThread(process.getInputStream(), System.out); - int exitValue = process.waitFor(); + if (properties != null && properties.containsKey(INPUT_KEY_PREFIX + 0)) { + try { + // sleep for 1 second to give the process a chance to start up + Thread.sleep(1000L); + } catch (InterruptedException ignore) { + // ignore interruptions + } + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(process.getOutputStream(), UTF_8)); + for (int index = 0; + properties.containsKey(INPUT_KEY_PREFIX + index); + index++) + { + String inputLine = properties.getProperty(INPUT_KEY_PREFIX + index); + System.out.println(inputLine); + System.out.flush(); + inputLine = (inputLine == null) ? "" : inputLine.trim(); + pw.println(inputLine); + pw.flush(); + } + } + int exitValue = 0; + int expectedExitValue = 0; + if (properties.containsKey(DESTROY_AFTER_KEY)) { + String propValue = properties.getProperty(DESTROY_AFTER_KEY); + long delay = Long.parseLong(propValue); + boolean exited = process.waitFor(delay, TimeUnit.MILLISECONDS); + if (!exited && process.isAlive()) { + expectedExitValue = SIGTERM_EXIT_CODE; + System.out.println(); + System.out.println("Runner destroying " + snippet + " process..."); + // NOTE: using process.destroy() does not trigger the registered + // shutdown hooks in the snippet sub-process for some reason + Process killer = runtime.exec("kill " + process.pid()); + killer.waitFor(); // wait for the kill process to complete + } + exitValue = process.waitFor(); + + } else { + // wait indefinitely for the process to terminate + exitValue = process.waitFor(); + } + errThread.join(); outThread.join(); - if (exitValue != 0) { - throw new Exception("Failed to execute snippet; " + snippet); + if (exitValue != expectedExitValue) { + throw new Exception("Failed to execute snippet; " + snippet + + " (" + exitValue + ")"); } long duration = (System.nanoTime() - start) / ONE_MILLION; System.out.println("Executed " + snippet + ". (" + duration + "ms)"); diff --git a/java/runner/resources/deleting/DeleteViaFutures.properties b/java/runner/resources/deleting/DeleteViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/deleting/DeleteViaLoop.properties b/java/runner/resources/deleting/DeleteViaLoop.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteViaLoop.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties b/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/deleting/DeleteWithInfoViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/runner/resources/initialization/PurgeRepository.properties b/java/runner/resources/initialization/PurgeRepository.properties new file mode 100644 index 0000000..82d8e06 --- /dev/null +++ b/java/runner/resources/initialization/PurgeRepository.properties @@ -0,0 +1,2 @@ +load.0=/data/load-500.jsonl +input.0=yes diff --git a/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties index f48afe2..c5f7301 100644 --- a/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties +++ b/java/runner/resources/loading/LoadTruthSetWithInfoViaLoop.properties @@ -1 +1,3 @@ -sources=CUSTOMERS,WATCHLIST,REFERENCE +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE diff --git a/java/runner/resources/redo/LoadWithRedoViaLoop.properties b/java/runner/resources/redo/LoadWithRedoViaLoop.properties new file mode 100644 index 0000000..c5f7301 --- /dev/null +++ b/java/runner/resources/redo/LoadWithRedoViaLoop.properties @@ -0,0 +1,3 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE diff --git a/java/runner/resources/redo/RedoContinuous.properties b/java/runner/resources/redo/RedoContinuous.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoContinuous.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/redo/RedoContinuousViaFutures.properties b/java/runner/resources/redo/RedoContinuousViaFutures.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoContinuousViaFutures.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/redo/RedoWithInfoContinuous.properties b/java/runner/resources/redo/RedoWithInfoContinuous.properties new file mode 100644 index 0000000..16be70f --- /dev/null +++ b/java/runner/resources/redo/RedoWithInfoContinuous.properties @@ -0,0 +1,7 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl +destroyAfter=15000 diff --git a/java/runner/resources/searching/SearchRecords.properties b/java/runner/resources/searching/SearchRecords.properties new file mode 100644 index 0000000..d90c3bc --- /dev/null +++ b/java/runner/resources/searching/SearchRecords.properties @@ -0,0 +1,6 @@ +source.0=CUSTOMERS +source.1=WATCHLIST +source.2=REFERENCE +load.0=/data/truthset/customers.jsonl +load.1=/data/truthset/reference.jsonl +load.2=/data/truthset/watchlist.jsonl diff --git a/java/runner/resources/searching/SearchViaFutures.properties b/java/runner/resources/searching/SearchViaFutures.properties new file mode 100644 index 0000000..bad401a --- /dev/null +++ b/java/runner/resources/searching/SearchViaFutures.properties @@ -0,0 +1 @@ +load.0=/data/load-500.jsonl diff --git a/java/snippets/configuration/AddDataSources.java b/java/snippets/configuration/AddDataSources.java new file mode 100644 index 0000000..ca29e52 --- /dev/null +++ b/java/snippets/configuration/AddDataSources.java @@ -0,0 +1,99 @@ +package configuration; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class AddDataSources { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = AddDataSources.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the config and config manager from the environment + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + + // setup a loop to handle race-condition conflicts on + // replacing the default config ID + boolean replacedConfig = false; + while (!replacedConfig) { + // get the current default config ID and associated config JSON + long configId = configMgr.getDefaultConfigId(); + String configDefinition = configMgr.getConfig(configId); + + // prepare an in-memory config to be modified and get the handle + long configHandle = config.importConfig(configDefinition); + String modifiedConfig = null; + try { + // create an array of the data sources to add + String[] dataSources = { "CUSTOMERS", "EMPLOYEES", "WATCHLIST" }; + + // loop through the array and add each data source + for (String dataSource : dataSources) { + config.addDataSource(configHandle, dataSource); + } + + // export the modified config to JSON text + modifiedConfig = config.exportConfig(configHandle); + + } finally { + config.closeConfig(configHandle); + } + + // add the modified config to the repository with a comment + long newConfigId = configMgr.addConfig( + modifiedConfig, "Added truth set data sources"); + + try { + // replace the default config + configMgr.replaceDefaultConfigId(configId, newConfigId); + + // if we get here then set the flag indicating success + replacedConfig = true; + + } catch (SzReplaceConflictException e) { + // if we get here then another thread or process has + // changed the default config ID since we retrieved it + // (i.e.: we have a race condition) so we allow the + // loop to repeat with the latest default config ID + } + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/deleting/DeleteViaFutures.java b/java/snippets/deleting/DeleteViaFutures.java new file mode 100644 index 0000000..2372b0b --- /dev/null +++ b/java/snippets/deleting/DeleteViaFutures.java @@ -0,0 +1,286 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + engine.deleteRecord(recordKey, SZ_NO_FLAGS); + + return null; + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Record> pendingFutures, boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + successCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/deleting/DeleteViaLoop.java b/java/snippets/deleting/DeleteViaLoop.java new file mode 100644 index 0000000..f14c3ca --- /dev/null +++ b/java/snippets/deleting/DeleteViaLoop.java @@ -0,0 +1,168 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteViaLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.deleteRecord( + SzRecordKey.of(dataSourceCode, recordId), SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/deleting/DeleteWithInfoViaFutures.java b/java/snippets/deleting/DeleteWithInfoViaFutures.java new file mode 100644 index 0000000..5b770d9 --- /dev/null +++ b/java/snippets/deleting/DeleteWithInfoViaFutures.java @@ -0,0 +1,325 @@ +package deleting; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class DeleteWithInfoViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/del-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Record(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = DeleteWithInfoViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Record> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Record record = new Record(lineNumber, line); + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + SzRecordKey recordKey = SzRecordKey.of(dataSourceCode, recordId); + + Future future = executor.submit(() -> { + // call the addRecord() function with no flags + return engine.deleteRecord(recordKey, SZ_WITH_INFO_FLAGS); + }); + + // add the futures to the pending future list + pendingFutures.put(future, record); + + } catch (JsonException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(engine, pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(engine, pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Successful delete operations : " + successCount); + System.out.println("Total entities deleted : " + entityIdSet.size()); + System.out.println("Failed delete operations : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " deletions to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(SzEngine engine, + Map, Record> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Record>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Record> entry = iter.next(); + Future future = entry.getKey(); + Record record = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + String info = future.get(); + + // if we get here then increment the success count + successCount++; + + // process the info + processInfo(engine, info); + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedRecord(ERROR, e, record.lineNumber, record.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRecord(WARNING, e, record.lineNumber, record.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(record.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, record.lineNumber, record.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.remove(entityId); + } catch (SzNotFoundException e) { + entityIdSet.add(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO DELETE RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/information/CheckDatastorePerformance.java b/java/snippets/information/CheckDatastorePerformance.java new file mode 100644 index 0000000..2779bd2 --- /dev/null +++ b/java/snippets/information/CheckDatastorePerformance.java @@ -0,0 +1,57 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class CheckDatastorePerformance { + private static final int SECONDS_TO_RUN = 3; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = CheckDatastorePerformance.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + String result = diagnostic.checkDatastorePerformance(SECONDS_TO_RUN); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetDatastoreInfo.java b/java/snippets/information/GetDatastoreInfo.java new file mode 100644 index 0000000..06aae5a --- /dev/null +++ b/java/snippets/information/GetDatastoreInfo.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetDatastoreInfo { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetDatastoreInfo.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + String result = diagnostic.getDatastoreInfo(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetLicense.java b/java/snippets/information/GetLicense.java new file mode 100644 index 0000000..b56378b --- /dev/null +++ b/java/snippets/information/GetLicense.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetLicense { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetLicense.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + + String result = product.getLicense(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/information/GetVersion.java b/java/snippets/information/GetVersion.java new file mode 100644 index 0000000..c2ab665 --- /dev/null +++ b/java/snippets/information/GetVersion.java @@ -0,0 +1,55 @@ +package information; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class GetVersion { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = GetVersion.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + + String result = product.getVersion(); + + System.out.println(result); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/initialization/EnginePriming.java b/java/snippets/initialization/EnginePriming.java new file mode 100644 index 0000000..7e58f1e --- /dev/null +++ b/java/snippets/initialization/EnginePriming.java @@ -0,0 +1,61 @@ +package initialization; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class EnginePriming { + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = EnginePriming.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzEngine engine = env.getEngine(); + + long start = System.nanoTime(); + + System.out.println("Priming Senzing engine..."); + engine.primeEngine(); + + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Primed Senzing engine. (" + duration + "ms)"); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/initialization/EnvironmentAndHubs.java b/java/snippets/initialization/EnvironmentAndHubs.java new file mode 100644 index 0000000..9bd0c1e --- /dev/null +++ b/java/snippets/initialization/EnvironmentAndHubs.java @@ -0,0 +1,66 @@ +package initialization; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class EnvironmentAndHubs { + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = EnginePriming.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzProduct product = env.getProduct(); + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + SzDiagnostic diagnostic = env.getDiagnostic(); + SzEngine engine = env.getEngine(); + + System.out.println(product); + System.out.println(config); + System.out.println(configMgr); + System.out.println(diagnostic); + System.out.println(engine); + + // do work with the hub handles which are valid + // until the env.destroy() function is called + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/initialization/PurgeRepository.java b/java/snippets/initialization/PurgeRepository.java new file mode 100644 index 0000000..e47a911 --- /dev/null +++ b/java/snippets/initialization/PurgeRepository.java @@ -0,0 +1,87 @@ +package initialization; + +import java.io.*; +import java.util.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class PurgeRepository { + private static final String PURGE_MESSAGE = """ + **************************************** WARNING **************************************** + + This example will purge all currently loaded data from the Senzing datastore! + Before proceeding, all instances of Senzing (custom code, tools, etc.) must be shut down. + + ***************************************************************************************** + + Are you sure you want to continue and purge the Senzing datastore? (y/n) """; + + private static final Set YES_ANSWERS + = Set.of("y", "Y", "Yes", "yes", "YES"); + + private static final long ONE_MILLION = 1000000L; + + public static void main(String[] args) { + System.out.println(PURGE_MESSAGE); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + String response = br.readLine(); + if (!YES_ANSWERS.contains(response)) { + System.exit(1); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = PurgeRepository.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + SzDiagnostic diagnostic = env.getDiagnostic(); + + long start = System.nanoTime(); + + System.out.println("Purging Senzing repository..."); + diagnostic.purgeRepository(); + + long duration = (System.nanoTime() - start) / ONE_MILLION; + System.out.println("Purged Senzing repository. (" + duration + "ms)"); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + } +} \ No newline at end of file diff --git a/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java index c56860a..0ba3127 100644 --- a/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java +++ b/java/snippets/loading/LoadTruthSetWithInfoViaLoop.java @@ -16,9 +16,9 @@ */ public class LoadTruthSetWithInfoViaLoop { private static final List INPUT_FILES = List.of( - "../resources/data/truthset/customers.json", - "../resources/data/truthset/reference.json", - "../resources/data/truthset/watchlist.json"); + "../resources/data/truthset/customers.jsonl", + "../resources/data/truthset/reference.jsonl", + "../resources/data/truthset/watchlist.jsonl"); private static final String UTF_8 = "UTF-8"; @@ -60,15 +60,15 @@ public static void main(String[] args) { .build(); try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + // loop through the input files for (String filePath : INPUT_FILES) { try (FileInputStream fis = new FileInputStream(filePath); - InputStreamReader isr = new InputStreamReader(fis, UTF_8); - BufferedReader br = new BufferedReader(isr)) + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) { - // get the engine from the environment - SzEngine engine = env.getEngine(); - int lineNumber = 0; // loop through the example records and add them to the repository for (String line = br.readLine(); line != null; line = br.readLine()) { @@ -103,11 +103,11 @@ public static void main(String[] args) { processInfo(engine, info); } catch (JsonException|SzBadInputException e) { - logFailedRecord(ERROR, e, lineNumber, line); + logFailedRecord(ERROR, e, filePath, lineNumber, line); errorCount++; // increment the error count } catch (SzRetryableException e) { - logFailedRecord(WARNING, e, lineNumber, line); + logFailedRecord(WARNING, e, filePath, lineNumber, line); errorCount++; // increment the error count retryCount++; // increment the retry count @@ -121,7 +121,7 @@ public static void main(String[] args) { } catch (Exception e) { // catch any other exception (incl. SzException) here - logFailedRecord(CRITICAL, e, lineNumber, line); + logFailedRecord(CRITICAL, e, filePath, lineNumber, line); errorCount++; throw e; // rethrow since exception is critical } @@ -200,13 +200,18 @@ private static void processInfo(SzEngine engine, String info) { * @param recordJson The JSON text for the failed record. */ private static void logFailedRecord(String errorType, - Exception exception, + Exception exception, + String filePath, int lineNumber, String recordJson) { + File file = new File(filePath); + String fileName = file.getName(); + System.err.println(); System.err.println( - "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName + + " AT LINE " + lineNumber + ": "); System.err.println(recordJson); System.err.println(exception); System.err.flush(); diff --git a/java/snippets/loading/LoadViaFutures.java b/java/snippets/loading/LoadViaFutures.java index f570f16..b2eee67 100644 --- a/java/snippets/loading/LoadViaFutures.java +++ b/java/snippets/loading/LoadViaFutures.java @@ -119,10 +119,11 @@ public static void main(String[] args) { // call the addRecord() function with no flags engine.addRecord(recordKey, record.line, SZ_NO_FLAGS); + // return null since we have no "info" to return return null; }); - // add the futures to the pending future list + // add the future to the pending future list pendingFutures.put(future, record); } catch (JsonException e) { diff --git a/java/snippets/loading/LoadWithStatsViaLoop.java b/java/snippets/loading/LoadWithStatsViaLoop.java new file mode 100644 index 0000000..c246fdc --- /dev/null +++ b/java/snippets/loading/LoadWithStatsViaLoop.java @@ -0,0 +1,187 @@ +package loading; + +import java.io.*; +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadWithStatsViaLoop { + private static final String DEFAULT_FILE_PATH = "../resources/data/load-500.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final int STATS_INTERVAL = 100; + private static final int STATS_TRUNCATE = 70; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadWithStatsViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + // check if it is time obtain stats + if ((successCount % STATS_INTERVAL) == 0) { + try { + String stats = engine.getStats(); + if (stats.length() > STATS_TRUNCATE) { + stats = stats.substring(0, STATS_TRUNCATE) + " ..."; + } + System.out.println("* STATS: " + stats); + + } catch (SzException e) { + // trap the stats exeption so it is not misinterpreted + // as an exception from engine.addRecord() + System.err.println("**** FAILED TO OBTAIN STATS: " + e); + } + } + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Records failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + int lineNumber, + String recordJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/java/snippets/redo/LoadWithRedoViaLoop.java b/java/snippets/redo/LoadWithRedoViaLoop.java new file mode 100644 index 0000000..6c10e60 --- /dev/null +++ b/java/snippets/redo/LoadWithRedoViaLoop.java @@ -0,0 +1,242 @@ +package redo; + +import java.io.*; +import java.util.List; + +import javax.json.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class LoadWithRedoViaLoop { + private static final List INPUT_FILES = List.of( + "../resources/data/truthset/customers.jsonl", + "../resources/data/truthset/reference.jsonl", + "../resources/data/truthset/watchlist.jsonl"); + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int successCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = LoadWithRedoViaLoop.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the input files + for (String filePath: INPUT_FILES) { + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + int lineNumber = 0; + // loop through the example records and add them to the repository + for (String line = br.readLine(); line != null; line = br.readLine()) { + // increment the line number + lineNumber++; + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + try { + // parse the line as a JSON object + JsonObject recordJson + = Json.createReader(new StringReader(line)).readObject(); + + // extract the data source code and record ID + String dataSourceCode = recordJson.getString(DATA_SOURCE, null); + String recordId = recordJson.getString(RECORD_ID, null); + + // call the addRecord() function with no flags + engine.addRecord( + SzRecordKey.of(dataSourceCode, recordId), line, SZ_NO_FLAGS); + + successCount++; + + } catch (JsonException|SzBadInputException e) { + logFailedRecord(ERROR, e, filePath, lineNumber, line); + errorCount++; // increment the error count + + } catch (SzRetryableException e) { + logFailedRecord(WARNING, e, filePath, lineNumber, line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + trackRetryRecord(line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRecord(CRITICAL, e, filePath, lineNumber, line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + } + + // now that we have loaded the records, check for redos and handle them + while (engine.countRedoRecords() > 0) { + // get the next redo record + String redo = engine.getRedoRecord(); + + try { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // increment the redone count + redoneCount++; + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println("Records successfully added : " + successCount); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRecord(String errorType, + Exception exception, + String filePath, + int lineNumber, + String recordJson) + { + File file = new File(filePath); + String fileName = file.getName(); + + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO ADD RECORD IN " + fileName + + " AT LINE " + lineNumber + ": "); + System.err.println(recordJson); + System.err.println(exception); + System.err.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/RedoContinuous.java b/java/snippets/redo/RedoContinuous.java new file mode 100644 index 0000000..cc9da66 --- /dev/null +++ b/java/snippets/redo/RedoContinuous.java @@ -0,0 +1,176 @@ +package redo; + +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoContinuous { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + + try { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // increment the redone count + redoneCount++; + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/RedoContinuousViaFutures.java b/java/snippets/redo/RedoContinuousViaFutures.java new file mode 100644 index 0000000..84352e2 --- /dev/null +++ b/java/snippets/redo/RedoContinuousViaFutures.java @@ -0,0 +1,280 @@ +package redo; + +import java.io.*; +import java.util.*; +import java.util.concurrent.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoContinuousViaFutures { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long HANDLE_PAUSE_TIMEOUT = 100L; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoContinuousViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, String> pendingFutures = new IdentityHashMap<>(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // shutdown the executor service + if (!executor.isShutdown()) executor.shutdown(); + + try { + handlePendingFutures(pendingFutures, true); + } catch (Exception e) { + e.printStackTrace(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) break; + + Future future = executor.submit(() -> { + // process the redo record + engine.processRedoRecord(redo, SZ_NO_FLAGS); + + // return null since we have no "info" to return + return null; + }); + + // add the future to the pending future list + pendingFutures.put(future, redo); + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(HANDLE_PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + + // check if there are no redo records right now + if (engine.countRedoRecords() == 0) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void handlePendingFutures(Map, String> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,String>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,String> entry = iter.next(); + Future future = entry.getKey(); + String redoRecord = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value to see if there was an exception + future.get(); + + // if we get here then increment the success count + redoneCount++; + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedRedo(WARNING, e, redoRecord); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + trackRetryRecord(redoRecord); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedRedo(CRITICAL, e, redoRecord); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } +} \ No newline at end of file diff --git a/java/snippets/redo/RedoWithInfoContinuous.java b/java/snippets/redo/RedoWithInfoContinuous.java new file mode 100644 index 0000000..9b54e3d --- /dev/null +++ b/java/snippets/redo/RedoWithInfoContinuous.java @@ -0,0 +1,224 @@ +package redo; + +import java.io.*; +import java.util.HashSet; +import java.util.Set; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonNumber; +import javax.json.JsonObject; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class RedoWithInfoContinuous { + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final long REDO_PAUSE_TIMEOUT = 30000L; + + private static final String REDO_PAUSE_DESCRIPTION = "30 seconds"; + + private static final String AFFECTED_ENTITIES = "AFFECTED_ENTITIES"; + private static final String ENTITY_ID = "ENTITY_ID"; + + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + private static int errorCount = 0; + private static int redoneCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + private static final Set entityIdSet = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = RedoWithInfoContinuous.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + // make sure we cleanup if exiting by CTRL-C or due to an exception + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + outputRedoStatistics(); + })); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + while (true) { + // get the next redo record + String redo = engine.getRedoRecord(); + + // check if no redo reords are available + if (redo == null) { + outputRedoStatistics(); + System.out.println(); + System.out.println( + "No redo records to process. Pausing for " + + REDO_PAUSE_DESCRIPTION + "...."); + System.out.println("Press CTRL-C to exit."); + try { + Thread.sleep(REDO_PAUSE_TIMEOUT); + } catch (InterruptedException ignore) { + // ignore the exception + } + continue; + } + + try { + // process the redo record + String info = engine.processRedoRecord(redo, SZ_WITH_INFO_FLAGS); + + // increment the redone count + redoneCount++; + + // process the info + processInfo(engine, info); + + } catch (SzRetryableException e) { + logFailedRedo(WARNING, e, redo); + errorCount++; + retryCount++; + trackRetryRecord(redo); + + } catch (Exception e) { + logFailedRedo(CRITICAL, e, redo); + errorCount++; + throw e; + } + } + + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // normally we would call env.destroy() here, but we have registered + // a shutdown hook to do that since termination will typically occur + // via CTRL-C being pressed, and the shutdown hook will still run if + // we get an exception + } + + } + + private static void outputRedoStatistics() { + System.out.println(); + System.out.println("Redos successfully processed : " + redoneCount); + System.out.println("Total entities affected : " + entityIdSet.size()); + System.out.println("Total failed records/redos : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println( + retryCount + " records/redos to be retried in " + retryFile); + } + System.out.flush(); + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param recordJson The JSON text for the failed record. + */ + private static void logFailedRedo(String errorType, + Exception exception, + String redoRecord) + { + System.err.println(); + System.err.println("** " + errorType + " ** FAILED TO PROCESS REDO: "); + System.err.println(redoRecord); + System.err.println(exception); + System.err.flush(); + } + + /** + * Tracks the specified JSON record definition to be retried in a + * retry file. + * + * @param recordJson The JSON text defining the record to be retried. + * + * @throws IOException If a failure occurs in writing the record to the + * retry file. + */ + private static void trackRetryRecord(String recordJson) + throws IOException + { + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(recordJson); + } + + /** + * Example method for parsing and handling the INFO message (formatted + * as JSON). This example implementation simply tracks all entity ID's + * that appear as "AFFECTED_ENTITIES" to count the number + * of entities created for the records -- essentially a contrived + * data mart. + * + * @param info The info message. + */ + private static void processInfo(SzEngine engine, String info) { + JsonObject jsonObject = Json.createReader(new StringReader(info)).readObject(); + if (!jsonObject.containsKey(AFFECTED_ENTITIES)) return; + JsonArray affectedArr = jsonObject.getJsonArray(AFFECTED_ENTITIES); + for (JsonObject affected : affectedArr.getValuesAs(JsonObject.class)) { + JsonNumber number = affected.getJsonNumber(ENTITY_ID); + long entityId = number.longValue(); + + try { + engine.getEntity(entityId, null); + entityIdSet.add(entityId); + } catch (SzNotFoundException e) { + entityIdSet.remove(entityId); + } catch (SzException e) { + // simply log the exception, do not rethrow + System.err.println(); + System.err.println("**** FAILED TO RETRIEVE ENTITY: " + entityId); + System.err.println(e.toString()); + System.err.flush(); + } + } + } + +} \ No newline at end of file diff --git a/java/snippets/searching/SearchRecords.java b/java/snippets/searching/SearchRecords.java new file mode 100644 index 0000000..b151b25 --- /dev/null +++ b/java/snippets/searching/SearchRecords.java @@ -0,0 +1,122 @@ +package searching; + +import java.io.StringReader; +import java.util.*; +import javax.json.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class SearchRecords { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = SearchRecords.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + // loop through the example records and add them to the repository + for (String criteria : getSearchCriteria()) { + // call the searchByAttributes() function with default flags + String result = engine.searchByAttributes( + criteria, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + + JsonObject jsonObj = Json.createReader( + new StringReader(result)).readObject(); + + System.out.println(); + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + if (jsonArr.size() == 0) { + System.out.println("No results for criteria: " + criteria); + } else { + System.out.println("Results for criteria: " + criteria); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + String name = obj.getString("ENTITY_NAME", null); + System.out.println(entityId + ": " + name); + } + } + System.out.flush(); + } + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing a list of criteria to search on. + * + * @return A {@link List} {@link String} JSON text values desribing the + * sets of criteria with which to search. + */ + public static List getSearchCriteria() { + List records = new LinkedList<>(); + records.add( + """ + { + "NAME_FULL": "Susan Moony", + "DATE_OF_BIRTH": "15/6/1998", + "SSN_NUMBER": "521212123" + } + """); + + records.add( + """ + { + "NAME_FIRST": "Robert", + "NAME_LAST": "Smith", + "ADDR_FULL": "123 Main Street Las Vegas NV 89132" + } + """); + + records.add( + """ + { + "NAME_FIRST": "Makio", + "NAME_LAST": "Yamanaka", + "ADDR_FULL": "787 Rotary Drive Rotorville FL 78720" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/searching/SearchViaFutures.java b/java/snippets/searching/SearchViaFutures.java new file mode 100644 index 0000000..cb7d0fe --- /dev/null +++ b/java/snippets/searching/SearchViaFutures.java @@ -0,0 +1,296 @@ +package searching; + +import java.io.*; +import javax.json.*; +import java.util.*; +import java.util.concurrent.*; +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class SearchViaFutures { + private static final String DEFAULT_FILE_PATH = "../resources/data/search-5K.jsonl"; + + private static final String UTF_8 = "UTF-8"; + + private static final String RETRY_PREFIX = "retry-"; + private static final String RETRY_SUFFIX = ".jsonl"; + + private static final int THREAD_COUNT = 8; + + private static final int BACKLOG_FACTOR = 10; + + private static final int MAXIMUM_BACKLOG = THREAD_COUNT * BACKLOG_FACTOR; + + private static final long PAUSE_TIMEOUT = 100L; + + private static final String DATA_SOURCE = "DATA_SOURCE"; + private static final String RECORD_ID = "RECORD_ID"; + + private static final String ERROR = "ERROR"; + private static final String WARNING = "WARNING"; + private static final String CRITICAL = "CRITICAL"; + + public record Criteria(int lineNumber, String line) { } + + private static int errorCount = 0; + private static int successCount = 0; + private static int retryCount = 0; + private static File retryFile = null; + private static PrintWriter retryWriter = null; + + private static Set foundEntities = new HashSet<>(); + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = SearchViaFutures.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + String filePath = (args.length > 0) ? args[0] : DEFAULT_FILE_PATH; + + // create the thread pool and executor service + ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT); + + // keep track of pending futures and don't backlog too many for memory's sake + Map, Criteria> pendingFutures = new IdentityHashMap<>(); + + try (FileInputStream fis = new FileInputStream(filePath); + InputStreamReader isr = new InputStreamReader(fis, UTF_8); + BufferedReader br = new BufferedReader(isr)) + { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + int lineNumber = 0; + boolean eof = false; + + while (!eof) { + // loop through the example records and queue them up so long + // as we have more records and backlog is not too large + while (pendingFutures.size() < MAXIMUM_BACKLOG) { + // read the next line + String line = br.readLine(); + lineNumber++; + + // check for EOF + if (line == null) { + eof = true; + break; + } + + // trim the line + line = line.trim(); + + // skip any blank lines + if (line.length() == 0) continue; + + // skip any commented lines + if (line.startsWith("#")) continue; + + // construct the Record instance + Criteria criteria = new Criteria(lineNumber, line); + + try { + Future future = executor.submit(() -> { + // call the searchByAttributes() function with default flags + return engine.searchByAttributes( + criteria.line, SZ_SEARCH_BY_ATTRIBUTES_DEFAULT_FLAGS); + }); + + // add the future to the pending future list + pendingFutures.put(future, criteria); + + } catch (JsonException e) { + logFailedSearch(ERROR, e, lineNumber, line); + errorCount++; // increment the error count + } + } + + do { + // handle any pending futures WITHOUT blocking to reduce the backlog + handlePendingFutures(pendingFutures, false); + + // if we still have exceeded the backlog size then pause + // briefly before trying again + if (pendingFutures.size() >= MAXIMUM_BACKLOG) { + try { + Thread.sleep(PAUSE_TIMEOUT); + + } catch (InterruptedException ignore) { + // do nothing + } + } + } while (pendingFutures.size() >= MAXIMUM_BACKLOG); + } + + // shutdown the executor service + executor.shutdown(); + + // after we have submitted all records we need to handle the remaining + // pending futures so this time we block on each future + handlePendingFutures(pendingFutures, true); + + } catch (Exception e) { + System.err.println(); + System.err.println("*** Terminated due to critical error ***"); + System.err.flush(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // check if executor service is shutdown + if (!executor.isShutdown()) { + executor.shutdown(); + } + + // IMPORTANT: make sure to destroy the environment + env.destroy(); + + System.out.println(); + System.out.println( + "Searches successfully completed : " + successCount); + System.out.println( + "Total entities found via searches : " + foundEntities.size()); + System.out.println( + "Searches failed with errors : " + errorCount); + + // check on any retry records + if (retryWriter != null) { + retryWriter.flush(); + retryWriter.close(); + } + if (retryCount > 0) { + System.out.println(retryCount + " records to be retried in " + retryFile); + } + System.out.flush(); + + } + + } + + private static void handlePendingFutures(Map, Criteria> pendingFutures, + boolean blocking) + throws Exception + { + // check for completed futures + Iterator,Criteria>> iter + = pendingFutures.entrySet().iterator(); + + // loop through the pending futures + while (iter.hasNext()) { + // get the next pending future + Map.Entry,Criteria> entry = iter.next(); + Future future = entry.getKey(); + Criteria criteria = entry.getValue(); + + // if not blocking and this one is not done then continue + if (!blocking && !future.isDone()) continue; + + // remove the pending future from the map + iter.remove(); + + try { + try { + // get the value and check for an exception + String results = future.get(); + + // if we get here then increment the success count + successCount++; + + // parse the results + JsonObject jsonObj = Json.createReader( + new StringReader(results)).readObject(); + + JsonArray jsonArr = jsonObj.getJsonArray("RESOLVED_ENTITIES"); + for (JsonObject obj : jsonArr.getValuesAs(JsonObject.class)) { + obj = obj.getJsonObject("ENTITY"); + obj = obj.getJsonObject("RESOLVED_ENTITY"); + long entityId = obj.getJsonNumber("ENTITY_ID").longValue(); + foundEntities.add(entityId); + } + + + } catch (InterruptedException e) { + // this could only happen if blocking is true, just + // rethrow as retryable and log the interruption + throw e; + + } catch (ExecutionException e) { + // if execution failed with an exception then retrhow + Throwable cause = e.getCause(); + if ((cause == null) || !(cause instanceof Exception)) { + // rethrow the execution exception + throw e; + } + // cast to an Exception and rethrow + throw ((Exception) cause); + } + + } catch (SzBadInputException e) { + logFailedSearch(ERROR, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + + } catch (SzRetryableException|InterruptedException|CancellationException e) { + // handle thread interruption and cancellation as retries + logFailedSearch(WARNING, e, criteria.lineNumber, criteria.line); + errorCount++; // increment the error count + retryCount++; // increment the retry count + + // track the retry record so it can be retried later + if (retryFile == null) { + retryFile = File.createTempFile(RETRY_PREFIX, RETRY_SUFFIX); + retryWriter = new PrintWriter( + new OutputStreamWriter(new FileOutputStream(retryFile), UTF_8)); + } + retryWriter.println(criteria.line); + + } catch (Exception e) { + // catch any other exception (incl. SzException) here + logFailedSearch(CRITICAL, e, criteria.lineNumber, criteria.line); + errorCount++; + throw e; // rethrow since exception is critical + } + } + } + + /** + * Example method for logging failed records. + * + * @param errorType The error type description. + * @param exception The exception itself. + * @param lineNumber The line number of the failed record in the JSON input file. + * @param criteriaJson The JSON text for the failed search criteria. + */ + private static void logFailedSearch(String errorType, + Exception exception, + int lineNumber, + String criteriaJson) + { + System.err.println(); + System.err.println( + "** " + errorType + " ** FAILED TO SEARCH CRITERIA AT LINE " + lineNumber + ": "); + System.err.println(criteriaJson); + System.err.println(exception); + System.err.flush(); + } + +} \ No newline at end of file diff --git a/resources/data/truthset/customers.jsonl b/resources/data/truthset/customers.jsonl new file mode 100644 index 0000000..a8a583e --- /dev/null +++ b/resources/data/truthset/customers.jsonl @@ -0,0 +1,120 @@ +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1001", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Robert", "DATE_OF_BIRTH": "12/11/1978", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "123 Main Street, Las Vegas NV 89132", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "702-919-1300", "EMAIL_ADDRESS": "bsmith@work.com", "DATE": "1/2/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1002", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Bob", "DATE_OF_BIRTH": "11/12/1978", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1515 Adela Lane", "ADDR_CITY": "Las Vegas", "ADDR_STATE": "NV", "ADDR_POSTAL_CODE": "89111", "PHONE_TYPE": "MOBILE", "PHONE_NUMBER": "702-919-1300", "DATE": "3/10/17", "STATUS": "Inactive", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1003", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Bob", "PRIMARY_NAME_MIDDLE": "J", "DATE_OF_BIRTH": "12/11/1978", "EMAIL_ADDRESS": "bsmith@work.com", "DATE": "4/9/16", "STATUS": "Inactive", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1004", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "B", "DATE_OF_BIRTH": "11/12/1979", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1515 Adela Ln", "ADDR_CITY": "Las Vegas", "ADDR_STATE": "NV", "ADDR_POSTAL_CODE": "89132", "EMAIL_ADDRESS": "bsmith@work.com", "DATE": "1/5/15", "STATUS": "Inactive", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1005", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Robbie", "DRIVERS_LICENSE_NUMBER": "112233", "DRIVERS_LICENSE_STATE": "NV", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "123 E Main St", "ADDR_CITY": "Henderson", "ADDR_STATE": "NV", "ADDR_POSTAL_CODE": "89132", "DATE": "7/16/19", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1009", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Edward", "DATE_OF_BIRTH": "3/1/1970", "SSN_NUMBER": "294-66-9999", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OH", "ADDR_POSTAL_CODE": "43004", "EMAIL_ADDRESS": "Kusha123@hmail.com", "DATE": "1/7/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1010", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Eddie", "DATE_OF_BIRTH": "Mar 1 1970", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OHIO", "DATE": "1/8/16", "STATUS": "Inactive", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1011", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Knight", "PRIMARY_NAME_FIRST": "Ed", "DATE_OF_BIRTH": "3/1/70", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1602 Brenville Pl", "ADDR_CITY": "San Francisco", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "94105", "DATE": "10/9/15", "STATUS": "Terminated", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1015", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Mary ", "DATE_OF_BIRTH": "10/27/76", "SSN_NUMBER": "293-90-9090", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OH", "ADDR_POSTAL_CODE": "43004", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "512-353-8633", "EMAIL_ADDRESS": "Kusha123@hmail.com", "DATE": "1/10/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1016", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Marie", "DATE_OF_BIRTH": "10/27/76", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_POSTAL_CODE": "43004", "DATE": "1/11/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1017", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Mary ", "SSN_NUMBER": "293-90-9090", "DATE": "1/12/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1018", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Marie", "DATE_OF_BIRTH": "10/28/76", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "512-353-8633", "DATE": "1/13/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1019", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Mark", "DATE_OF_BIRTH": "9/28/97", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OH", "ADDR_POSTAL_CODE": "43004", "EMAIL_ADDRESS": "Kusha123@hmail.com", "DATE": "1/14/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1020", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Marsha", "DATE_OF_BIRTH": "9/28/97", "SSN_NUMBER": "201-77-7719", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OH", "ADDR_POSTAL_CODE": "43004", "EMAIL_ADDRESS": "Kusha123@hmail.com", "DATE": "1/15/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1022", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Antoun", "PRIMARY_NAME_FIRST": "Mohamed", "DATE_OF_BIRTH": "1/7/80", "DATE": "1/16/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1023", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Antoun", "PRIMARY_NAME_FIRST": "Muhammed", "DATE_OF_BIRTH": "1/7/80", "DATE": "1/17/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1025", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Darla", "DATE_OF_BIRTH": "1/7/80", "DATE": "1/18/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1026", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Darlene", "DATE_OF_BIRTH": "1/7/80", "DATE": "1/19/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1028", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Dobbins Jr", "PRIMARY_NAME_FIRST": "David", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "1450 N City Rd Suite 900", "ADDR_CITY": "Arlington", "ADDR_STATE": "VA", "ADDR_POSTAL_CODE": "23208", "DATE": "1/20/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1030", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Garski", "PRIMARY_NAME_FIRST": "Luis", "DATE_OF_BIRTH": "3/25/89", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "445 Overpass Rd ", "ADDR_CITY": "San Ramon ", "ADDR_STATE": "CA ", "ADDR_POSTAL_CODE": "927230000", "DATE": "1/21/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1031", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Garsky", "PRIMARY_NAME_FIRST": "Louis", "DATE_OF_BIRTH": "3/25/89", "ADDR_TYPE": "HOME", "ADDR_LINE1": "445 Overpass Rd San Ramon", "DATE": "1/22/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1032", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Shaw", "PRIMARY_NAME_FIRST": "Daniella", "DATE_OF_BIRTH": "20/8/1991", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "202-321-3212", "DATE": "1/23/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1033", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Daniella", "PRIMARY_NAME_FIRST": "Shaw", "DATE_OF_BIRTH": "8/20/91", "ADDR_TYPE": "HOME", "ADDR_LINE1": "80 Delaware Ave SE Washington DC 40040", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "321-3212", "DATE": "1/24/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1034", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Medina Sentosa", "PRIMARY_NAME_FIRST": "Maria Luis", "DATE_OF_BIRTH": "11/21/73", "ADDR_TYPE": "HOME", "ADDR_LINE1": "9304 W. 15th St La Blanca, FL 60527", "EMAIL_ADDRESS": "Maria Sentosa", "DATE": "1/25/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1035", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "M Sentosa", "PRIMARY_NAME_FIRST": "Maria Luis", "DATE_OF_BIRTH": "11/12/73", "ADDR_TYPE": "HOME", "ADDR_LINE1": "9304 W. 15th St La Blanca, FL 60527", "DATE": "1/26/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1036", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Sentosa", "PRIMARY_NAME_FIRST": "Maria Luis", "DATE_OF_BIRTH": "11/12/73", "ADDR_TYPE": "HOME", "ADDR_LINE1": "9304 W. 15th St La Blanca, FL 60527", "DATE": "1/27/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1039", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "John", "GENDER": "M", "DATE_OF_BIRTH": "10/10/70", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/28/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1040", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "John", "DATE_OF_BIRTH": "3/15/90", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/29/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1043", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Patrick", "DATE_OF_BIRTH": "10/10/70", "PASSPORT_NUMBER": "10251111", "PASSPORT_COUNTRY": "US", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/30/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1044", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Patricia", "DATE_OF_BIRTH": "3/15/90", "PASSPORT_NUMBER": "10252222", "PASSPORT_COUNTRY": "US", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/31/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1045", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Pat", "PASSPORT_NUMBER": "10251111", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/2/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1046", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Pat", "PASSPORT_NUMBER": "10252222", "PASSPORT_COUNTRY": "USA", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "1/3/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1047", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Thompson", "PRIMARY_NAME_FIRST": "Zara", "EMAIL_ADDRESS": "sthomp45@fmail.com", "DATE": "1/4/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1048", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Tompson", "PRIMARY_NAME_FIRST": "Sarah", "EMAIL_ADDRESS": "sthomp45@fmail.com", "DATE": "1/5/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1049", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Thompson", "PRIMARY_NAME_FIRST": "Sahra", "EMAIL_ADDRESS": "sthomp45@fmail.com", "DATE": "1/6/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1050", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Toulouse", "PRIMARY_NAME_FIRST": "Lee", "DATE_OF_BIRTH": "2/1/85", "PASSPORT_NUMBER": "483290175", "PASSPORT_COUNTRY": "USA", "DATE": "1/7/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1051", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Toulouse", "PRIMARY_NAME_FIRST": "Leigh", "DATE_OF_BIRTH": "1/2/85", "PASSPORT_NUMBER": "483290175", "PASSPORT_COUNTRY": "US", "DATE": "1/8/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1052", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Toulouse", "PRIMARY_NAME_FIRST": "Lea", "PASSPORT_NUMBER": "483290175", "PASSPORT_COUNTRY": "US", "DATE": "1/9/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1053", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Beau", "PASSPORT_NUMBER": "72129291", "PASSPORT_COUNTRY": "CA", "ADDR_TYPE": "HOME", "ADDR_LINE1": "6371 E Foothill Dr, Orroville, CA ", "DATE": "1/10/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1054", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Magdalena", "DATE_OF_BIRTH": "24-May-11", "DRIVERS_LICENSE_NUMBER": "93939211", "DRIVERS_LICENSE_STATE": "CA", "ADDR_TYPE": "HOME", "ADDR_LINE1": "6371 E Foothill Dr, Orroville, CA 95915", "DATE": "1/11/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1055", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FIRST": "Beau", "PASSPORT_NUMBER": "72129291", "PASSPORT_COUNTRY": "CAN", "ADDR_TYPE": "HOME", "ADDR_LINE1": "6371 E Foothill Dr, 95915", "DATE": "1/12/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1056", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Jones", "PRIMARY_NAME_FIRST": "Magdalena", "DATE_OF_BIRTH": "5/24/11", "DRIVERS_LICENSE_NUMBER": "93939211", "DRIVERS_LICENSE_STATE": "CA", "ADDR_TYPE": "HOME", "ADDR_LINE1": "6371 E Foothill Dr, Orroville, CA ", "DATE": "1/13/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1057", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Jones", "PRIMARY_NAME_FIRST": "Jay", "EMAIL_ADDRESS": "jjones@jones.com", "DATE": "1/14/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1058", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Jay", "PRIMARY_NAME_FIRST": "Jones", "EMAIL_ADDRESS": "\"Jay Jones\" ", "DATE": "1/15/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1059", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Roderick", "PRIMARY_NAME_FIRST": "Ray", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "971-421-8250", "DATE": "1/16/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1060", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Roderick", "PRIMARY_NAME_FIRST": "R", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "9714218250", "DATE": "1/17/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1061", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Andreason", "PHONE_TYPE": "MOBILE", "PHONE_NUMBER": "(807) 422-9031", "DATE": "1/18/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1062", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Andreason", "PHONE_NUMBER": "807-422-9031", "DATE": "1/19/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1063", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Mooney", "PRIMARY_NAME_FIRST": "Susan", "DATE_OF_BIRTH": "6/15/98", "DATE": "1/20/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1064", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Mooney", "PRIMARY_NAME_FIRST": "Susanne", "DATE_OF_BIRTH": "6/15/98", "PASSPORT_NUMBER": "1231345345", "PASSPORT_COUNTRY": "US", "DATE": "1/21/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1065", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Mooney", "PRIMARY_NAME_FIRST": "Susan", "PASSPORT_NUMBER": "1231345345", "PASSPORT_COUNTRY": "US", "DRIVERS_LICENSE_NUMBER": "8923322", "DRIVERS_LICENSE_STATE": "OR", "DATE": "1/22/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1066", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Mooney", "PRIMARY_NAME_FIRST": "Susan", "DRIVERS_LICENSE_NUMBER": "8923322", "DRIVERS_LICENSE_STATE": "OR", "SSN_NUMBER": "521-21-2123", "DATE": "1/23/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1067", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Moonie", "PRIMARY_NAME_FIRST": "Susan", "SSN_NUMBER": "521212123", "ADDR_TYPE": "HOME", "ADDR_LINE1": "638 Downey St, Salem, OR", "DATE": "1/24/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1068", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Moony", "PRIMARY_NAME_FIRST": "Susan", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "Adventura Aparments 638 Downey St, Salem, OR", "DATE": "1/25/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1069", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u738b\u6770", "GENDER": "M", "DATE_OF_BIRTH": "9/14/93", "NATIONAL_ID_NUMBER": "832721", "ADDR_TYPE": "HOME", "ADDR_LINE1": "12 Constitution Street ", "DATE": "1/26/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1070", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Wang", "PRIMARY_NAME_FIRST": "Jie", "GENDER": "Male", "DATE_OF_BIRTH": "9/14/93", "NATIONAL_ID_NUMBER": "832721", "NATIONAL_ID_COUNTRY": "Hong Kong", "ADDR_TYPE": "HOME", "ADDR_LINE1": "12 Constitution Street ", "DATE": "1/27/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1071", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u738b\u4f1f", "GENDER": "F", "DATE_OF_BIRTH": "9/14/97", "NATIONAL_ID_NUMBER": "7123833", "NATIONAL_ID_COUNTRY": "China", "ADDR_TYPE": "HOME", "ADDR_LINE1": "169 3rd Ave. Camden, NJ 08030", "DATE": "1/28/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1072", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Wang", "PRIMARY_NAME_FIRST": "Wei", "GENDER": "Female", "DATE_OF_BIRTH": "9/14/97", "NATIONAL_ID_NUMBER": "7123833", "NATIONAL_ID_COUNTRY": "China", "ADDR_TYPE": "HOME", "ADDR_LINE1": "169 3rd Ave. Camden, NJ 08030", "DATE": "1/29/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1073", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u5f20\u4f1f", "GENDER": "M", "DATE_OF_BIRTH": "8/2/06", "ADDR_TYPE": "HOME", "ADDR_LINE1": "173 John Lane, Camden, NJ 08030", "DATE": "1/30/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1074", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Zhang", "PRIMARY_NAME_FIRST": "Wei", "GENDER": "Male", "DATE_OF_BIRTH": "2/8/06", "ADDR_TYPE": "HOME", "ADDR_LINE1": "173 John Lane, 08030", "DATE": "1/31/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1075", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u5f20\u79c0\u82f1", "GENDER": "F", "DATE_OF_BIRTH": "2/4/31", "ADDR_TYPE": "HOME", "ADDR_LINE1": "329 Leatherwood Street, Las Vegas, 89117", "DATE": "1/2/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1076", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Zhang", "PRIMARY_NAME_FIRST": "Xiu Ying", "GENDER": "Female", "DATE_OF_BIRTH": "4/2/31", "ADDR_TYPE": "HOME", "ADDR_LINE1": "329 Leatherwood Street, Las Vegas, NV", "DATE": "1/3/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1077", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u5218\u6770", "GENDER": "F", "DATE_OF_BIRTH": "6/25/08", "ADDR_TYPE": "HOME", "ADDR_LINE1": "37 Campfire St. ", "DATE": "1/4/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1078", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Liu", "PRIMARY_NAME_FIRST": "Jie", "GENDER": "Unknown", "DATE_OF_BIRTH": "25-Jun-08", "ADDR_TYPE": "HOME", "ADDR_LINE1": "37 Campfire St. ", "DATE": "1/5/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1079", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Brown", "PRIMARY_NAME_FIRST": "Jeffrey", "GENDER": "U", "DATE_OF_BIRTH": "6/21/82", "SSN_NUMBER": "3241", "DATE": "1/6/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1080", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Brown Jr", "PRIMARY_NAME_FIRST": "Geoffrey", "GENDER": "M", "DATE_OF_BIRTH": "6/21/82", "SSN_NUMBER": "3241", "DATE": "1/7/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1081", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Frankens", "PRIMARY_NAME_FIRST": "George", "DATE_OF_BIRTH": "15-Mar-92", "PASSPORT_NUMBER": "234456456", "PASSPORT_COUNTRY": "DE", "ADDR_TYPE": "HOME", "ADDR_LINE1": "Ansbacher Strasse 23, 56422 Dusseldorf", "ADDR_POSTAL_CODE": "56244", "DATE": "1/8/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1082", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Frankens", "PRIMARY_NAME_FIRST": "Georg", "DATE_OF_BIRTH": "15-Mar-92", "PASSPORT_NUMBER": "234456456", "PASSPORT_COUNTRY": "Germany", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "23 Ansbacher Street", "ADDR_CITY": "Dusseldorf", "ADDR_POSTAL_CODE": "56244", "ADDR_COUNTRY": "Germany", "DATE": "1/9/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1083", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Klempski", "PRIMARY_NAME_FIRST": "Morris", "DATE_OF_BIRTH": "17-May-90", "PASSPORT_NUMBER": "34543555", "PASSPORT_COUNTRY": "CA", "ADDR_TYPE": "HOME", "ADDR_LINE1": "Skyline Apartments, 705 Sheppard Ave", "ADDR_CITY": "Toronto", "ADDR_POSTAL_CODE": "M1S 1T4", "DATE": "1/10/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1084", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Klempsky", "PRIMARY_NAME_FIRST": "Morrie", "DATE_OF_BIRTH": "17-May-90", "PASSPORT_NUMBER": "34543555", "PASSPORT_COUNTRY": "Canada", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "705 Sheppard Ave", "ADDR_CITY": "Toronto", "ADDR_POSTAL_CODE": "M1S 1T4", "ADDR_COUNTRY": "CAN", "DATE": "1/11/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1085", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "D'esquire", "PRIMARY_NAME_FIRST": "Ellie", "DATE_OF_BIRTH": "19-Feb-91", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "0352 6553537", "EMAIL_ADDRESS": "dellie@fmail.com", "DATE": "1/12/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1086", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Desqueir", "PRIMARY_NAME_FIRST": "Ellie", "DATE_OF_BIRTH": "19-Feb-91", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "+39 0352 6553537", "EMAIL_ADDRESS": "dellie@fmail.com", "DATE": "1/13/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1087", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Wiest", "PRIMARY_NAME_FIRST": "George", "GENDER": "M", "DATE_OF_BIRTH": "3/12/87", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "702-221-2412", "EMAIL_ADDRESS": "pfranks@ishmail.com", "DATE": "1/14/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1088", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Weest", "PRIMARY_NAME_FIRST": "George", "GENDER": "F", "DATE_OF_BIRTH": "3/12/87", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "221-2412", "EMAIL_ADDRESS": "pfranks@ishmail.com", "DATE": "1/15/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1089", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Klein", "PRIMARY_NAME_FIRST": "Morris I", "DATE_OF_BIRTH": "4/12/82", "DATE": "1/16/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1090", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Klein", "PRIMARY_NAME_FIRST": "Morris II", "DATE_OF_BIRTH": "4/12/82", "DATE": "1/17/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1091", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Ohare", "PRIMARY_NAME_FIRST": "Ellie", "DATE_OF_BIRTH": "8/15/67", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "0352 6553537", "EMAIL_ADDRESS": "ellie.ohare@fmail.com", "DATE": "1/18/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1092", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "O'hare", "PRIMARY_NAME_FIRST": "Ellie", "DATE_OF_BIRTH": "8/15/67", "PHONE_TYPE": "HOME", "PHONE_NUMBER": "+39 0352 6553537", "EMAIL_ADDRESS": "ellie.ohare@fmail.com", "DATE": "1/19/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1093", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Amanda", "DATE_OF_BIRTH": "3/12/87", "DRIVERS_LICENSE_NUMBER": "73423499", "DRIVERS_LICENSE_STATE": "MN", "DATE": "1/20/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1094", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Jones", "PRIMARY_NAME_FIRST": "Amanda", "DATE_OF_BIRTH": "3/12/87", "DRIVERS_LICENSE_NUMBER": "73423499", "DRIVERS_LICENSE_STATE": "MN", "DATE": "1/21/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1095", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Aguilar", "PRIMARY_NAME_FIRST": "Juan", "GENDER": "Male", "DATE_OF_BIRTH": "4/12/82", "DRIVERS_LICENSE_NUMBER": "234234455", "DRIVERS_LICENSE_STATE": "MN", "DATE": "1/22/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1096", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Aguilar", "PRIMARY_NAME_FIRST": "Juann", "DATE_OF_BIRTH": "4/12/82", "DRIVERS_LICENSE_NUMBER": "234234455", "DRIVERS_LICENSE_STATE": "MN", "DATE": "1/23/18", "STATUS": "Active", "AMOUNT": "100"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1097", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Sanchez", "PRIMARY_NAME_FIRST": "Marie", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "P.O. Box 12987", "ADDR_CITY": "Andersonville", "ADDR_STATE": "IL", "ADDR_POSTAL_CODE": "60611", "PHONE_TYPE": "MOBILE", "EMAIL_ADDRESS": "mickey@mmail.com", "DATE": "1/24/18", "STATUS": "Active", "AMOUNT": "200"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1098", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Sanchez Mendoza", "PRIMARY_NAME_FIRST": "Marie", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "PO BOX 12987", "ADDR_CITY": "Chicago", "ADDR_STATE": "IL", "ADDR_POSTAL_CODE": "60611", "PHONE_TYPE": "MOBILE", "EMAIL_ADDRESS": "mickey@mmail.com", "DATE": "1/25/18", "STATUS": "Active", "AMOUNT": "300"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1099", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Aguilar", "PRIMARY_NAME_FIRST": "Anna Maria", "GENDER": "Female", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1812 Overture way", "ADDR_CITY": "Chicago", "ADDR_STATE": "IL", "PHONE_TYPE": "MOBILE", "EMAIL_ADDRESS": "mouse@mmail.com", "DATE": "1/26/18", "STATUS": "Active", "AMOUNT": "400"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1100", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Aguilar", "PRIMARY_NAME_FIRST": "Anna", "PRIMARY_NAME_MIDDLE": "Marie", "GENDER": "Unknown", "ADDR_TYPE": "HOME", "ADDR_LINE1": "9881 Freedom way", "ADDR_CITY": "Chicago", "ADDR_STATE": "IL", "PHONE_TYPE": "MOBILE", "EMAIL_ADDRESS": "mouse@mmail.com", "DATE": "1/27/18", "STATUS": "Active", "AMOUNT": "500"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1101", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Miller", "PRIMARY_NAME_FIRST": "Mark", "EMAIL_ADDRESS": "mark@marksfoods.com", "DATE": "1/28/18", "STATUS": "Active", "AMOUNT": "600"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1102", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Milner", "PRIMARY_NAME_FIRST": "Mark", "EMAIL_ADDRESS": "mark@marksfoods.com", "DATE": "1/29/18", "STATUS": "Active", "AMOUNT": "700"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1103", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Becker", "PRIMARY_NAME_FIRST": "Anabella", "GENDER": "U", "DRIVERS_LICENSE_NUMBER": "823123", "DRIVERS_LICENSE_STATE": "TX", "DATE": "1/30/18", "STATUS": "Active", "AMOUNT": "800"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "1104", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Becker", "PRIMARY_NAME_FIRST": "Annabelle", "GENDER": "F", "DRIVERS_LICENSE_NUMBER": "823123", "DRIVERS_LICENSE_STATE": "Texas", "DATE": "1/31/18", "STATUS": "Active", "AMOUNT": "900"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2011", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Hajah Mamunah (Jln Pisang)", "ADDR_TYPE": "BUSINESS", "ADDR_FULL": "#01-11, HillV2, 4 Hillview Rise, 667979", "ADDR_COUNTRY": "Singapore", "DATE": "1/31/18", "STATUS": "Inactive", "CATEGORY": "Platinum"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2031", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "\u1782\u17b9\u1798", "PRIMARY_NAME_FIRST": "\u178f\u17b6\u179a\u17b6", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "Street 128 Phnom Penh Cambodia", "DATE": "3/15/1992", "STATUS": "Active", "CATEGORY": "Gold"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2032", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kim", "PRIMARY_NAME_FIRST": "Dara", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "Street 128 ", "ADDR_CITY": "Phnom Penh", "ADDR_COUNTRY": "Cambodia", "DATE": "3/12/1998", "STATUS": "Active", "CATEGORY": "Silver"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2042", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Mullenkrants ", "SECONDARY_NAME_ORG": "Autoworks", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "Hardenbergstrasse 87", "ADDR_POSTAL_CODE": "66879", "ADDR_COUNTRY": "Germany", "DATE": "3/15/2019", "STATUS": "Terminated", "CATEGORY": "Platinum"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2063", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Alexander Pavlovich Vasiliev", "PHONE_NUMBER": "481-285-6234", "DATE": "1/15/2000", "STATUS": "Active", "CATEGORY": "Platinum"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2072", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Univrsl Export Inc", "ADDR_TYPE": "BUSINESS", "ADDR_LINE1": "100 Howard Hughs Plaza", "ADDR_CITY": "Las Vegas", "ADDR_STATE": "NV", "ADDR_POSTAL_CODE": "89111", "PHONE_NUMBER": "800-111-1234", "DATE": "6/15/2005", "STATUS": "Active", "CATEGORY": "Silver"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2073", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Worldwide Exports ", "ADDR_TYPE": "REGISTERED", "ADDR_LINE1": "Chrysler Building, 405 Lexington Avenue", "ADDR_CITY": "New York", "ADDR_STATE": "NY", "ADDR_POSTAL_CODE": "10174", "DATE": "12/10/2020", "STATUS": "Active", "CATEGORY": "Platinum"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2142", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Singapore exports", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "133 New Bridge Road, Chinatown Point, Singapore 059413", "ADDR_COUNTRY": "Singapore", "DATE": "2/4/2012", "STATUS": "Active", "CATEGORY": "Silver"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2152", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "India Exports", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "Mullanpara Road, Old Vythiri, Vythiri, Wayanad, 673576, India", "DATE": "3/1/2010", "STATUS": "Active", "CATEGORY": "Gold"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2171", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Andrew", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2172", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Andy ", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2181", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Anna", "PHONE_NUMBER": "702-221-2211", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2182", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Annabelle", "PHONE_NUMBER": "702-221-2211", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2191", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Muir", "PRIMARY_NAME_FIRST": "Jim", "DATE_OF_BIRTH": "1997-11-12", "ADDR_LINE1": "12396 Austin Rd", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2192", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Muir", "PRIMARY_NAME_FIRST": "Jane", "DATE_OF_BIRTH": "1999-12-10", "ADDR_LINE1": "12396 Austin Rd", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2193", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Muir", "PRIMARY_NAME_FIRST": "J", "DATE_OF_BIRTH": "1999-12-10", "ADDR_LINE1": "12396 Austin Rd", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2201", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Swarm", "PRIMARY_NAME_FIRST": "Jorg", "ADDR_LINE1": "127 14th Ave", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2202", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Barge", "PRIMARY_NAME_FIRST": "Jorge", "ADDR_LINE1": "4362 Belmont Lane", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2203", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Gray", "PRIMARY_NAME_FIRST": "Gaston", "ADDR_LINE1": "1376 BlueBell Road", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2204", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Howard", "PRIMARY_NAME_FIRST": "Henry", "ADDR_LINE1": "538 Blanco St", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2205", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Inverness", "PRIMARY_NAME_FIRST": "Inez", "ADDR_LINE1": "2516 BentTree Ln", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2206", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Jackson", "PRIMARY_NAME_FIRST": "Julia", "ADDR_LINE1": "319 Cody Road", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2207", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kellar", "PRIMARY_NAME_FIRST": "Kandace", "ADDR_LINE1": "1824 AspenOak Way", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2208", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Leonard", "PRIMARY_NAME_FIRST": "Leslie", "ADDR_LINE1": "4362 Belmont Lane", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2209", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Miller", "PRIMARY_NAME_FIRST": "Millie", "ADDR_LINE1": "1376 BlueBell Road", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2210", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Nice", "PRIMARY_NAME_FIRST": "Nelson", "ADDR_LINE1": "319 Cody Road", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2211", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Otter", "PRIMARY_NAME_FIRST": "Otto", "ADDR_LINE1": "1824 AspenOak Way", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2212", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Pemberton", "PRIMARY_NAME_FIRST": "Penny", "ADDR_LINE1": "1824 AspenOak Way", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2213", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kellar", "PRIMARY_NAME_FIRST": "Candace", "ADDR_LINE1": "1824 AspenOak Way", "ADDR_CITY": "Elmwood Park", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95865", "EMAIL_ADDRESS": "info@ca-state.gov"} +{"DATA_SOURCE": "CUSTOMERS", "RECORD_ID": "2214", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Sanders", "PRIMARY_NAME_FIRST": "Sandy", "ADDR_LINE1": "1376 BlueBell Rd", "ADDR_CITY": "Sacramento", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "95823", "EMAIL_ADDRESS": "info@ca-state.gov"} diff --git a/resources/data/truthset/reference.jsonl b/resources/data/truthset/reference.jsonl new file mode 100644 index 0000000..4c18e85 --- /dev/null +++ b/resources/data/truthset/reference.jsonl @@ -0,0 +1,22 @@ +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2012", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Hajah Maimunah", "ADDR_TYPE": "REGISTERED", "ADDR_LINE1": "4 Hillview Rise", "ADDR_CITY": "SINGAPORE", "ADDR_POSTAL_CODE": "667979", "ADDR_COUNTRY": "Singapore", "REL_ANCHOR_KEY": "2011", "DATE": "2010", "STATUS": "Active", "CATEGORY": "Proprietorship"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2013", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Wang Jie", "DATE_OF_BIRTH": "1993-09-14", "REL_POINTER_KEY": "2011", "REL_POINTER_ROLE": "Owns 60%", "STATUS": "Current", "CATEGORY": "Owner"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2014", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Wang Wei", "DATE_OF_BIRTH": "1997-09-14", "REL_POINTER_KEY": "2011", "REL_POINTER_ROLE": "Owns 40%", "STATUS": "Current", "CATEGORY": "Owner"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2041", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "M\u00fcllenkranz ", "SECONDARY_NAME_ORG": "Autowerkz", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "Hardenbergstra\u00dfe 87", "ADDR_CITY": "Rheinland-Pfalz", "ADDR_POSTAL_CODE": "66879", "ADDR_COUNTRY": "Germany", "REL_ANCHOR_KEY": "2041", "DATE": "2009", "STATUS": "Inactive", "CATEGORY": "Partnership"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2051", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "M\u00fcllenkranz", "PRIMARY_NAME_FULL": "Faisal Siddiqui", "ADDR_TYPE": "HOME", "ADDR_LINE1": "Jia Musa Shahdara Sheikhupura Road", "ADDR_CITY": "Lahore", "ADDR_COUNTRY": "Pakistan", "PHONE_NUMBER": "+92 42-7925774", "REL_POINTER_KEY": "2041", "REL_POINTER_ROLE": "President", "STATUS": "Current", "CATEGORY": "President"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2061", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "M\u00fcllenkranz", "NATIVE_NAME_FULL": "\u0412\u0410\u0421\u0406\u041b\u042c\u0415\u040e, \u0410\u043b\u044f\u043a\u0441\u0430\u043d\u0434\u0440 \u041f\u0430\u045e\u043b\u0430\u0432\u0456\u0447", "ADDR_FULL": "Tolmacheva Ul., bld. 8, appt. 71 Smolensk", "ADDR_COUNTRY": "RUS", "PHONE_TYPE": "PRIMARY", "PHONE_NUMBER": "+7(4812)85-62-34", "REL_POINTER_KEY": "2041", "REL_POINTER_ROLE": "Owns 100%", "STATUS": "Current", "CATEGORY": "Owner"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2071", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Universal Exports, USA", "SECONDARY_NAME_ORG": "Universal Exports", "ADDR_TYPE": "BUSINESS", "ADDR_FULL": "Hughes Plaza, 100 Howard Hughes Way, Las Vegas, NV 89111", "PHONE_NUMBER": "800-111-1234", "REL_ANCHOR_KEY": "2071", "REL_POINTER_KEY": "2074", "REL_POINTER_ROLE": "Global Parent", "DATE": "1990", "STATUS": "Active", "CATEGORY": "Corporation"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2074", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Universal Exports Worldwide", "SECONDARY_NAME_ORG": "Universal Exports", "ADDR_TYPE": "REGISTERED", "ADDR_LINE1": "405 Lexington Avenue", "ADDR_CITY": "Manhattan", "ADDR_STATE": "NY", "ADDR_POSTAL_CODE": "10174", "REL_ANCHOR_KEY": "2074", "DATE": "1990", "STATUS": "Active", "CATEGORY": "Corporation"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2081", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports Worldwide", "PRIMARY_NAME_FULL": "Howard Hughess", "REL_POINTER_KEY": "2074", "REL_POINTER_ROLE": "Owns 50%", "STATUS": "Current", "CATEGORY": "Owner"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2091", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports Worldwide", "PRIMARY_NAME_FULL": "Margaret Charney", "REL_POINTER_KEY": "2074", "REL_POINTER_ROLE": "Owns 50%", "STATUS": "Current", "CATEGORY": "Owner"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2101", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports, USA", "PRIMARY_NAME_FULL": "Keeley Jones", "PHONE_NUMBER": "818-892-2818", "EMAIL_ADDRESS": "kjones@universal.com", "REL_POINTER_KEY": "2071", "REL_POINTER_ROLE": "Principal", "STATUS": "Current", "CATEGORY": "Director"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2102", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Jones, Kaylee", "PHONE_NUMBER": "18188922818", "EMAIL_ADDRESS": "kjones@universal.com", "STATUS": "Active", "CATEGORY": "Contact"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2111", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports, USA", "PRIMARY_NAME_FULL": "Susan Meyer", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "Fieldstrasse 10, FL-2198 Triesen", "ADDR_COUNTRY": "Lichtenstein", "REL_POINTER_KEY": "2071", "REL_POINTER_ROLE": "Principal", "STATUS": "Current", "CATEGORY": "Director"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2112", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Susan Meyer Thomas", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "Fieldstrasse 10, FL-2198 Triesen, Lichtenstein", "STATUS": "Active", "CATEGORY": "Contact"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2121", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports, USA", "PRIMARY_NAME_FULL": "Kristen Salinger", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "123 King street", "ADDR_CITY": "New York", "ADDR_STATE": "NY", "ADDR_POSTAL_CODE": "10012", "PHONE_TYPE": "MOBILE", "PHONE_NUMBER": "320-392-2137", "REL_POINTER_KEY": "2071", "REL_POINTER_ROLE": "Principal", "STATUS": "Current", "CATEGORY": "Director"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2122", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Salenger, Kristin", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "8321 Duke Street", "ADDR_CITY": "Los Angeles", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "90015", "PHONE_TYPE": "MOBILE", "PHONE_NUMBER": "(320) 392-2137", "STATUS": "Active", "CATEGORY": "Contact"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2131", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports, USA", "PRIMARY_NAME_FULL": "Rosemay A Thomas", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "18 Danver Place, Loughborough, Leicestershire, LE11 1UU, United Kingdom", "REL_POINTER_KEY": "2071", "REL_POINTER_ROLE": "Principal", "STATUS": "Current", "CATEGORY": "Director"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2132", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Thomas", "PRIMARY_NAME_FIRST": "Rosemary A", "ADDR_TYPE": "PRIMARY", "ADDR_FULL": "18 Danver Place, Loughborough, Leicestershire, United Kingdom, LE11 1UU", "STATUS": "Active", "CATEGORY": "Contact"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2141", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Universal Exports Singapore", "SECONDARY_NAME_ORG": "Universal Exports", "ADDR_TYPE": "REGISTERED", "ADDR_FULL": "Chinatown Point, 133 New Bridge Road, 059413 singapore", "ADDR_COUNTRY": "Singapore", "REL_POINTER_KEY": "2074", "REL_POINTER_ROLE": "Global Parent", "DATE": "1994", "STATUS": "Active", "CATEGORY": "Corporation"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2151", "RECORD_TYPE": "ORGANIZATION", "PRIMARY_NAME_ORG": "Universal Exports India", "SECONDARY_NAME_ORG": "Universal Exports", "ADDR_TYPE": "REGISTERED", "ADDR_FULL": "Mullanpara Road, Vythiri, 673576, India", "REL_POINTER_KEY": "2074", "REL_POINTER_ROLE": "Global Parent", "DATE": "1998", "STATUS": "Active", "CATEGORY": "Corporation"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2161", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports, USA", "PRIMARY_NAME_LAST": "Alexopoulos", "PRIMARY_NAME_FIRST": "Anastassia", "ADDR_TYPE": "HOME", "ADDR_LINE1": "6781 Metaxa Forest, Suite 296", "ADDR_CITY": "Athens", "ADDR_STATE": "GA", "ADDR_POSTAL_CODE": "30009", "EMAIL_ADDRESS": "Nastassia", "REL_POINTER_KEY": "2071", "REL_POINTER_ROLE": "Principal", "STATUS": "Current", "CATEGORY": "Director"} +{"DATA_SOURCE": "REFERENCE", "RECORD_ID": "2162", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_FULL": "Alexopoulos, Nastassia", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "6781 Metaxa Forest", "ADDR_CITY": "Athens", "ADDR_STATE": "GA", "ADDR_POSTAL_CODE": "30009", "EMAIL_ADDRESS": "patak@universal.com", "STATUS": "Active", "CATEGORY": "Contact"} diff --git a/resources/data/truthset/watchlist.jsonl b/resources/data/truthset/watchlist.jsonl new file mode 100644 index 0000000..7a60309 --- /dev/null +++ b/resources/data/truthset/watchlist.jsonl @@ -0,0 +1,17 @@ +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1006", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith Sr", "PRIMARY_NAME_FIRST": "Robert", "PRIMARY_NAME_MIDDLE": "E", "DATE_OF_BIRTH": "3/31/1954", "DRIVERS_LICENSE_NUMBER": "112233", "DRIVERS_LICENSE_STATE": "NV", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "123 Main St, Las Vegas ", "DATE": "1/3/17", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1007", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Patricia", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1515 Adela Ln, LV, NV", "ADDR_POSTAL_CODE": "89132", "EMAIL_ADDRESS": "psmith@email.com", "DATE": "2/4/20", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1008", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "Robert", "EMAIL_ADDRESS": "robert.smith@email.com", "DATE": "3/5/19", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1012", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Kusha", "PRIMARY_NAME_FIRST": "Eddie", "DATE_OF_BIRTH": "3/1/1970", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "1602 Brenville Pl, San Francisco, CA 94105", "ADDR_CITY": "San Francisco", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "94105", "DATE": "4/6/19", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1014", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Knight", "PRIMARY_NAME_FIRST": "Eddie", "DATE_OF_BIRTH": "3/1/70", "SSN_NUMBER": "294-66-9999", "ADDR_TYPE": "MAILING", "ADDR_LINE1": "160 Brenville Pl", "ADDR_CITY": "San Francisco", "ADDR_STATE": "CA", "ADDR_POSTAL_CODE": "94105", "DATE": "5/7/15", "STATUS": "Inactive", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1021", "RECORD_TYPE": "PERSON", "SSN_NUMBER": "201-77-7719", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1304 Poppy Hills Dr", "ADDR_CITY": "Blacklick", "ADDR_STATE": "OH", "ADDR_POSTAL_CODE": "43004", "EMAIL_ADDRESS": "Kusha123@hmail.com", "DATE": "6/8/16", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1024", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Antoun", "PRIMARY_NAME_FIRST": "Mhd", "DATE_OF_BIRTH": "1/7/80", "DATE": "7/9/18", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1027", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Anderson", "PRIMARY_NAME_FIRST": "Darletta", "DATE_OF_BIRTH": "1/7/80", "DATE": "3/5/19", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1029", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Dobbins Senior", "PRIMARY_NAME_FIRST": "David", "ADDR_TYPE": "HOME", "ADDR_LINE1": "Suite 900, 1450 N City Rd", "ADDR_CITY": "Arlington", "ADDR_POSTAL_CODE": "23208", "DATE": "4/6/20", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1037", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Sentosa", "PRIMARY_NAME_FIRST": "Maria", "DATE_OF_BIRTH": "12/11/73", "DATE": "5/7/15", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1038", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Medina", "PRIMARY_NAME_FIRST": "Maria", "DATE_OF_BIRTH": "12/11/73", "ADDR_TYPE": "HOME", "ADDR_LINE1": "9304 W. 15th St La Blanca, FL 60527", "EMAIL_ADDRESS": "msentosa@fmail.com", "DATE": "1/3/19", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1041", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "John", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "2/4/19", "STATUS": "Active", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "1042", "RECORD_TYPE": "PERSON", "PRIMARY_NAME_LAST": "Smith", "PRIMARY_NAME_FIRST": "John", "GENDER": "M", "ADDR_TYPE": "HOME", "ADDR_LINE1": "3212 W. 32nd St Palm Harbor, FL 60527", "DATE": "3/5/18", "STATUS": "Inactive", "CATEGORY": "Fraud"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "2052", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u0641\u064a\u0635\u0644 \u0635\u062f\u064a\u0642\u064a", "ADDR_TYPE": "PRIMARY", "ADDR_LINE1": "Jia Musa Shahdara Sheikhupura Road", "ADDR_CITY": "Lahore", "ADDR_COUNTRY": "Pakistan", "PHONE_NUMBER": "42-7925774", "DATE": "2/22/2002", "STATUS": "Current", "CATEGORY": "Sanctioned"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "2062", "RECORD_TYPE": "PERSON", "NATIVE_NAME_FULL": "\u0412\u0410\u0421\u0418\u041b\u042c\u0415\u0412, \u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440 \u041f\u0430\u0432\u043b\u043e\u0432\u0438\u0447", "ADDR_LINE1": "Tolmacheva Ul., bld. 8, appt. 71 ", "ADDR_CITY": "Smolensk", "PHONE_NUMBER": "(4812)85-62-34", "DATE": "4/14/2014", "STATUS": "Current", "CATEGORY": "Sanctioned"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "2082", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports", "PRIMARY_NAME_FULL": "Hughes, Howard", "ADDR_TYPE": "HOME", "ADDR_LINE1": "1231 Las Vegas Blvd, Penthouse Suite", "ADDR_CITY": "Las Vegas", "ADDR_STATE": "NV", "DATE": "4/3/2019", "STATUS": "Current", "CATEGORY": "PEP"} +{"DATA_SOURCE": "WATCHLIST", "RECORD_ID": "2092", "RECORD_TYPE": "PERSON", "EMPLOYER_NAME": "Universal Exports", "PRIMARY_NAME_FULL": "Charney, Peggie", "DATE": "4/3/2019", "STATUS": "Current", "CATEGORY": "PEP"} From 5ef6d3fdd2c9b0b77a05ea4ed4b2fcef48629327 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Tue, 28 Jan 2025 09:53:34 -0800 Subject: [PATCH 8/9] Added stewardship snippets --- java/snippets/stewardship/ForceResolve.java | 172 ++++++++++++++++++ java/snippets/stewardship/ForceUnresolve.java | 172 ++++++++++++++++++ 2 files changed, 344 insertions(+) create mode 100644 java/snippets/stewardship/ForceResolve.java create mode 100644 java/snippets/stewardship/ForceUnresolve.java diff --git a/java/snippets/stewardship/ForceResolve.java b/java/snippets/stewardship/ForceResolve.java new file mode 100644 index 0000000..f57f44a --- /dev/null +++ b/java/snippets/stewardship/ForceResolve.java @@ -0,0 +1,172 @@ +package stewardship; + +import java.util.*; +import javax.json.*; +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of force-resolving records that + * otherwise will not resolve to one another. + */ +public class ForceResolve { + private static final String TEST = "TEST"; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = ForceResolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID_NUMBER to force resolve..."); + SzRecordKey key1 = SzRecordKey.of(TEST, "1"); + SzRecordKey key3 = SzRecordKey.of(TEST, "3"); + + String record1 = engine.getRecord(key1, SZ_RECORD_DEFAULT_FLAGS); + String record3 = engine.getRecord(key3, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj1 = Json.createReader(new StringReader(record1)).readObject(); + JsonObject obj3 = Json.createReader(new StringReader(record3)).readObject(); + + obj1 = obj1.getJsonObject("JSON_DATA"); + obj3 = obj3.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job1 = Json.createObjectBuilder(obj1); + JsonObjectBuilder job3 = Json.createObjectBuilder(obj3); + + for (JsonObjectBuilder job : List.of(job1, job3)) { + job.add("TRUSTED_ID_NUMBER", "TEST_R1-TEST_R3"); + job.add("TRUSTED_ID_TYPE", "FORCE_RESOLVE"); + } + + record1 = job1.build().toString(); + record3 = job3.build().toString(); + + engine.addRecord(key1, record1, SZ_NO_FLAGS); + engine.addRecord(key3, record3, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "1"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "1", + "PRIMARY_NAME_FULL": "Patrick Smith", + "AKA_NAME_FULL": "Paddy Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "2"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "2", + "PRIMARY_NAME_FULL": "Patricia Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688", + "DATE_OF_BIRTH": "5/4/1994" + } + """); + + records.put( + SzRecordKey.of("TEST", "3"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "3", + "PRIMARY_NAME_FULL": "Pat Smith", + "ADDR_FULL": "787 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "787-767-2688" + } + """); + + return records; + } +} \ No newline at end of file diff --git a/java/snippets/stewardship/ForceUnresolve.java b/java/snippets/stewardship/ForceUnresolve.java new file mode 100644 index 0000000..feff5c6 --- /dev/null +++ b/java/snippets/stewardship/ForceUnresolve.java @@ -0,0 +1,172 @@ +package stewardship; + +import java.util.*; +import javax.json.*; +import java.io.*; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +import static com.senzing.sdk.SzFlag.*; + +/** + * Provides a simple example of force-unresolving records that + * otherwise will not resolve to one another. + */ +public class ForceUnresolve { + private static final String TEST = "TEST"; + + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = ForceUnresolve.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the engine from the environment + SzEngine engine = env.getEngine(); + + Map recordMap = getRecords(); + // loop through the example records and add them to the repository + for (Map.Entry entry : recordMap.entrySet()) { + SzRecordKey recordKey = entry.getKey(); + String recordDefinition = entry.getValue(); + + // call the addRecord() function with no flags + engine.addRecord(recordKey, recordDefinition, SZ_NO_FLAGS); + + System.out.println("Record " + recordKey.recordId() + " added"); + System.out.flush(); + } + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " originally resolves to entity " + entityId); + } + System.out.println(); + System.out.println("Updating records with TRUSTED_ID_NUMBER to force unresolve..."); + SzRecordKey key4 = SzRecordKey.of(TEST, "4"); + SzRecordKey key6 = SzRecordKey.of(TEST, "6"); + + String record4 = engine.getRecord(key4, SZ_RECORD_DEFAULT_FLAGS); + String record6 = engine.getRecord(key6, SZ_RECORD_DEFAULT_FLAGS); + + JsonObject obj4 = Json.createReader(new StringReader(record4)).readObject(); + JsonObject obj6 = Json.createReader(new StringReader(record6)).readObject(); + + obj4 = obj4.getJsonObject("JSON_DATA"); + obj6 = obj6.getJsonObject("JSON_DATA"); + + JsonObjectBuilder job4 = Json.createObjectBuilder(obj4); + JsonObjectBuilder job6 = Json.createObjectBuilder(obj6); + + job4.add("TRUSTED_ID_NUMBER", "TEST_R4-TEST_R6"); + job4.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + job6.add("TRUSTED_ID_NUMBER", "TEST_R6-TEST_R4"); + job6.add("TRUSTED_ID_TYPE", "FORCE_UNRESOLVE"); + + record4 = job4.build().toString(); + record6 = job6.build().toString(); + + engine.addRecord(key4, record4, SZ_NO_FLAGS); + engine.addRecord(key6, record6, SZ_NO_FLAGS); + + System.out.println(); + for (SzRecordKey recordKey : recordMap.keySet()) { + String result = engine.getEntity(recordKey, SZ_ENTITY_BRIEF_DEFAULT_FLAGS); + JsonObject jsonObj = Json.createReader(new StringReader(result)).readObject(); + long entityId = jsonObj.getJsonObject("RESOLVED_ENTITY") + .getJsonNumber("ENTITY_ID").longValue(); + System.out.println( + "Record " + recordKey + " now resolves to entity " + entityId); + } + System.out.println(); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } + + /** + * This is a support method for providing example records to add. + * + * @return A {@link Map} of {@link SzRecordKey} keys to {@link String} + * JSON text values desribing the records to be added. + */ + public static Map getRecords() { + Map records = new LinkedHashMap<>(); + records.put( + SzRecordKey.of("TEST", "4"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "4", + "PRIMARY_NAME_FULL": "Elizabeth Jonas", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "SSN_NUMBER": "767-87-7678", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "5"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "5", + "PRIMARY_NAME_FULL": "Beth Jones", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "SSN_NUMBER": "767-87-7678", + "DATE_OF_BIRTH": "1/12/1990" + } + """); + + records.put( + SzRecordKey.of("TEST", "6"), + """ + { + "DATA_SOURCE": "TEST", + "RECORD_ID": "6", + "PRIMARY_NAME_FULL": "Betsey Jones", + "ADDR_FULL": "202 Rotary Dr, Rotorville, RI, 78720", + "PHONE_NUMBER": "202-787-7678" + } + """); + + return records; + } +} \ No newline at end of file From 93b8a5aeedf0ce48926b588f724c31e9877a6842 Mon Sep 17 00:00:00 2001 From: "Barry M. Caceres" Date: Tue, 28 Jan 2025 15:23:52 -0800 Subject: [PATCH 9/9] Added README.md files in multiple directories. Added configuration/InitDefaultConfig.java Updated usage message for SnippetRunner.java --- java/README.md | 107 ++++++++++++++++++ .../com/senzing/runner/SnippetRunner.java | 6 +- .../configuration/InitDefaultConfig.java | 70 ++++++++++++ java/snippets/configuration/README.md | 11 ++ java/snippets/deleting/README.md | 15 +++ java/snippets/information/README.md | 14 +++ java/snippets/initialization/README.md | 11 ++ java/snippets/loading/README.md | 23 ++++ java/snippets/redo/README.md | 21 ++++ java/snippets/searching/README.md | 14 +++ java/snippets/stewardship/ForceResolve.java | 2 +- java/snippets/stewardship/ForceUnresolve.java | 2 +- java/snippets/stewardship/README.md | 29 +++++ 13 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 java/README.md create mode 100644 java/snippets/configuration/InitDefaultConfig.java create mode 100644 java/snippets/configuration/README.md create mode 100644 java/snippets/deleting/README.md create mode 100644 java/snippets/information/README.md create mode 100644 java/snippets/initialization/README.md create mode 100644 java/snippets/loading/README.md create mode 100644 java/snippets/redo/README.md create mode 100644 java/snippets/searching/README.md create mode 100644 java/snippets/stewardship/README.md diff --git a/java/README.md b/java/README.md new file mode 100644 index 0000000..febf653 --- /dev/null +++ b/java/README.md @@ -0,0 +1,107 @@ +# Java Snippets + +The Java snippets are contained in the `snippets` directory under various Java package directories. They can built using the `pom.xml` in this directory using `mvn package`. The result will be the `sz-sdk-snippets.jar` file ni the `target` directory. + +There are several ways to run the code snippets. + +## Run Directly + +You may run any individual Snippet class directly providing you have a Senzing repository to run it with and the `SENZING_ENGINE_CONFIGURATION_JSON` environment variable set for connecting to that repository. Many of the snippets will find a default data file to run with if run from this directory, but also allow the caller to use a different data file if given by the first command-line arguemnt. + +1. Run a snippet that takes no command-line arguments. + ``` + java -cp target/sz-sdk-snippets.jar loading.LoadRecords + ``` + +2. Run a snippet and override the input file using command-line arguments + ``` + java -cp target/sz-sdk-snippets.jar loading.LoadRecordsViaLoop ../../resources/data/load-500-with-errors.jsonl + ``` + +# Run Individually via Runner + +The `com.senzing.runner.SnippetRunner` class will run one or more snippets for you and create a temporary Senzing repository to run +then against. This is the `Main-Class` of the `sz-sdk-snippets.jar` file so it can be executed using `java -jar target/sz-sdk-snippets.jar`. + +**NOTE:** When code snippets are run this way you cannot specify command-line arguments for individual snippets, nor can you respond to command-line input requests (they will be automatically be responded by the runner -- including forced termination of a snippet that is intended to run indefinitely). + +1. Execute all code snippets: + ``` + java -jar target/sz-sdk-snippets.jar all + ``` + +2. Execute all code snippets in a Java package: + ``` + java -jar target/sz-sdk-snippets.jar loading + ``` + +3. Execute all code snippets from multiple packages: + ``` + java -jar target/sz-sdk-snippets.jar loading redo + ``` +4. Execute specific code snippets: + ``` + java -jar target/sz-sdk-snippets.jar loading.LoadViaLoop loading.LoadViaQueue + ``` +5. Mix and match packages with individual snippets: + ``` + java -jar target/sz-sdk-snippets.jar redo loading.LoadViaLoop + ``` +6. Generate a help message by specifying no arguments: + ``` + java -jar target/sz-sdk-snippets.jar + + java -jar sz-sdk-snippets.jar [ all | | ]* + + - Specifying no arguments will print this message + - Specifying "all" will run all snippets + - Specifying one or more groups will run all snippets in those groups + - Specifying one or more snippets will run those snippet + + Examples: + + java -jar sz-sdk-snippets.jar all + + java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures + + java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords + + Snippet Group Names: + - configuration + - deleting + - information + - initialization + - loading + - redo + - searching + - stewardship + + Snippet Names: + - configuration.AddDataSources + - configuration.InitDefaultConfig + - deleting.DeleteViaFutures + - deleting.DeleteViaLoop + - deleting.DeleteWithInfoViaFutures + - information.CheckDatastorePerformance + - information.GetDatastoreInfo + - information.GetLicense + - information.GetVersion + - initialization.EnginePriming + - initialization.EnvironmentAndHubs + - initialization.PurgeRepository + - loading.LoadRecords + - loading.LoadTruthSetWithInfoViaLoop + - loading.LoadViaFutures + - loading.LoadViaLoop + - loading.LoadViaQueue + - loading.LoadWithInfoViaFutures + - loading.LoadWithStatsViaLoop + - redo.LoadWithRedoViaLoop + - redo.RedoContinuous + - redo.RedoContinuousViaFutures + - redo.RedoWithInfoContinuous + - searching.SearchRecords + - searching.SearchViaFutures + - stewardship.ForceResolve + - stewardship.ForceUnresolve + ``` diff --git a/java/runner/java/com/senzing/runner/SnippetRunner.java b/java/runner/java/com/senzing/runner/SnippetRunner.java index 7898848..0516b71 100644 --- a/java/runner/java/com/senzing/runner/SnippetRunner.java +++ b/java/runner/java/com/senzing/runner/SnippetRunner.java @@ -340,7 +340,7 @@ private static void executeSnippet(String snippet, } private static void printUsage(SortedMap> snippetMap) { - System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]* ]"); + System.err.println("java -jar sz-sdk-snippets.jar [ all | | ]*"); System.err.println(); System.err.println(" - Specifying no arguments will print this message"); System.err.println(" - Specifying \"all\" will run all snippets"); @@ -351,9 +351,9 @@ private static void printUsage(SortedMap> snippetMap) System.err.println(); System.err.println(" java -jar sz-sdk-snippets.jar all"); System.err.println(); - System.err.println(" java -jar sz-sdk-snippets.jar loading.AddRecords loading.AddFutures"); + System.err.println(" java -jar sz-sdk-snippets.jar loading.LoadRecords loading.LoadViaFutures"); System.err.println(); - System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.AddRecords"); + System.err.println(" java -jar sz-sdk-snippets.jar initialization deleting loading.LoadRecords"); System.err.println(); System.err.println("Snippet Group Names:"); snippetMap.keySet().forEach(group -> { diff --git a/java/snippets/configuration/InitDefaultConfig.java b/java/snippets/configuration/InitDefaultConfig.java new file mode 100644 index 0000000..b28fd2a --- /dev/null +++ b/java/snippets/configuration/InitDefaultConfig.java @@ -0,0 +1,70 @@ +package configuration; + +import com.senzing.sdk.*; +import com.senzing.sdk.core.SzCoreEnvironment; + +/** + * Provides a simple example of adding records to the Senzing repository. + */ +public class InitDefaultConfig { + public static void main(String[] args) { + // get the senzing repository settings + String settings = System.getenv("SENZING_ENGINE_CONFIGURATION_JSON"); + if (settings == null) { + System.err.println("Unable to get settings."); + throw new IllegalArgumentException("Unable to get settings"); + } + + // create a descriptive instance name (can be anything) + String instanceName = InitDefaultConfig.class.getSimpleName(); + + // initialize the Senzing environment + SzEnvironment env = SzCoreEnvironment.newBuilder() + .settings(settings) + .instanceName(instanceName) + .verboseLogging(false) + .build(); + + try { + // get the config and config manager from the environment + SzConfig config = env.getConfig(); + SzConfigManager configMgr = env.getConfigManager(); + + // prepare an in-memory config to be modified and get the handle + long configHandle = config.createConfig(); + String configDefinition = null; + try { + configDefinition = config.exportConfig(configHandle); + + } finally { + config.closeConfig(configHandle); + } + + // add the modified config to the repository with a comment + long configId = configMgr.addConfig( + configDefinition, "Initial configuration"); + + // replace the default config + configMgr.setDefaultConfigId(configId); + + } catch (SzException e) { + // handle any exception that may have occurred + System.err.println("Senzing Error Message : " + e.getMessage()); + System.err.println("Senzing Error Code : " + e.getErrorCode()); + e.printStackTrace(); + throw new RuntimeException(e); + + } catch (Exception e) { + e.printStackTrace(); + if (e instanceof RuntimeException) { + throw ((RuntimeException) e); + } + throw new RuntimeException(e); + + } finally { + // IMPORTANT: make sure to destroy the environment + env.destroy(); + } + + } +} \ No newline at end of file diff --git a/java/snippets/configuration/README.md b/java/snippets/configuration/README.md new file mode 100644 index 0000000..5537d31 --- /dev/null +++ b/java/snippets/configuration/README.md @@ -0,0 +1,11 @@ +# Deleting Data +The configuration snippets outline how to modify the Senzing configuration, register the modified configuration with a configuration ID and update the default configuration ID for the repository. + +You may either `setDefaultConfigId()` or `replaceDefaultConfigId()`. Initially, the the default config ID must be set since there is no existing config ID to replace. However, when updating you may use `replaceDefaultConfigId()` to guard against race conditions of multiple threads or processes updating at the same time. + +## Snippets +* **AddDataSources.java** + * Gets the current default config, creates a modified config with additional data sources, registers that modified config and then replaces the default config ID. +* **InitDefaultConfig.java** + * Initializes the repository with a default config ID using the template configuration provided by Senzing. + diff --git a/java/snippets/deleting/README.md b/java/snippets/deleting/README.md new file mode 100644 index 0000000..3afe0f7 --- /dev/null +++ b/java/snippets/deleting/README.md @@ -0,0 +1,15 @@ +# Deleting Data + +The deletion snippets outline deleting previously added source records. Deleting source records removes the previously added source record from the system, completes the entity resolution process and persists outcomes in the Senzing repository. + +Deleting a record only requires the data source code and record ID for the record to be deleted. + +## Snippets + +- **DeleteViaFutures.java** + - Read and delete source records from a file using multiple threads +- **DeleteViaLoop.java** + - Basic read and delete source records from a file +- **DeleteWithInfoViaFutures.java** + - Read and delete source records from a file using multiple threads + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `deleteRecord()` method and track the entity ID's. diff --git a/java/snippets/information/README.md b/java/snippets/information/README.md new file mode 100644 index 0000000..11c2b95 --- /dev/null +++ b/java/snippets/information/README.md @@ -0,0 +1,14 @@ +# System & Repository Information + +The information snippets outline the retrieval of different informational aspects of a Senzing instance or engine. + +## Snippets + +- **CheckDatastorePerformance.java** + - Run an insert test against the Senzing repository to gauge performance +- **GetDatastoreInfo.java** + - Return basic information about the Senzing repository(s) +- **GetLicense.java** + - Return the currently in use license details +- **GetVersion.java** + - Return the current Senzing product version details diff --git a/java/snippets/initialization/README.md b/java/snippets/initialization/README.md new file mode 100644 index 0000000..316ff63 --- /dev/null +++ b/java/snippets/initialization/README.md @@ -0,0 +1,11 @@ +# Initialization + +## Snippets + +- **EnginePriming.java** + - Priming the Senzing engine before use loads resource intensive assets upfront. Without priming the first SDK call to the engine will appear slower than usual as it causes these assets to be loaded +- **EnvironmentsAndHubs.java** + - Basic example of how to create an abstract Senzing factory and each of the available engines +- **PurgeRepository.java** + - **WARNING** This script will remove all data from a Senzing repository, use with caution! **WARNING** + - It will prompt first, still use with caution! diff --git a/java/snippets/loading/README.md b/java/snippets/loading/README.md new file mode 100644 index 0000000..035b9a8 --- /dev/null +++ b/java/snippets/loading/README.md @@ -0,0 +1,23 @@ +# Loading Data + +The loading snippets outline adding new source records. Adding source records ingests [mapped](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) JSON data, completes the entity resolution process and persists outcomes in the Senzing repository. Adding a source record with the same data source code and record ID as an existing record will replace it. + +## Snippets + +- **LoadRecords.java** + - Basic iteration over a few records, adding each one +- **LoadTruthSetWithInfoViaLoop.java** + - Read and load from multiple source files, adding a sample truth + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `addRecord()` method and track the entity ID's for the records. +- **LoaeViaFutures.java** + - Read and load source records from a file using multiple threads +- **LoadViaLoop.java** + - Basic read and add source records from a file +- **LoadViaQueue.java** + - Read and load source records using a queue +- **LoadWithInfoViaFutures.java** + - Read and load source records from a file using multiple threads + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `addRecord()` method and track the entity ID's for the records. +- **LoadWithStatsViaLoop.java** + - Basic read and add source records from a file + - Periodic calling to `getStats()` method during load to track loading statistics. diff --git a/java/snippets/redo/README.md b/java/snippets/redo/README.md new file mode 100644 index 0000000..918a2b3 --- /dev/null +++ b/java/snippets/redo/README.md @@ -0,0 +1,21 @@ +# Redo Records + +The redo snippets outline processing redo records. During normal processing of loading, deleting and replacing data the Senzing engine may determine additional work needs to be completed for an entity. There are times the Senzing engine will decide to defer this additional work. Examples of why this may happen include: + +- Records loaded in parallel are clustering around the same entities causing contention +- Automatic corrections +- Cleansing decisions made on attributes determined to no longer be useful for entity resolution + +When an entity requires additional work a record is automatically created in the system indicating this requirement. These records are called redo records. Redo records need to be periodically or continuously checked for and processed. Periodically is suitable after manipulating smaller portions of data, for example, at the end of a batch load of data. In contrast, a continuous process checking for and processing redo records is suitable in a streaming system that is constantly manipulating data. In general, it is recommended to have a continuous redo process checking for any redo records to process and processing them. + +## Snippets + +- **LoadWithRedoViaLoop.java** + - Read and load source records from a file and then process any redo records +- **RedoContinuous.java** + - Basic example of continuously monitoring for redo records to process +- **RedoContinuousViaFutures.java** + - Continuously monitor for redo records to process using multiple threads +- **RedoWithInfoContinuous.java** + - Continuously monitor for redo records to process + - Collect the response using the [SZ_WITH_INFO flag](../../../README.md#with-info) on the `processRedoRecord()` method and track the entity ID's for the records. diff --git a/java/snippets/searching/README.md b/java/snippets/searching/README.md new file mode 100644 index 0000000..0525212 --- /dev/null +++ b/java/snippets/searching/README.md @@ -0,0 +1,14 @@ +# Searching for Entities + +The search snippets outline searching for entities in the system. Searching for entities uses the same mapped JSON data [specification](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) as SDK methods such as `add_record()` to format the search request. + +There are [considerations](https://senzing.zendesk.com/hc/en-us/articles/360007880814-Guidelines-for-Successful-Entity-Searching) to be aware of when searching. + +## Snippets + +- **SearchRecords.java** + - Basic iteration over a few records, searching for each one + - To see results first load records with [LoadTruthSetWithInfoViaLoop.java](../loading/LoadTruthSetViaLoop.java) +- **SearchViaFutures.java** + - Read and search for records from a file using multiple threads + - To see results first load records with [LoadViaFutures.java](../loading/LoadViaFutures.java) diff --git a/java/snippets/stewardship/ForceResolve.java b/java/snippets/stewardship/ForceResolve.java index f57f44a..21b9ff6 100644 --- a/java/snippets/stewardship/ForceResolve.java +++ b/java/snippets/stewardship/ForceResolve.java @@ -61,7 +61,7 @@ public static void main(String[] args) { "Record " + recordKey + " originally resolves to entity " + entityId); } System.out.println(); - System.out.println("Updating records with TRUSTED_ID_NUMBER to force resolve..."); + System.out.println("Updating records with TRUSTED_ID to force resolve..."); SzRecordKey key1 = SzRecordKey.of(TEST, "1"); SzRecordKey key3 = SzRecordKey.of(TEST, "3"); diff --git a/java/snippets/stewardship/ForceUnresolve.java b/java/snippets/stewardship/ForceUnresolve.java index feff5c6..6ff8475 100644 --- a/java/snippets/stewardship/ForceUnresolve.java +++ b/java/snippets/stewardship/ForceUnresolve.java @@ -61,7 +61,7 @@ public static void main(String[] args) { "Record " + recordKey + " originally resolves to entity " + entityId); } System.out.println(); - System.out.println("Updating records with TRUSTED_ID_NUMBER to force unresolve..."); + System.out.println("Updating records with TRUSTED_ID to force unresolve..."); SzRecordKey key4 = SzRecordKey.of(TEST, "4"); SzRecordKey key6 = SzRecordKey.of(TEST, "6"); diff --git a/java/snippets/stewardship/README.md b/java/snippets/stewardship/README.md new file mode 100644 index 0000000..993dcc5 --- /dev/null +++ b/java/snippets/stewardship/README.md @@ -0,0 +1,29 @@ +# Stewardship + +The stewardship snippets outline forced resolution and forced un-resolution of records from entities. Stewardship provides the ability to force records to resolve or un-resolve when, for example, Senzing doesn't have enough information at a point in time, but you may have knowledge outside of Senzing to override a decision Senzing has made. Basic stewardship utilizes the `TRUSTED_ID` feature to influence entity resolution. See the [Entity Specification](https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-JSON-CSV-Mapping) for additional details. + +In these examples, the current JSON data for a record is first retrieved and additional `TRUSTED_ID` attributes are appended before replacing the records and completing entity resolution, now taking into account the influence of the `TRUSTED_ID` attributes: + +- `TRUSTED_ID_NUMBER` - when the values across records is the same the records resolve to the same entity. If the values used across records differ, the records will not resolve to the same entity. +- `TRUSTED_ID_TYPE` - an arbitrary value to indicate the use of the TRUSTED_ID_NUMBER. + +## Snippets + +- **ForceResolve.java** + - Force resolve records together to a single entity +- **ForceUnresolve.java** + - Force un-resolve a record from an entity into a new entity + +## Example Usage + +### Force Resolve + +Force resolve first adds 3 records and details which entity they each belong to. + +With additional knowledge not represented in Senzing you know record 3 "Pat Smith" represents the same person as record 1 "Patrick Smith". To force resolve these 2 records to the same entity, first fetch the current representation of each record with `getRecord()`. Next add `TRUSTED_ID_NUMBER` and `TRUSTED_ID_TYPE` attributes to each of the retrieved records. `TRUSTED_ID_NUMBER` uses the same value to indicate these records should always be considered the same entity and resolve together. In this example the data source of the records and their record IDs are used to create `TRUSTED_ID_NUMBER`. `TRUSTED_ID_TYPE` is set as FORCE_RESOLVE as an indicator they were forced together. + +### Force UnResolve + +Force UnResolve first adds 3 records and details all records resolved to the same entity. + +With additional knowledge not represented in Senzing you know record 6 "Betsey Jones" is not the same as records 4 and 5; Betsey is a twin to "Elizabeth Jones". To force unresolve "Betsey" from the "Elizabeth" entity, first fetch the current representation of each record with `getRecord()`. Next add `TRUSTED_ID_NUMBER` and `TRUSTED_ID_TYPE` attributes to each of the retrieved records. `TRUSTED_ID_NUMBER` uses a different value to indicate these records should always be considered different entities and not resolve together. In this example the data source of the records and their record IDs are used to create `TRUSTED_ID_NUMBER`. `TRUSTED_ID_TYPE` is set as FORCE_UNRESOLVE as an indicator they were forced apart.