Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
See #360
  • Loading branch information
fsteeg committed Mar 11, 2021
1 parent bc825ae commit dfe708b
Show file tree
Hide file tree
Showing 11 changed files with 1,105 additions and 5 deletions.
2 changes: 1 addition & 1 deletion metafacture-biblio/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ dependencies {
api project(':metafacture-framework')
implementation project(':metafacture-commons')
implementation project(':metafacture-flowcontrol')
implementation 'org.dspace:oclc-harvester2:0.1.12'
implementation ('xalan:xalan:2.7.0') {
exclude group: 'xalan', module: 'serializer'
exclude group: 'xercesImpl', module: 'xercesImpl'
exclude group: 'xml-apis', module: 'xml-apis'
}
implementation 'log4j:log4j:1.2.12'
implementation 'org.slf4j:slf4j-api:1.7.7'
testImplementation 'junit:junit:4.12'
testImplementation 'org.mockito:mockito-core:2.5.5'
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathException;

import org.metafacture.framework.MetafactureException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;
import org.oclc.oai.harvester2.app.RawWrite;
import org.xml.sax.SAXException;

import ORG.oclc.oai.harvester2.app.RawWrite;

/**
* Opens an OAI-PMH stream and passes a reader to the receiver.
*
Expand Down Expand Up @@ -111,10 +111,10 @@ public void process(final String baseUrl) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
} catch (NoSuchFieldException e) {
e.printStackTrace();
} catch (XPathException e) {
e.printStackTrace();
}
try {
getReceiver().process(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/**
* Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package org.oclc.oai.harvester2.app;

import java.io.*;
import java.lang.NoSuchFieldException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.HashMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathExpressionException;
import org.oclc.oai.harvester2.verb.*;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class RawWrite {

public static void main(String[] args) {
try {
System.out.println(new Date());

HashMap options = getOptions(args);
List rootArgs = (List) options.get("rootArgs");
String baseURL = null;
if (rootArgs.size() > 0) {
baseURL = (String) rootArgs.get(0);
} else {
throw new IllegalArgumentException();
}

OutputStream out = System.out;
String outFileName = (String) options.get("-out");
String from = (String) options.get("-from");
String until = (String) options.get("-until");
String metadataPrefix = (String) options.get("-metadataPrefix");
if (metadataPrefix == null) metadataPrefix = "oai_dc";
String resumptionToken = (String) options.get("-resumptionToken");
String setSpec = (String) options.get("-setSpec");

if (resumptionToken != null) {
if (outFileName != null)
out = new FileOutputStream(outFileName, true);
run(baseURL, resumptionToken, out);
} else {
if (outFileName != null)
out = new FileOutputStream(outFileName);
run(baseURL, from, until, metadataPrefix, setSpec, out);
}

if (out != System.out) out.close();
System.out.println(new Date());
} catch (IllegalArgumentException e) {
System.err.println("RawWrite <-from date> <-until date> <-metadataPrefix prefix> <-setSpec setName> <-resumptionToken token> <-out fileName> baseURL");
} catch (Exception e) {
e.printStackTrace();
System.exit(-1);
}
}

public static void run(String baseURL, String resumptionToken,
OutputStream out)
throws IOException, ParserConfigurationException, SAXException, XPathExpressionException,
NoSuchFieldException {
ListRecords listRecords = new ListRecords(baseURL, resumptionToken);
while (listRecords != null) {
NodeList errors = listRecords.getErrors();
if (errors != null && errors.getLength() > 0) {
System.out.println("Found errors");
int length = errors.getLength();
for (int i = 0; i < length; ++i) {
Node item = errors.item(i);
System.out.println(item);
}
System.out.println("Error record: " + listRecords.toString());
break;
}
// System.out.println(listRecords);
out.write(listRecords.toString().getBytes("UTF-8"));
out.write("\n".getBytes("UTF-8"));
resumptionToken = listRecords.getResumptionToken();
System.out.println("resumptionToken: " + resumptionToken);
if (resumptionToken == null || resumptionToken.length() == 0) {
listRecords = null;
} else {
listRecords = new ListRecords(baseURL, resumptionToken);
}
}
out.write("</harvest>\n".getBytes("UTF-8"));
}

public static void run(String baseURL, String from, String until,
String metadataPrefix, String setSpec,
OutputStream out)
throws IOException, ParserConfigurationException, SAXException, XPathException,
NoSuchFieldException {
out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".getBytes("UTF-8"));
out.write("<harvest>\n".getBytes("UTF-8"));
out.write(new Identify(baseURL).toString().getBytes("UTF-8"));
out.write("\n".getBytes("UTF-8"));
out.write(new ListMetadataFormats(baseURL).toString().getBytes("UTF-8"));
out.write("\n".getBytes("UTF-8"));
ListSets listSets = new ListSets(baseURL);
while (listSets != null) {
out.write(listSets.toString().getBytes("UTF-8"));
out.write("\n".getBytes("UTF-8"));
String resumptionToken = listSets.getResumptionToken();
System.out.println("resumptionToken: " + resumptionToken);
if (resumptionToken == null || resumptionToken.length() == 0) {
listSets = null;
} else {
listSets = new ListSets(baseURL, resumptionToken);
}
}
ListRecords listRecords = new ListRecords(baseURL, from, until, setSpec,
metadataPrefix);
while (listRecords != null) {
NodeList errors = listRecords.getErrors();
if (errors != null && errors.getLength() > 0) {
System.out.println("Found errors");
int length = errors.getLength();
for (int i = 0; i < length; ++i) {
Node item = errors.item(i);
System.out.println(item);
}
System.out.println("Error record: " + listRecords.toString());
break;
}
// System.out.println(listRecords);
out.write(listRecords.toString().getBytes("UTF-8"));
out.write("\n".getBytes("UTF-8"));
String resumptionToken = listRecords.getResumptionToken();
System.out.println("resumptionToken: " + resumptionToken);
if (resumptionToken == null || resumptionToken.length() == 0) {
listRecords = null;
} else {
listRecords = new ListRecords(baseURL, resumptionToken);
}
}
out.write("</harvest>\n".getBytes("UTF-8"));
}

private static HashMap getOptions(String[] args) {
HashMap options = new HashMap();
ArrayList rootArgs = new ArrayList();
options.put("rootArgs", rootArgs);

for (int i = 0; i < args.length; ++i) {
if (args[i].charAt(0) != '-') {
rootArgs.add(args[i]);
} else if (i + 1 < args.length) {
options.put(args[i], args[++i]);
} else {
throw new IllegalArgumentException();
}
}
return options;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is
* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package org.oclc.oai.harvester2.verb;

import java.io.IOException;
import java.net.MalformedURLException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.xml.sax.SAXException;

/**
* This class represents an GetRecord response on either the server or on the client
*
* @author Jeffrey A. Young, OCLC Online Computer Library Center
*/
public class GetRecord extends HarvesterVerb {

/**
* Mock object constructor (for unit testing purposes)
*/
public GetRecord() {
super();
}

/**
* Client-side GetRecord verb constructor
*
* @param baseURL the baseURL of the server to be queried
* @param identifier
* @param metadataPrefix
* @exception MalformedURLException the baseURL is bad
* @exception SAXException the xml response is bad
* @exception IOException an I/O error occurred
* @throws ParserConfigurationException
* @throws XPathExpressionException
*/
public GetRecord(String baseURL, String identifier, String metadataPrefix)
throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
super(getRequestURL(baseURL, identifier, metadataPrefix));
}

/**
* Get the oai:identifier from the oai:header
*
* @return the oai:identifier as a String
* @throws XPathExpressionException
* @throws NoSuchFieldException
*/
public String getIdentifier() throws XPathExpressionException, NoSuchFieldException {
if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) {
return getSingleString("/oai20:OAI-PMH/oai20:GetRecord/oai20:record/oai20:header/oai20:identifier");
} else if (SCHEMA_LOCATION_V1_1_GET_RECORD.equals(getSchemaLocation())) {
return getSingleString("/oai11_GetRecord:GetRecord/oai11_GetRecord:record/oai11_GetRecord:header/oai11_GetRecord:identifier");
} else {
throw new NoSuchFieldException(getSchemaLocation());
}
}

/**
* Construct the query portion of the http request
*
* @return a String containing the query portion of the http request
*/
private static String getRequestURL(String baseURL, String identifier, String metadataPrefix) {
StringBuilder requestURL = new StringBuilder(baseURL);
requestURL.append("?verb=GetRecord");
requestURL.append("&identifier=").append(identifier);
requestURL.append("&metadataPrefix=").append(metadataPrefix);
return requestURL.toString();
}
}
Loading

0 comments on commit dfe708b

Please sign in to comment.