From 4248d08687159c1b74e04e068ee6f6a56df8a803 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 25 Mar 2024 16:01:21 -0400 Subject: [PATCH 1/7] adding api for new mdc processing state db table --- .../iq/dataverse/api/MakeDataCountApi.java | 54 ++++++++++++++- .../MakeDataCountProcessState.java | 67 +++++++++++++++++++ .../MakeDataCountProcessStateServiceBean.java | 61 +++++++++++++++++ src/main/resources/db/migration/V6.1.0.8.sql | 10 +++ .../iq/dataverse/api/MakeDataCountApiIT.java | 40 +++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 13 ++++ 6 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java create mode 100644 src/main/resources/db/migration/V6.1.0.8.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 08e776a3eb8..38023327274 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -7,6 +7,8 @@ import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessStateServiceBean; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; @@ -29,6 +31,8 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; +import jakarta.ws.rs.DELETE; +import jakarta.ws.rs.GET; import jakarta.ws.rs.POST; import jakarta.ws.rs.Path; import jakarta.ws.rs.PathParam; @@ -47,6 +51,8 @@ public class MakeDataCountApi extends AbstractApiBean { @EJB DatasetMetricsServiceBean datasetMetricsService; @EJB + MakeDataCountProcessStateServiceBean makeDataCountProcessStateService; + @EJB DatasetExternalCitationsServiceBean datasetExternalCitationsService; @EJB DatasetServiceBean datasetService; @@ -110,7 +116,7 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer @POST @Path("/addUsageMetricsFromSushiReport") - public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) { + public Response addUsageMetricsFromSushiReportAll(@QueryParam("reportOnDisk") String reportOnDisk) { try { JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk); @@ -200,5 +206,51 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE return wr.getResponse(); } } + @GET + @Path("{yearMonth}/processingState") + public Response getProcessingState(@PathParam("yearMonth") String yearMonth) { + MakeDataCountProcessState mdcps; + try { + mdcps = makeDataCountProcessStateService.getMakeDataCountProcessState(yearMonth); + } catch (IllegalArgumentException e) { + return error(Status.BAD_REQUEST,e.getMessage()); + } + if (mdcps != null) { + JsonObjectBuilder output = Json.createObjectBuilder(); + output.add("yearMonth", mdcps.getYearMonth()); + output.add("state", mdcps.getState().name()); + output.add("state-change-timestamp", mdcps.getStateChangeTime().toString()); + return ok(output); + } else { + return error(Status.NOT_FOUND, "Could not find an existing process state for " + yearMonth); + } + } + @POST + @Path("{yearMonth}/processingState") + public Response updateProcessingState(@PathParam("yearMonth") String yearMonth, @QueryParam("state") String state) { + MakeDataCountProcessState mdcps; + try { + mdcps = makeDataCountProcessStateService.setMakeDataCountProcessState(yearMonth, state); + } catch (IllegalArgumentException e) { + return error(Status.BAD_REQUEST,e.getMessage()); + } + + JsonObjectBuilder output = Json.createObjectBuilder(); + output.add("yearMonth", mdcps.getYearMonth()); + output.add("state", mdcps.getState().name()); + output.add("state-change-timestamp", mdcps.getStateChangeTime().toString()); + return ok(output); + } + + @DELETE + @Path("{yearMonth}/processingState") + public Response deleteProcessingState(@PathParam("yearMonth") String yearMonth) { + boolean deleted = makeDataCountProcessStateService.deleteMakeDataCountProcessState(yearMonth); + if (deleted) { + return ok("Processing State deleted for " + yearMonth); + } else { + return notFound("Processing State not found for " + yearMonth); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java new file mode 100644 index 00000000000..f49640214e9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java @@ -0,0 +1,67 @@ +package edu.harvard.iq.dataverse.makedatacount; + +import jakarta.persistence.*; + +import java.io.Serializable; +import java.sql.Timestamp; +import java.time.Instant; + +@Entity +public class MakeDataCountProcessState implements Serializable { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(nullable = false) + private Long id; + + public enum MDCProcessState { + NEW("new"), DONE("done"), SKIP("skip"), PROCESSING("processing"), FAILED("failed"); + private final String text; + private MDCProcessState(final String text) { + this.text = text; + } + @Override + public String toString() { + return text; + } + } + @Column(nullable = false) + private String yearMonth; + @Column(nullable = false) + private MDCProcessState state; + @Column(nullable = true) + private Timestamp state_change_time; + + public MakeDataCountProcessState() { } + public MakeDataCountProcessState (String yearMonth, String state) { + this.setYearMonth(yearMonth); + this.setState(state); + } + + public void setYearMonth(String yearMonth) throws IllegalArgumentException { + // Todo: add constraint + if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) { + throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd"); + } + this.yearMonth = yearMonth; + } + public String getYearMonth() { + return this.yearMonth; + } + public void setState(MDCProcessState state) { + this.state = state; + this.state_change_time = Timestamp.from(Instant.now()); + } + public void setState(String state) throws IllegalArgumentException { + if (state != null) { + setState(MDCProcessState.valueOf(state.toUpperCase())); + } else { + throw new IllegalArgumentException("State is required and can not be null"); + } + } + public MDCProcessState getState() { + return this.state; + } + public Timestamp getStateChangeTime() { + return state_change_time; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java new file mode 100644 index 00000000000..5d7ec8ff047 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java @@ -0,0 +1,61 @@ +package edu.harvard.iq.dataverse.makedatacount; + +import jakarta.ejb.EJBException; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; + +import java.util.List; + +@Named +@Stateless +public class MakeDataCountProcessStateServiceBean { + + @PersistenceContext(unitName = "VDCNet-ejbPU") + protected EntityManager em; + + public MakeDataCountProcessState getMakeDataCountProcessState(String yearMonth) { + validateYearMonth(yearMonth); + MakeDataCountProcessState mdcps = null; + String queryStr = "SELECT d FROM MakeDataCountProcessState d WHERE d.yearMonth = '" + yearMonth + "' "; + Query query = em.createQuery(queryStr); + List resultList = query.getResultList(); + if (resultList.size() > 1) { + throw new EJBException("More than one MakeDataCount Process State record found for YearMonth " + yearMonth + "."); + } + if (resultList.size() == 1) { + mdcps = (MakeDataCountProcessState) resultList.get(0); + } + return mdcps; + } + + public MakeDataCountProcessState setMakeDataCountProcessState(String yearMonth, String state) { + MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth); + if (mdcps == null) { + mdcps = new MakeDataCountProcessState(yearMonth, state); + } else { + mdcps.setState(state); + } + return em.merge(mdcps); + } + + public boolean deleteMakeDataCountProcessState(String yearMonth) { + MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth); + if (mdcps == null) { + return false; + } else { + em.remove(mdcps); + em.flush(); + return true; + } + } + + private void validateYearMonth(String yearMonth) { + // Check yearMonth format. either yyyy-mm or yyyy-mm-dd + if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) { + throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd"); + } + } +} diff --git a/src/main/resources/db/migration/V6.1.0.8.sql b/src/main/resources/db/migration/V6.1.0.8.sql new file mode 100644 index 00000000000..b8f466c0b73 --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.8.sql @@ -0,0 +1,10 @@ +CREATE TABLE IF NOT EXISTS makedatacountprocessstate ( + id SERIAL NOT NULL, + yearMonth VARCHAR(16) NOT NULL UNIQUE, + state ENUM('new', 'done', 'skip', 'processing', 'failed') NOT NULL, + state_change_time TIMESTAMP WITHOUT TIME ZONE DEFAULT now(), + PRIMARY KEY (ID) + ); + +CREATE INDEX IF NOT EXISTS INDEX_makedatacountprocessstate_yearMonth ON makedatacountprocessstate (yearMonth); + diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java index 7a113fd4caa..dbfd853edd1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState; +import io.restassured.path.json.JsonPath; import io.restassured.RestAssured; import io.restassured.response.Response; import java.io.File; @@ -7,8 +9,13 @@ import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; import org.apache.commons.io.FileUtils; +import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; + +import org.hamcrest.Matchers; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -171,6 +178,39 @@ public void testMakeDataCountGetMetric() throws IOException { } + @Test + public void testGetUpdateDeleteProcessingState() { + String yearMonth = "2000-01"; + // make sure it isn't in the DB + Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404))); + + Response getState = UtilIT.makeDataCountGetProcessingState(yearMonth); + getState.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + Response updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.PROCESSING.toString()); + updateState.then().assertThat().statusCode(OK.getStatusCode()); + getState = UtilIT.makeDataCountGetProcessingState(yearMonth); + getState.then().assertThat().statusCode(OK.getStatusCode()); + JsonPath stateJson = JsonPath.from(getState.body().asString()); + stateJson.prettyPrint(); + String state1 = stateJson.getString("data.state"); + assertThat(state1, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.PROCESSING.name())); + String updateTimestamp1 = stateJson.getString("data.state-change-timestamp"); + + updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.DONE.toString()); + updateState.then().assertThat().statusCode(OK.getStatusCode()); + stateJson = JsonPath.from(updateState.body().asString()); + stateJson.prettyPrint(); + String state2 = stateJson.getString("data.state"); + String updateTimestamp2 = stateJson.getString("data.state-change-timestamp"); + assertThat(state2, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.DONE.name())); + + assertThat(updateTimestamp2, Matchers.is(Matchers.greaterThan(updateTimestamp1))); + + deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(OK.getStatusCode()); + } + /** * Ignore is set on this test because it requires database edits to pass. * There are currently two citions for doi:10.7910/DVN/HQZOOB but you have diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 080ca0c43e9..ba36911ffae 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3135,6 +3135,19 @@ static Response makeDataCountUpdateCitationsForDataset(String idOrPersistentIdOf return requestSpecification.post("/api/admin/makeDataCount/" + idInPath + "/updateCitationsForDataset"+ optionalQueryParam); } + static Response makeDataCountGetProcessingState(String yearMonth) { + RequestSpecification requestSpecification = given(); + return requestSpecification.get("/api/admin/makeDataCount/" + yearMonth + "/processingState"); + } + static Response makeDataCountUpdateProcessingState(String yearMonth, String state) { + RequestSpecification requestSpecification = given(); + return requestSpecification.post("/api/admin/makeDataCount/" + yearMonth + "/processingState?state=" + state); + } + static Response makeDataCountDeleteProcessingState(String yearMonth) { + RequestSpecification requestSpecification = given(); + return requestSpecification.delete("/api/admin/makeDataCount/" + yearMonth + "/processingState"); + } + static Response editDDI(String body, String fileId, String apiToken) { if (apiToken == null) { apiToken = ""; From 8898d5367b34215c0991f300b072d2fe6fd4de91 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 26 Mar 2024 10:25:04 -0400 Subject: [PATCH 2/7] adding release note --- doc/release-notes/10424-new-api-for-mdc.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 doc/release-notes/10424-new-api-for-mdc.md diff --git a/doc/release-notes/10424-new-api-for-mdc.md b/doc/release-notes/10424-new-api-for-mdc.md new file mode 100644 index 00000000000..8fb1f6d9e3d --- /dev/null +++ b/doc/release-notes/10424-new-api-for-mdc.md @@ -0,0 +1,11 @@ +The API endpoint `api/admin/makeDataCount/{yearMonth}/processingState` has been added to Get, Create/Update(POST), and Delete a State for processing Make Data Count logged metrics +For Create/Update the 'state' is passed in through a query parameter. +Example +- `curl POST http://localhost:8080/api/admin/makeDataCount/2024-03/processingState?state=Skip` + +Valid values for state are [New, Done, Skip, Processing, and Failed] +'New' can be used to re-trigger the processing of the data for the year-month specified. +'Skip' will prevent the file from being processed. +'Processing' shows the state where the file is currently being processed. +'Failed' shows the state where the file has failed and will be re-processed in the next run. If you don't want the file to be re-processed set the state to 'Skip'. +'Done' is the state where the file has been successfully processed. From 243bafed1363c2edd67d8a3dcf75b6ae76b29bfd Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 26 Mar 2024 11:35:49 -0400 Subject: [PATCH 3/7] adding test for invalid state --- .../iq/dataverse/api/MakeDataCountApi.java | 4 ++-- .../MakeDataCountProcessState.java | 17 ++++++++++----- .../iq/dataverse/api/MakeDataCountApiIT.java | 21 +++++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 38023327274..d94ab42c516 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -232,8 +232,8 @@ public Response updateProcessingState(@PathParam("yearMonth") String yearMonth, MakeDataCountProcessState mdcps; try { mdcps = makeDataCountProcessStateService.setMakeDataCountProcessState(yearMonth, state); - } catch (IllegalArgumentException e) { - return error(Status.BAD_REQUEST,e.getMessage()); + } catch (Exception e) { + return badRequest(e.getMessage()); } JsonObjectBuilder output = Json.createObjectBuilder(); diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java index f49640214e9..bde705abf44 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java @@ -5,6 +5,7 @@ import java.io.Serializable; import java.sql.Timestamp; import java.time.Instant; +import java.util.Arrays; @Entity public class MakeDataCountProcessState implements Serializable { @@ -19,6 +20,16 @@ public enum MDCProcessState { private MDCProcessState(final String text) { this.text = text; } + public static MDCProcessState fromString(String text) { + if (text != null) { + for (MDCProcessState state : MDCProcessState.values()) { + if (text.equals(state.text)) { + return state; + } + } + } + throw new IllegalArgumentException("State must be one of these values: " + Arrays.asList(MDCProcessState.values()) + "."); + } @Override public String toString() { return text; @@ -52,11 +63,7 @@ public void setState(MDCProcessState state) { this.state_change_time = Timestamp.from(Instant.now()); } public void setState(String state) throws IllegalArgumentException { - if (state != null) { - setState(MDCProcessState.valueOf(state.toUpperCase())); - } else { - throw new IllegalArgumentException("State is required and can not be null"); - } + setState(MDCProcessState.fromString(state)); } public MDCProcessState getState() { return this.state; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java index dbfd853edd1..64856461703 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java @@ -211,6 +211,27 @@ public void testGetUpdateDeleteProcessingState() { deleteState.then().assertThat().statusCode(OK.getStatusCode()); } + @Test + public void testUpdateProcessingStateWithInvalidState() { + String yearMonth = "2000-02"; + // make sure it isn't in the DB + Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404))); + + Response stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState"); + stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "new"); + stateResponse.then().assertThat().statusCode(OK.getStatusCode()); + stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState"); + stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + stateResponse = UtilIT.makeDataCountGetProcessingState(yearMonth); + stateResponse.then().assertThat().statusCode(OK.getStatusCode()); + JsonPath stateJson = JsonPath.from(stateResponse.body().asString()); + String state = stateJson.getString("data.state"); + assertThat(state, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.NEW.name())); + } + /** * Ignore is set on this test because it requires database edits to pass. * There are currently two citions for doi:10.7910/DVN/HQZOOB but you have From db46350a0278a66f990272058c0c0bebb96cf1c2 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 27 Mar 2024 12:46:21 -0400 Subject: [PATCH 4/7] remove sql table create in favor of automatic table creation by JPA --- .../makedatacount/MakeDataCountProcessState.java | 1 + src/main/resources/db/migration/V6.1.0.8.sql | 10 ---------- 2 files changed, 1 insertion(+), 10 deletions(-) delete mode 100644 src/main/resources/db/migration/V6.1.0.8.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java index bde705abf44..9b6ce457de9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java @@ -8,6 +8,7 @@ import java.util.Arrays; @Entity +@Table(indexes = {@Index(columnList="yearMonth")}) public class MakeDataCountProcessState implements Serializable { @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/resources/db/migration/V6.1.0.8.sql b/src/main/resources/db/migration/V6.1.0.8.sql deleted file mode 100644 index b8f466c0b73..00000000000 --- a/src/main/resources/db/migration/V6.1.0.8.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE TABLE IF NOT EXISTS makedatacountprocessstate ( - id SERIAL NOT NULL, - yearMonth VARCHAR(16) NOT NULL UNIQUE, - state ENUM('new', 'done', 'skip', 'processing', 'failed') NOT NULL, - state_change_time TIMESTAMP WITHOUT TIME ZONE DEFAULT now(), - PRIMARY KEY (ID) - ); - -CREATE INDEX IF NOT EXISTS INDEX_makedatacountprocessstate_yearMonth ON makedatacountprocessstate (yearMonth); - From b974f14e45fc2dfc3e2db0dbc2fd8724775ef0ab Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 27 Mar 2024 14:32:26 -0400 Subject: [PATCH 5/7] review comments --- .../java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java | 4 ++-- .../dataverse/makedatacount/MakeDataCountProcessState.java | 6 +++--- .../edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index d94ab42c516..1f2f1039327 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -219,7 +219,7 @@ public Response getProcessingState(@PathParam("yearMonth") String yearMonth) { JsonObjectBuilder output = Json.createObjectBuilder(); output.add("yearMonth", mdcps.getYearMonth()); output.add("state", mdcps.getState().name()); - output.add("state-change-timestamp", mdcps.getStateChangeTime().toString()); + output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString()); return ok(output); } else { return error(Status.NOT_FOUND, "Could not find an existing process state for " + yearMonth); @@ -239,7 +239,7 @@ public Response updateProcessingState(@PathParam("yearMonth") String yearMonth, JsonObjectBuilder output = Json.createObjectBuilder(); output.add("yearMonth", mdcps.getYearMonth()); output.add("state", mdcps.getState().name()); - output.add("state-change-timestamp", mdcps.getStateChangeTime().toString()); + output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString()); return ok(output); } diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java index 9b6ce457de9..2241a2c4ca8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java @@ -41,7 +41,7 @@ public String toString() { @Column(nullable = false) private MDCProcessState state; @Column(nullable = true) - private Timestamp state_change_time; + private Timestamp stateChangeTimestamp; public MakeDataCountProcessState() { } public MakeDataCountProcessState (String yearMonth, String state) { @@ -61,7 +61,7 @@ public String getYearMonth() { } public void setState(MDCProcessState state) { this.state = state; - this.state_change_time = Timestamp.from(Instant.now()); + this.stateChangeTimestamp = Timestamp.from(Instant.now()); } public void setState(String state) throws IllegalArgumentException { setState(MDCProcessState.fromString(state)); @@ -70,6 +70,6 @@ public MDCProcessState getState() { return this.state; } public Timestamp getStateChangeTime() { - return state_change_time; + return stateChangeTimestamp; } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java index 64856461703..69bdd8ee515 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java @@ -195,14 +195,14 @@ public void testGetUpdateDeleteProcessingState() { stateJson.prettyPrint(); String state1 = stateJson.getString("data.state"); assertThat(state1, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.PROCESSING.name())); - String updateTimestamp1 = stateJson.getString("data.state-change-timestamp"); + String updateTimestamp1 = stateJson.getString("data.stateChangeTimestamp"); updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.DONE.toString()); updateState.then().assertThat().statusCode(OK.getStatusCode()); stateJson = JsonPath.from(updateState.body().asString()); stateJson.prettyPrint(); String state2 = stateJson.getString("data.state"); - String updateTimestamp2 = stateJson.getString("data.state-change-timestamp"); + String updateTimestamp2 = stateJson.getString("data.stateChangeTimestamp"); assertThat(state2, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.DONE.name())); assertThat(updateTimestamp2, Matchers.is(Matchers.greaterThan(updateTimestamp1))); From 82e35b842958e7a272748c17a16bf8c3935124f7 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 27 Mar 2024 15:01:20 -0400 Subject: [PATCH 6/7] adding docs --- .../source/developers/make-data-count.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/sphinx-guides/source/developers/make-data-count.rst b/doc/sphinx-guides/source/developers/make-data-count.rst index 8eaa5c0d7f8..d64fff9ccc7 100644 --- a/doc/sphinx-guides/source/developers/make-data-count.rst +++ b/doc/sphinx-guides/source/developers/make-data-count.rst @@ -88,6 +88,23 @@ To read more about the Make Data Count api, see https://github.com/datacite/sash You can compare the MDC metrics display with the Dataverse installation's original by toggling the ``:DisplayMDCMetrics`` setting (true by default to display MDC metrics). +New Make Data Count Processing for Your Dataverse Installation +-------------------------------------------------------------- + +A new script (release date TBD) will be available for processing archived Dataverse log files. Monthly logs that are zipped, TARed, and copied to an archive can be processed by this script running nightly or weekly. +The script will keep track of the state of each tar file they are processed. Through the following APIs the state of each file can be checked or modified. +Setting the state to 'Skip' will prevent the file from being processed if the developer needs to analyze the contents. +'Failed' files will be re-tried in a later run. +'Done' files are successful and will be ignored going forward. +The file(s) currently being processed will have the state 'Processing'. +The states are [NEW, DONE, SKIP, PROCESSING, FAILED] +The script will process the newest set of log files (merging files from multiple nodes) and calling counter_processor. +The Admin APIs to manage the states include a GET, POST, and DELETE(For Testing). +yearMonth must be in the format yyyymm or yyyymmdd +``curl -X GET http://localhost:8080/api/admin/{yearMonth}/processingState`` +``curl -X POST http://localhost:8080/api/admin/{yearMonth}/processingState?state=done`` +``curl -X DELETE http://localhost:8080/api/admin/{yearMonth}/processingState`` + Resources --------- From 6cb9a4c76f970b5c60d30e2cb33b23af85e13911 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 27 Mar 2024 16:09:35 -0400 Subject: [PATCH 7/7] doc tweaks for MDC processingState API #10424 --- doc/release-notes/10424-new-api-for-mdc.md | 14 +++----- .../source/developers/make-data-count.rst | 34 +++++++++++++------ 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/doc/release-notes/10424-new-api-for-mdc.md b/doc/release-notes/10424-new-api-for-mdc.md index 8fb1f6d9e3d..fef8ee2af22 100644 --- a/doc/release-notes/10424-new-api-for-mdc.md +++ b/doc/release-notes/10424-new-api-for-mdc.md @@ -1,11 +1,5 @@ -The API endpoint `api/admin/makeDataCount/{yearMonth}/processingState` has been added to Get, Create/Update(POST), and Delete a State for processing Make Data Count logged metrics -For Create/Update the 'state' is passed in through a query parameter. -Example -- `curl POST http://localhost:8080/api/admin/makeDataCount/2024-03/processingState?state=Skip` +(Please put at the bottom of the list under 🌐 API) -Valid values for state are [New, Done, Skip, Processing, and Failed] -'New' can be used to re-trigger the processing of the data for the year-month specified. -'Skip' will prevent the file from being processed. -'Processing' shows the state where the file is currently being processed. -'Failed' shows the state where the file has failed and will be re-processed in the next run. If you don't want the file to be re-processed set the state to 'Skip'. -'Done' is the state where the file has been successfully processed. +### Experimental Make Data Count processingState API + +An experimental Make Data Count processingState API has been added. For now it has been documented in the developer guide: https://guides.dataverse.org/en/6.2/developers/make-data-count.html#processing-archived-logs diff --git a/doc/sphinx-guides/source/developers/make-data-count.rst b/doc/sphinx-guides/source/developers/make-data-count.rst index d64fff9ccc7..43779c35f7c 100644 --- a/doc/sphinx-guides/source/developers/make-data-count.rst +++ b/doc/sphinx-guides/source/developers/make-data-count.rst @@ -88,21 +88,33 @@ To read more about the Make Data Count api, see https://github.com/datacite/sash You can compare the MDC metrics display with the Dataverse installation's original by toggling the ``:DisplayMDCMetrics`` setting (true by default to display MDC metrics). -New Make Data Count Processing for Your Dataverse Installation --------------------------------------------------------------- +Processing Archived Logs +------------------------ A new script (release date TBD) will be available for processing archived Dataverse log files. Monthly logs that are zipped, TARed, and copied to an archive can be processed by this script running nightly or weekly. -The script will keep track of the state of each tar file they are processed. Through the following APIs the state of each file can be checked or modified. -Setting the state to 'Skip' will prevent the file from being processed if the developer needs to analyze the contents. -'Failed' files will be re-tried in a later run. -'Done' files are successful and will be ignored going forward. -The file(s) currently being processed will have the state 'Processing'. -The states are [NEW, DONE, SKIP, PROCESSING, FAILED] -The script will process the newest set of log files (merging files from multiple nodes) and calling counter_processor. -The Admin APIs to manage the states include a GET, POST, and DELETE(For Testing). -yearMonth must be in the format yyyymm or yyyymmdd + +The script will keep track of the state of each tar file they are processed and will make use of the following "processingState" API endpoints, which allow the state of each file to be checked or modified. + +The possible states are new, done, skip, processing, and failed. + +Setting the state to "skip" will prevent the file from being processed if the developer needs to analyze the contents. + +"failed" files will be re-tried in a later run. + +"done" files are successful and will be ignored going forward. + +The files currently being processed will have the state "processing". + +The script will process the newest set of log files (merging files from multiple nodes) and call Counter Processor. + +APIs to manage the states include GET, POST, and DELETE (for testing), as shown below. + +Note: ``yearMonth`` must be in the format ``yyyymm`` or ``yyyymmdd``. + ``curl -X GET http://localhost:8080/api/admin/{yearMonth}/processingState`` + ``curl -X POST http://localhost:8080/api/admin/{yearMonth}/processingState?state=done`` + ``curl -X DELETE http://localhost:8080/api/admin/{yearMonth}/processingState`` Resources