Skip to content

Commit

Permalink
Remove Java w/arc processing, and replace it with Sparkling. (#533)
Browse files Browse the repository at this point in the history
- Remove old Java W/ARC processing code
- Remove Java formatter GitHub action
- Update pom.xml to remove Java dependencies and add Sparkling dependencies
- Remove shadowed AU tika-parsers dependency, and use org.tika again
- Use Sparkling to extend ArchiveRecord
- Filter on response and revisit records
- Add domain column to webpages().
- fix discardDate issue
- Add missing spreadsheet mimetype
- Documentation and formatting updates
- Update tests
- resolves #534 
- resolves #532
- resolves #494
- resolves #493
- resolves #492
- resolves #260
- resolves #247 
- resolves #182
- resolves #76
- resolves #74
- resolves #73
- resolves #23
- resolves #18
- Thank you @helgeho!!!

Co-authored-by: Helge Holzmann <helgeho@invelop.de>
  • Loading branch information
ruebot and helgeho authored May 24, 2022
1 parent d37cdbb commit c8fa256
Show file tree
Hide file tree
Showing 23 changed files with 288 additions and 1,609 deletions.
17 changes: 0 additions & 17 deletions .github/workflows/java-formatter.yml

This file was deleted.

1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# The Archives Unleashed Toolkit
[![codecov](https://codecov.io/gh/archivesunleashed/aut/branch/main/graph/badge.svg)](https://codecov.io/gh/archivesunleashed/aut)
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.archivesunleashed/aut/badge.svg)](https://maven-badges.herokuapp.com/maven-central/io.archivesunleashed/aut)
[![Javadoc](https://img.shields.io/badge/Javadoc-0.91.0-blue?style=flat)](https://api.docs.archivesunleashed.io/0.91.0/apidocs/index.html)
[![Scaladoc](https://img.shields.io/badge/Scaladoc-0.91.0-blue?style=flat)](https://api.docs.archivesunleashed.io/0.91.0/scaladocs/io/archivesunleashed/index.html)
[![UserDocs](https://img.shields.io/badge/UserDocs-0.91.0-blue?style=flat)](https://aut.docs.archivesunleashed.org/docs/home)
[![LICENSE](https://img.shields.io/badge/license-Apache-blue.svg?style=flat)](https://www.apache.org/licenses/LICENSE-2.0)
Expand Down
91 changes: 39 additions & 52 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
<scala.binary.version>2.12</scala.binary.version>
<hadoop.version>2.7.4</hadoop.version>
<spark.version>3.0.1</spark.version>
<guava.version>29.0-jre</guava.version>
<github.global.server>github</github.global.server>
<license.plugin.version>3.0</license.plugin.version>
<release.plugin.version>2.5.2</release.plugin.version>
<scm-provider-gitexe.plugin.version>1.9.5</scm-provider-gitexe.plugin.version>
<changelog.plugin.version>2.3</changelog.plugin.version>
<github-site.plugin.version>0.12</github-site.plugin.version>
<javadoc.plugin.version>3.1.1</javadoc.plugin.version>
<gpg.plugin.version>1.6</gpg.plugin.version>
<build-helper.plugin.version>3.0.0</build-helper.plugin.version>
<deploy.plugin.version>2.8.2</deploy.plugin.version>
Expand Down Expand Up @@ -67,10 +67,6 @@
<id>maven</id>
<url>https://repo.maven.apache.org/maven2/</url>
</repository>
<repository>
<id>mvn-repo</id>
<url>https://raw.githubusercontent.com/archivesunleashed/aut-resources/master/mvn-repo</url>
</repository>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
Expand Down Expand Up @@ -107,7 +103,12 @@
<resource>META-INF/services/org.apache.lucene.codecs.Codec</resource>
</transformer>
</transformers>

<relocations>
<relocation>
<pattern>com.google.common.</pattern>
<shadedPattern>com.google.common.shaded.</shadedPattern>
</relocation>
</relocations>
<!-- This fixes the issue "Invalid signature file digest for Manifest main attributes"
cf. http://zhentao-li.blogspot.com/2012/06/maven-shade-plugin-invalid-signature.html -->
<filters>
Expand Down Expand Up @@ -192,12 +193,9 @@
<configuration>
<header>config/LICENSE_HEADER.txt</header>
<mapping>
<java>SLASHSTAR_STYLE</java>
<scala>SLASHSTAR_STYLE</scala>
</mapping>
<includes>
<include>src/main/java/**</include>
<include>src/test/java/**</include>
<include>src/main/scala/**</include>
<include>src/test/scala/**</include>
</includes>
Expand Down Expand Up @@ -281,34 +279,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>${javadoc.plugin.version}</version>
<configuration>
<javadocExecutable>${java.home}/bin/javadoc</javadocExecutable>
<linksource>true</linksource>
<quiet>true</quiet>
<source>11</source>
</configuration>
<executions>
<execution>
<id>verify-javadocs</id>
<goals>
<goal>jar</goal>
<goal>test-jar</goal>
</goals>
</execution>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
<goal>javadoc</goal>
</goals>
<phase>site</phase>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-changelog-plugin</artifactId>
<version>${changelog.plugin.version}</version>
Expand Down Expand Up @@ -371,13 +341,6 @@
<argLine>--illegal-access=permit</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>${javadoc.plugin.version}</version>
<configuration>
<linksource>true</linksource>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jxr-plugin</artifactId>
<version>${jxr.plugin.version}</version>
Expand Down Expand Up @@ -454,6 +417,21 @@
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.12</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.14</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
Expand Down Expand Up @@ -505,6 +483,18 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand All @@ -518,10 +508,7 @@
<version>${tika.version}</version>
</dependency>
<dependency>
<!-- See issue #302.
<groupId>org.apache.tika</groupId>
-->
<groupId>com.github.archivesunleashed.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${tika.version}</version>
</dependency>
Expand Down Expand Up @@ -576,6 +563,11 @@
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>com.github.internetarchive</groupId>
<artifactId>Sparkling</artifactId>
<version>main-SNAPSHOT</version>
</dependency>
</dependencies>

<developers>
Expand Down Expand Up @@ -622,11 +614,6 @@
<build>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>${javadoc.plugin.version}</version>
</plugin>
<plugin>
<artifactId>maven-gpg-plugin</artifactId>
<version>${gpg.plugin.version}</version>
Expand Down
180 changes: 0 additions & 180 deletions src/main/java/io/archivesunleashed/data/ArcRecordUtils.java

This file was deleted.

Loading

0 comments on commit c8fa256

Please sign in to comment.