Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for LazyList for all the harvesters. #616

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,15 @@ class CommunityWebsHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
for (result <- iter(inputStream)) {
iter(inputStream).foreach(result => {
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
.getLogger(this.getClass)
.error(s"Caught exception on $inFile.", exception)
case _ => // do nothing
}
}
})
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,15 @@ class DplaJsonlFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
for (result <- iter(inputStream)) yield {
iter(inputStream).foreach( result => {
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
.getLogger(this.getClass)
.error(s"Caught exception on $inFile.", exception)
case _ => //do nothing
}
}
})
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ object FileFilters {
val gzFilter: FileFilter = newFilter("gz")
val xmlFilter: FileFilter = newFilter("xml")
val zipFilter: FileFilter = newFilter("zip")
val txtFilter: FileFilter = newFilter("txt")
}
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,13 @@ class FlFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
for (result <- iter(inputStream)) yield {
iter(inputStream).foreach(result => {
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager.getLogger(this.getClass).error(s"Caught exception on $inFile.", exception)
case _ => //do nothing
}
}
})
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class HathiFileHarvester(
)
)

for (tarResult <- iter(inputStream)) yield {
iter(inputStream).foreach(tarResult => {
handleFile(tarResult, unixEpoch) match {
case Failure(exception) =>
logger
Expand All @@ -139,7 +139,7 @@ class HathiFileHarvester(
)
case _ => //do nothing
}
}
})

IOUtils.closeQuietly(inputStream)
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,15 @@ class HeartlandFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
for (result <- iter(inputStream)) yield {
iter(inputStream).foreach(result =>
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
.getLogger(this.getClass)
.error(s"Caught exception on $inFile.", exception)
case _ => //do nothing
}
}
)
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,13 @@ class NYPLFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
for (result <- iter(inputStream)) {
iter(inputStream).foreach( result =>
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager.getLogger(this.getClass).error(s"Caught exception on $inFile.", exception)
case Success(_) => // do nothing
}
}
)
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class NaraDeltaHarvester(
)
)

val recordCount = (for (tarResult <- iter(inputStream)) yield {
val recordCount = iter(inputStream).map(tarResult =>
handleFile(tarResult, unixEpoch, file.getName) match {
case Failure(exception) =>
val logger = LogManager.getLogger(this.getClass)
Expand All @@ -266,7 +266,7 @@ class NaraDeltaHarvester(
case Success(count) =>
count
}
}).sum
).sum

logger.info(s"Harvested $recordCount records from ${file.getName}")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ class NaraFileHarvester(
)
)

val recordCount = (for (tarResult <- iter(inputStream)) yield {
val recordCount = iter(inputStream).map(tarResult =>
handleFile(tarResult, unixEpoch, file.getName) match {
case Failure(exception) =>
val logger = LogManager.getLogger(this.getClass)
Expand All @@ -319,8 +319,7 @@ class NaraFileHarvester(
0
case Success(count) =>
count
}
}).sum
}).sum

val logger = LogManager.getLogger(this.getClass)
logger.info(s"Harvested $recordCount records from ${file.getName}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ class NorthwestHeritageFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
val recordCount = (for (result <- iter(inputStream)) yield {
val recordCount = iter(inputStream).map(result =>
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
Expand All @@ -160,7 +160,7 @@ class NorthwestHeritageFileHarvester(
case Success(count) =>
count
}
}).sum
).sum
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,12 @@ class OaiFileHarvester(
* already be filtered by the FilenameFilter, I wonder if we even need the
* match statement here.
*/
def getInputStream(file: File): Option[ZipInputStream] = {
def getInputStream(file: File): Option[ZipInputStream] =
file.getName match {
case zipName if zipName.endsWith("zip") =>
Some(new ZipInputStream(new FileInputStream(file)))
case _ => None
}
}

/** Main logic for handling individual entries in the zip.
*
Expand Down Expand Up @@ -151,13 +150,13 @@ class OaiFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
iter(inputStream).foreach(result => {
iter(inputStream).foreach(result =>
handleFile(result, unixEpoch) match {
case Failure(exception) =>
logger.error(s"Caught exception on $inFile.", exception)
case Success(_) => //do nothing
}
})
)
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class SiFileHarvester(
private def getExpectedFileCounts(inFiles: File): Map[String, String] = {
var loadCounts = Map[String, String]()
inFiles
.listFiles(new TxtFileFilter)
.listFiles(FileFilters.txtFilter)
.foreach(file => {
Using(Source
.fromFile(file)) { source =>
Expand Down Expand Up @@ -234,7 +234,3 @@ class SiFileHarvester(

}

class TxtFileFilter extends FileFilter {
override def accept(pathname: File): Boolean =
pathname.getName.endsWith("txt")
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,12 @@ class VaFileHarvester(
* already be filtered by the FilenameFilter, I wonder if we even need the
* match statement here.
*/
def getInputStream(file: File): Option[ZipInputStream] = {
def getInputStream(file: File): Option[ZipInputStream] =
file.getName match {
case zipName if zipName.endsWith("zip") =>
Some(new ZipInputStream(new FileInputStream(file)))
case _ => None
}
}

/** Main logic for handling individual entries in the zip.
*
Expand Down Expand Up @@ -113,17 +112,16 @@ class VaFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
val recordCount = (for (result <- iter(inputStream)) yield {
iter(inputStream).foreach(result => {
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
.getLogger(this.getClass)
.error(s"Caught exception on $inFile.", exception)
0
case Success(count) =>
count
}
}).sum
})
IOUtils.closeQuietly(inputStream)
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,12 @@ class VtFileHarvester(
* already be filtered by the FilenameFilter, I wonder if we even need the
* match statement here.
*/
def getInputStream(file: File): Option[ZipInputStream] = {
def getInputStream(file: File): Option[ZipInputStream] =
file.getName match {
case zipName if zipName.endsWith("zip") =>
Some(new ZipInputStream(new FileInputStream(file)))
case _ => None
}
}

/** Main logic for handling individual entries in the zip.
*
Expand Down Expand Up @@ -109,17 +108,16 @@ class VtFileHarvester(
.getOrElse(
throw new IllegalArgumentException("Couldn't load ZIP files.")
)
(for (result <- iter(inputStream)) yield {
iter(inputStream).foreach(result =>
handleFile(result, unixEpoch) match {
case Failure(exception) =>
LogManager
.getLogger(this.getClass)
.error(s"Caught exception on $inFile.", exception)
0
case Success(count) =>
count

case Success(count) => ()
}
}).sum
)
IOUtils.closeQuietly(inputStream)
})

Expand Down