Skip to content

Commit

Permalink
chore: clean up old/missed search indexes in SearchWriterSuite (#1901)
Browse files Browse the repository at this point in the history
  • Loading branch information
niehaus59 authored Mar 31, 2023
1 parent 9a6cf03 commit 7ffa970
Showing 1 changed file with 45 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ import org.apache.http.client.methods.HttpDelete
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.DataFrame

import java.time.LocalDateTime
import java.time.format.{DateTimeFormatterBuilder, DateTimeParseException, SignStyle}
import java.time.temporal.ChronoField
import java.util.UUID
import scala.collection.mutable
import scala.concurrent.blocking
Expand All @@ -29,6 +32,12 @@ class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister

private val testServiceName = "mmlspark-azure-search"

// When a date pattern starts with 'yyyy' and has no separator following, the parser can sometimes decide
// to take the whole string to match the year, which results in an exception. The following is a hackaround.
val formatter = new DateTimeFormatterBuilder()
.appendValue(ChronoField.YEAR_OF_ERA, 4, 4, SignStyle.EXCEEDS_PAD)
.appendPattern("MMddHHmmssSSS").toFormatter()

private def createTestData(numDocs: Int): DataFrame = {
(0 until numDocs)
.map(i => ("upload", s"$i", s"file$i", s"text$i"))
Expand Down Expand Up @@ -68,7 +77,8 @@ class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister
private val createdIndexes: mutable.ListBuffer[String] = mutable.ListBuffer()

private def generateIndexName(): String = {
val name = s"test-${UUID.randomUUID().hashCode()}"
val date = formatter.format(LocalDateTime.now())
val name = s"test-${UUID.randomUUID().hashCode()}-${date}"
createdIndexes.append(name)
name
}
Expand All @@ -82,22 +92,50 @@ class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister
createSimpleIndexJson(indexName))
}

def deleteIndex(indexName: String): Int = {
val deleteRequest = new HttpDelete(
s"https://$testServiceName.search.windows.net/indexes/$indexName?api-version=2017-11-11")
deleteRequest.setHeader("api-key", azureSearchKey)
val response = safeSend(deleteRequest)
response.getStatusLine.getStatusCode
}

override def afterAll(): Unit = {
//TODO make this existing search indices when multiple builds are allowed
println("Cleaning up services")
val successfulCleanup = getExisting(azureSearchKey, testServiceName)
.intersect(createdIndexes).map { n =>
val deleteRequest = new HttpDelete(
s"https://$testServiceName.search.windows.net/indexes/$n?api-version=2017-11-11")
deleteRequest.setHeader("api-key", azureSearchKey)
val response = safeSend(deleteRequest)
response.getStatusLine.getStatusCode
deleteIndex(n)
}.forall(_ == 204)
cleanOldIndexes()
super.afterAll()
assert(successfulCleanup)
()
}

def cleanOldIndexes(): Unit = {
import scala.util.matching.Regex

val twoDaysAgo = LocalDateTime.now().minusDays(2)
val endingDatePattern: Regex = "^.*-(\\d{17})$".r
val e = getExisting(azureSearchKey, testServiceName)
e.foreach { name =>
name match {
case endingDatePattern(dateString) =>
try {
val date = LocalDateTime.parse(dateString, formatter)
if (date.isBefore(twoDaysAgo)) {
deleteIndex(name)
}
} catch {
case _: DateTimeParseException => {}
case t: Throwable => throw t
}
case _ => {}
}
}
}

private def retryWithBackoff[T](f: => T,
timeouts: List[Long] =
List(5000, 10000, 50000, 100000, 200000, 200000)): T = {
Expand Down Expand Up @@ -147,7 +185,7 @@ class SearchWriterSuite extends TestBase with AzureSearchKey with IndexLister
assert(SearchIndex.getStatistics(indexName, azureSearchKey, testServiceName)._1 == size)
()
}

ignore("clean up all search indexes"){
getExisting(azureSearchKey, testServiceName)
.foreach { n =>
Expand Down

0 comments on commit 7ffa970

Please sign in to comment.