Skip to content

Commit 63563d7

Browse files
committed
Add transformer to deduplicate identical files based content
1 parent a99ca9c commit 63563d7

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package com.github.jengelman.gradle.plugins.shadow.transformers
2+
3+
import java.io.File
4+
import java.nio.ByteBuffer
5+
import java.security.MessageDigest
6+
import org.apache.tools.zip.ZipOutputStream
7+
import org.gradle.api.GradleException
8+
import org.gradle.api.file.FileTreeElement
9+
import org.gradle.api.model.ObjectFactory
10+
import org.gradle.api.tasks.util.PatternSet
11+
12+
/**
13+
* Transformer to include files with identical content only once in the shadow JAR.
14+
*
15+
* Multiple files with the same path but different content lead to an error.
16+
*
17+
* Some scenarios for duplicate resources in a shadow jar:
18+
* * Duplicate `.class` files
19+
* Having duplicate `.class` files with different is a situation indicating that the resulting jar is
20+
* built with _incompatible_ classes, likely leading to issues during runtime.
21+
* This situation can happen when one dependency is (also) included in an uber jar.
22+
* * Duplicate `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml` files.
23+
* Some dependencies contain shaded variants of other dependencies.
24+
* Tools that inspect jar files to extract the included dependencies, for example, for license auditing
25+
* use cases or tools that collect information of all included dependencies, may rely on these files.
26+
* Hence, it is desirable to retain the duplicate resource `pom.properties`/`xml` resources.
27+
*
28+
* `DeduplicatingResourceTransformer` checks all entries in the resulting jar.
29+
* It is generally not recommended to use any of the [include] configuration functions.
30+
*
31+
* There are reasons to retain duplicate resources with different contents in the resulting jar.
32+
* This can be achieved with the [exclude] configuration functions.
33+
*
34+
* To exclude a path or pattern from being deduplicated, for example, legit
35+
* `META-INF/<group-id>/<artifact-id>/pom.properties`/`xml`, configure the transformer with an exclusion
36+
* like the following:
37+
* ```kotlin
38+
* tasks.named<ShadowJar>("shadowJar").configure {
39+
* // Keep pom.* files from different Guava versions in the jar.
40+
* exclude("META-INF/maven/com.google.guava/guava/pom.*")
41+
* // Duplicates with different content for all other resource paths will raise an error.
42+
* }
43+
* ```
44+
*
45+
* *Tip*: the [com.github.jengelman.gradle.plugins.shadow.tasks.FindResourceInClasspath] convenience task
46+
* can be used to find resources in a Gradle classpath/configuration.
47+
*
48+
* *Warning* Do **not** combine [PreserveFirstFoundResourceTransformer] with this transformer.
49+
*/
50+
@CacheableTransformer
51+
public open class DeduplicatingResourceTransformer(
52+
final override val objectFactory: ObjectFactory,
53+
patternSet: PatternSet,
54+
) : PatternFilterableResourceTransformer(patternSet) {
55+
private val sources: MutableMap<String, MutableMap<Long, MutableList<File>>> = LinkedHashMap()
56+
57+
override fun canTransformResource(element: FileTreeElement): Boolean {
58+
if (!patternSpec.isSatisfiedBy(element)) {
59+
return false
60+
}
61+
62+
val perPathPerHashFiles = sources.computeIfAbsent(element.path) { LinkedHashMap() }
63+
64+
val file = element.file
65+
val hash = hashForFile(file)
66+
val withSameContent = perPathPerHashFiles.computeIfAbsent(hash) { mutableListOf() }
67+
withSameContent.add(file)
68+
69+
return perPathPerHashFiles.size > 1 || withSameContent.size > 1
70+
}
71+
72+
override fun hasTransformedResource(): Boolean = true
73+
74+
override fun modifyOutputStream(os: ZipOutputStream, preserveFileTimestamps: Boolean) {
75+
val duplicatePaths = sources.filter { (_, filesByHash) -> filesByHash.size > 1 }
76+
77+
if (!duplicatePaths.isEmpty()) {
78+
val message =
79+
"Found ${duplicatePaths.size} path duplicate(s) with different content in the shadow JAR:" +
80+
duplicatePaths
81+
.map { (path, filesByHash) ->
82+
" * $path${filesByHash.map { (hash, files) ->
83+
files.joinToString { file -> " * ${file.path} (Hash: $hash)" }
84+
}.joinToString("\n", "\n", "")}"
85+
}
86+
.joinToString("\n", "\n", "")
87+
throw GradleException(message)
88+
}
89+
}
90+
91+
private val digest: MessageDigest by lazy { MessageDigest.getInstance("SHA-256") }
92+
93+
private fun hashForFile(file: File): Long {
94+
try {
95+
file.inputStream().use {
96+
val buffer = ByteArray(8192)
97+
while (true) {
98+
val rd = it.read(buffer)
99+
if (rd == -1) {
100+
break
101+
}
102+
digest.update(buffer, 0, rd)
103+
}
104+
}
105+
return ByteBuffer.wrap(digest.digest()).getLong(0)
106+
} catch (e: Exception) {
107+
throw RuntimeException("Failed to read data or calculate hash for $file", e)
108+
}
109+
}
110+
}

0 commit comments

Comments
 (0)