|
37 | 37 | the Jars. |
38 | 38 |
|
39 | 39 | By default, while hadoop-* artifacts are all included, dependencies |
40 | | - are omitted for nearly everything. |
41 | | - * keeps size down |
42 | | - * keeps CVE attack surface down |
| 40 | + are omitted for all cloud connectors except hadoop-azure and |
| 41 | + possibly hadoop-gcs and hadoop-tos modules. |
| 42 | + For hadoop-aws the AWS SDK bundle.jar omitted, but everything else is included. |
| 43 | +
|
| 44 | + * This keeps binary release size below the limit of apache distributions |
| 45 | + * Reduces download and size overhead in docker usage. |
| 46 | + * Reduces the CVE attack surface |
| 47 | + * Reduces the risk of classpath conflict. |
| 48 | +
|
43 | 49 | To produce a build with the specific desired dependencies, the build must be executed |
44 | | - with the relevant profile of ${module}-dependencies. |
| 50 | + with the relevant profile of ${module}-package. |
45 | 51 |
|
46 | 52 | For example, a build with the hadoop-aws and hadoop-azure-datalake dependencies, |
47 | | - run with |
48 | | - -Phadoop-aws-dependencies -Phadoop-azure-datalake-dependencies |
| 53 | + build with -Dhadoop-aws-package -Dhadoop-azure-datalake-package |
| 54 | +
|
| 55 | + Available package profiles: |
| 56 | + hadoop-aliyun-package |
| 57 | + hadoop-aws-package |
| 58 | + hadoop-azure-datalake-package |
| 59 | + hadoop-cos-package |
| 60 | + hadoop-gcp-package |
| 61 | + hadoop-huaweicloud-package |
| 62 | + hadoop-tos-package |
| 63 | +
|
| 64 | + To build a complete distribution then |
| 65 | + mvn package -Pdist -DskipTests -Phadoop-aliyun-package,hadoop-aws-package,hadoop-azure-datalake-package\ |
| 66 | + -Phadoop-cos-package,hadoop-gcp-package,hadoop-tos-package |
| 67 | +
|
49 | 68 | --> |
50 | 69 | <properties> |
51 | 70 | <hadoop.component>cloud-storage</hadoop.component> |
|
130 | 149 | <groupId>org.apache.hadoop</groupId> |
131 | 150 | <artifactId>hadoop-gcp</artifactId> |
132 | 151 | <scope>compile</scope> |
133 | | - <!-- |
134 | | - Exclude transitive dependencies to prevent dependency convergence |
135 | | - problems. hadoop-gcp is a self-contained shaded jar. |
136 | | - --> |
137 | 152 | <exclusions> |
138 | 153 | <exclusion> |
139 | 154 | <groupId>*</groupId> |
|
202 | 217 | </build> |
203 | 218 | </profile> |
204 | 219 |
|
205 | | - |
206 | 220 | <!-- Pull in aliyun --> |
207 | 221 | <profile> |
208 | | - <id>hadoop-aliyun-dependencies</id> |
| 222 | + <id>hadoop-aliyun-package</id> |
209 | 223 | <activation> |
210 | | - <activeByDefault>false</activeByDefault> |
| 224 | + <property><name>hadoop-aliyun-package</name></property> |
211 | 225 | </activation> |
212 | 226 | <dependencies> |
213 | 227 | <dependency> |
|
220 | 234 |
|
221 | 235 | <!-- Pull in the AWS SDK --> |
222 | 236 | <profile> |
223 | | - <id>hadoop-aws-dependencies</id> |
| 237 | + <id>hadoop-aws-package</id> |
224 | 238 | <activation> |
225 | | - <activeByDefault>false</activeByDefault> |
| 239 | + <property><name>hadoop-aws-package</name></property> |
226 | 240 | </activation> |
227 | 241 | <dependencies> |
228 | 242 | <dependency> |
|
233 | 247 | </dependencies> |
234 | 248 | </profile> |
235 | 249 |
|
236 | | - <!-- Pull in all the cos --> |
| 250 | + <!-- Pull in ADLS gen1 support --> |
237 | 251 | <profile> |
238 | | - <id>hadoop-cos-dependencies</id> |
| 252 | + <id>hadoop-azure-datalake-package</id> |
239 | 253 | <activation> |
240 | | - <activeByDefault>false</activeByDefault> |
| 254 | + <property><name>hadoop-azure-datalake-package</name></property> |
241 | 255 | </activation> |
242 | 256 | <dependencies> |
243 | 257 | <dependency> |
244 | 258 | <groupId>org.apache.hadoop</groupId> |
245 | | - <artifactId>hadoop-cos</artifactId> |
| 259 | + <artifactId>hadoop-azure-datalake</artifactId> |
246 | 260 | <scope>compile</scope> |
247 | 261 | </dependency> |
248 | 262 | </dependencies> |
249 | 263 | </profile> |
250 | 264 |
|
251 | | - <!-- Pull in ADLS gen1 --> |
| 265 | + <!-- Pull in all the hadoop-cos dependencies --> |
252 | 266 | <profile> |
253 | | - <id>hadoop-azure-datalake-dependencies</id> |
| 267 | + <id>hadoop-cos-package</id> |
254 | 268 | <activation> |
255 | | - <activeByDefault>false</activeByDefault> |
| 269 | + <property><name>hadoop-cos-package</name></property> |
256 | 270 | </activation> |
257 | 271 | <dependencies> |
258 | 272 | <dependency> |
259 | 273 | <groupId>org.apache.hadoop</groupId> |
260 | | - <artifactId>hadoop-azure-datalake</artifactId> |
| 274 | + <artifactId>hadoop-cos</artifactId> |
261 | 275 | <scope>compile</scope> |
262 | 276 | </dependency> |
263 | 277 | </dependencies> |
264 | 278 | </profile> |
265 | 279 |
|
266 | 280 | <!-- Pull in the huaweicloud dependencies --> |
267 | 281 | <profile> |
268 | | - <id>hadoop-huaweicloud-dependencies</id> |
| 282 | + <id>hadoop-huaweicloud-package</id> |
269 | 283 | <activation> |
270 | | - <activeByDefault>false</activeByDefault> |
| 284 | + <property><name>hadoop-huaweicloud-package</name></property> |
271 | 285 | </activation> |
272 | 286 | <dependencies> |
273 | 287 | <dependency> |
|
284 | 298 | </dependencies> |
285 | 299 | </profile> |
286 | 300 |
|
| 301 | + <!-- Pull in the gcp dependencies --> |
| 302 | + <profile> |
| 303 | + <id>hadoop-gcp-package</id> |
| 304 | + <activation> |
| 305 | + <property><name>hadoop-gcp-package</name></property> |
| 306 | + </activation> |
| 307 | + <dependencies> |
| 308 | + <dependency> |
| 309 | + <groupId>org.apache.hadoop</groupId> |
| 310 | + <artifactId>hadoop-gcp</artifactId> |
| 311 | + <scope>compile</scope> |
| 312 | + </dependency> |
| 313 | + </dependencies> |
| 314 | + </profile> |
| 315 | + |
287 | 316 | <!-- Pull in Volcano TOS --> |
288 | 317 | <profile> |
289 | | - <id>hadoop-tos-dependencies</id> |
| 318 | + <id>hadoop-tos-package</id> |
290 | 319 | <activation> |
291 | | - <activeByDefault>false</activeByDefault> |
| 320 | + <property><name>hadoop-tos-package</name></property> |
292 | 321 | </activation> |
293 | 322 | <dependencies> |
294 | 323 | <dependency> |
|
0 commit comments