Skip to content

Commit

Permalink
Merge pull request mercedes-benz#2572 from mercedes-benz/feature-2554…
Browse files Browse the repository at this point in the history
…-add-wildcards-to-webscan-includes-excludes

Extend webscan include/exclude feature mercedes-benz#2554
  • Loading branch information
winzj authored Oct 13, 2023
2 parents 363e86b + 723c62f commit f4adb8e
Show file tree
Hide file tree
Showing 10 changed files with 323 additions and 287 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ public enum SecHubConfigurationModelValidationError {

WEB_SCAN_NO_HEADER_VALUE_DEFINED("The value for a HTTP header is not defined!"),

WEB_SCAN_INCLUDE_INVALID("The value of an include is invalid!"),

WEB_SCAN_EXCLUDE_INVALID("The value of an exclude is invalid!"),

WEB_SCAN_HTTP_HEADER_ONLY_FOR_URL_IS_NOT_A_VALID_URL("The URL for a HTTP header is not a valid URL!"),

WEB_SCAN_HTTP_HEADER_ONLY_FOR_URL_DOES_NOT_CONTAIN_TARGET_URL("The URL for a HTTP header does not contain the base URL that shall be scanned!"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
Expand All @@ -35,6 +36,10 @@ public class SecHubConfigurationModelValidator {
private static final int MAX_METADATA_LABEL_VALUE_LENGTH = 150;
private static final int MAX_METADATA_LABEL_AMOUNT = 20;

private static final int MAX_LIST_SIZE_INCLUDES = 500;
private static final int MAX_LIST_SIZE_EXCLUDES = 500;
private static final int MAX_LENGTH_PATH_SIZE = 2048;

SecHubConfigurationModelSupport modelSupport = new SecHubConfigurationModelSupport();

private List<String> supportedVersions;
Expand Down Expand Up @@ -262,18 +267,36 @@ private void handleWebScanConfiguration(InternalValidationContext context) {
if (SimpleNetworkUtils.isURINullOrEmpty(uri)) {

context.result.addError(WEB_SCAN_HAS_NO_URL_DEFINED);
return;

} else if (!SimpleNetworkUtils.isHttpProtocol(uri)) {

String schema = SimpleStringUtils.truncateWhenTooLong(uri.getScheme(), 5);
context.result.addError(WEB_SCAN_URL_HAS_UNSUPPORTED_SCHEMA, "Schema was: " + schema + " but supported is only HTTP/HTTPS");
return;
}

handleIncludesAndExcludes(context, webScan);
handleApi(context, webScan);
handleHTTPHeaders(context, webScan);

}

private void handleIncludesAndExcludes(InternalValidationContext context, SecHubWebScanConfiguration webScan) {
String targetUrl = webScan.getUrl().toString();
WebScanConfigurationModelValidationContext webScanContext = new WebScanConfigurationModelValidationContext(context,
addTrailingSlashToUrlWhenMissingAndLowerCase(targetUrl), Collections.emptyList());

if (webScan.getExcludes().isPresent()) {
List<String> excludes = webScan.getExcludes().get();
validateExcludesOrIncludes(webScanContext, excludes, false);
}
if (webScan.getIncludes().isPresent()) {
List<String> includes = webScan.getIncludes().get();
validateExcludesOrIncludes(webScanContext, includes, true);
}
}

private void handleApi(InternalValidationContext context, SecHubWebScanConfiguration webScan) {
Optional<SecHubWebScanApiConfiguration> apiOpt = webScan.getApi();
if (!apiOpt.isPresent()) {
Expand All @@ -300,6 +323,66 @@ private void handleHTTPHeaders(InternalValidationContext context, SecHubWebScanC
validateHeaderOnlyForUrlNotDuplicated(webScanContext);
}

private void validateExcludesOrIncludes(WebScanConfigurationModelValidationContext webScanContext, List<String> urlList, boolean include) {
String term = "excludes";
SecHubConfigurationModelValidationError validationError = WEB_SCAN_EXCLUDE_INVALID;
int maxListSize = MAX_LIST_SIZE_EXCLUDES;

if (include) {
term = "includes";
validationError = WEB_SCAN_INCLUDE_INVALID;
maxListSize = MAX_LIST_SIZE_INCLUDES;
}

if (urlList.size() > maxListSize) {
webScanContext.markAsFailed(validationError, "A maximum of " + maxListSize + " " + term + " are allowed.");
return;
}

for (String subUrlPattern : urlList) {
if (subUrlPattern.length() > MAX_LENGTH_PATH_SIZE) {
subUrlPattern = subUrlPattern.substring(0, MAX_LENGTH_PATH_SIZE);
webScanContext.markAsFailed(validationError, "Maximum URL length is " + MAX_LENGTH_PATH_SIZE + " characters. The first " + MAX_LENGTH_PATH_SIZE
+ " characters of the URL in question: " + subUrlPattern);
return;
}
// we do not return if one include/exclude was wrong,
// to be able to tell the user all wrong includes and excludes
validateIncludeOrExcludePattern(webScanContext, subUrlPattern, include);
}
}

private void validateIncludeOrExcludePattern(WebScanConfigurationModelValidationContext webScanContext, String subUrlPattern, boolean include) {
if (subUrlPattern.contains("//")) {
if (include) {
webScanContext.markAsFailed(WEB_SCAN_INCLUDE_INVALID, "The include: " + subUrlPattern + " contains '//'!");
} else {
webScanContext.markAsFailed(WEB_SCAN_EXCLUDE_INVALID, "The exclude: " + subUrlPattern + " contains '//'!");
}
return;
}

String urlToCheck = webScanContext.sanatizedTargetUrl;
if (subUrlPattern.startsWith("/")) {
urlToCheck += subUrlPattern.substring(1);
} else {
urlToCheck += subUrlPattern;
}

String createdIncludeOrExcludeUrl = createUrlWithoutWildCards(urlToCheck);
try {
new URI(createdIncludeOrExcludeUrl).toURL();
} catch (URISyntaxException | MalformedURLException e) {
if (include) {
webScanContext.markAsFailed(WEB_SCAN_INCLUDE_INVALID,
"The include: " + subUrlPattern + " does create an invalid URL without the wild cards : " + createdIncludeOrExcludeUrl);
} else {
webScanContext.markAsFailed(WEB_SCAN_EXCLUDE_INVALID,
"The exclude: " + subUrlPattern + " does create an invalid URL without the wild cards : " + createdIncludeOrExcludeUrl);
}
}
}

private void validateHeaderOnlyForUrlNotDuplicated(WebScanConfigurationModelValidationContext webScanContext) {
if (webScanContext.failed) {
return;
Expand Down Expand Up @@ -375,14 +458,14 @@ private String addTrailingSlashToUrlWhenMissingAndLowerCase(String url) {
return resultUrl.toLowerCase();
}

private String createLowerCasedUrlAndAddTrailingSlashIfMissing(String onlyForUrl) {
private String createLowerCasedUrlAndAddTrailingSlashIfMissing(String url) {
// ensure "https://mywebapp.com/" and "https://mywebapp.com" are accepted as the
// same. This way we can check if this URL contains our scan target URL.
return addTrailingSlashToUrlWhenMissingAndLowerCase(onlyForUrl);
return addTrailingSlashToUrlWhenMissingAndLowerCase(url);
}

private String createUrlWithoutWildCards(String onlyForUrl) {
return PATTERN_QUOTED_WEBSCAN_URL_WILDCARD_SYMBOL.matcher(onlyForUrl).replaceAll("");
private String createUrlWithoutWildCards(String url) {
return PATTERN_QUOTED_WEBSCAN_URL_WILDCARD_SYMBOL.matcher(url).replaceAll("");
}

private void handleInfraScanConfiguration(InternalValidationContext context) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
// SPDX-License-Identifier: MIT
package com.mercedesbenz.sechub.commons.model;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.net.InetAddress;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
Expand Down Expand Up @@ -1320,6 +1327,176 @@ void explicit_definitions_for_the_same_header_for_certain_urls_but_list_of_urls_
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_NON_UNIQUE_HEADER_CONFIGURATION);
}

@ParameterizedTest
@EmptySource
@ValueSource(strings = { "/", "<*>", "/<*>", "<*>/<*>", "/en/contacts", "/en/contacts/<*>", "<*>/en/contacts/<*>", "<*>/en/<*>/contacts/<*>",
"<*>/en/<*>/<*>/contacts/<*>", "<*>/en<*><*>contacts/<*>", "en/contacts/<*>", "en/contacts", "en/contacts/" })
void valid_include_and_exclude_has_no_errors(String includeExcludeEntry) {
/* prepare */
List<String> entryAsList = Arrays.asList(includeExcludeEntry);
SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();

sechubConfiguration.getWebScan().get().excludes = Optional.of(entryAsList);
sechubConfiguration.getWebScan().get().includes = Optional.of(entryAsList);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasNoErrors(result);
}

@ParameterizedTest
@ValueSource(strings = { "//en/contacts", "/en//contacts", "/en/contacts//", "/en/ contacts/" })
void double_slashes_include_exclude_has_errors(String includeExcludeEntry) {
/* prepare */
List<String> entryAsList = Arrays.asList(includeExcludeEntry);
SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().excludes = Optional.of(entryAsList);
sechubConfiguration.getWebScan().get().includes = Optional.of(entryAsList);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);
/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_EXCLUDE_INVALID);
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_INCLUDE_INVALID);
}

@ParameterizedTest
@ValueSource(strings = { " ", " /en/contacts", "/en/ contacts/", "/en/contacts " })
void spaces_in_include_exclude_has_errors(String includeExcludeEntry) {
/* prepare */
List<String> entryAsList = Arrays.asList(includeExcludeEntry);
SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().excludes = Optional.of(entryAsList);
sechubConfiguration.getWebScan().get().includes = Optional.of(entryAsList);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);
/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_EXCLUDE_INVALID);
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_INCLUDE_INVALID);
}

@Test
void too_many_excludes_results_in_error() {
List<String> excludes = createListWithTooManyIncludesOrExcludes();

SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().excludes = Optional.of(excludes);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_EXCLUDE_INVALID);
}

@Test
void too_many_includes_results_in_error() {
/* prepare */
List<String> includes = createListWithTooManyIncludesOrExcludes();

SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().includes = Optional.of(includes);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_INCLUDE_INVALID);
}

@Test
void exclude_too_long_results_in_error() {
/* prepare */
List<String> excludes = createTooLongIncludeOrExcludeEntry();

SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().excludes = Optional.of(excludes);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_EXCLUDE_INVALID);
}

@Test
void include_too_long_results_in_error() {
/* prepare */
List<String> includes = createTooLongIncludeOrExcludeEntry();

SecHubScanConfiguration sechubConfiguration = createSecHubConfigurationWithWebScanPart();
sechubConfiguration.getWebScan().get().includes = Optional.of(includes);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasError(result, SecHubConfigurationModelValidationError.WEB_SCAN_INCLUDE_INVALID);
}

@Test
void can_read_sechub_web_scan_config_with_wildcards() {
/* prepare */
String json = TestFileReader.loadTextFile("src/test/resources/sechub_config_web_scan_includes_excludes_with_wildcards.json");
SecHubScanConfiguration sechubConfiguration = SecHubScanConfiguration.createFromJSON(json);

modelSupportCollectedScanTypes.add(ScanType.WEB_SCAN);

/* execute */
SecHubConfigurationModelValidationResult result = validatorToTest.validate(sechubConfiguration);

/* test */
assertHasNoErrors(result);
}

private SecHubScanConfiguration createSecHubConfigurationWithWebScanPart() {
SecHubWebScanConfiguration webScanConfig = new SecHubWebScanConfiguration();
webScanConfig.url = URI.create("https://www.gamechanger.example.org/");

SecHubScanConfiguration sechubConfiguration = new SecHubScanConfiguration();
sechubConfiguration.setApiVersion("1.0");
sechubConfiguration.setWebScan(webScanConfig);
return sechubConfiguration;
}

private List<String> createListWithTooManyIncludesOrExcludes() {
List<String> list = new LinkedList<>();
for (int i = 1; i <= 501; i++) {
list.add("/myapp" + i);
}
return list;
}

private List<String> createTooLongIncludeOrExcludeEntry() {
StringBuilder sb = new StringBuilder();
sb.append("/");

for (int i = 0; i < 64; i++) {
sb.append("abcdefghijklmnopqrstuvwxyz012345");
}

List<String> list = new LinkedList<>();
list.add(sb.toString());
return list;
}

private SecHubWebScanConfiguration createWebScanConfigurationWithHeader(String targetUrl, String onlyForUrl) {
String headerName = "Authorization";
String headerValue = "secret-key";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"apiVersion" : "1.0",
"webScan" : {
"url" : "https://www.gamechanger.example.org",
"includes" : [ "/special/include", "/special/include/<*>", "<*>/special/<*>/include/<*>", "<*>/special/include/<*>", "special/include/<*>", "special/include", "special/include/" ],
"excludes" : [ "/en/contact", "/en/contacts/<*>", "<*>/en/<*>/contacts/<*>", "<*>/en/contacts/<*>", "en/contacts/<*>", "en/contacts", "en/contacts/" ]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,27 @@ include::sechub_config_example2_webscan_anonyous.json[]
<1> Define web scan
<2> The `URL` to scan. This `URL` must be whitelisted in `{sechub}` project. Normally without a slash `/` at the end.
<3> *Optional*: Define includes, if you have a special path that is linked nowhere,
so the scanner can not detect it automatically while crawling the application.
Always use them starting with a slash (`/`) because they are interpreted relative to the `URL` provided before.
so the scanner can not detect it automatically while crawling the application. You can use wildcards by using the symbol `<*>` like in the example above.
To make the scan work the target URL will always be implicitly included with `"https://www.gamechanger.example.org<*>"` if no includes are specified. If includes are specified the scan is limited to this includes.
- Includes starting with a slash (`/`) like `"includes": [ "/special/include","/special/include/<*>"]` they are interpreted relative to the scan target `URL` provided before.
- Includes not starting with a slash (`/`) like `"includes": [ "<*>/en/contacts/<*>","en/contacts/<*>","en/contacts","en/contacts/"`] are interpreted as enclosed by wildcards like the first include in the list example: `"<*>/en/contacts/<*>"`.
<4> *Optional*: Define excludes, if you have a special path you want to exclude, from the scan.
Always use them starting with a slash (`/`) because they are interpreted relative to the `URL` provided before.
You can use excludes the same way you can use the includes.
Excludes do always overwrite includes if the provided patterns for includes and excludes do have intersections.
<5> *Optional*: Define the maximum duration the scan can take.
Scanning a "large" web page/application can take several hours or even days.
This option ensures the scan will only run for a limited time.
<6> Define the unit of time.
The unit can be: `millisecond`, `second`, `minute`, `hour`, `day`.

[CAUTION]
====
Includes are a different from excludes looking at wildcards, because in includes they might not be resolved properly, if the pages behind the wildcards cannot be detected by a web crawler.
If you only want to scan a specific part of your application e.g only the customers section `https://my-application.com/customer/`,
you can specify the target URL : `"url": "https://my-application.com"` and the wanted include starting with a slash like this : `"includes": [ "/customer/<*>"]`.
====

===== Login
A web scan does work much better if it has got access to all content
- so a login is necessary most time. If you do not define a login configuration
Expand Down
Loading

0 comments on commit f4adb8e

Please sign in to comment.