diff --git a/src/sec_certs/dataset/cc.py b/src/sec_certs/dataset/cc.py index 398f2435..44c56afa 100644 --- a/src/sec_certs/dataset/cc.py +++ b/src/sec_certs/dataset/cc.py @@ -143,6 +143,27 @@ def targets_txt_dir(self) -> Path: """ return self.targets_dir / "txt" + @property + def certificates_dir(self) -> Path: + """ + Returns directory that holds files associated with the certificates + """ + return self.certs_dir / "certificates" + + @property + def certificates_pdf_dir(self) -> Path: + """ + Returns directory that holds PDFs associated with certificates + """ + return self.certificates_dir / "pdf" + + @property + def certificates_txt_dir(self) -> Path: + """ + Returns directory that holds TXTs associated with certificates + """ + return self.certificates_dir / "txt" + @property def pp_dataset_path(self) -> Path: """ @@ -242,7 +263,14 @@ def _set_local_paths(self): self.auxiliary_datasets.mu_dset.root_dir = self.mu_dataset_dir for cert in self: - cert.set_local_paths(self.reports_pdf_dir, self.targets_pdf_dir, self.reports_txt_dir, self.targets_txt_dir) + cert.set_local_paths( + self.reports_pdf_dir, + self.targets_pdf_dir, + self.certificates_pdf_dir, + self.reports_txt_dir, + self.targets_txt_dir, + self.certificates_txt_dir, + ) # TODO: This forgets to set local paths for other auxiliary datasets def _merge_certs(self, certs: dict[str, CCCertificate], cert_source: str | None = None) -> None: @@ -531,11 +559,12 @@ def _parse_table( def _download_all_artifacts_body(self, fresh: bool = True) -> None: self._download_reports(fresh) self._download_targets(fresh) + self._download_certs(fresh) @staged(logger, "Downloading PDFs of CC certification reports.") def _download_reports(self, fresh: bool = True) -> None: self.reports_pdf_dir.mkdir(parents=True, exist_ok=True) - certs_to_process = [x for x in self if x.state.report_is_ok_to_download(fresh) and x.report_link] + certs_to_process = [x for x in self if x.state.report.is_ok_to_download(fresh) and x.report_link] if not fresh and certs_to_process: logger.info( @@ -551,7 +580,7 @@ def _download_reports(self, fresh: bool = True) -> None: @staged(logger, "Downloading PDFs of CC security targets.") def _download_targets(self, fresh: bool = True) -> None: self.targets_pdf_dir.mkdir(parents=True, exist_ok=True) - certs_to_process = [x for x in self if x.state.report_is_ok_to_download(fresh)] + certs_to_process = [x for x in self if x.state.st.is_ok_to_download(fresh)] if not fresh and certs_to_process: logger.info( @@ -564,10 +593,26 @@ def _download_targets(self, fresh: bool = True) -> None: progress_bar_desc="Downloading PDFs of CC security targets", ) + @staged(logger, "Downloading PDFs of CC certificates.") + def _download_certs(self, fresh: bool = True) -> None: + self.certificates_pdf_dir.mkdir(parents=True, exist_ok=True) + certs_to_process = [x for x in self if x.state.cert.is_ok_to_download(fresh)] + + if not fresh and certs_to_process: + logger.info( + f"Downloading {len(certs_to_process)} PDFs of CC certificates for which previous download failed.." + ) + + cert_processing.process_parallel( + CCCertificate.download_pdf_cert, + certs_to_process, + progress_bar_desc="Downloading PDFs of CC certificates", + ) + @staged(logger, "Converting PDFs of certification reports to txt.") def _convert_reports_to_txt(self, fresh: bool = True) -> None: self.reports_txt_dir.mkdir(parents=True, exist_ok=True) - certs_to_process = [x for x in self if x.state.report_is_ok_to_convert(fresh)] + certs_to_process = [x for x in self if x.state.report.is_ok_to_convert(fresh)] if not fresh and certs_to_process: logger.info( @@ -583,7 +628,7 @@ def _convert_reports_to_txt(self, fresh: bool = True) -> None: @staged(logger, "Converting PDFs of security targets to txt.") def _convert_targets_to_txt(self, fresh: bool = True) -> None: self.targets_txt_dir.mkdir(parents=True, exist_ok=True) - certs_to_process = [x for x in self if x.state.st_is_ok_to_convert(fresh)] + certs_to_process = [x for x in self if x.state.st.is_ok_to_convert(fresh)] if fresh: logger.info("Converting PDFs of security targets to txt.") @@ -598,13 +643,32 @@ def _convert_targets_to_txt(self, fresh: bool = True) -> None: progress_bar_desc="Converting PDFs of security targets to txt", ) + @staged(logger, "Converting PDFs of certificates to txt.") + def _convert_certs_to_txt(self, fresh: bool = True) -> None: + self.certificates_txt_dir.mkdir(parents=True, exist_ok=True) + certs_to_process = [x for x in self if x.state.cert.is_ok_to_convert(fresh)] + + if fresh: + logger.info("Converting PDFs of certificates to txt.") + if not fresh and certs_to_process: + logger.info( + f"Converting {len(certs_to_process)} PDFs of certificates to txt for which previous conversion failed." + ) + + cert_processing.process_parallel( + CCCertificate.convert_cert_pdf, + certs_to_process, + progress_bar_desc="Converting PDFs of certificates to txt", + ) + def _convert_all_pdfs_body(self, fresh: bool = True) -> None: self._convert_reports_to_txt(fresh) self._convert_targets_to_txt(fresh) + self._convert_certs_to_txt(fresh) @staged(logger, "Extracting report metadata") def _extract_report_metadata(self) -> None: - certs_to_process = [x for x in self if x.state.report_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.report.is_ok_to_analyze()] processed_certs = cert_processing.process_parallel( CCCertificate.extract_report_pdf_metadata, certs_to_process, @@ -615,7 +679,7 @@ def _extract_report_metadata(self) -> None: @staged(logger, "Extracting target metadata") def _extract_target_metadata(self) -> None: - certs_to_process = [x for x in self if x.state.st_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.st.is_ok_to_analyze()] processed_certs = cert_processing.process_parallel( CCCertificate.extract_st_pdf_metadata, certs_to_process, @@ -624,13 +688,25 @@ def _extract_target_metadata(self) -> None: ) self.update_with_certs(processed_certs) + @staged(logger, "Extracting cert metadata") + def _extract_cert_metadata(self) -> None: + certs_to_process = [x for x in self if x.state.cert.is_ok_to_analyze()] + processed_certs = cert_processing.process_parallel( + CCCertificate.extract_cert_pdf_metadata, + certs_to_process, + use_threading=False, + progress_bar_desc="Extracting cert metadata", + ) + self.update_with_certs(processed_certs) + def _extract_pdf_metadata(self) -> None: self._extract_report_metadata() self._extract_target_metadata() + self._extract_cert_metadata() @staged(logger, "Extracting report frontpages") def _extract_report_frontpage(self) -> None: - certs_to_process = [x for x in self if x.state.report_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.report.is_ok_to_analyze()] processed_certs = cert_processing.process_parallel( CCCertificate.extract_report_pdf_frontpage, certs_to_process, @@ -639,24 +715,13 @@ def _extract_report_frontpage(self) -> None: ) self.update_with_certs(processed_certs) - @staged(logger, "Extracting target frontpages") - def _extract_target_frontpage(self) -> None: - certs_to_process = [x for x in self if x.state.st_is_ok_to_analyze()] - processed_certs = cert_processing.process_parallel( - CCCertificate.extract_st_pdf_frontpage, - certs_to_process, - use_threading=False, - progress_bar_desc="Extracting target frontpages", - ) - self.update_with_certs(processed_certs) - def _extract_pdf_frontpage(self) -> None: self._extract_report_frontpage() - self._extract_target_frontpage() + # We have no frontpage extraction for targets or certificates themselves, only for the reports. @staged(logger, "Extracting report keywords") def _extract_report_keywords(self) -> None: - certs_to_process = [x for x in self if x.state.report_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.report.is_ok_to_analyze()] processed_certs = cert_processing.process_parallel( CCCertificate.extract_report_pdf_keywords, certs_to_process, @@ -667,7 +732,7 @@ def _extract_report_keywords(self) -> None: @staged(logger, "Extracting target keywords") def _extract_target_keywords(self) -> None: - certs_to_process = [x for x in self if x.state.st_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.st.is_ok_to_analyze()] processed_certs = cert_processing.process_parallel( CCCertificate.extract_st_pdf_keywords, certs_to_process, @@ -676,9 +741,21 @@ def _extract_target_keywords(self) -> None: ) self.update_with_certs(processed_certs) + @staged(logger, "Extracting cert keywords") + def _extract_cert_keywords(self) -> None: + certs_to_process = [x for x in self if x.state.cert.is_ok_to_analyze()] + processed_certs = cert_processing.process_parallel( + CCCertificate.extract_cert_pdf_keywords, + certs_to_process, + use_threading=False, + progress_bar_desc="Extracting cert keywords", + ) + self.update_with_certs(processed_certs) + def _extract_pdf_keywords(self) -> None: self._extract_report_keywords() self._extract_target_keywords() + self._extract_cert_keywords() def extract_data(self) -> None: logger.info("Extracting various data from certification artifacts") @@ -688,7 +765,7 @@ def extract_data(self) -> None: @staged(logger, "Computing heuristics: Deriving information about laboratories involved in certification.") def _compute_cert_labs(self) -> None: - certs_to_process = [x for x in self if x.state.report_is_ok_to_analyze()] + certs_to_process = [x for x in self if x.state.report.is_ok_to_analyze()] for cert in certs_to_process: cert.compute_heuristics_cert_lab() diff --git a/src/sec_certs/model/cc_matching.py b/src/sec_certs/model/cc_matching.py index 4e6f5735..21d94b74 100644 --- a/src/sec_certs/model/cc_matching.py +++ b/src/sec_certs/model/cc_matching.py @@ -75,10 +75,10 @@ def match(self, cert: CCCertificate) -> float: if self._product == cert.name and self._vendor == cert.manufacturer: return 99 # If we match the report hash, return early. - if cert.state.report_pdf_hash == self._report_hash and self._report_hash is not None: + if cert.state.report.pdf_hash == self._report_hash and self._report_hash is not None: return 95 # If we match the target hash, return early. - if cert.state.st_pdf_hash == self._target_hash and self._target_hash is not None: + if cert.state.st.pdf_hash == self._target_hash and self._target_hash is not None: return 93 # Fuzzy match at the end with some penalization. diff --git a/src/sec_certs/model/references_nlp/segment_extractor.py b/src/sec_certs/model/references_nlp/segment_extractor.py index d63208ec..f77c8f5b 100644 --- a/src/sec_certs/model/references_nlp/segment_extractor.py +++ b/src/sec_certs/model/references_nlp/segment_extractor.py @@ -173,9 +173,9 @@ def _prepare_df_from_cc_dset(self, certs: Iterable[CCCertificate]) -> pd.DataFra - Loads manually annotated samples - Combines all of that into single dataframe """ - target_certs = [x for x in certs if x.heuristics.st_references.directly_referencing and x.state.st_txt_path] + target_certs = [x for x in certs if x.heuristics.st_references.directly_referencing and x.state.st.txt_path] report_certs = [ - x for x in certs if x.heuristics.report_references.directly_referencing and x.state.report_txt_path + x for x in certs if x.heuristics.report_references.directly_referencing and x.state.report.txt_path ] df_targets = self._build_df(target_certs, "target") df_reports = self._build_df(report_certs, "report") @@ -217,8 +217,8 @@ def get_cert_records(cert: CCCertificate, source: Literal["target", "report"]) - for key, val in actual_references.items() ] - (certs[0].state.report_txt_path.parent.parent / "txt_processed").mkdir(exist_ok=True, parents=True) - (certs[0].state.st_txt_path.parent.parent / "txt_processed").mkdir(exist_ok=True, parents=True) + (certs[0].state.report.txt_path.parent.parent / "txt_processed").mkdir(exist_ok=True, parents=True) + (certs[0].state.st.txt_path.parent.parent / "txt_processed").mkdir(exist_ok=True, parents=True) return list(itertools.chain.from_iterable(get_cert_records(cert, source) for cert in certs)) def _build_df(self, certs: list[CCCertificate], source: Literal["target", "report"]) -> pd.DataFrame: diff --git a/src/sec_certs/sample/cc.py b/src/sec_certs/sample/cc.py index 904c794d..28099f58 100644 --- a/src/sec_certs/sample/cc.py +++ b/src/sec_certs/sample/cc.py @@ -5,7 +5,6 @@ from collections import Counter, defaultdict from dataclasses import dataclass, field from datetime import date, datetime -from enum import Enum from pathlib import Path from typing import Any, ClassVar from urllib.parse import unquote_plus, urlparse @@ -28,20 +27,7 @@ from sec_certs.serialization.json import ComplexSerializableType from sec_certs.serialization.pandas import PandasSerializableType from sec_certs.utils import helpers -from sec_certs.utils.extract import normalize_match_string - -HEADERS = { - "anssi": sec_certs.utils.extract.search_only_headers_anssi, - "bsi": sec_certs.utils.extract.search_only_headers_bsi, - "nscib": sec_certs.utils.extract.search_only_headers_nscib, - "niap": sec_certs.utils.extract.search_only_headers_niap, - "canada": sec_certs.utils.extract.search_only_headers_canada, -} - - -class ReferenceType(Enum): - DIRECT = "direct" - INDIRECT = "indirect" +from sec_certs.utils.extract import normalize_match_string, scheme_frontpage_functions class CCCertificate( @@ -95,141 +81,87 @@ def from_dict(cls, dct: dict) -> CCCertificate.MaintenanceReport: def __lt__(self, other): return self.maintenance_date < other.maintenance_date - @dataclass(init=False) - class InternalState(ComplexSerializableType): - """ - Holds internal state of the certificate, whether downloads and converts of individual components succeeded. Also - holds information about errors and paths to the files. - """ - - st_download_ok: bool # Whether target download went OK - report_download_ok: bool # Whether report download went OK - st_convert_garbage: bool # Whether initial target conversion resulted in garbage - report_convert_garbage: bool # Whether initial report conversion resulted in garbage - st_convert_ok: bool # Whether overall target conversion went OK (either pdftotext or via OCR) - report_convert_ok: bool # Whether overall report conversion went OK (either pdftotext or via OCR) - st_extract_ok: bool # Whether target extraction went OK - report_extract_ok: bool # Whether report extraction went OK - - st_pdf_hash: str | None - report_pdf_hash: str | None - st_txt_hash: str | None - report_txt_hash: str | None - - _st_pdf_path: Path | None = None - _report_pdf_path: Path | None = None - _st_txt_path: Path | None = None - _report_txt_path: Path | None = None + @dataclass + class DocumentState(ComplexSerializableType): + download_ok: bool = False # Whether download went OK + convert_garbage: bool = False # Whether initial conversion resulted in garbage + convert_ok: bool = False # Whether overall conversion went OK (either pdftotext or via OCR) + extract_ok: bool = False # Whether extraction went OK - def __init__( - self, - st_download_ok: bool = False, - report_download_ok: bool = False, - st_convert_garbage: bool = False, - report_convert_garbage: bool = False, - st_convert_ok: bool = False, - report_convert_ok: bool = False, - st_extract_ok: bool = False, - report_extract_ok: bool = False, - st_pdf_hash: str | None = None, - report_pdf_hash: str | None = None, - st_txt_hash: str | None = None, - report_txt_hash: str | None = None, - ): - super().__init__() - self.st_download_ok = st_download_ok - self.report_download_ok = report_download_ok - self.st_convert_garbage = st_convert_garbage - self.report_convert_garbage = report_convert_garbage - self.st_convert_ok = st_convert_ok - self.report_convert_ok = report_convert_ok - self.st_extract_ok = st_extract_ok - self.report_extract_ok = report_extract_ok - self.st_pdf_hash = st_pdf_hash - self.report_pdf_hash = report_pdf_hash - self.st_txt_hash = st_txt_hash - self.report_txt_hash = report_txt_hash + pdf_hash: str | None = None + txt_hash: str | None = None - @property - def st_pdf_path(self) -> Path: - if not self._st_pdf_path: - raise ValueError(f"st_pdf_path not set on {type(self)}") - return self._st_pdf_path + _pdf_path: Path | None = None + _txt_path: Path | None = None - @st_pdf_path.setter - def st_pdf_path(self, pth: str | Path | None) -> None: - self._st_pdf_path = Path(pth) if pth else None + def is_ok_to_download(self, fresh: bool = True) -> bool: + return True if fresh else not self.download_ok - @property - def report_pdf_path(self) -> Path: - if not self._report_pdf_path: - raise ValueError(f"report_pdf_path not set on {type(self)}") - return self._report_pdf_path + def is_ok_to_convert(self, fresh: bool = True) -> bool: + return self.download_ok if fresh else self.download_ok and not self.convert_ok - @report_pdf_path.setter - def report_pdf_path(self, pth: str | Path | None) -> None: - self._report_pdf_path = Path(pth) if pth else None + def is_ok_to_analyze(self, fresh: bool = True) -> bool: + if fresh: + return self.download_ok and self.convert_ok + else: + return self.download_ok and self.convert_ok and not self.extract_ok @property - def st_txt_path(self) -> Path: - if not self._st_txt_path: - raise ValueError(f"st_txt_path not set on {type(self)}") - return self._st_txt_path + def pdf_path(self) -> Path: + if not self._pdf_path: + raise ValueError(f"pdf_path not set on {type(self)}") + return self._pdf_path - @st_txt_path.setter - def st_txt_path(self, pth: str | Path | None) -> None: - self._st_txt_path = Path(pth) if pth else None + @pdf_path.setter + def pdf_path(self, pth: str | Path | None) -> None: + self._pdf_path = Path(pth) if pth else None @property - def report_txt_path(self) -> Path: - if not self._report_txt_path: - raise ValueError(f"report_txt_path not set on {type(self)}") - return self._report_txt_path + def txt_path(self) -> Path: + if not self._txt_path: + raise ValueError(f"txt_path not set on {type(self)}") + return self._txt_path - @report_txt_path.setter - def report_txt_path(self, pth: str | Path | None) -> None: - self._report_txt_path = Path(pth) if pth else None + @txt_path.setter + def txt_path(self, pth: str | Path | None) -> None: + self._txt_path = Path(pth) if pth else None @property def serialized_attributes(self) -> list[str]: return [ - "st_download_ok", - "report_download_ok", - "st_convert_garbage", - "report_convert_garbage", - "st_convert_ok", - "report_convert_ok", - "st_extract_ok", - "report_extract_ok", - "st_pdf_hash", - "report_pdf_hash", - "st_txt_hash", - "report_txt_hash", + "download_ok", + "convert_garbage", + "convert_ok", + "extract_ok", + "pdf_hash", + "txt_hash", ] - def report_is_ok_to_download(self, fresh: bool = True) -> bool: - return True if fresh else not self.report_download_ok - - def st_is_ok_to_download(self, fresh: bool = True) -> bool: - return True if fresh else not self.st_download_ok - - def report_is_ok_to_convert(self, fresh: bool = True) -> bool: - return self.report_download_ok if fresh else self.report_download_ok and not self.report_convert_ok + @dataclass(init=False) + class InternalState(ComplexSerializableType): + """ + Holds internal state of the certificate, whether downloads and converts of individual components succeeded. Also + holds information about errors and paths to the files. + """ - def st_is_ok_to_convert(self, fresh: bool = True) -> bool: - return self.st_download_ok if fresh else self.st_download_ok and not self.st_convert_ok + report: CCCertificate.DocumentState + st: CCCertificate.DocumentState + cert: CCCertificate.DocumentState - def report_is_ok_to_analyze(self, fresh: bool = True) -> bool: - if fresh is True: - return self.report_download_ok and self.report_convert_ok - else: - return self.report_download_ok and self.report_convert_ok and not self.report_extract_ok + def __init__( + self, + report: CCCertificate.DocumentState | None = None, + st: CCCertificate.DocumentState | None = None, + cert: CCCertificate.DocumentState | None = None, + ): + super().__init__() + self.report = report if report is not None else CCCertificate.DocumentState() + self.st = st if st is not None else CCCertificate.DocumentState() + self.cert = cert if cert is not None else CCCertificate.DocumentState() - def st_is_ok_to_analyze(self, fresh: bool = True) -> bool: - if fresh is True: - return self.st_download_ok and self.st_convert_ok - else: - return self.st_download_ok and self.st_convert_ok and not self.st_extract_ok + @property + def serialized_attributes(self) -> list[str]: + return ["report", "st", "cert"] @dataclass class PdfData(BasePdfData, ComplexSerializableType): @@ -239,146 +171,108 @@ class PdfData(BasePdfData, ComplexSerializableType): report_metadata: dict[str, Any] | None = field(default=None) st_metadata: dict[str, Any] | None = field(default=None) + cert_metadata: dict[str, Any] | None = field(default=None) report_frontpage: dict[str, dict[str, Any]] | None = field(default=None) - st_frontpage: dict[str, dict[str, Any]] | None = field(default=None) + st_frontpage: dict[str, dict[str, Any]] | None = field( + default=None + ) # TODO: Unused, we have no frontpage matching for targets + cert_frontpage: dict[str, dict[str, Any]] | None = field( + default=None + ) # TODO: Unused, we have no frontpage matching for certs report_keywords: dict[str, Any] | None = field(default=None) st_keywords: dict[str, Any] | None = field(default=None) + cert_keywords: dict[str, Any] | None = field(default=None) report_filename: str | None = field(default=None) st_filename: str | None = field(default=None) + cert_filename: str | None = field(default=None) def __bool__(self) -> bool: return any(x is not None for x in vars(self)) - @property - def bsi_data(self) -> dict[str, Any] | None: - """ - Returns frontpage data related to BSI-provided information - """ - return self.report_frontpage.get("bsi", None) if self.report_frontpage else None - - @property - def niap_data(self) -> dict[str, Any] | None: - """ - Returns frontpage data related to niap-provided information - """ - return self.report_frontpage.get("niap", None) if self.report_frontpage else None - - @property - def nscib_data(self) -> dict[str, Any] | None: - """ - Returns frontpage data related to nscib-provided information - """ - return self.report_frontpage.get("nscib", None) if self.report_frontpage else None - - @property - def canada_data(self) -> dict[str, Any] | None: - """ - Returns frontpage data related to canada-provided information - """ - return self.report_frontpage.get("canada", None) if self.report_frontpage else None - - @property - def anssi_data(self) -> dict[str, Any] | None: - """ - Returns frontpage data related to ANSSI-provided information - """ - return self.report_frontpage.get("anssi", None) if self.report_frontpage else None - @property def cert_lab(self) -> list[str] | None: """ Returns labs for which certificate data was parsed. """ + if not self.report_frontpage: + return None labs = [ data["cert_lab"].split(" ")[0].upper() - for data in [self.bsi_data, self.anssi_data, self.niap_data, self.nscib_data, self.canada_data] + for scheme, data in self.report_frontpage.items() if data and "cert_lab" in data ] return labs if labs else None - @property - def bsi_cert_id(self) -> str | None: - return self.bsi_data.get("cert_id", None) if self.bsi_data else None - - @property - def niap_cert_id(self) -> str | None: - return self.niap_data.get("cert_id", None) if self.niap_data else None - - @property - def nscib_cert_id(self) -> str | None: - return self.nscib_data.get("cert_id", None) if self.nscib_data else None - - @property - def canada_cert_id(self) -> str | None: - return self.canada_data.get("cert_id", None) if self.canada_data else None - - @property - def anssi_cert_id(self) -> str | None: - return self.anssi_data.get("cert_id", None) if self.anssi_data else None - def frontpage_cert_id(self, scheme: str) -> dict[str, float]: """ Get cert_id candidate from the frontpage of the report. """ - scheme_map = { - "DE": self.bsi_cert_id, - "US": self.niap_cert_id, - "NL": self.nscib_cert_id, - "CA": self.canada_cert_id, - "FR": self.anssi_cert_id, - } - if scheme in scheme_map and (candidate := scheme_map[scheme]): - return {candidate: 1.0} - return {} + if not self.report_frontpage: + return {} + data = self.report_frontpage.get(scheme) + if not data: + return {} + cert_id = data.get("cert_id") + if not cert_id: + return {} + else: + return {cert_id: 1.0} def filename_cert_id(self, scheme: str) -> dict[str, float]: """ - Get cert_id candidates from the matches in the report filename. + Get cert_id candidates from the matches in the report filename and cert filename. """ - if not self.report_filename: - return {} scheme_filename_rules = rules["cc_filename_cert_id"][scheme] if not scheme_filename_rules: return {} scheme_meta = schemes[scheme] - matches: Counter = Counter() - for rule in scheme_filename_rules: - match = re.search(rule, self.report_filename) - if match: - try: - meta = match.groupdict() - cert_id = scheme_meta(meta) - matches[cert_id] += 1 - except Exception: - continue - if not matches: - return {} - total = max(matches.values()) - results = {} - for candidate, count in matches.items(): - results[candidate] = count / total + results: dict[str, float] = {} + for fname in (self.report_filename, self.cert_filename): + if not fname: + continue + + matches: Counter = Counter() + for rule in scheme_filename_rules: + match = re.search(rule, fname) + if match: + try: + meta = match.groupdict() + cert_id = scheme_meta(meta) + matches[cert_id] += 1 + except Exception: + continue + if not matches: + continue + total = max(matches.values()) + + for candidate, count in matches.items(): + results.setdefault(candidate, 0) + results[candidate] += count / total # TODO count length in weight return results def keywords_cert_id(self, scheme: str) -> dict[str, float]: """ - Get cert_id candidates from the keywords matches in the report. + Get cert_id candidates from the keywords matches in the report and cert. """ - if not self.report_keywords: - return {} - cert_id_matches = self.report_keywords.get("cc_cert_id") - if not cert_id_matches: - return {} + results: dict[str, float] = {} + for keywords in (self.report_keywords, self.cert_keywords): + if not keywords: + continue + cert_id_matches = keywords.get("cc_cert_id") + if not cert_id_matches: + continue - if scheme not in cert_id_matches: - return {} - matches: Counter = Counter(cert_id_matches[scheme]) - if not matches: - return {} - total = max(matches.values()) - results = {} - for candidate, count in matches.items(): - results[candidate] = count / total + if scheme not in cert_id_matches: + continue + matches: Counter = Counter(cert_id_matches[scheme]) + if not matches: + continue + total = max(matches.values()) + + for candidate, count in matches.items(): + results.setdefault(candidate, 0) + results[candidate] += count / total # TODO count length in weight return results @@ -388,22 +282,27 @@ def metadata_cert_id(self, scheme: str) -> dict[str, float]: """ scheme_rules = rules["cc_cert_id"][scheme] fields = ("/Title", "/Subject") - matches: Counter = Counter() - for meta_field in fields: - field_val = self.report_metadata.get(meta_field) if self.report_metadata else None - if not field_val: + results: dict[str, float] = {} + for metadata in (self.report_metadata, self.cert_metadata): + if not metadata: continue - for rule in scheme_rules: - match = re.search(rule, field_val) - if match: - cert_id = normalize_match_string(match.group()) - matches[cert_id] += 1 - if not matches: - return {} - total = max(matches.values()) - results = {} - for candidate, count in matches.items(): - results[candidate] = count / total + matches: Counter = Counter() + for meta_field in fields: + field_val = metadata.get(meta_field) + if not field_val: + continue + for rule in scheme_rules: + match = re.search(rule, field_val) + if match: + cert_id = normalize_match_string(match.group()) + matches[cert_id] += 1 + if not matches: + continue + total = max(matches.values()) + + for candidate, count in matches.items(): + results.setdefault(candidate, 0) + results[candidate] += count / total # TODO count length in weight return results @@ -814,25 +713,33 @@ def set_local_paths( self, report_pdf_dir: str | Path | None, st_pdf_dir: str | Path | None, + cert_pdf_dir: str | Path | None, report_txt_dir: str | Path | None, st_txt_dir: str | Path | None, + cert_txt_dir: str | Path | None, ) -> None: """ Sets paths to files given the requested directories :param Optional[Union[str, Path]] report_pdf_dir: Directory where pdf reports shall be stored :param Optional[Union[str, Path]] st_pdf_dir: Directory where pdf security targets shall be stored + :param Optional[Union[str, Path]] cert_pdf_dir: Directory where pdf certificates shall be stored :param Optional[Union[str, Path]] report_txt_dir: Directory where txt reports shall be stored :param Optional[Union[str, Path]] st_txt_dir: Directory where txt security targets shall be stored + :param Optional[Union[str, Path]] cert_txt_dir: Directory where txtcertificates shall be stored """ if report_pdf_dir: - self.state.report_pdf_path = Path(report_pdf_dir) / (self.dgst + ".pdf") + self.state.report.pdf_path = Path(report_pdf_dir) / (self.dgst + ".pdf") if st_pdf_dir: - self.state.st_pdf_path = Path(st_pdf_dir) / (self.dgst + ".pdf") + self.state.st.pdf_path = Path(st_pdf_dir) / (self.dgst + ".pdf") + if cert_pdf_dir: + self.state.cert.pdf_path = Path(cert_pdf_dir) / (self.dgst + ".pdf") if report_txt_dir: - self.state.report_txt_path = Path(report_txt_dir) / (self.dgst + ".txt") + self.state.report.txt_path = Path(report_txt_dir) / (self.dgst + ".txt") if st_txt_dir: - self.state.st_txt_path = Path(st_txt_dir) / (self.dgst + ".txt") + self.state.st.txt_path = Path(st_txt_dir) / (self.dgst + ".txt") + if cert_txt_dir: + self.state.cert.txt_path = Path(cert_txt_dir) / (self.dgst + ".txt") @staticmethod def download_pdf_report(cert: CCCertificate) -> CCCertificate: @@ -846,14 +753,14 @@ def download_pdf_report(cert: CCCertificate) -> CCCertificate: if not cert.report_link: exit_code = "No link" else: - exit_code = helpers.download_file(cert.report_link, cert.state.report_pdf_path) + exit_code = helpers.download_file(cert.report_link, cert.state.report.pdf_path) if exit_code != requests.codes.ok: error_msg = f"failed to download report from {cert.report_link}, code: {exit_code}" logger.error(f"Cert dgst: {cert.dgst} " + error_msg) - cert.state.report_download_ok = False + cert.state.report.download_ok = False else: - cert.state.report_download_ok = True - cert.state.report_pdf_hash = helpers.get_sha256_filepath(cert.state.report_pdf_path) + cert.state.report.download_ok = True + cert.state.report.pdf_hash = helpers.get_sha256_filepath(cert.state.report.pdf_path) cert.pdf_data.report_filename = unquote_plus(str(urlparse(cert.report_link).path).split("/")[-1]) return cert @@ -866,39 +773,61 @@ def download_pdf_st(cert: CCCertificate) -> CCCertificate: :return CCCertificate: returns the modified certificate with updated state """ exit_code: str | int = ( - helpers.download_file(cert.st_link, cert.state.st_pdf_path) if cert.st_link else "No link" + helpers.download_file(cert.st_link, cert.state.st.pdf_path) if cert.st_link else "No link" ) if exit_code != requests.codes.ok: error_msg = f"failed to download ST from {cert.st_link}, code: {exit_code}" logger.error(f"Cert dgst: {cert.dgst} " + error_msg) - cert.state.st_download_ok = False + cert.state.st.download_ok = False else: - cert.state.st_download_ok = True - cert.state.st_pdf_hash = helpers.get_sha256_filepath(cert.state.st_pdf_path) + cert.state.st.download_ok = True + cert.state.st.pdf_hash = helpers.get_sha256_filepath(cert.state.st.pdf_path) cert.pdf_data.st_filename = unquote_plus(str(urlparse(cert.st_link).path).split("/")[-1]) return cert + @staticmethod + def download_pdf_cert(cert: CCCertificate) -> CCCertificate: + """ + Downloads pdf of the certificate. Staticmethod to allow for parallelization. + + :param CCCertificate cert: cert to download the pdf of + :return CCCertificate: returns the modified certificate with updated state + """ + exit_code: str | int = ( + helpers.download_file(cert.cert_link, cert.state.cert.pdf_path) if cert.cert_link else "No link" + ) + + if exit_code != requests.codes.ok: + error_msg = f"failed to download certificate from {cert.cert_link}, code: {exit_code}" + logger.error(f"Cert dgst: {cert.dgst} " + error_msg) + cert.state.cert.download_ok = False + else: + cert.state.cert.download_ok = True + cert.state.cert.pdf_hash = helpers.get_sha256_filepath(cert.state.cert.pdf_path) + cert.pdf_data.cert_filename = unquote_plus(str(urlparse(cert.cert_link).path).split("/")[-1]) + return cert + @staticmethod def convert_report_pdf(cert: CCCertificate) -> CCCertificate: """ Converts the pdf certification report to txt, given the certificate. Staticmethod to allow for parallelization. - :param CCCertificate cert: cert to download the pdf report for + :param CCCertificate cert: cert to convert the pdf report for :return CCCertificate: the modified certificate with updated state """ ocr_done, ok_result = sec_certs.utils.pdf.convert_pdf_file( - cert.state.report_pdf_path, cert.state.report_txt_path + cert.state.report.pdf_path, cert.state.report.txt_path ) # If OCR was done the result was garbage - cert.state.report_convert_garbage = ocr_done + cert.state.report.convert_garbage = ocr_done # And put the whole result into convert_ok - cert.state.report_convert_ok = ok_result + cert.state.report.convert_ok = ok_result if not ok_result: error_msg = "failed to convert report pdf->txt" logger.error(f"Cert dgst: {cert.dgst} " + error_msg) else: - cert.state.report_txt_hash = helpers.get_sha256_filepath(cert.state.report_txt_path) + cert.state.report.txt_hash = helpers.get_sha256_filepath(cert.state.report.txt_path) return cert @staticmethod @@ -906,34 +835,39 @@ def convert_st_pdf(cert: CCCertificate) -> CCCertificate: """ Converts the pdf security target to txt, given the certificate. Staticmethod to allow for parallelization. - :param CCCertificate cert: cert to download the pdf security target for + :param CCCertificate cert: cert to convert the pdf security target for :return CCCertificate: the modified certificate with updated state """ - ocr_done, ok_result = sec_certs.utils.pdf.convert_pdf_file(cert.state.st_pdf_path, cert.state.st_txt_path) + ocr_done, ok_result = sec_certs.utils.pdf.convert_pdf_file(cert.state.st.pdf_path, cert.state.st.txt_path) # If OCR was done the result was garbage - cert.state.st_convert_garbage = ocr_done + cert.state.st.convert_garbage = ocr_done # And put the whole result into convert_ok - cert.state.st_convert_ok = ok_result + cert.state.st.convert_ok = ok_result if not ok_result: error_msg = "failed to convert security target pdf->txt" logger.error(f"Cert dgst: {cert.dgst} " + error_msg) else: - cert.state.st_txt_hash = helpers.get_sha256_filepath(cert.state.st_txt_path) + cert.state.st.txt_hash = helpers.get_sha256_filepath(cert.state.st.txt_path) return cert @staticmethod - def extract_st_pdf_metadata(cert: CCCertificate) -> CCCertificate: + def convert_cert_pdf(cert: CCCertificate) -> CCCertificate: """ - Extracts metadata from security target pdf given the certificate. Staticmethod to allow for parallelization. + Converts the pdf certificate to txt, given the certificate. Staticmethod to allow for parallelization. - :param CCCertificate cert: cert to extract the metadata for. + :param CCCertificate cert: cert to convert the certificate for :return CCCertificate: the modified certificate with updated state """ - response, cert.pdf_data.st_metadata = sec_certs.utils.pdf.extract_pdf_metadata(cert.state.st_pdf_path) - if response != constants.RETURNCODE_OK: - cert.state.st_extract_ok = False + ocr_done, ok_result = sec_certs.utils.pdf.convert_pdf_file(cert.state.cert.pdf_path, cert.state.cert.txt_path) + # If OCR was done the result was garbage + cert.state.cert.convert_garbage = ocr_done + # And put the whole result into convert_ok + cert.state.cert.convert_ok = ok_result + if not ok_result: + error_msg = "failed to convert security target pdf->txt" + logger.error(f"Cert dgst: {cert.dgst} " + error_msg) else: - cert.state.st_extract_ok = True + cert.state.cert.txt_hash = helpers.get_sha256_filepath(cert.state.cert.txt_path) return cert @staticmethod @@ -944,28 +878,41 @@ def extract_report_pdf_metadata(cert: CCCertificate) -> CCCertificate: :param CCCertificate cert: cert to extract the metadata for. :return CCCertificate: the modified certificate with updated state """ - response, cert.pdf_data.report_metadata = sec_certs.utils.pdf.extract_pdf_metadata(cert.state.report_pdf_path) + response, cert.pdf_data.report_metadata = sec_certs.utils.pdf.extract_pdf_metadata(cert.state.report.pdf_path) if response != constants.RETURNCODE_OK: - cert.state.report_extract_ok = False + cert.state.report.extract_ok = False else: - cert.state.report_extract_ok = True + cert.state.report.extract_ok = True return cert @staticmethod - def extract_st_pdf_frontpage(cert: CCCertificate) -> CCCertificate: + def extract_st_pdf_metadata(cert: CCCertificate) -> CCCertificate: """ - Extracts data from security target pdf frontpage given the certificate. Staticmethod to allow for parallelization. + Extracts metadata from security target pdf given the certificate. Staticmethod to allow for parallelization. - :param CCCertificate cert: cert to extract the frontpage data for. + :param CCCertificate cert: cert to extract the metadata for. :return CCCertificate: the modified certificate with updated state """ - cert.pdf_data.st_frontpage = {} + response, cert.pdf_data.st_metadata = sec_certs.utils.pdf.extract_pdf_metadata(cert.state.st.pdf_path) + if response != constants.RETURNCODE_OK: + cert.state.st.extract_ok = False + else: + cert.state.st.extract_ok = True + return cert - for header_type, associated_header_func in HEADERS.items(): - response, cert.pdf_data.st_frontpage[header_type] = associated_header_func(cert.state.st_txt_path) + @staticmethod + def extract_cert_pdf_metadata(cert: CCCertificate) -> CCCertificate: + """ + Extracts metadata from certificate pdf given the certificate. Staticmethod to allow for parallelization. - if response != constants.RETURNCODE_OK: - cert.state.st_extract_ok = False + :param CCCertificate cert: cert to extract the metadata for. + :return CCCertificate: the modified certificate with updated state + """ + response, cert.pdf_data.cert_metadata = sec_certs.utils.pdf.extract_pdf_metadata(cert.state.cert.pdf_path) + if response != constants.RETURNCODE_OK: + cert.state.cert.extract_ok = False + else: + cert.state.cert.extract_ok = True return cert @staticmethod @@ -978,25 +925,25 @@ def extract_report_pdf_frontpage(cert: CCCertificate) -> CCCertificate: """ cert.pdf_data.report_frontpage = {} - for header_type, associated_header_func in HEADERS.items(): - response, cert.pdf_data.report_frontpage[header_type] = associated_header_func(cert.state.report_txt_path) - + if cert.scheme in scheme_frontpage_functions: + header_func = scheme_frontpage_functions[cert.scheme] + response, cert.pdf_data.report_frontpage[cert.scheme] = header_func(cert.state.report.txt_path) if response != constants.RETURNCODE_OK: - cert.state.report_extract_ok = False + cert.state.report.extract_ok = False return cert @staticmethod def extract_report_pdf_keywords(cert: CCCertificate) -> CCCertificate: """ - Matches regular expresions in txt obtained from certification report and extracts the matches into attribute. + Matches regular expressions in txt obtained from certification report and extracts the matches into attribute. Static method to allow for parallelization :param CCCertificate cert: certificate to extract the keywords for. :return CCCertificate: the modified certificate with extracted keywords. """ - report_keywords = sec_certs.utils.extract.extract_keywords(cert.state.report_txt_path, cc_rules) + report_keywords = sec_certs.utils.extract.extract_keywords(cert.state.report.txt_path, cc_rules) if report_keywords is None: - cert.state.report_extract_ok = False + cert.state.report.extract_ok = False else: cert.pdf_data.report_keywords = report_keywords return cert @@ -1004,19 +951,35 @@ def extract_report_pdf_keywords(cert: CCCertificate) -> CCCertificate: @staticmethod def extract_st_pdf_keywords(cert: CCCertificate) -> CCCertificate: """ - Matches regular expresions in txt obtained from security target and extracts the matches into attribute. + Matches regular expressions in txt obtained from security target and extracts the matches into attribute. Static method to allow for parallelization :param CCCertificate cert: certificate to extract the keywords for. :return CCCertificate: the modified certificate with extracted keywords. """ - st_keywords = sec_certs.utils.extract.extract_keywords(cert.state.st_txt_path, cc_rules) + st_keywords = sec_certs.utils.extract.extract_keywords(cert.state.st.txt_path, cc_rules) if st_keywords is None: - cert.state.st_extract_ok = False + cert.state.st.extract_ok = False else: cert.pdf_data.st_keywords = st_keywords return cert + @staticmethod + def extract_cert_pdf_keywords(cert: CCCertificate) -> CCCertificate: + """ + Matches regular expressions in txt obtained from the certificate and extracts the matches into attribute. + Static method to allow for parallelization + + :param CCCertificate cert: certificate to extract the keywords for. + :return CCCertificate: the modified certificate with extracted keywords. + """ + cert_keywords = sec_certs.utils.extract.extract_keywords(cert.state.cert.txt_path, cc_rules) + if cert_keywords is None: + cert.state.cert.extract_ok = False + else: + cert.pdf_data.cert_keywords = cert_keywords + return cert + def compute_heuristics_version(self) -> None: """ Fills in the heuristically obtained version of certified product into attribute in heuristics class. diff --git a/src/sec_certs/utils/extract.py b/src/sec_certs/utils/extract.py index e6312b8d..669cbcfc 100644 --- a/src/sec_certs/utils/extract.py +++ b/src/sec_certs/utils/extract.py @@ -818,3 +818,12 @@ def get_sums_for_rules_subset(dct: dict | None, path: str) -> dict[str, float]: cc_rules_subset_to_search = rules_get_subset(path) paths_to_search = extract_key_paths(cc_rules_subset_to_search, path) return {x: get_sum_of_values_from_dict_path(dct, x, np.nan) for x in paths_to_search} + + +scheme_frontpage_functions = { + "FR": search_only_headers_anssi, + "DE": search_only_headers_bsi, + "NL": search_only_headers_nscib, + "US": search_only_headers_niap, + "CA": search_only_headers_canada, +} diff --git a/tests/cc/test_cc_certificate.py b/tests/cc/test_cc_certificate.py index bdc10f32..f90245e0 100644 --- a/tests/cc/test_cc_certificate.py +++ b/tests/cc/test_cc_certificate.py @@ -32,33 +32,29 @@ def vulnerable_certificate(tmp_path_factory) -> CCCertificate: def test_extract_metadata(vulnerable_certificate: CCCertificate): - vulnerable_certificate.state.st_extract_ok = True + vulnerable_certificate.state.st.extract_ok = True CCCertificate.extract_st_pdf_metadata(vulnerable_certificate) - assert vulnerable_certificate.state.st_extract_ok + assert vulnerable_certificate.state.st.extract_ok - vulnerable_certificate.state.report_extract_ok = True + vulnerable_certificate.state.report.extract_ok = True CCCertificate.extract_report_pdf_metadata(vulnerable_certificate) - assert vulnerable_certificate.state.report_extract_ok + assert vulnerable_certificate.state.report.extract_ok def test_extract_frontpage(vulnerable_certificate: CCCertificate): - vulnerable_certificate.state.st_extract_ok = True - CCCertificate.extract_st_pdf_frontpage(vulnerable_certificate) - assert vulnerable_certificate.state.st_extract_ok - - vulnerable_certificate.state.report_extract_ok = True + vulnerable_certificate.state.report.extract_ok = True CCCertificate.extract_report_pdf_frontpage(vulnerable_certificate) - assert vulnerable_certificate.state.report_extract_ok + assert vulnerable_certificate.state.report.extract_ok def test_keyword_extraction(vulnerable_certificate: CCCertificate): - vulnerable_certificate.state.st_extract_ok = True + vulnerable_certificate.state.st.extract_ok = True CCCertificate.extract_st_pdf_keywords(vulnerable_certificate) - assert vulnerable_certificate.state.st_extract_ok + assert vulnerable_certificate.state.st.extract_ok - vulnerable_certificate.state.report_extract_ok = True + vulnerable_certificate.state.report.extract_ok = True CCCertificate.extract_report_pdf_keywords(vulnerable_certificate) - assert vulnerable_certificate.state.report_extract_ok + assert vulnerable_certificate.state.report.extract_ok def test_cert_link_escaping(cert_one: CCCertificate): diff --git a/tests/cc/test_cc_dataset.py b/tests/cc/test_cc_dataset.py index 0701e678..9d1e4022 100644 --- a/tests/cc/test_cc_dataset.py +++ b/tests/cc/test_cc_dataset.py @@ -23,41 +23,52 @@ def test_download_and_convert_pdfs(toy_dataset: CCDataset, data_dir: Path): "8a5e6bcda602920c": "fcee91f09bb72a6526a1f94d0ab754a6db3fbe3ba5773cd372df19788bb25292", } + template_cert_pdf_hashes = { + "309ac2fd7f2dcf17": "9d38bca310c4d349cc39471e0b75d939cc275db9a75b07b8a365d719cfbedcc5", + "8cf86948f02f047d": None, + "8a5e6bcda602920c": "4ba78f26f505819183256ca5a6b404fa90c750fe160c41791e4c400f64e2f6d5", + } + with TemporaryDirectory() as td: toy_dataset.copy_dataset(td) toy_dataset.download_all_artifacts() if not ( - toy_dataset["309ac2fd7f2dcf17"].state.report_download_ok - or toy_dataset["309ac2fd7f2dcf17"].state.st_download_ok - or toy_dataset["8cf86948f02f047d"].state.report_download_ok - or toy_dataset["8cf86948f02f047d"].state.st_download_ok - or toy_dataset["8a5e6bcda602920c"].state.report_download_ok - or toy_dataset["8a5e6bcda602920c"].state.st_download_ok + toy_dataset["309ac2fd7f2dcf17"].state.report.download_ok + or toy_dataset["309ac2fd7f2dcf17"].state.st.download_ok + or toy_dataset["309ac2fd7f2dcf17"].state.cert.download_ok + or toy_dataset["8cf86948f02f047d"].state.report.download_ok + or toy_dataset["8cf86948f02f047d"].state.st.download_ok + or toy_dataset["8a5e6bcda602920c"].state.report.download_ok + or toy_dataset["8a5e6bcda602920c"].state.st.download_ok + or toy_dataset["8a5e6bcda602920c"].state.cert.download_ok ): pytest.xfail(reason="Fail due to error during download") toy_dataset.convert_all_pdfs() for cert in toy_dataset: - assert cert.state.report_pdf_hash == template_report_pdf_hashes[cert.dgst] - assert cert.state.st_pdf_hash == template_st_pdf_hashes[cert.dgst] - assert not cert.state.report_convert_garbage - assert not cert.state.st_convert_garbage - assert cert.state.report_convert_ok - assert cert.state.st_convert_ok - assert cert.state.report_txt_path.exists() - assert cert.state.st_txt_path.exists() + assert cert.state.report.pdf_hash == template_report_pdf_hashes[cert.dgst] + assert cert.state.st.pdf_hash == template_st_pdf_hashes[cert.dgst] + assert cert.state.cert.pdf_hash == template_cert_pdf_hashes[cert.dgst] + assert not cert.state.report.convert_garbage + assert not cert.state.st.convert_garbage + assert cert.state.report.convert_ok + assert cert.state.st.convert_ok + assert cert.state.report.txt_path.exists() + assert cert.state.st.txt_path.exists() + if cert.cert_link: + assert cert.state.cert.txt_path.exists() template_report_txt_path = data_dir / "report_309ac2fd7f2dcf17.txt" template_st_txt_path = data_dir / "target_309ac2fd7f2dcf17.txt" assert ( - abs(toy_dataset["309ac2fd7f2dcf17"].state.st_txt_path.stat().st_size - template_st_txt_path.stat().st_size) + abs(toy_dataset["309ac2fd7f2dcf17"].state.st.txt_path.stat().st_size - template_st_txt_path.stat().st_size) < 1000 ) assert ( abs( - toy_dataset["309ac2fd7f2dcf17"].state.report_txt_path.stat().st_size + toy_dataset["309ac2fd7f2dcf17"].state.report.txt_path.stat().st_size - template_report_txt_path.stat().st_size ) < 1000 diff --git a/tests/cc/test_cc_maintenance_updates.py b/tests/cc/test_cc_maintenance_updates.py index 1144d3e0..4e94d980 100644 --- a/tests/cc/test_cc_maintenance_updates.py +++ b/tests/cc/test_cc_maintenance_updates.py @@ -43,11 +43,11 @@ def test_download_artifacts(mu_dset: CCDatasetMaintenanceUpdates): mu_dset.download_all_artifacts() mu = mu_dset["cert_8a5e6bcda602920c_update_559ed93dd80320b5"] - if not (mu.state.report_download_ok or mu.state.st_download_ok): + if not (mu.state.report.download_ok or mu.state.st.download_ok): pytest.xfail(reason="Fail due to error on CC server.") - assert mu.state.report_pdf_hash == "80bada65614c1b037c13efa78996a8910700d0e05a3ca217286f76d7dacefe62" - assert mu.state.st_pdf_hash == "d42e4364d037ba742fcd4050a9a84d0e6300f93eb68bcfe8c61f72c429c9ceca" + assert mu.state.report.pdf_hash == "80bada65614c1b037c13efa78996a8910700d0e05a3ca217286f76d7dacefe62" + assert mu.state.st.pdf_hash == "d42e4364d037ba742fcd4050a9a84d0e6300f93eb68bcfe8c61f72c429c9ceca" def test_dataset_to_json(mu_dset: CCDatasetMaintenanceUpdates, data_dir: Path, tmp_path: Path): diff --git a/tests/data/cc/analysis/cc_full_dataset.json b/tests/data/cc/analysis/cc_full_dataset.json index 3b07f9c5..64ef38e4 100644 --- a/tests/data/cc/analysis/cc_full_dataset.json +++ b/tests/data/cc/analysis/cc_full_dataset.json @@ -55,18 +55,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "f4ab8c62a2325cc634eef164a9707d2375f2e3d5c5ab9791b7c00a6034e64b62", - "report_pdf_hash": "1954f42e857c02a156caf8fea7abd36ee0a5953fe0e319c4ec749af277fcfb4b", - "st_txt_hash": "c8b4c5667a3f60edc845051e5a31a2d17b9d9a11df9e56dd89681d25e727a622", - "report_txt_hash": "35627594d3806ac3926ec47f466503fe27781533da12beb6f8705882fccf125e" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "1954f42e857c02a156caf8fea7abd36ee0a5953fe0e319c4ec749af277fcfb4b", + "txt_hash": "35627594d3806ac3926ec47f466503fe27781533da12beb6f8705882fccf125e" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "f4ab8c62a2325cc634eef164a9707d2375f2e3d5c5ab9791b7c00a6034e64b62", + "txt_hash": "c8b4c5667a3f60edc845051e5a31a2d17b9d9a11df9e56dd89681d25e727a622" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -115,8 +130,7 @@ } }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -127,17 +141,9 @@ "ref_protection_profiles": "None", "cc_version": "Product specific Security Target Common Criteria Part 2 conformant", "cc_security_level": "Common Criteria Part 3 conformant EAL 3 augmented by ALC_FLR.1 SOGIS Recognition Agreement" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { diff --git a/tests/data/cc/analysis/reference_dataset.json b/tests/data/cc/analysis/reference_dataset.json index 00ab6674..38fb7fd0 100644 --- a/tests/data/cc/analysis/reference_dataset.json +++ b/tests/data/cc/analysis/reference_dataset.json @@ -44,16 +44,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "16f1c7e32517d22f6062edf439f8c67eb0d202fadaaf1c54c1f96af7f74ad7ac", - "report_pdf_hash": "2dc5ff15361621bb2bd7db79491b88a70049abe4ffbe8370de87f9c51f42fb50", - "st_txt_hash": "81c53d1e5b1c2fcb129ce1053d13cd1308f7a556921f0b9024cedf75c6b2efb7", - "report_txt_hash": "460e8010dbc8f5de5b87bf96fd45c71cfd9f3869f34ca6ac1ab02cbd70d2523f" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "2dc5ff15361621bb2bd7db79491b88a70049abe4ffbe8370de87f9c51f42fb50", + "txt_hash": "460e8010dbc8f5de5b87bf96fd45c71cfd9f3869f34ca6ac1ab02cbd70d2523f" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "16f1c7e32517d22f6062edf439f8c67eb0d202fadaaf1c54c1f96af7f74ad7ac", + "txt_hash": "81c53d1e5b1c2fcb129ce1053d13cd1308f7a556921f0b9024cedf75c6b2efb7" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -84,8 +101,7 @@ "/Title": "Security Target The Océ Digital Access Controller (DAC) R10.1.5, as used in the Océ VarioPrint 1055, 1055 BC, 1055 DP, 1065, 1075, 2062, 2075, 2075 DP printer/copier/scanner products" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -93,17 +109,9 @@ "cert_item": "Océ Digital Access Controller (DAC) R10.1.5 for use in the Océ VarioPrint 1055, 1055 BC, 1055 DP, 1065, 1075, 2062, 2075, 2075 DP printer/copier/scanner products", "developer": "Océ Technologies B.V", "cert_lab": "BSI" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { @@ -604,16 +612,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "8f288bc6133855bba6b78ccaeff86b46b4ce1db86aa1b5141c1073a74e6d13fd", - "report_pdf_hash": "d77dffc3f2e4d95d6103de31b7ebbec54551c3b93f01415971ec3b9f5cb33e4f", - "st_txt_hash": "926668bea7c427a4fcf82857bfc63420f3597b6bff39699927a58f335620eaac", - "report_txt_hash": "0535df1c56fb4f87153cbffee51ba4d77fac47a6f17f024aa7d9df461028bc65" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "d77dffc3f2e4d95d6103de31b7ebbec54551c3b93f01415971ec3b9f5cb33e4f", + "txt_hash": "0535df1c56fb4f87153cbffee51ba4d77fac47a6f17f024aa7d9df461028bc65" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "8f288bc6133855bba6b78ccaeff86b46b4ce1db86aa1b5141c1073a74e6d13fd", + "txt_hash": "926668bea7c427a4fcf82857bfc63420f3597b6bff39699927a58f335620eaac" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -644,8 +669,7 @@ "/Title": "Microsoft Word - Oce Venlo DAC Security Target 2.4.doc" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -653,17 +677,9 @@ "cert_item": "Océ Digital Access Controller (DAC) R9.1.6", "developer": "Océ Technologies B.V", "cert_lab": "BSI" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { @@ -1242,16 +1258,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "c72b032f6119e6dd64270296713d5626b1473bf4925018ce4dd106d76953213f", - "report_pdf_hash": "eca6c3a665d8bd20394cf002bff0a8a1b451337f60c8184d458ffd5b31491085", - "st_txt_hash": "179b07b4fc7402066a884edea494b28e324315108a5e0820184031f2e2062ad5", - "report_txt_hash": "11e1262fd8f5df1b140f5e8813883b71447503781399427b35adbbecd00b4d63" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "eca6c3a665d8bd20394cf002bff0a8a1b451337f60c8184d458ffd5b31491085", + "txt_hash": "11e1262fd8f5df1b140f5e8813883b71447503781399427b35adbbecd00b4d63" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "c72b032f6119e6dd64270296713d5626b1473bf4925018ce4dd106d76953213f", + "txt_hash": "179b07b4fc7402066a884edea494b28e324315108a5e0820184031f2e2062ad5" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -1282,8 +1315,7 @@ "/Title": "Microsoft Word - Oce Venlo DAC Security Target 1.9.doc" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -1291,17 +1323,9 @@ "cert_item": "Océ Digital Access Controller (DAC) R 8.1.10", "developer": "Océ Technologies B.V", "cert_lab": "BSI" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { diff --git a/tests/data/cc/analysis/transitive_vulnerability_dataset.json b/tests/data/cc/analysis/transitive_vulnerability_dataset.json index fb45efde..abd4c7a3 100644 --- a/tests/data/cc/analysis/transitive_vulnerability_dataset.json +++ b/tests/data/cc/analysis/transitive_vulnerability_dataset.json @@ -54,16 +54,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "5bb93f7f3f08f30ba41abb003a2f8ce2609c385af82b863fbc0b19bd0c21a701", - "report_pdf_hash": "ee1ebada2c91c5a000c8d112e9e3742d09cad4c920d3f3e2a9beb01f16c69bb6", - "st_txt_hash": "66271d8bf0b581a2f189301438f2aee13ff3da0bb0bb180bcf518261eb695496", - "report_txt_hash": "9d360141a98e764b15855f519b456c4e4639f993c4f8b5ab67e9c8ae7fbfc9e4" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "ee1ebada2c91c5a000c8d112e9e3742d09cad4c920d3f3e2a9beb01f16c69bb6", + "txt_hash": "9d360141a98e764b15855f519b456c4e4639f993c4f8b5ab67e9c8ae7fbfc9e4" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "5bb93f7f3f08f30ba41abb003a2f8ce2609c385af82b863fbc0b19bd0c21a701", + "txt_hash": "66271d8bf0b581a2f189301438f2aee13ff3da0bb0bb180bcf518261eb695496" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -91,8 +108,7 @@ "/CreationDate": "D:20140828154014+02'00'" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -100,17 +116,9 @@ "cert_item": "IBM z/OS Version 2 Release 1", "developer": "IBM Corporation", "cert_lab": "BSI" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { @@ -1360,16 +1368,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "4879ac5fdf9569ad88089df0122acd1c8a8f7252ed8f36aace14bbb0b805b758", - "report_pdf_hash": "63e6ac157e08ed37f9861458c66c015663e17fb8936746d7ae487963bdd455c7", - "st_txt_hash": "f7f7b8f31dddde3f0756cde8843061f01b606bdf266eca71dbcc56b3672d1db5", - "report_txt_hash": "dd120ba7667c2385839c96ee70c56f2a4d464fc95e3ea2818d31b3347d06fd4f" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "63e6ac157e08ed37f9861458c66c015663e17fb8936746d7ae487963bdd455c7", + "txt_hash": "dd120ba7667c2385839c96ee70c56f2a4d464fc95e3ea2818d31b3347d06fd4f" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "4879ac5fdf9569ad88089df0122acd1c8a8f7252ed8f36aace14bbb0b805b758", + "txt_hash": "f7f7b8f31dddde3f0756cde8843061f01b606bdf266eca71dbcc56b3672d1db5" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -1398,8 +1423,7 @@ "/CreationDate": "D:20150508083715+02'00'" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -1407,17 +1431,9 @@ "cert_item": "RACF Element of z/OS Version 2, Release 1", "developer": "IBM Corporation", "cert_lab": "BSI" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { @@ -2311,16 +2327,33 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true, - "st_pdf_hash": "1a5e4c1382e284da88d93aa5557c7728c14b3fce73d81e4cf731ab24cee9ffdd", - "report_pdf_hash": "f10c85194adae814703781471f3a3de713383d8a9fbf5389fc7106958a8aaf90", - "st_txt_hash": "90b8e48add278faea4668eccba591d3992bf782669cca1b0a63bf6f21b514cd9", - "report_txt_hash": "0a7c65e3d11f082c8f75aba7de0079c0b1aa5e67bb28d4635cbcaa4cd200d1c2" + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "f10c85194adae814703781471f3a3de713383d8a9fbf5389fc7106958a8aaf90", + "txt_hash": "0a7c65e3d11f082c8f75aba7de0079c0b1aa5e67bb28d4635cbcaa4cd200d1c2" + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "1a5e4c1382e284da88d93aa5557c7728c14b3fce73d81e4cf731ab24cee9ffdd", + "txt_hash": "90b8e48add278faea4668eccba591d3992bf782669cca1b0a63bf6f21b514cd9" + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", @@ -2348,8 +2381,7 @@ "/CreationDate": "D:20170503171742+02'00'" }, "report_frontpage": { - "anssi": {}, - "bsi": { + "DE": { "match_rules": [ "(BSI-DSZ-CC-.+?) (?:for|For) (.+?) from (.*)" ], @@ -2360,17 +2392,9 @@ "ref_protection_profiles": "Operating System Protection Profile, Version 2.0, 01 June 2010, BSI-CC-PP-0067-2010, OSPP Extended Packages: Extended Identification and Authentication and Labeled Security, both Version 2.0, 28 May 2010", "cc_version": "PP conformant Common Criteria Part 2 extended", "cc_security_level": "Common Criteria Part 3 conformant EAL 4 augmented by ALC_FLR.3" - }, - "nscib": {}, - "niap": {}, - "canada": {} + } }, "st_frontpage": { - "anssi": {}, - "bsi": {}, - "nscib": {}, - "niap": {}, - "canada": {} }, "report_keywords": { "cc_cert_id": { diff --git a/tests/data/cc/analysis/vulnerable_dataset.json b/tests/data/cc/analysis/vulnerable_dataset.json index 1b23ef0d..7978cb5a 100644 --- a/tests/data/cc/analysis/vulnerable_dataset.json +++ b/tests/data/cc/analysis/vulnerable_dataset.json @@ -39,23 +39,48 @@ "maintenance_updates": [], "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": true, + "extract_ok": true, + "pdf_hash": null, + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": true, + "extract_ok": true, + "pdf_hash": null, + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", "report_metadata": null, "st_metadata": null, + "cert_metadata": null, "report_frontpage": null, "st_frontpage": null, + "cert_frontpage": null, "report_keywords": null, "st_keywords": null, + "cert_keywords": null, "report_filename": null, - "st_filename": null + "st_filename": null, + "cert_filename": null }, "heuristics": { "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", @@ -95,23 +120,48 @@ "maintenance_updates": [], "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_ok": true, - "report_convert_ok": true, - "st_extract_ok": true, - "report_extract_ok": true + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": true, + "extract_ok": true, + "pdf_hash": null, + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": true, + "extract_ok": true, + "pdf_hash": null, + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", "report_metadata": null, "st_metadata": null, + "cert_metadata": null, "report_frontpage": null, "st_frontpage": null, + "cert_frontpage": null, "report_keywords": null, "st_keywords": null, + "cert_keywords": null, "report_filename": null, - "st_filename": null + "st_filename": null, + "cert_filename": null }, "heuristics": { "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", diff --git a/tests/data/cc/certificate/fictional_cert.json b/tests/data/cc/certificate/fictional_cert.json index ff95795c..54781dd5 100644 --- a/tests/data/cc/certificate/fictional_cert.json +++ b/tests/data/cc/certificate/fictional_cert.json @@ -41,29 +41,48 @@ }, "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": false, - "report_download_ok": false, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": false, - "report_convert_ok": false, - "st_extract_ok": false, - "report_extract_ok": false, - "st_pdf_hash": null, - "report_pdf_hash": null, - "st_txt_hash": null, - "report_txt_hash": null + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", "report_metadata": null, "st_metadata": null, + "cert_metadata": null, "report_frontpage": null, "st_frontpage": null, + "cert_frontpage": null, "report_keywords": null, "st_keywords": null, + "cert_keywords": null, "report_filename": null, - "st_filename": null + "st_filename": null, + "cert_filename": null }, "heuristics": { "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", diff --git a/tests/data/cc/dataset/auxiliary_datasets/maintenances/maintenance_updates.json b/tests/data/cc/dataset/auxiliary_datasets/maintenances/maintenance_updates.json index 4c1ff4c7..5596b597 100644 --- a/tests/data/cc/dataset/auxiliary_datasets/maintenances/maintenance_updates.json +++ b/tests/data/cc/dataset/auxiliary_datasets/maintenances/maintenance_updates.json @@ -22,29 +22,48 @@ "st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20ST%20v1.4%20CCRA.pdf", "state": { "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": true, - "report_download_ok": true, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": false, - "report_convert_ok": false, - "st_extract_ok": false, - "report_extract_ok": false, - "st_pdf_hash": "d42e4364d037ba742fcd4050a9a84d0e6300f93eb68bcfe8c61f72c429c9ceca", - "report_pdf_hash": "80bada65614c1b037c13efa78996a8910700d0e05a3ca217286f76d7dacefe62", - "st_txt_hash": null, - "report_txt_hash": null + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "80bada65614c1b037c13efa78996a8910700d0e05a3ca217286f76d7dacefe62", + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": true, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": "d42e4364d037ba742fcd4050a9a84d0e6300f93eb68bcfe8c61f72c429c9ceca", + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } }, "pdf_data": { "_type": "sec_certs.sample.cc.CCCertificate.PdfData", "report_metadata": null, "st_metadata": null, + "cert_metadata": null, "report_frontpage": null, "st_frontpage": null, + "cert_frontpage": null, "report_keywords": null, "st_keywords": null, + "cert_keywords": null, "report_filename": "383-7-159 MR v1.0e.pdf", - "st_filename": "383-7-159 ST v1.4 CCRA.pdf" + "st_filename": "383-7-159 ST v1.4 CCRA.pdf", + "cert_filename": null }, "heuristics": { "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", diff --git a/tests/data/cc/dataset/toy_dataset.json b/tests/data/cc/dataset/toy_dataset.json index e32cffa4..dbf75078 100644 --- a/tests/data/cc/dataset/toy_dataset.json +++ b/tests/data/cc/dataset/toy_dataset.json @@ -1,288 +1,345 @@ { - "_type": "sec_certs.dataset.cc.CCDataset", - "state": { - "_type": "sec_certs.dataset.dataset.Dataset.DatasetInternalState", - "meta_sources_parsed": true, - "artifacts_downloaded": false, - "pdfs_converted": false, - "auxiliary_datasets_processed": false, - "certs_analyzed": false + "_type": "sec_certs.dataset.cc.CCDataset", + "state": { + "_type": "sec_certs.dataset.dataset.Dataset.DatasetInternalState", + "meta_sources_parsed": true, + "artifacts_downloaded": false, + "pdfs_converted": false, + "auxiliary_datasets_processed": false, + "certs_analyzed": false + }, + "timestamp": "2020-11-16 17:04:14.770153", + "sha256_digest": "not implemented", + "name": "toy dataset", + "description": "toy dataset description", + "n_certs": 3, + "certs": [ + { + "_type": "sec_certs.sample.cc.CCCertificate", + "dgst": "309ac2fd7f2dcf17", + "status": "active", + "category": "Access Control Devices and Systems", + "name": "NetIQ Identity Manager 4.7", + "manufacturer": "NetIQ Corporation", + "scheme": "SE", + "security_level": { + "_type": "Set", + "elements": [ + "ALC_FLR.2", + "EAL3+" + ] + }, + "not_valid_before": "2020-06-15", + "not_valid_after": "2025-06-15", + "report_link": "https://www.commoncriteriaportal.org/files/epfiles/Certification%20Report%20-%20NetIQ®%20Identity%20Manager%204.7.pdf", + "st_link": "https://www.commoncriteriaportal.org/files/epfiles/ST%20-%20NetIQ%20Identity%20Manager%204.7.pdf", + "cert_link": "https://www.commoncriteriaportal.org/files/epfiles/Certifikat%20CCRA%20-%20NetIQ%20Identity%20Manager%204.7_signed.pdf", + "manufacturer_web": "https://www.netiq.com/", + "protection_profiles": { + "_type": "Set", + "elements": [] + }, + "maintenance_updates": { + "_type": "Set", + "elements": [] + }, + "state": { + "_type": "sec_certs.sample.cc.CCCertificate.InternalState", + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } + }, + "pdf_data": { + "_type": "sec_certs.sample.cc.CCCertificate.PdfData", + "report_metadata": null, + "st_metadata": null, + "cert_metadata": null, + "report_frontpage": null, + "st_frontpage": null, + "cert_frontpage": null, + "report_keywords": null, + "st_keywords": null, + "cert_keywords": null, + "report_filename": null, + "st_filename": null, + "cert_filename": null + }, + "heuristics": { + "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", + "extracted_versions": null, + "cpe_matches": null, + "verified_cpe_matches": null, + "related_cves": null, + "cert_lab": null, + "cert_id": null, + "annotated_references": null, + "extracted_sars": null, + "direct_transitive_cves": null, + "indirect_transitive_cves": null, + "report_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "st_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "scheme_data": null + } }, - "timestamp": "2020-11-16 17:04:14.770153", - "sha256_digest": "not implemented", - "name": "toy dataset", - "description": "toy dataset description", - "n_certs": 3, - "certs": [ - { - "_type": "sec_certs.sample.cc.CCCertificate", - "dgst": "309ac2fd7f2dcf17", - "status": "active", - "category": "Access Control Devices and Systems", - "name": "NetIQ Identity Manager 4.7", - "manufacturer": "NetIQ Corporation", - "scheme": "SE", - "security_level": { - "_type": "Set", - "elements": [ - "ALC_FLR.2", - "EAL3+" - ] - }, - "not_valid_before": "2020-06-15", - "not_valid_after": "2025-06-15", - "report_link": "https://www.commoncriteriaportal.org/files/epfiles/Certification%20Report%20-%20NetIQ®%20Identity%20Manager%204.7.pdf", - "st_link": "https://www.commoncriteriaportal.org/files/epfiles/ST%20-%20NetIQ%20Identity%20Manager%204.7.pdf", - "cert_link": "https://www.commoncriteriaportal.org/files/epfiles/Certifikat%20CCRA%20-%20NetIQ%20Identity%20Manager%204.7_signed.pdf", - "manufacturer_web": "https://www.netiq.com/", - "protection_profiles": { - "_type": "Set", - "elements": [] - }, - "maintenance_updates": { - "_type": "Set", - "elements": [] - }, - "state": { - "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": false, - "report_download_ok": false, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": false, - "report_convert_ok": false, - "st_extract_ok": false, - "report_extract_ok": false, - "st_pdf_hash": null, - "report_pdf_hash": null, - "st_txt_hash": null, - "report_txt_hash": null - }, - "pdf_data": { - "_type": "sec_certs.sample.cc.CCCertificate.PdfData", - "report_metadata": null, - "st_metadata": null, - "report_frontpage": null, - "st_frontpage": null, - "report_keywords": null, - "st_keywords": null, - "report_filename": null, - "st_filename": null - }, - "heuristics": { - "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", - "extracted_versions": null, - "cpe_matches": null, - "verified_cpe_matches": null, - "related_cves": null, - "cert_lab": null, - "cert_id": null, - "annotated_references": null, - "extracted_sars": null, - "direct_transitive_cves": null, - "indirect_transitive_cves": null, - "report_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "st_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "scheme_data": null - } + { + "_type": "sec_certs.sample.cc.CCCertificate", + "dgst": "8cf86948f02f047d", + "status": "active", + "category": "Access Control Devices and Systems", + "name": "Magic SSO V4.0", + "manufacturer": "Dreamsecurity Co., Ltd.", + "scheme": "KR", + "security_level": { + "_type": "Set", + "elements": [] + }, + "not_valid_before": "2019-11-15", + "not_valid_after": "2024-11-15", + "report_link": "https://www.commoncriteriaportal.org/files/epfiles/KECS-CR-19-70%20Magic%20SSO%20V4.0(eng)%20V1.0.pdf", + "st_link": "https://www.commoncriteriaportal.org/files/epfiles/Magic_SSO_V4.0-ST-v1.4_EN.pdf", + "cert_link": null, + "manufacturer_web": "https://www.dreamsecurity.com/", + "protection_profiles": { + "_type": "Set", + "elements": [ + { + "_type": "sec_certs.sample.protection_profile.ProtectionProfile", + "pp_name": "Korean National Protection Profile for Single Sign On V1.0", + "pp_eal": "EAL1+", + "pp_link": "https://www.commoncriteriaportal.org/files/ppfiles/KECS-PP-0822-2017%20Korean%20National%20PP%20for%20Single%20Sign%20On%20V1.0(eng).pdf", + "pp_ids": null + } + ] + }, + "maintenance_updates": { + "_type": "Set", + "elements": [] + }, + "state": { + "_type": "sec_certs.sample.cc.CCCertificate.InternalState", + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null }, - { - "_type": "sec_certs.sample.cc.CCCertificate", - "dgst": "8cf86948f02f047d", - "status": "active", - "category": "Access Control Devices and Systems", - "name": "Magic SSO V4.0", - "manufacturer": "Dreamsecurity Co., Ltd.", - "scheme": "KR", - "security_level": { - "_type": "Set", - "elements": [] - }, - "not_valid_before": "2019-11-15", - "not_valid_after": "2024-11-15", - "report_link": "https://www.commoncriteriaportal.org/files/epfiles/KECS-CR-19-70%20Magic%20SSO%20V4.0(eng)%20V1.0.pdf", - "st_link": "https://www.commoncriteriaportal.org/files/epfiles/Magic_SSO_V4.0-ST-v1.4_EN.pdf", - "cert_link": null, - "manufacturer_web": "https://www.dreamsecurity.com/", - "protection_profiles": { - "_type": "Set", - "elements": [ - { - "_type": "sec_certs.sample.protection_profile.ProtectionProfile", - "pp_name": "Korean National Protection Profile for Single Sign On V1.0", - "pp_eal": "EAL1+", - "pp_link": "https://www.commoncriteriaportal.org/files/ppfiles/KECS-PP-0822-2017%20Korean%20National%20PP%20for%20Single%20Sign%20On%20V1.0(eng).pdf", - "pp_ids": null - } - ] - }, - "maintenance_updates": { - "_type": "Set", - "elements": [] - }, - "state": { - "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": false, - "report_download_ok": false, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": false, - "report_convert_ok": false, - "st_extract_ok": false, - "report_extract_ok": false, - "st_pdf_hash": null, - "report_pdf_hash": null, - "st_txt_hash": null, - "report_txt_hash": null - }, - "pdf_data": { - "_type": "sec_certs.sample.cc.CCCertificate.PdfData", - "report_metadata": null, - "st_metadata": null, - "report_frontpage": null, - "st_frontpage": null, - "report_keywords": null, - "st_keywords": null, - "report_filename": null, - "st_filename": null - }, - "heuristics": { - "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", - "extracted_versions": null, - "cpe_matches": null, - "verified_cpe_matches": null, - "related_cves": null, - "cert_lab": null, - "cert_id": null, - "annotated_references": null, - "extracted_sars": null, - "direct_transitive_cves": null, - "indirect_transitive_cves": null, - "report_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "st_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "scheme_data": null - } + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null }, - { - "_type": "sec_certs.sample.cc.CCCertificate", - "dgst": "8a5e6bcda602920c", - "status": "active", - "category": "Boundary Protection Devices and Systems", - "name": "Fortinet FortiGate w/ FortiOS v5.6.7", - "manufacturer": "Fortinet, Inc.", - "scheme": "CA", - "security_level": { - "_type": "Set", - "elements": [] - }, - "not_valid_before": "2019-05-22", - "not_valid_after": "2024-05-24", - "report_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20CR%20v1.0a.pdf", - "st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20ST%20v1.3A.pdf", - "cert_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20CT%20v1.0a.pdf", - "manufacturer_web": "https://www.fortinet.com/", - "protection_profiles": { - "_type": "Set", - "elements": [ - { - "_type": "sec_certs.sample.protection_profile.ProtectionProfile", - "pp_name": "collaborative Protection Profile for Stateful Traffic Filter Firewalls v2.0 + Errata 20180314", - "pp_eal": null, - "pp_link": "https://www.commoncriteriaportal.org/files/ppfiles/CPP_FW_V2.0E.pdf", - "pp_ids": null - } - ] - }, - "maintenance_updates": { - "_type": "Set", - "elements": [ - { - "_type": "sec_certs.sample.cc.CCCertificate.MaintenanceReport", - "maintenance_date": "2019-08-26", - "maintenance_title": "Fortinet FortiGate w/ FortiOS v5.6.7 Build 6022", - "maintenance_report_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20MR%20v1.0e.pdf", - "maintenance_st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20ST%20v1.4%20CCRA.pdf" - } - ] - }, - "state": { - "_type": "sec_certs.sample.cc.CCCertificate.InternalState", - "st_download_ok": false, - "report_download_ok": false, - "st_convert_garbage": false, - "report_convert_garbage": false, - "st_convert_ok": false, - "report_convert_ok": false, - "st_extract_ok": false, - "report_extract_ok": false, - "st_pdf_hash": null, - "report_pdf_hash": null, - "st_txt_hash": null, - "report_txt_hash": null - }, - "pdf_data": { - "_type": "sec_certs.sample.cc.CCCertificate.PdfData", - "report_metadata": null, - "st_metadata": null, - "report_frontpage": null, - "st_frontpage": null, - "report_keywords": null, - "st_keywords": null, - "report_filename": null, - "st_filename": null - }, - "heuristics": { - "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", - "extracted_versions": null, - "cpe_matches": null, - "verified_cpe_matches": null, - "related_cves": null, - "cert_lab": null, - "cert_id": null, - "annotated_references": null, - "extracted_sars": null, - "direct_transitive_cves": null, - "indirect_transitive_cves": null, - "report_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "st_references": { - "_type": "sec_certs.sample.certificate.References", - "directly_referenced_by": null, - "directly_referencing": null, - "indirectly_referenced_by": null, - "indirectly_referencing": null - }, - "scheme_data": null - } + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null } - ] + }, + "pdf_data": { + "_type": "sec_certs.sample.cc.CCCertificate.PdfData", + "report_metadata": null, + "st_metadata": null, + "cert_metadata": null, + "report_frontpage": null, + "st_frontpage": null, + "cert_frontpage": null, + "report_keywords": null, + "st_keywords": null, + "cert_keywords": null, + "report_filename": null, + "st_filename": null, + "cert_filename": null + }, + "heuristics": { + "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", + "extracted_versions": null, + "cpe_matches": null, + "verified_cpe_matches": null, + "related_cves": null, + "cert_lab": null, + "cert_id": null, + "annotated_references": null, + "extracted_sars": null, + "direct_transitive_cves": null, + "indirect_transitive_cves": null, + "report_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "st_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "scheme_data": null + } + }, + { + "_type": "sec_certs.sample.cc.CCCertificate", + "dgst": "8a5e6bcda602920c", + "status": "active", + "category": "Boundary Protection Devices and Systems", + "name": "Fortinet FortiGate w/ FortiOS v5.6.7", + "manufacturer": "Fortinet, Inc.", + "scheme": "CA", + "security_level": { + "_type": "Set", + "elements": [] + }, + "not_valid_before": "2019-05-22", + "not_valid_after": "2024-05-24", + "report_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20CR%20v1.0a.pdf", + "st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20ST%20v1.3A.pdf", + "cert_link": "https://www.commoncriteriaportal.org/files/epfiles/383-4-450%20CT%20v1.0a.pdf", + "manufacturer_web": "https://www.fortinet.com/", + "protection_profiles": { + "_type": "Set", + "elements": [ + { + "_type": "sec_certs.sample.protection_profile.ProtectionProfile", + "pp_name": "collaborative Protection Profile for Stateful Traffic Filter Firewalls v2.0 + Errata 20180314", + "pp_eal": null, + "pp_link": "https://www.commoncriteriaportal.org/files/ppfiles/CPP_FW_V2.0E.pdf", + "pp_ids": null + } + ] + }, + "maintenance_updates": { + "_type": "Set", + "elements": [ + { + "_type": "sec_certs.sample.cc.CCCertificate.MaintenanceReport", + "maintenance_date": "2019-08-26", + "maintenance_title": "Fortinet FortiGate w/ FortiOS v5.6.7 Build 6022", + "maintenance_report_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20MR%20v1.0e.pdf", + "maintenance_st_link": "https://www.commoncriteriaportal.org/files/epfiles/383-7-159%20ST%20v1.4%20CCRA.pdf" + } + ] + }, + "state": { + "_type": "sec_certs.sample.cc.CCCertificate.InternalState", + "report": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "st": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + }, + "cert": { + "_type": "sec_certs.sample.cc.CCCertificate.DocumentState", + "download_ok": false, + "convert_garbage": false, + "convert_ok": false, + "extract_ok": false, + "pdf_hash": null, + "txt_hash": null + } + }, + "pdf_data": { + "_type": "sec_certs.sample.cc.CCCertificate.PdfData", + "report_metadata": null, + "st_metadata": null, + "cert_metadata": null, + "report_frontpage": null, + "st_frontpage": null, + "cert_frontpage": null, + "report_keywords": null, + "st_keywords": null, + "cert_keywords": null, + "report_filename": null, + "st_filename": null, + "cert_filename": null + }, + "heuristics": { + "_type": "sec_certs.sample.cc.CCCertificate.Heuristics", + "extracted_versions": null, + "cpe_matches": null, + "verified_cpe_matches": null, + "related_cves": null, + "cert_lab": null, + "cert_id": null, + "annotated_references": null, + "extracted_sars": null, + "direct_transitive_cves": null, + "indirect_transitive_cves": null, + "report_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "st_references": { + "_type": "sec_certs.sample.certificate.References", + "directly_referenced_by": null, + "directly_referencing": null, + "indirectly_referenced_by": null, + "indirectly_referencing": null + }, + "scheme_data": null + } + } + ] }