diff --git a/.gitignore b/.gitignore
index 1fd389c..8b3ac10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,4 @@ snakejob.*
listings/
dist
+debug
diff --git a/setup.py b/setup.py
index 63f1a20..8305b59 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
long_description = fh.read()
-DEV_REQS = ['black', 'flake8', 'isort', 'mypy']
+DEV_REQS = ['black', 'flake8', 'isort', 'mypy', 'requests-cache']
TEST_REQS = ['biopython', 'snakemake', 'ftputil', 'requests', 'pytest', 'pytest-cov', 'hypothesis']
setup(
@@ -17,7 +17,7 @@
description='Convert between NCBI pubmed/PMC and BIOC formats',
long_description=long_description,
long_description_content_type='text/markdown',
- install_requires=['bioc>=2.0', 'typing_extensions'],
+ install_requires=['bioc>=2.0', 'typing_extensions', 'unidecode'],
extras_require={'dev': DEV_REQS + TEST_REQS, 'test': TEST_REQS},
python_requires='>=3.6',
author='Jake Lever',
diff --git a/src/bioconverters/constants.py b/src/bioconverters/constants.py
new file mode 100644
index 0000000..5d2f871
--- /dev/null
+++ b/src/bioconverters/constants.py
@@ -0,0 +1,50 @@
+GREEK_ALPHABET = {
+ '\u0391': 'Alpha',
+ '\u0392': 'Beta',
+ '\u0393': 'Gamma',
+ '\u0394': 'Delta',
+ '\u0395': 'Epsilon',
+ '\u0396': 'Zeta',
+ '\u0397': 'Eta',
+ '\u0398': 'Theta',
+ '\u0399': 'Iota',
+ '\u039A': 'Kappa',
+ '\u039B': 'Lambda',
+ '\u039C': 'Mu',
+ '\u039D': 'Nu',
+ '\u039E': 'Xi',
+ '\u039F': 'Omicron',
+ '\u03A0': 'Pi',
+ '\u03A1': 'Rho',
+ '\u03A3': 'Sigma',
+ '\u03A4': 'Tau',
+ '\u03A5': 'Upsilon',
+ '\u03A6': 'Phi',
+ '\u03A7': 'Chi',
+ '\u03A8': 'Psi',
+ '\u03A9': 'Omega',
+ '\u03B1': 'alpha',
+ '\u03B2': 'beta',
+ '\u03B3': 'gamma',
+ '\u03B4': 'delta',
+ '\u03B5': 'epsilon',
+ '\u03B6': 'zeta',
+ '\u03B7': 'eta',
+ '\u03B8': 'theta',
+ '\u03B9': 'iota',
+ '\u03BA': 'kappa',
+ '\u03BB': 'lambda',
+ '\u03BC': 'mu',
+ '\u03BD': 'nu',
+ '\u03BE': 'xi',
+ '\u03BF': 'omicron',
+ '\u03C0': 'pi',
+ '\u03C1': 'rho',
+ '\u03C3': 'sigma',
+ '\u03C4': 'tau',
+ '\u03C5': 'upsilon',
+ '\u03C6': 'phi',
+ '\u03C7': 'chi',
+ '\u03C8': 'psi',
+ '\u03C9': 'omega',
+}
diff --git a/src/bioconverters/pmcxml.py b/src/bioconverters/pmcxml.py
index 9bc1c91..64ed648 100644
--- a/src/bioconverters/pmcxml.py
+++ b/src/bioconverters/pmcxml.py
@@ -33,6 +33,7 @@
"authors’ contributions",
"background",
"case report",
+ "case presentation",
"competing interests",
"conclusion",
"conclusions",
@@ -41,6 +42,7 @@
"consent",
"data analysis",
"data collection",
+ "disclosure statement",
"discussion",
"ethics statement",
"funding",
@@ -90,7 +92,7 @@ class PmcArticle(TypedDict):
journal: str
journalISO: str
textSources: TextSource
- annotations: Dict[str, str] = {}
+ annotations: Dict[str, str]
def extract_article_content(
@@ -372,6 +374,7 @@ def pmcxml2bioc(
trim_sentences: bool = False,
all_xml_path_infon: bool = False,
mark_citations: bool = False,
+ sectioning_delimiter: str = "//",
) -> Iterator[Iterable[bioc.BioCDocument]]:
"""
Convert a PMC XML file into its Bioc equivalent
@@ -419,9 +422,16 @@ def pmcxml2bioc(
subsection_check = text_source.lower().strip("01234567890. ")
if subsection_check in allowed_subsections:
subsection = subsection_check
+ elif chunk.section:
+ subsection = re.sub(
+ r"^\s*\d+(\.\d+)*\s*\.\s*", "", chunk.section.lower()
+ )
passage.infons["section"] = group_name
passage.infons["subsection"] = subsection
+ passage.infons["sectioning"] = sectioning_delimiter.join(
+ chunk.sections
+ )
if chunk.xml_path:
if all_xml_path_infon or set(chunk.xml_path.split('/')) & {
diff --git a/src/bioconverters/utils.py b/src/bioconverters/utils.py
index 4e2b142..bd85c2a 100644
--- a/src/bioconverters/utils.py
+++ b/src/bioconverters/utils.py
@@ -1,10 +1,16 @@
+import logging
import re
import unicodedata
import uuid
import xml.etree.cElementTree as etree
+import xml.sax.saxutils as saxutils
+from copy import copy
from typing import Callable, Dict, Iterable, List, Optional, Tuple
import bioc
+from unidecode import unidecode
+
+from .constants import GREEK_ALPHABET
# XML elements to ignore the contents of
IGNORE_LIST = [
@@ -19,7 +25,7 @@
"tex-math",
"mml:math",
"object-id",
- "ext-link",
+ "ext-link", # TODO: should we keep URL content? some of these have text rather than the URL as inner content
]
# XML elements to separate text between (into different passages)
@@ -34,33 +40,43 @@
"label",
]
-TABLE_DELIMITER = '\t'
-TABLE_DELIMITED_TAGS = {'tr', 'th', 'td'}
+
+TABLE_DELIMITER = "\t"
+TABLE_DELIMITED_TAGS = {"tr", "th", "td"}
+# Tags that should be pre-pended with a space on merge
+PSEUDO_SPACE_TAGS = {"sup", "break", "AbstractText"}
+ANNOTATION_MARKER_PATTERN = (
+ r"ANN_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
+)
class TextChunk:
text: str
xml_node: str
xml_path: str
- non_separating: bool = False
is_tail: bool = False
is_annotation: bool = False
+ sections: List[str] = []
def __init__(
self,
text,
xml_node,
xml_path=None,
- non_separating=False,
is_tail=False,
is_annotation=False,
+ sections: Optional[List[str]] = None,
):
self.text = text
self.xml_node = xml_node
self.xml_path = xml_path
- self.non_separating = non_separating or is_annotation
self.is_tail = is_tail
self.is_annotation = is_annotation
+ self.sections = sections or []
+
+ @property
+ def section(self) -> str:
+ return self.sections[0] if self.sections else ""
def __str__(self) -> str:
return self.text
@@ -71,11 +87,11 @@ def __len__(self) -> int:
def __repr__(self):
tag = self.tag
if self.is_tail:
- tag = f'{tag}#'
- ns = '-ns' if self.non_separating else ''
- tag = f'{tag}{ns}'
+ tag = f"{tag}#"
if self.text:
- tag = f'{tag}+text[{len(self.text)}]'
+ tag = f"{tag}+text[{len(self.text)}]"
+ if self.is_annotation:
+ tag = f"{tag}@"
return tag
@property
@@ -92,9 +108,9 @@ def remove_brackets_without_words(text: str) -> str:
changed = True
previous_text = text
while changed:
- fixed = re.sub(r"\([^\w\t]*\)", "", previous_text)
- fixed = re.sub(r"\[[^\w\t]*\]", "", fixed)
- fixed = re.sub(r"\{[^\w\t]*\}", "", fixed)
+ fixed = re.sub(r"\([^\w\t-]*\)", "", previous_text)
+ fixed = re.sub(r"\[[^\w\t-]*\]", "", fixed)
+ fixed = re.sub(r"\{[^\w\t-]*\}", "", fixed)
changed = bool(previous_text != fixed)
previous_text = fixed
return fixed
@@ -114,30 +130,42 @@ def cleanup_text(text: str) -> str:
Clean up non-tab extra whitespace, remove control characters and extra leftover brackets etc
"""
# Remove some "control-like" characters (left/right separator)
- text = text.replace(u"\u2028", " ").replace(u"\u2029", " ")
- text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C" or ch == TABLE_DELIMITER)
+ text = text.replace("\u2028", " ").replace("\u2029", " ")
+ text = text.replace("°", " ° ")
+ # unidecode will default convert this to * but it is more appropriate to be converted to . as that is how it is generally used in the XML articles
+ text = text.replace("·", ".")
+ text = "".join(
+ ch for ch in text if unicodedata.category(ch)[0] != "C" or ch == TABLE_DELIMITER
+ )
text = "".join(ch if unicodedata.category(ch)[0] != "Z" else " " for ch in text)
+ # replace greek letters with their long-form equivalent
+ for greek_letter, replacement in GREEK_ALPHABET.items():
+ text = text.replace(greek_letter, replacement)
+
+ text = unidecode(text, errors="preserve")
+
# Remove repeated commands and commas next to periods
text = re.sub(r",([^\S\t]*,)*", ",", text)
text = re.sub(r"(,[^\S\t]*)*\.", ".", text)
text = remove_brackets_without_words(text)
- # remove extra spaces from in-text figute/table citations
- text = re.sub(r'\([^\S\t]*([^)]*[^\s)])[^\S\t]*\)', r'(\1)', text)
+ # remove extra spaces from in-text figure/table citations
+ text = re.sub(r"\([^\S\t]*([^)]*[^\s)])[^\S\t]*\)", r"(\1)", text)
# remove trailing spaces before periods
- text = re.sub(r'[^\S\t]+\.(\s|$)', r'.\1', text)
+ text = re.sub(r"[^\S\t]+\.(\s|$)", r".\1", text)
# remove extra spaces around commas/semi-colons
- text = re.sub(r'[^\S\t]*([,;])[^\S\t]+', r'\1 ', text)
+ text = re.sub(r"[^\S\t]*([,;:])([^\S\t]+)", r"\1 ", text)
+ text = re.sub(r"[^\S\t]*([,;:])$", r"\1", text)
# trim leading and trailing non tab whitespace
- text = re.sub(r'(^|\t)([^\S\t]+)', r'\1', text)
- text = re.sub(r'([^\S\t]+)(\t|$)', r'\2', text)
+ text = re.sub(r"(^|\t)([^\S\t]+)", r"\1", text)
+ text = re.sub(r"([^\S\t]+)(\t|$)", r"\2", text)
# trim multiple non-tab spaces
- text = re.sub(r'[^\S\t][^\S\t]+', ' ', text)
+ text = re.sub(r"[^\S\t][^\S\t]+", " ", text)
return text
@@ -168,36 +196,254 @@ def merge_adjacent_xref_siblings(elem_list):
If two XML elements in a list are adjacent and both xrefs separated only by punctuation, merge them
"""
siblings = []
-
for elem in elem_list:
- if siblings and elem.tag == 'xref' and siblings[-1].tag == 'xref':
+ if siblings and elem.tag == "xref" and siblings[-1].tag == "xref":
# merge these 2 if the tail of the first element is a punctuation mark
- prev_tail = (siblings[-1].tail or '').strip()
+ prev_tail = (siblings[-1].tail or "").strip()
if (
- siblings[-1].tail
- and len(prev_tail) == 1
- and unicodedata.category(prev_tail)[0] == 'P'
- and elem.attrib.get('ref-type') == siblings[-1].attrib.get('ref-type')
- ):
-
- siblings[-1].text = (siblings[-1].text or '') + prev_tail + (elem.text or '')
+ not prev_tail
+ or (
+ len(prev_tail) == 1 and unicodedata.category(prev_tail[0])[0] == "P"
+ )
+ ) and elem.attrib.get("ref-type") == siblings[-1].attrib.get("ref-type"):
+
+ siblings[-1].text = (
+ (siblings[-1].text or "") + prev_tail + (elem.text or "")
+ )
siblings[-1].tail = elem.tail
continue
siblings.append(elem)
return siblings
-def get_tag_path(mapping: Dict[etree.Element, etree.Element], node: etree.Element) -> str:
+def drop_adjacent_sup_siblings(elem_list: List[etree.Element]) -> List[etree.Element]:
"""
- Get a string representing the path of the currentl XML node in the heirachry of the XML file
+ If there are 2 adjacent superscript tags, drop them and append their text to the preceding element
+ """
+ result: List[etree.Element] = []
+
+ for elem in elem_list:
+ if elem.tag == "sup" and len(result) > 1 and result[-1].tag == "sup":
+ # must have a non-sup element to append to the tail of
+ text = [result[-1].text, result[-1].tail, elem.text, elem.tail]
+ if result[-2].tail is None:
+ result[-2].tail = ""
+ result[-2].tail += "".join([(t or "") for t in text])
+ result.pop()
+ else:
+ result.append(elem)
+ return result
+
+
+def get_tag_path(
+ mapping: Dict[etree.Element, etree.Element], node: etree.Element
+) -> str:
+ """
+ Get a string representing the path of the current XML node in the hierarchy of the XML file
"""
path = []
- current_node = node
+ current_node: Optional[etree.Element] = node
while current_node is not None:
path.append(current_node.tag)
current_node = mapping.get(current_node)
- return '/'.join((path[::-1]))
+ return "/".join((path[::-1]))
+
+
+def get_tag_section(
+ mapping: Dict[etree.Element, etree.Element], node: etree.Element
+) -> List[str]:
+ """
+ Get a string representing the section/subsection of the current XML node in the hierarchy of the XML file
+
+ Args:
+ mapping: mapping of each XML node to their parent/containing XML node
+ node: the current XML node for which we are determining the section titles
+ """
+ current_node: Optional[etree.Element] = node
+ section_name = []
+ while current_node is not None:
+ if current_node.tag == "sec":
+ titles = [t for t in current_node if t.tag == "title"]
+ if len(titles) == 1 and titles[0].text:
+ section_name.append(titles[0].text.strip())
+
+ current_node = mapping.get(current_node)
+ return section_name[::-1]
+
+
+def first_empty_index(items, strict_none=False) -> int:
+ """
+ Return the index of the first falsy item in an iterable. Defaults to 0 if no items are falsy
+ """
+ for i, item in enumerate(items):
+ if not item and not strict_none:
+ return i
+ elif strict_none and item is None:
+ return i
+ return 0
+
+
+def get_unique_child_element_index(elem: etree.Element, child_elem_type: str) -> int:
+ """
+ Get a child element from an XML parent node and ensure that 1 and exactly 1 element is returned
+
+ Args:
+ elem: the element to search children of
+ child_elem_type: the tag type of the element in question
+ """
+ indices = []
+ for i, child in enumerate(elem):
+ if child.tag == child_elem_type:
+ indices.append(i)
+ if not indices:
+ raise KeyError(
+ f"unable to find child element with tag type = {child_elem_type}"
+ )
+ if len(indices) > 1:
+ raise ValueError(
+ f"found multiple child elements with tag type = {child_elem_type}"
+ )
+ return indices[0]
+
+
+def normalize_table_header(header) -> etree.Element:
+ """
+ Replace any multi-row table header with a single-row header by repeating col-spanning labels as prefixes on their sub-columns
+ """
+ header_cols = 0
+ header_rows = len(header)
+ for row in header:
+ for header_cell in row:
+ header_cols += int(header_cell.attrib.get("colspan", 1))
+ break
+
+ header_matrix = []
+ filled_cells = []
+ for _ in range(header_rows):
+ row = []
+ for _ in range(header_cols):
+ row.append("")
+ header_matrix.append(row)
+ filled_cells.append([0 for _ in row])
+
+ for i_row, row in enumerate(header):
+ i_col = 0
+ for header_cell in row:
+ text = str(merge_text_chunks(chunk for chunk in tag_handler(header_cell)))
+ row_cells = [
+ r + i_row for r in range(int(header_cell.attrib.get("rowspan", 1)))
+ ]
+ col_cells = [
+ r + first_empty_index(filled_cells[i_row])
+ for r in range(int(header_cell.attrib.get("colspan", 1)))
+ ]
+
+ for r in row_cells:
+ for c in col_cells:
+ header_matrix[r][c] = text
+ filled_cells[r][c] = 1
+
+ for col in range(header_cols):
+ for row in range(1, header_rows)[::-1]:
+ if header_matrix[row][col] == header_matrix[row - 1][col]:
+ header_matrix[row][col] = ""
+
+ # now flatten the header rows
+ for row in header_matrix[1:]:
+ for i_col, col in enumerate(row):
+ if col:
+ header_matrix[0][i_col] += " " + col
+
+ result = [re.sub(r"[\s\n]+", " ", col.strip()) for col in header_matrix[0]]
+ new_xml = []
+
+ for col in result:
+ new_xml.append(f"
KRAS mutational status has been shown to be a predictive biomarker of resistance to anti-EGFR monoclonal antibody (mAb) therapy in patients with metastatic colorectal cancer. We report the spectrum of KRAS mutation in 1506 patients with colorectal cancer and the identification and characterization of rare insertion mutations within the functional domain of KRAS. KRAS mutations are found in 44.5% (670/1506) of the patients. Two cases are found to harbor double mutations involving both codons 12 and 13. The frequencies of KRAS mutations at its codons 12, 13, 61, and 146 are 75.1%, 19.3%, 2.5%, and 2.7%, respectively. The most abundant mutation of codon 12 is G12D, followed by G12V and G12C while G13D is the predominant mutation in codon 13. Mutations in other codons are rare. The KRAS mutation rate is significantly higher in women (48%, 296/617) than in men (42.1%, 374/889, P = 0.023). Tumors on the right colon have a higher frequency of KRAS mutations than those on the left (57.3% vs. 40.4%, P < 0.0001). Two in-frame insertion mutations affect the phosphate-binding loop (codon 10–16) of KRAS are identified. One of them has never been reported before. Compared with wild-type protein, the insertion variants enhance the cellular accumulation of active RAS (RAS-GTP) and constitutively activate the downstream signaling pathway. NIH3T3 cells transfected with the insertion variants show enhanced anchorage-independent growth and in vivo tumorigenicity. Potentially these mutations contribute to primary resistance to anti-EGFR mAb therapy but the clinical implication requires further validation.
Colorectal cancer (CRC) is one of the most common lethal cancers worldwide. In 2008, more than 1.2 million new cases were diagnosed, with approximately 608 700 deaths estimated to have occurred.1 Epidermal growth factor receptor (EGFR), a critical molecule in CRC initiation and progression, is frequently overexpressed in metastatic CRC (mCRC) tumors.2,3 The phenomena lead to the development of molecular targeting therapy to inhibit the EGFR signaling pathway. Using anti-EGFR monoclonal antibodies (mAbs) such as cetuximab and panitumumab, have been approved in treating mCRC to inhibit EGFR activity and hence switching off downstream pathways.2,3
+
However, anti-EGFR therapy does not work on all CRCs, largely due to the resistance to the anti-EGFR mAbs.4 Different studies have reported the response and outcome of CRCs to the anti-EGFR mAbs was poor with KRAS mutation which accounting for 30–40% of non-responsive cases.4-7
+KRAS mutation status is now considered to be a predictive biomarker of resistance to anti-EGFR mAbs treatment for mCRC patients. KRAS is one of the RAS superfamily of proto-oncoproteins which is small signal switch molecule called GTPase, cycling between inactive GDP-bound (RAS-GDP) and active GTP-bound (RAS-GTP) forms, to regulate cellular growth and differentiation.8 Activating mutations of RAS proto-oncogenes continuously elevate the cytoplasmic RAS-GTP level. Oncogenic signaling pathways, such as Raf-MEK-ERK and PI3K/AKT cascades, are then constitutively activated in an EGFR activation-independent manner and therefore promote cell cycle progression.6,8
+KRAS mutation is found in 40% of CRCs and missense point mutation is the most common mutation. The majority of the point mutation sites of KRAS in CRC patients are located at codons 12 and 13 (~80% and ~17%, respectively), together with rare mutations at codons 61 and 146 (~1–4%).3,9-11
+
Most clinical studies of KRAS mutation in CRC were conducted in western countries. However, KRAS mutation rate or spectrum in CRCs may partially depends on the population studied.12 It has been reported that KRAS mutations were identified in CRC patients from the UK, Switzerland, and Spain, for 27.4%, 38%, and 41% respectively.12 This epidemiological variation indicates the essence of establishment of a local CRC KRAS mutation data in different populations. There has been a dramatic increase in reported incidence of colorectal cancer in Asian.12 It is of paramount importance to investigate the KRAS mutation spectrum in our locality in view of the implication in using anti-EGFR targeting therapy. We aim to analyze the KRAS mutation status and the clinical correlation in Chinese patients with CRC in Hong Kong. Here we report the spectrum of KRAS mutation in a large cohort of colorectal cancer and the identification and characterization of a novel insertion mutation within the function domain of KRAS.
+
+
+ Results
+
+ Clinical characteristics of the patients
+
We tested a total of 1506 patients with colorectal cancer. Of them 889 (59%) were males and 617 (41%) were females. The median age at presentation was 61 ± 11.3 y (range 21–89 y). The clinical characteristics were in keeping with other reported populations of colorectal cancer.11 The age of female patients were slightly younger than males (59 ± 12.1 vs 61 ± 11.2, P = 0.014). There was significantly higher frequency of left colon tumor (75.8%) than the right side (24.2%, P < 0.0001). However, the right side tumors were more common in females (28.7%) compared with males (21.1%, P = 0.001). When rectal tumor was considered a separate entity, female patients had a higher frequency of right side tumor whereas the rectal tumors were more commonly found in male patients (P < 0.0001). The clinical characteristics of the patients tested were summarized in Table 1.
+
+
+ Table 1. Clinical characteristics of 1506 patients tested for KRAS status
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Total
+
Female
+
Male
+
P value
+
+
+
+
+
n =
+
1506
+
617 (41%)
+
889 (59%)
+
+
+
+
Age
+
61 ± 11.3
+
59 ± 12.1
+
61 ± 11.2
+
0.014
+
+
+
Tumor site (right vs left)
+
+
+
0.001
+
+
+
Right
+
365 (24.2%)
+
177 (28.7%)
+
188 (21.1%)
+
+
+
+
Left
+
1141 (75.8%)
+
440 (71.3%)
+
701 (78.9%)
+
+
+
+
Tumor site (right vs left vs rectum)
+
+
+
< 0.0001
+
+
+
Right
+
365 (24.2%)
+
177 (28.7%)
+
188 (21.1%)
+
+
+
+
Left
+
538 (35.7%)
+
228 (40.0%)
+
310 (34.9%)
+
+
+
+
Rectum
+
603 (40.1%)
+
212 (34.3%)
+
391 (44.0%)
+
+
+
+
+
+
+
+ Status of KRAS mutation
+
KRAS mutations on codons 12, 13, 61 and 146 were analyzed by PCR-direct sequencing using microdissected FFPE tumor tissues from 1506 patients. A total of 672 KRAS mutations were identified from 670 patients (44.5%, 670 out of 1506, Table 2). Two cases were found to harbor double mutations. Both cases involved codon 12 and codon 13 of KRAS gene. One case harbored concomitant G12C and G13D, while the other had both G12V and G13D. Within 672 KRAS mutations identified, the frequencies of mutations at codons 12, 13, 61, and 146 were 75.1%, 19.3%, 2.5%, and 2.7%, respectively. Majority of the mutations occurred at codons 12 and 13 which accounted for more than 94% of all mutations identified. The most common mutation was glycine to aspartate on codon 12 (G12D), which accounted for 37.5% of all mutations (252 out of 672). Mutation from glycine to valine (G12V) was the second most common of all specified mutations (20.1%; 135 of 672). Mutation from glycine to aspartate on codon 13 (G13D) accounted for 19.0% (128 of 672) of specified mutations.
aA total of 672 KRAS mutations were detected from 670 colorectal tumors. Two tumors harbored double mutations.
+
+
+
+
The KRAS mutation rate was significantly higher in women (48%, 296 of 617, Table 3) than in men (42.1%, 374 of 889, P = 0.023). The mutation rate did not differ according to the primary tumor site if the tumor location was classified as either ascending, hepatic flexure, transverse, splenic flexure, descending, sigmoid, or rectum. If the tumors on the right side of the colon (ascending and transverse colon) were group together and compared with those on the left (splenic flexure to rectum), the frequency of KRAS mutations were significantly higher in the right colon (57.3% vs. 40.4%, P < 0.0001). The KRAS mutation was not associated with the age of the patient. In comparison of the most frequently mutated codons between left and right colon, codon 12 mutations were significantly more likely to occur in rectum (right colon 28.8%, left colon 29.7%, rectum 41.6%), while codon 13 mutations were slightly more frequent in the right colon (right colon 40%, left colon 30.8%, rectum 29.2%, P = 0.013)
+
+
+ Table 3. Correlation of KRAS mutation status with clinical features
+
In the pool of CRC cases, we identified two rare KRAS mutations which were defined as in-frame insertion mutations. The Insertion mutations in KRAS exon 2 of patient #286 and #833 were further validated by direct sequencing of the cloned PCR products (Fig. 1). In patient #286, an in-flame insertion of 3-nucleotide (GGA) between codons 10 and 11 was observed (c.30_31insGGA: p.G10_A11insG). This rare mutation, which suggested the insertion of a glycine residue between glycine (amino acid 10) and alanine (amino acid 11), was reported once in the patient with myeloid leukemia.8 In patient #833, a tandem repeat sequences of codon 10 and 11 (GGA GCT) was in-flame inserted after codons 11 and introduced extra glycine and alanine residues between alanine (codon 11) and glycine (codon 12). This insertion mutation (c.33_34insGGAGCT:p.A11_G12insGA) has never been reported before. These two mutations are named 10G11 and 11GA12 respectively.
+
+
+
Figure 1. Electropherogram for KRAS mutants. Tissue DNA from the patient with colorectal cancer were amplified and cloned for sequencing analysis. Two novel in-flame insertions (10G11 and 11GA12) in exon 2 of KRAS gene were identified.
To investigate whether the newly found 10G11 and 11GA12
+KRAS mutation activate RAS activity, we constructed expression plasmids and transiently transfected into 293FT and NIH3T3 cells. As a control, expression plasmids carrying wild-type KRAS (KRAS-WT) and a well-known active KRAS mutant (KRAS-G12V) were used for comparison during the basic functional assay.
+
Compared with the cells transfected with WT expression plasmid, overexpression of 10G11 and 11GA12
+KRAS mutants in cell lines resulted in elevated protein levels of both active RAS (Ras-GTP) and its downstream signaling molecule, phosphorylated extracellular signal-regulated kinase (p-ERK). The elevated levels of these two proteins are similar to the cells transfected with the KRAS-G12V mutant construct (Fig. 2). To further demonstrate the biological effect of 10G11 and 11GA12
+KRAS mutants, NIH3T3 cells which stably transfected with empty vector, KRAS-WT, KRAS-G12V, 10G11 or 11GA12 mutant were prepared. Although NIH3T3 stable transfectants showed similar proliferation rate in MTT assay (data not shown), they have apparent differences in anchorage-independent growth property. We demonstrated in soft-agar colony formation assay that only a few number of colonies of the cells transfected with either empty vector or KRAS-wild type expression vector were observed. In contrast, more colonies were counted in all three transfectants with mutant KRAS and the differences were statistically significant compared with cells transfected with KRAS-wild type (Fig. 3). Furthermore, the colony sizes of the mutant KRAS transfectants were, in general, bigger than that in KRAS-wild type transfectant. To assess the in vivo tumorigenicity of novel KRAS variants, NIH3T3 transfectants containing empty vector or different KRAS mutants were injected subcutaneously into the dorsal flank of Balb/c nude mice. Compared with KRAS wild type and empty vector controls, KRAS
+10G11 and 11GA12 significantly enhanced in vivo tumor growth as showed in Figure 4. Collectively, these observations suggested that both newly identified KRAS mutants could activate the Raf-MEK-ERK pathway by elevating RAS-GTP level and contribute in vitro and in vivo cell transformation.
+
+
+
Figure 2.KRAS insertion mutants activated RAS signaling by enhancing cellular accumulation of active RAS (RAS-GTP) and activating p-ERK. NIH3T3 and 293FT cells were transfected with KRAS mutants, and RAS-GTP protein in the cell extract were immunoprecipitated with agarose beads containing Ras binding domain of Raf-1. Protein levels in both whole cell extracts (pan-RAS and pERK) and precipitated samples (RAS-GTP) were analyzed by western blot analysis as indicated. Representative results from 3 independent experiments were shown.
+
+
+
+
+
+
Figure 3.KRAS insertion mutants promoted anchorage-independent growth in soft agar. NIH3T3 cells stably transfected with pcDNA3.1 empty vector (EV), wild-type KRAS (WT), G12V KRAS mutant (G12V), 10G11 and 11GA12 mutants were cultured in soft agar for analysis. Representative microscopic pictures of colony from each transfectant were taken (Magnification, 400×). The number of colony in each transfectant was plot in the bar chart and the results shown were mean and standard deviation from three independent experiments. The P value of < 0.05 and < 0.001 were denoted as * and ** respectively.
+
+
+
+
+
+
Figure 4.KRAS insertion mutants promoted in vivo growth of NIH3T3 cells. In vivo tumorgenic assay in nude mice showed that tumors formed in the sites implanted with NIH3T3 cells expressing KRAS mutants (G12V, 10G11, or 11GA12) were consistently larger than that implanted with wild-type KRAS (WT) and empty vector (EV) controls. By western blotting, the expression of KRAS protein in the NIH3T3 transfectants and tumors dissected from the xenografts (T1–T5) was detected.
+
+
+
+
+
+
+ Discussion
+
In the current study, we report the KRAS mutation frequency in a large cohort of patients with colorectal cancer in Hong Kong. KRAS mutation is found in 44.5% (670 out of 1506) of colorectal cancers. The mutation rate is similar to KRAS studies previously reported.13-20
+Table 4 summarized the KRAS mutation rates and the distribution of mutants in representative studies. Codon 12 is the most common KRAS mutation and the most frequently found mutation is G12D (35% of all mutations found). Our data demonstrate the predominance of KRAS-mutant carcinoma in right colon and in female patients. This is in keeping with some previous reports although other studies might not have demonstrated such relationship.21,22 The preference of site of KRAS mutation might be correlate with the different molecular pathways involved in right and left side colon CRCs. The right and left side colon cancers have been considered as distinct tumor entities because of their epidemiological, clinicopathologic, and molecular biologic features. Right side colon cancer was found to be associated with female, older age, advanced stage, and poorly differentiated mucinous histology.23-26 Higher rates of microsatellite instability and KRAS mutations were common molecular events found in right side colon cancer.27,28 Whereas the left side tumor were more common to be chromosomal instable and harbor more TP53 mutation.27-30 The reason for the observed differences between left and right side colon adenocarcinoma remains unclear. It is likely to be multifactorial and complex including embryologic origin, and the effect of chemical and bacterial luminal microenvironments. Moreover, we have reported the predominant KRAS mutations in left colon are located in codon 12 and right colon in codon 13. This finding is different from a large population-based study which found significantly more codon-12 mutation cases in proximal (right colon) than distal (left colon) tumors (29.1% vs 20.5%; P < 0.01).21 Another study also showed rectosigmoid tumor (left colon) had the highest frequency of codon 13 mutations.31 There is no consistent trend, further study is necessary.
+
+
+ Table 4. Comparison of KRAS mutation distribution in reported series
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Studies
+
Current study
+
COSMIC database
+
Rosty 201323
+
Imamura 201241
+
De Roock 201042
+
Chang 200943
+
Karapetis 200844
+
Amado 20087
+
Brink 200331
+
Samowitz 200021
+
Andreyev 199811
+
+
+
+
+
n =
+
1506
+
17316
+
776
+
1261
+
747
+
228
+
394
+
427
+
737
+
1416
+
2214
+
+
+
Mutation rate %
+
44.5
+
34.9
+
28
+
35.8
+
36.3
+
36.4
+
41.6
+
43.1
+
36.8
+
31.8
+
37.7
+
+
+
+ Relative mutation distribution (%) by codon
+
+
+
+
Codon 12
+
75.1
+
79.3
+
87
+
74.6
+
69.3
+
69.9
+
63.8
+
84.2
+
70
+
77.9
+
54
+
+
+
Codon 13
+
19.1
+
17.6
+
13
+
25.4
+
20.1
+
25.3
+
11.7
+
15.8
+
21.6
+
22.1
+
16.7
+
+
+
Codon 61
+
2.5
+
0.58
+
+
+
5.3
+
1.2
+
+
+
+
+
+
+
+
Codon 146
+
2.7
+
0.19
+
+
+
5
+
2.4
+
+
+
+
+
+
+
+
+ Relative mutation distribution (%) by nucleotide substitution
+
+
+
+
G12D
+
37.6
+
35
+
161
+
35.2
+
27.4
+
+
35.7
+
38
+
26.1
+
31.1
+
30.6
+
+
+
G12V
+
20.0
+
21.5
+
95
+
20.8
+
19.8
+
+
28.1
+
21.7
+
24.4
+
21.4
+
23.4
+
+
+
G12C
+
6.7
+
8.3
+
44
+
9.6
+
7.3
+
+
+
7.6
+
5.9
+
9.5
+
+
+
+
G12S
+
4.9
+
6.3
+
12
+
2.6
+
6.3
+
+
+
7.6
+
5.6
+
6.8
+
+
+
+
G12A
+
4.3
+
6.7
+
20
+
4.4
+
6.9
+
+
+
8.2
+
5.6
+
3.5
+
+
+
+
G12R
+
1.5
+
1.1
+
8
+
1.8
+
1.7
+
+
+
1.6
+
2.4
+
0.7
+
+
+
+
G13D
+
18.8
+
17.4
+
110
+
24.1
+
20.1
+
+
11.7
+
15.8
+
20.2
+
20.8
+
16.7
+
+
+
G13C
+
0.3
+
+
3
+
0.7
+
+
+
+
+
0.3
+
0.4
+
+
+
+
Q61H
+
1.3
+
0.3
+
+
+
2.3
+
+
+
+
+
+
+
+
+
Q61L
+
0.7
+
0.2
+
+
+
1
+
+
+
+
+
+
+
+
+
Q61R
+
0.3
+
0.1
+
+
+
1.3
+
+
+
+
+
+
+
+
+
A146T
+
2.7
+
0.2
+
+
+
5
+
+
+
+
+
+
+
+
+
+
+
We report two rare in-frame insertion mutations in this study, c.30_31insGGA: p.G10_A11insG (duplication of codon 10) and c.33_34insGGAGCT:p.A11_G12insGA (duplication of codon 10–11). In-frame Insertion mutations in KRAS are rarely reported. Almost all reported KRAS in-frame insertions are tandem duplications. Three-nucleotide insertions resulting in codon 9, codon 10, and codon 12 duplications have been reported in colorectal cancer and leukemia.8,32-34 A Netherland cohort study found a duplication of six nucleotides in a colorectal tumor, leading to two additional amino acids added in codon 9 of KRAS.31 A 15-bp insertion in exon 3 that resulted in tandem duplication of codons 62–66 has been found in a case of primary lung adenocarcinoma.35 Another study also reported the identical 15-bp in-frame insertion mutation in a colorectal carcinoma.36
+
Wild-type KRAS regulate cellular growth and differentiation by cycling between inactive GDP-bound form (Ras-GDP) and active GTP-bound form (Ras-GTP). Mutant KRAS is defective in intrinsic GTP hydrolysis. Therefore, it is accumulated in cells in active GTP-bound form, resulting in constitutive activation of downstream signaling through effector proteins. Both insertion mutations found in the current study (10G11 and 11GA12) affect the phosphate-binding loop (codon 10–16) of KRAS. Our in vitro functional analyses have confirmed that similar to the KRAS mutant G12V, both rare mutants enhance the cellular accumulation of active RAS (Ras-GTP), and activate the Raf-MEK-ERK pathway. Using soft agar assays, we demonstrate the ability of both insertion variants in driving in vitro cell transformation. We also show that both insertion mutants demonstrate enhanced tumorigenicity in nude mice. Our finding is concordant with previous in vivo analysis of KRAS
+10Gly11 mutation in acute leukemia which showed duplication of amino acid residue in codon 12 could lead to the activation of KRAS.8 In addition, another RAS protein member, HRAS with an insertion mutation in codon 12 was reported to gain the ability in cell transformation.8 These results suggest that both point mutation and insertion mutation within codon 12 and sites nearby could activate RAS protein through interrupting the GTP binding site of RAS family protein.
+
In summary, this study has provided a KRAS mutation database in colorectal cancer of local Chinese population and the correlation between KRAS status with gender and primary site in the colon. Furthermore, we report the identification and characterization of two rare KRAS insertion mutations. In vitro and in vivo functional studies confirm the oncogenic properties of these insertion mutations. KRAS mutations beyond the “hotspots” can be oncogenic by conveying selective growth advantage to the cells. These mutations might potentially contribute to primary resistance for anti-EGFR mAb targeted therapy. The clinical implication for these mutations requires further validation.
+
+
+ Materials and Methods
+
+ Patient sample
+
A total of 1506 consecutive colorectal adenocarcinoma specimens sent for KRAS mutational analysis in Prince of Wales Hospital, Hong Kong between 2008 and 2012 were included in this study. The study protocol was approved by the Joint CUHK-NTE Clinical Research Ethics Committee, Hong Kong.
+
+
+ Tumor DNA extraction
+
The location of tumor cells in the formalin-fixed, paraffin-embedded (FFPE) tissue were first marked on the standard H&E-stained histological slides. Subsequently, the corresponding tumor tissues on the unstained glass slide were microdissected manually for DNA extraction using QIAamp DNA tissue mini kit with standard procedure (Qiagen).
+
+
+ Sequencing analysis
+
Mutational hot spots including KRAS codons 12, 13, 61, and 146 were investigated by PCR-direct sequencing. PCR reactions were performed using primers listed in Table 5. Cycling sequencing reaction of the PCR fragments was performed with BigDye Terminator system (Applied Biosystems) using primers from both directions. The sequencing results were analyzed with the ABI PRISM® 3130XL Genetic Analyzer (Applied Biosystems). The data was collected and analyzed using Applied Biosystems sequencing analysis software.
+
+
+ Table 5. The sequences of oligonucleotides used in this study
+
+
+
+
+
+
+
+
+
+
+
PCR primers
+
Forward sequence
+
Reverse sequence
+
+
+
+
+
KRAS codon 12/13
+
GTATTAACCT TATGTGTGAC A
+
GTCCTGCACC AGTAATATGC
+
+
+
KRAS codon 61
+
TGCACTGTAA TAATCCAGAC TGTG
+
TGCACTGTAA TAATCCAGAC TGTG
+
+
+
KRAS codon 146
+
TCTGAAGATG TACCTATGGT CCTAGT
+
AAGAAGCAAT GCCCTCTCAA
+
+
+
+ Mutagenesis primers
+
+
+
+
+
+
KRAS-WT
+
5′-GGTAGTTGGA GCTGGTGGCG TAGGCAAGA-3′
+
5′- TCTTGCCTAC GCCACCAGCT CCAACTACC-3′
+
+
+
KRAS-10G11
+
5′-GTGGTAGTTG GAGGAGCTGG TGGCGTAGGC AAG-3′
+
5′-CTTGCCTACG CCACCAGCTC CTCCAACTAC CAC-3′
+
+
+
KRAS-11GA12
+
5′-GGTAGTTGGA GCTGGAGCTG GTGGCGTAGG CAAG-3′
+
5′-CTTGCCTACG CCACCAGCTC CAGCTCCAAC TACC-3′
+
+
+
+
+
+
+ Detection of the precise sequence of the rare mutation
+
PCR product corresponding to KRAS exon 1 was amplified from the patient genomic DNA and subsequently cloned using the TOPO-TA Cloning kit (Invitrogen). Ten colonies of each transformation were randomly selected for sequencing analysis.
+
+
+ Cell culture and transfection
+
Human embryonic kidney cells (293FT) and mouse embryonic fibroblast cells (NIH3T3) were obtained from Invitrogen and American Type Culture Collection (ATCC) respectively. Both cell lines were cultured in Dulbecco modified Eagle medium plus 10% FBS (Gibco, Invitrogen). Transfection of 293FT and NIH3T3 cells were performed using LipofectamineTM LTX reagent (Invitrogen) following the manufacturer’s protocol.
+
+
+ Site-direct mutagenesis and active RAS measurement
+
Full-length of KRAS cDNA was cut from pBabe K-Ras 12V vector (Addgene plasmid 12544)37 and cloned into pcDNA3.1 (+) expression vector (Invitrogen) via BamH1 and Xba1 restriction sites. Corresponding KRAS mutations were introduced into the expression vector using QuickChange® II Site-Directed Mutagenesis Kit according to the manufacturer’s recommendations (Stratagene). The desired mutations in each construct were finally confirmed by direct sequencing. The primer sequences for mutagenesis were listed in Table 5. Ras Activation Assay Kit (Millipore) was used to measure the level of active RAS (RAS-GTP) after transient transfection of corresponding plasmid into the cell lines. In brief, 0.5 mg of cell extract was immunoprecipitated with agarose beads containing human Ras Binding Domain (RBD, residues 1–149) of Raf-1. After washing, the beads were mixed with protein loading buffer and 10% of the mixture was electrophoresed by 12% SDS-PAGE for western blot analysis as previously described.38,39 The primary antibodies used were pan-RAS (RAS10, Millipore; 1:2000) and p-ERK1/2 (9102, Cell Signaling; 1:1000). HRP conjugated anti-mouse secondary antibody used was purchased from DAKO (1:20000 dilution).
+
+
+ Soft agar colony formation assay
+
NIH3T3 cells transfected with corresponding KRAS expression plasmids were selected in culture medium containing 400 μg/mL of G418 (Invitrogen) for one month before preparing colony formation assay. In the assay, culture medium containing 0.7% agarose was set as a bottom layer in 6-well dishes. A total of 3000 cells, which mixed with culture medium containing 0.35% agarose, were added over the bottom layer. After 25 d of incubation, colonies were stained with 0.005% crystal violet overnight and were counted under dissection microscope. Each experiment was performed in triplicate.
+
+
+ In vivo tumorigenicity
+
NIH3T3 transfectants (1 × 106 cells suspended in 0.1 mL phosphate-buffered saline), containing empty vector or different KRAS mutant, were injected subcutaneously into the dorsal flank of five 5-wk-old male Balb/c nude mice. The tumor volume was determined as previously described.40 All experimental procedures were approved by the Animal Ethics Committee of the Chinese University of Hong Kong.
+
+
+ Statistical analysis
+
Statistical analysis of two times two contingency tables of categorical variables was performed using the Chi-square test or Fisher exact test, as appropriate. The t test was performed to compare continuous variables between two groups. All statistical analyses were performed by using statistical program SPSS version 16.0. A two-tailed P value of <0.05 was regarded as statistically significant.
+
+
+
+
+
+ Disclosure of Potential Conflicts of Interest
+
No potential conflicts of interest were disclosed.
+
+
+
+ Abbreviations:
+
+ mCRC
+
+
metastatic colorectal carcinoma
+
+
+
+ mAbs
+
+
monoclonal antibodies
+
+
+
+ EGFR
+
+
epidermal growth factor receptor
+
+
+
+ FFPE
+
+
formalin-fixed, paraffin-embedded
+
+
+
+
+
+
+10.4161/cbt.28550
+
+
+
+ References
+
+
+
+
+
+ Jemal
+ A
+
+
+ Bray
+ F
+
+
+ Center
+ MM
+
+
+ Ferlay
+ J
+
+
+ Ward
+ E
+
+
+ Forman
+ D
+
+
+ Global cancer statistics
+ CA Cancer J Clin
+ 2011
+ 61
+ 69
+ 90
+ 10.3322/caac.20107
+ 21296855
+
+
+
+
+
+
+
+ Markman
+ B
+
+
+ Javier Ramos
+ F
+
+
+ Capdevila
+ J
+
+
+ Tabernero
+ J
+
+
+ EGFR and KRAS in colorectal cancer
+ Adv Clin Chem
+ 2010
+ 51
+ 71
+ 119
+ 10.1016/S0065-2423(10)51004-7
+ 20857619
+
+
+
+
+
+
+
+ Brand
+ TM
+
+
+ Wheeler
+ DL
+
+
+ KRAS mutant colorectal tumors: past and present
+ Small GTPases
+ 2012
+ 3
+ 34
+ 9
+ 10.4161/sgtp.18751
+ 22714415
+
+
+
+
+
+
+
+ Benvenuti
+ S
+
+
+ Sartore-Bianchi
+ A
+
+
+ Di Nicolantonio
+ F
+
+
+ Zanon
+ C
+
+
+ Moroni
+ M
+
+
+ Veronese
+ S
+
+
+ Siena
+ S
+
+
+ Bardelli
+ A
+
+
+ Oncogenic activation of the RAS/RAF signaling pathway impairs the response of metastatic colorectal cancers to anti-epidermal growth factor receptor antibody therapies
+ Cancer Res
+ 2007
+ 67
+ 2643
+ 8
+ 10.1158/0008-5472.CAN-06-4158
+ 17363584
+
+
+
+
+
+
+
+ Lièvre
+ A
+
+
+ Bachet
+ JB
+
+
+ Boige
+ V
+
+
+ Cayre
+ A
+
+
+ Le Corre
+ D
+
+
+ Buc
+ E
+
+
+ Ychou
+ M
+
+
+ Bouché
+ O
+
+
+ Landi
+ B
+
+
+ Louvet
+ C
+
+
+
+ KRAS mutations as an independent prognostic factor in patients with advanced colorectal cancer treated with cetuximab
+ J Clin Oncol
+ 2008
+ 26
+ 374
+ 9
+ 10.1200/JCO.2007.12.5906
+ 18202412
+
+
+
+
+
+
+
+ Jimeno
+ A
+
+
+ Messersmith
+ WA
+
+
+ Hirsch
+ FR
+
+
+ Franklin
+ WA
+
+
+ Eckhardt
+ SG
+
+
+ KRAS mutations and sensitivity to epidermal growth factor receptor inhibitors in colorectal cancer: practical application of patient selection
+ J Clin Oncol
+ 2009
+ 27
+ 1130
+ 6
+ 10.1200/JCO.2008.19.8168
+ 19124802
+
+
+
+
+
+
+
+ Amado
+ RG
+
+
+ Wolf
+ M
+
+
+ Peeters
+ M
+
+
+ Van Cutsem
+ E
+
+
+ Siena
+ S
+
+
+ Freeman
+ DJ
+
+
+ Juan
+ T
+
+
+ Sikorski
+ R
+
+
+ Suggs
+ S
+
+
+ Radinsky
+ R
+
+
+
+ Wild-type KRAS is required for panitumumab efficacy in patients with metastatic colorectal cancer
+ J Clin Oncol
+ 2008
+ 26
+ 1626
+ 34
+ 10.1200/JCO.2007.14.7116
+ 18316791
+
+
+
+
+
+
+
+ Bollag
+ G
+
+
+ Adler
+ F
+
+
+ elMasry
+ N
+
+
+ McCabe
+ PC
+
+
+ Conner
+ E
+ Jr.
+
+
+ Thompson
+ P
+
+
+ McCormick
+ F
+
+
+ Shannon
+ K
+
+
+ Biochemical characterization of a novel KRAS insertion mutation from a human leukemia
+ J Biol Chem
+ 1996
+ 271
+ 32491
+ 4
+ 10.1074/jbc.271.51.32491
+ 8955068
+
+
+
+
+
+
+
+ Tejpar
+ S
+
+
+ Celik
+ I
+
+
+ Schlichting
+ M
+
+
+ Sartorius
+ U
+
+
+ Bokemeyer
+ C
+
+
+ Van Cutsem
+ E
+
+
+ Association of KRAS G13D tumor mutations with outcome in patients with metastatic colorectal cancer treated with first-line chemotherapy with or without cetuximab
+ J Clin Oncol
+ 2012
+ 30
+ 3570
+ 7
+ 10.1200/JCO.2012.42.2592
+ 22734028
+
+
+
+
+
+
+
+ Edkins
+ S
+
+
+ O’Meara
+ S
+
+
+ Parker
+ A
+
+
+ Stevens
+ C
+
+
+ Reis
+ M
+
+
+ Jones
+ S
+
+
+ Greenman
+ C
+
+
+ Davies
+ H
+
+
+ Dalgliesh
+ G
+
+
+ Forbes
+ S
+
+
+
+ Recurrent KRAS codon 146 mutations in human colorectal cancer
+ Cancer Biol Ther
+ 2006
+ 5
+ 928
+ 32
+ 10.4161/cbt.5.8.3251
+ 16969076
+
+
+
+
+
+
+
+ Andreyev
+ HJ
+
+
+ Norman
+ AR
+
+
+ Cunningham
+ D
+
+
+ Oates
+ JR
+
+
+ Clarke
+ PA
+
+
+ Kirsten ras mutations in patients with colorectal cancer: the multicenter “RASCAL” study
+ J Natl Cancer Inst
+ 1998
+ 90
+ 675
+ 84
+ 10.1093/jnci/90.9.675
+ 9586664
+
+
+
+
+
+
+
+ Zulhabri
+ O
+
+
+ Rahman
+ J
+
+
+ Ismail
+ S
+
+
+ Isa
+ MR
+
+
+ Wan Zurinah
+ WN
+
+
+ Predominance of G to A codon 12 mutation K-ras gene in Dukes’ B colorectal cancer
+ Singapore Med J
+ 2012
+ 53
+ 26
+ 31
+ 22252179
+
+
+
+
+
+
+
+ Russo
+ A
+
+
+ Bazan
+ V
+
+
+ Agnese
+ V
+
+
+ Rodolico
+ V
+
+
+ Gebbia
+ N
+
+
+ Prognostic and predictive factors in colorectal cancer: Kirsten Ras in CRC (RASCAL) and TP53CRC collaborative studies
+ Ann Oncol
+ 2005
+ 16
+ Suppl 4
+ iv44
+ 9
+ 10.1093/annonc/mdi907
+ 15923428
+
+
+
+
+
+
+
+ Richman
+ SD
+
+
+ Seymour
+ MT
+
+
+ Chambers
+ P
+
+
+ Elliott
+ F
+
+
+ Daly
+ CL
+
+
+ Meade
+ AM
+
+
+ Taylor
+ G
+
+
+ Barrett
+ JH
+
+
+ Quirke
+ P
+
+
+ KRAS and BRAF mutations in advanced colorectal cancer are associated with poor prognosis but do not preclude benefit from oxaliplatin or irinotecan: results from the MRC FOCUS trial
+ J Clin Oncol
+ 2009
+ 27
+ 5931
+ 7
+ 10.1200/JCO.2009.22.4295
+ 19884549
+
+
+
+
+
+
+
+ Kim
+ ST
+
+
+ Park
+ KH
+
+
+ Kim
+ JS
+
+
+ Shin
+ SW
+
+
+ Kim
+ YH
+
+
+ Impact of KRAS Mutation Status on Outcomes in Metastatic Colon Cancer Patients without Anti-Epidermal Growth Factor Receptor Therapy
+ Cancer Res Treat
+ 2013
+ 45
+ 55
+ 62
+ 10.4143/crt.2013.45.1.55
+ 23613671
+
+
+
+
+
+
+
+ Petrelli
+ F
+
+
+ Coinu
+ A
+
+
+ Cabiddu
+ M
+
+
+ Ghilardi
+ M
+
+
+ Barni
+ S
+
+
+ KRAS as prognostic biomarker in metastatic colorectal cancer patients treated with bevacizumab: a pooled analysis of 12 published trials
+ Med Oncol
+ 2013
+ 30
+ 650
+ 10.1007/s12032-013-0650-4
+ 23828442
+
+
+
+
+
+
+
+ Watanabe
+ T
+
+
+ Yoshino
+ T
+
+
+ Uetake
+ H
+
+
+ Yamazaki
+ K
+
+
+ Ishiguro
+ M
+
+
+ Kurokawa
+ T
+
+
+ Saijo
+ N
+
+
+ Ohashi
+ Y
+
+
+ Sugihara
+ K
+
+
+ KRAS mutational status in Japanese patients with colorectal cancer: results from a nationwide, multicenter, cross-sectional study
+ Jpn J Clin Oncol
+ 2013
+ 43
+ 706
+ 12
+ 10.1093/jjco/hyt062
+ 23657052
+
+
+
+
+
+
+
+ Adelstein
+ BA
+
+
+ Dobbins
+ TA
+
+
+ Harris
+ CA
+
+
+ Marschner
+ IC
+
+
+ Ward
+ RL
+
+
+ A systematic review and meta-analysis of KRAS status as the determinant of response to anti-EGFR antibodies and the impact of partner chemotherapy in metastatic colorectal cancer
+ Eur J Cancer
+ 2011
+ 47
+ 1343
+ 54
+ 10.1016/j.ejca.2011.03.031
+ 21550229
+
+
+
+
+
+
+
+ Mao
+ C
+
+
+ Zhou
+ J
+
+
+ Yang
+ Z
+
+
+ Huang
+ Y
+
+
+ Wu
+ X
+
+
+ Shen
+ H
+
+
+ Tang
+ J
+
+
+ Chen
+ Q
+
+
+ KRAS, BRAF and PIK3CA mutations and the loss of PTEN expression in Chinese patients with colorectal cancer
+ PLoS One
+ 2012
+ 7
+ e36653
+ 10.1371/journal.pone.0036653
+ 22586484
+
+
+
+
+
+
+
+ Yunxia
+ Z
+
+
+ Jun
+ C
+
+
+ Guanshan
+ Z
+
+
+ Yachao
+ L
+
+
+ Xueke
+ Z
+
+
+ Jin
+ L
+
+
+ Mutations in epidermal growth factor receptor and K-ras in Chinese patients with colorectal cancer
+ BMC Med Genet
+ 2010
+ 11
+ 34
+ 10.1186/1471-2350-11-34
+ 20184776
+
+
+
+
+
+
+
+ Samowitz
+ WS
+
+
+ Curtin
+ K
+
+
+ Schaffer
+ D
+
+
+ Robertson
+ M
+
+
+ Leppert
+ M
+
+
+ Slattery
+ ML
+
+
+ Relationship of Ki-ras mutations in colon cancers to tumor location, stage, and survival: a population-based study
+ Cancer Epidemiol Biomarkers Prev
+ 2000
+ 9
+ 1193
+ 7
+ 11097226
+
+
+
+
+
+
+
+ Elnatan
+ J
+
+
+ Goh
+ HS
+
+
+ Smith
+ DR
+
+
+ C-KI-RAS activation and the biological behaviour of proximal and distal colonic adenocarcinomas
+ Eur J Cancer
+ 1996
+ 32A
+ 491
+ 7
+ 10.1016/0959-8049(95)00567-6
+ 8814697
+
+
+
+
+
+
+
+ Rosty
+ C
+
+
+ Young
+ JP
+
+
+ Walsh
+ MD
+
+
+ Clendenning
+ M
+
+
+ Walters
+ RJ
+
+
+ Pearson
+ S
+
+
+ Pavluk
+ E
+
+
+ Nagler
+ B
+
+
+ Pakenas
+ D
+
+
+ Jass
+ JR
+
+
+
+ Colorectal carcinomas with KRAS mutation are associated with distinctive morphological and molecular features
+ Mod Pathol
+ 2013
+ 26
+ 825
+ 34
+ 10.1038/modpathol.2012.240
+ 23348904
+
+
+
+
+
+
+
+ Lin
+ JK
+
+
+ Chang
+ SC
+
+
+ Wang
+ HS
+
+
+ Yang
+ SH
+
+
+ Jiang
+ JK
+
+
+ Chen
+ WC
+
+
+ Lin
+ TC
+
+
+ Li
+ AF
+
+
+ Distinctive clinicopathological features of Ki-ras mutated colorectal cancers
+ J Surg Oncol
+ 2006
+ 94
+ 234
+ 41
+ 10.1002/jso.20438
+ 16900509
+
+
+
+
+
+
+
+ Pai
+ RK
+
+
+ Jayachandran
+ P
+
+
+ Koong
+ AC
+
+
+ Chang
+ DT
+
+
+ Kwok
+ S
+
+
+ Ma
+ L
+
+
+ Arber
+ DA
+
+
+ Balise
+ RR
+
+
+ Tubbs
+ RR
+
+
+ Shadrach
+ B
+
+
+
+ BRAF-mutated, microsatellite-stable adenocarcinoma of the proximal colon: an aggressive adenocarcinoma with poor survival, mucinous differentiation, and adverse morphologic features
+ Am J Surg Pathol
+ 2012
+ 36
+ 744
+ 52
+ 10.1097/PAS.0b013e31824430d7
+ 22314188
+
+
+
+
+
+
+
+ Benedix
+ F
+
+
+ Kube
+ R
+
+
+ Meyer
+ F
+
+
+ Schmidt
+ U
+
+
+ Gastinger
+ I
+
+
+ Lippert
+ H
+
+ Colon/Rectum Carcinomas (Primary Tumor) Study Group
+
+ Comparison of 17,641 patients with right- and left-sided colon cancer: differences in epidemiology, perioperative course, histology, and survival
+ Dis Colon Rectum
+ 2010
+ 53
+ 57
+ 64
+ 10.1007/DCR.0b013e3181c703a4
+ 20010352
+
+
+
+
+
+
+
+ Bufill
+ JA
+
+
+ Colorectal cancer: evidence for distinct genetic categories based on proximal or distal tumor location
+ Ann Intern Med
+ 1990
+ 113
+ 779
+ 88
+ 10.7326/0003-4819-113-10-779
+ 2240880
+
+
+
+
+
+
+
+ Sugai
+ T
+
+
+ Habano
+ W
+
+
+ Jiao
+ YF
+
+
+ Tsukahara
+ M
+
+
+ Takeda
+ Y
+
+
+ Otsuka
+ K
+
+
+ Nakamura
+ S
+
+
+ Analysis of molecular alterations in left- and right-sided colorectal carcinomas reveals distinct pathways of carcinogenesis: proposal for new molecular profile of colorectal carcinomas
+ J Mol Diagn
+ 2006
+ 8
+ 193
+ 201
+ 10.2353/jmoldx.2006.050052
+ 16645205
+
+
+
+
+
+
+
+ Soong
+ R
+
+
+ Powell
+ B
+
+
+ Elsaleh
+ H
+
+
+ Gnanasampanthan
+ G
+
+
+ Smith
+ DR
+
+
+ Goh
+ HS
+
+
+ Joseph
+ D
+
+
+ Iacopetta
+ B
+
+
+ Prognostic significance of TP53 gene mutation in 995 cases of colorectal carcinoma. Influence of tumour site, stage, adjuvant chemotherapy and type of mutation
+ Eur J Cancer
+ 2000
+ 36
+ 2053
+ 60
+ 10.1016/S0959-8049(00)00285-9
+ 11044641
+
+
+
+
+
+
+
+ Iacopetta
+ B
+
+
+ Are there two sides to colorectal cancer?
+ Int J Cancer
+ 2002
+ 101
+ 403
+ 8
+ 10.1002/ijc.10635
+ 12216066
+
+
+
+
+
+
+
+ Brink
+ M
+
+
+ de Goeij
+ AF
+
+
+ Weijenberg
+ MP
+
+
+ Roemen
+ GM
+
+
+ Lentjes
+ MH
+
+
+ Pachen
+ MM
+
+
+ Smits
+ KM
+
+
+ de Bruïne
+ AP
+
+
+ Goldbohm
+ RA
+
+
+ van den Brandt
+ PA
+
+
+ K-ras oncogene mutations in sporadic colorectal cancer in The Netherlands Cohort Study
+ Carcinogenesis
+ 2003
+ 24
+ 703
+ 10
+ 10.1093/carcin/bgg009
+ 12727799
+
+
+
+
+
+
+
+ Reimann
+ C
+
+
+ Arola
+ M
+
+
+ Bierings
+ M
+
+
+ Karow
+ A
+
+
+ van den Heuvel-Eibrink
+ MM
+
+
+ Hasle
+ H
+
+
+ Niemeyer
+ CM
+
+
+ Kratz
+ CP
+
+
+ A novel somatic K-Ras mutation in juvenile myelomonocytic leukemia
+ Leukemia
+ 2006
+ 20
+ 1637
+ 8
+ 10.1038/sj.leu.2404303
+ 16826224
+
+
+
+
+
+
+
+ Tartaglia
+ M
+
+
+ Martinelli
+ S
+
+
+ Cazzaniga
+ G
+
+
+ Cordeddu
+ V
+
+
+ Iavarone
+ I
+
+
+ Spinelli
+ M
+
+
+ Palmi
+ C
+
+
+ Carta
+ C
+
+
+ Pession
+ A
+
+
+ Aricò
+ M
+
+
+
+ Genetic evidence for lineage-related and differentiation stage-related contribution of somatic PTPN11 mutations to leukemogenesis in childhood acute leukemia
+ Blood
+ 2004
+ 104
+ 307
+ 13
+ 10.1182/blood-2003-11-3876
+ 14982869
+
+
+
+
+
+
+
+ Servomaa
+ K
+
+
+ Kiuru
+ A
+
+
+ Kosma
+ VM
+
+
+ Hirvikoski
+ P
+
+
+ Rytömaa
+ T
+
+
+ p53 and K-ras gene mutations in carcinoma of the rectum among Finnish women
+ Mol Pathol
+ 2000
+ 53
+ 24
+ 30
+ 10.1136/mp.53.1.24
+ 10884918
+
+
+
+
+
+
+
+ Schmid
+ K
+
+
+ Oehl
+ N
+
+
+ Wrba
+ F
+
+
+ Pirker
+ R
+
+
+ Pirker
+ C
+
+
+ Filipits
+ M
+
+
+ EGFR/KRAS/BRAF mutations in primary lung adenocarcinomas and corresponding locoregional lymph node metastases
+ Clin Cancer Res
+ 2009
+ 15
+ 4554
+ 60
+ 10.1158/1078-0432.CCR-09-0089
+ 19584155
+
+
+
+
+
+
+
+ Wójcik
+ P
+
+
+ Kulig
+ J
+
+
+ Okoń
+ K
+
+
+ Zazula
+ M
+
+
+ Moździoch
+ I
+
+
+ Niepsuj
+ A
+
+
+ Stachura
+ J
+
+
+ KRAS mutation profile in colorectal carcinoma and novel mutation--internal tandem duplication in KRAS
+ Pol J Pathol
+ 2008
+ 59
+ 93
+ 6
+ 18669174
+
+
+
+
+
+
+
+ Khosravi-Far
+ R
+
+
+ White
+ MA
+
+
+ Westwick
+ JK
+
+
+ Solski
+ PA
+
+
+ Chrzanowska-Wodnicka
+ M
+
+
+ Van Aelst
+ L
+
+
+ Wigler
+ MH
+
+
+ Der
+ CJ
+
+
+ Oncogenic Ras activation of Raf/mitogen-activated protein kinase-independent pathways is sufficient to cause tumorigenic transformation
+ Mol Cell Biol
+ 1996
+ 16
+ 3923
+ 33
+ 8668210
+
+
+
+
+
+
+
+ Tong
+ JH
+
+
+ Ng
+ DC
+
+
+ Chau
+ SL
+
+
+ So
+ KK
+
+
+ Leung
+ PP
+
+
+ Lee
+ TL
+
+
+ Lung
+ RW
+
+
+ Chan
+ MW
+
+
+ Chan
+ AW
+
+
+ Lo
+ KW
+
+
+
+ Putative tumour-suppressor gene DAB2 is frequently down regulated by promoter hypermethylation in nasopharyngeal carcinoma
+ BMC Cancer
+ 2010
+ 10
+ 253
+ 10.1186/1471-2407-10-253
+ 20525238
+
+
+
+
+
+
+
+ Lung
+ RW
+
+
+ Tong
+ JH
+
+
+ Sung
+ YM
+
+
+ Leung
+ PS
+
+
+ Ng
+ DC
+
+
+ Chau
+ SL
+
+
+ Chan
+ AW
+
+
+ Ng
+ EK
+
+
+ Lo
+ KW
+
+
+ To
+ KF
+
+
+ Modulation of LMP2A expression by a newly identified Epstein-Barr virus-encoded microRNA miR-BART22
+ Neoplasia
+ 2009
+ 11
+ 1174
+ 84
+ 19881953
+
+
+
+
+
+
+
+ Kang
+ W
+
+
+ Tong
+ JH
+
+
+ Chan
+ AW
+
+
+ Lee
+ TL
+
+
+ Lung
+ RW
+
+
+ Leung
+ PP
+
+
+ So
+ KK
+
+
+ Wu
+ K
+
+
+ Fan
+ D
+
+
+ Yu
+ J
+
+
+
+ Yes-associated protein 1 exhibits oncogenic property in gastric cancer and its nuclear accumulation associates with poor prognosis
+ Clin Cancer Res
+ 2011
+ 17
+ 2130
+ 9
+ 10.1158/1078-0432.CCR-10-2467
+ 21346147
+
+
+
+
+
+
+
+ Imamura
+ Y
+
+
+ Morikawa
+ T
+
+
+ Liao
+ X
+
+
+ Lochhead
+ P
+
+
+ Kuchiba
+ A
+
+
+ Yamauchi
+ M
+
+
+ Qian
+ ZR
+
+
+ Nishihara
+ R
+
+
+ Meyerhardt
+ JA
+
+
+ Haigis
+ KM
+
+
+
+ Specific mutations in KRAS codons 12 and 13, and patient prognosis in 1075 BRAF wild-type colorectal cancers
+ Clin Cancer Res
+ 2012
+ 18
+ 4753
+ 63
+ 10.1158/1078-0432.CCR-11-3210
+ 22753589
+
+
+
+
+
+
+
+ De Roock
+ W
+
+
+ Claes
+ B
+
+
+ Bernasconi
+ D
+
+
+ De Schutter
+ J
+
+
+ Biesmans
+ B
+
+
+ Fountzilas
+ G
+
+
+ Kalogeras
+ KT
+
+
+ Kotoula
+ V
+
+
+ Papamichael
+ D
+
+
+ Laurent-Puig
+ P
+
+
+
+ Effects of KRAS, BRAF, NRAS, and PIK3CA mutations on the efficacy of cetuximab plus chemotherapy in chemotherapy-refractory metastatic colorectal cancer: a retrospective consortium analysis
+ Lancet Oncol
+ 2010
+ 11
+ 753
+ 62
+ 10.1016/S1470-2045(10)70130-3
+ 20619739
+
+
+
+
+
+
+
+ Chang
+ YS
+
+
+ Yeh
+ KT
+
+
+ Chang
+ TJ
+
+
+ Chai
+ C
+
+
+ Lu
+ HC
+
+
+ Hsu
+ NC
+
+
+ Chang
+ JG
+
+
+ Fast simultaneous detection of K-RAS mutations in colorectal cancer
+ BMC Cancer
+ 2009
+ 9
+ 179
+ 10.1186/1471-2407-9-179
+ 19515263
+
+
+
+
+
+
+
+ Karapetis
+ CS
+
+
+ Khambata-Ford
+ S
+
+
+ Jonker
+ DJ
+
+
+ O’Callaghan
+ CJ
+
+
+ Tu
+ D
+
+
+ Tebbutt
+ NC
+
+
+ Simes
+ RJ
+
+
+ Chalchal
+ H
+
+
+ Shapiro
+ JD
+
+
+ Robitaille
+ S
+
+
+
+ K-ras mutations and benefit from cetuximab in advanced colorectal cancer
+ N Engl J Med
+ 2008
+ 359
+ 1757
+ 65
+ 10.1056/NEJMoa0804385
+ 18946061
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/PMC4816447.xml b/tests/data/PMC4816447.xml
new file mode 100644
index 0000000..8a62d22
--- /dev/null
+++ b/tests/data/PMC4816447.xml
@@ -0,0 +1,1217 @@
+
+
+
+
+
+
+ PLoS One
+ PLoS ONE
+ plos
+ plosone
+
+ PLoS ONE
+
+ 1932-6203
+
+ Public Library of Science
+ San Francisco, CA USA
+
+
+
+ 27032107
+ 4816447
+ 10.1371/journal.pone.0147599
+ PONE-D-15-00077
+
+
+ Research Article
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancer Treatment
+
+
+
+
+ Biology and Life Sciences
+
+ Genetics
+
+ Gene Identification and Analysis
+
+ Mutation Detection
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancer Treatment
+
+ Chemotherapy
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Clinical Medicine
+
+ Clinical Oncology
+
+ Chemotherapy
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Clinical Oncology
+
+ Chemotherapy
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Pharmaceutics
+
+ Drug Therapy
+
+ Chemotherapy
+
+
+
+
+
+ Biology and Life Sciences
+
+ Genetics
+
+ Mutation
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancers and Neoplasms
+
+ Lung and Intrathoracic Tumors
+
+
+
+
+
+ Research and Analysis Methods
+
+ Database and Informatics Methods
+
+ Biological Databases
+
+ Mutation Databases
+
+
+
+
+
+ Biology and Life Sciences
+
+ Genetics
+
+ Mutation
+
+ Mutation Databases
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancers and Neoplasms
+
+ Lung and Intrathoracic Tumors
+
+ Non-Small Cell Lung Cancer
+
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancers and Neoplasms
+
+ Carcinomas
+
+ Adenocarcinomas
+
+ Adenocarcinoma of the Lung
+
+
+
+
+
+
+
+ Medicine and Health Sciences
+
+ Oncology
+
+ Cancers and Neoplasms
+
+ Lung and Intrathoracic Tumors
+
+ Adenocarcinoma of the Lung
+
+
+
+
+
+
+
+ Prospective Evaluation of First-Line Erlotinib in Advanced Non-Small Cell Lung Cancer (NSCLC) Carrying an Activating EGFR Mutation: A Multicenter Academic Phase II Study in Caucasian Patients (FIELT)
+ First-Line Erlotinib in EGFR Mutant Lung Cancer
+
+
+
+
+ De Grève
+ Jacques
+
+
+ 1
+
+ *
+
+
+
+ Van Meerbeeck
+ Jan
+
+
+ 2
+
+
+ ¤
+
+
+
+
+ Vansteenkiste
+ Johan F.
+
+
+ 3
+
+
+
+
+ Decoster
+ Lore
+
+
+ 1
+
+
+
+
+ Meert
+ Anne-Pascale
+
+
+ 4
+
+
+
+
+ Vuylsteke
+ Peter
+
+
+ 5
+
+
+
+
+ Focan
+ Christian
+
+
+ 6
+
+
+
+
+ Canon
+ Jean-Luc
+
+
+ 7
+
+
+
+
+ Humblet
+ Yves
+
+
+ 8
+
+
+
+
+ Berchem
+ Guy
+
+
+ 9
+
+
+
+
+ Colinet
+ Benoit
+
+
+ 7
+
+
+
+
+ Galdermans
+ Danny
+
+
+ 10
+
+
+
+
+ Bosquée
+ Lionel
+
+
+ 11
+
+
+
+
+ Vermeij
+ Joanna
+
+
+ 12
+
+
+
+
+ Dewaele
+ Alex
+
+
+ 1
+
+
+
+
+ Geers
+ Caroline
+
+
+ 1
+
+
+
+
+ Schallier
+ Denis
+
+
+ 1
+
+
+
+
+ Teugels
+ Erik
+
+
+ 1
+
+
+
+
+Department of Medical Oncology, Oncologisch Centrum, Universitair Ziekenhuis Brussel, Brussels, Belgium
+
+Department of Thoracic Oncology, University Hospital, Gent, Belgium
+
+Department of Pneumology, University Hospital KU Leuven, Leuven, Belgium
+
+Department of Pneumology, Institut Jules Bordet, Brussels, Belgium
+
+Department of Medical Oncology, Clinique et Maternité Sainte-Elisabeth, Namur, Belgium
+
+Department of Medical Oncology, CHC Clinique Saint Joseph, Liège, Belgium
+
+Department of Medical Oncology, Grand Hôpital de Charleroi, Charleroi, Belgium
+
+Department of Medical Oncology, Centre du Cancer de l'Université Catholique de Louvain, Brussels, Belgium
+
+Department of Medical Oncology, Centre Hospitalier du Luxembourg, Luxembourg, Luxembourg
+
+Department of Medical Oncology, ZNA Middelheim Hospital, Antwerp, Belgium
+
+Department of Respiratory Medicine, CHU Sart-Tilman, Liège, Belgium
+
+Department of Medical Oncology, ZNA Jan Palfijn, Merksem, Belgium
+
+
+
+ Rosell
+ Rafael
+
+ Editor
+
+
+
+
+ Catalan Institute of Oncology, SPAIN
+
+
+
+
Competing Interests: The authors have declared that no competing interests exist.
+
+
+
Conceived and designed the experiments: JDG ET. Performed the experiments: JDG JVM JFV LD APM PV CF JC YH GB BC DG LB JV AD CG DS ET. Analyzed the data: JDG LD AD ET. Contributed reagents/materials/analysis tools: JDG JVM JFV LD APM PV CF JC YH GB BC DG LB JV AD CG DS ET. Wrote the paper: JDG JVM JFV LD DS ET.
+
+
+
+
Current address: Department of Thoracic Oncology, Antwerp University Hospital, Edegem, Belgium
Epidermal Growth Factor Receptor (EGFR) tyrosine kinase inhibition is the preferred first-line treatment of advanced adenocarcinoma of the lung that harbors EGFR activating tyrosine kinase domain mutations. Most data available pertain to Asian populations in which such mutations are more prevalent. We report on the long-term results of first-line treatment with erlotinib in Caucasian patients with advanced adenocarcinoma of the lung that have a somatic EGFR mutation in their tumor.
+
+
+ Methods
+
Multicenter academic prospective phase II study with erlotinib in patients with an activating EGFR tyrosine kinase (TK) domain somatic mutation (any exon encoding the kinase domain) in the tumor and no prior treatment for their advanced disease.
+
+
+ Results
+
Phenotypic preselecting of 229 patients led to a high EGFR mutation detection rate of 24% of which 46 patients were included in the phase II study. With a progression free survival (PFS) of 81% at three months the study met its primary endpoint for presumed superiority over chemotherapy. With an overall median PFS of 11 months and a median overall survival (OS) of 23 months, the results compare favorably with results obtained in randomized studies using TKI in first line in EGFR mutation positive adenocarcinoma of the lung.
+
+
+ Conclusion
+
The present study reinforces the use of EGFR tyrosine kinase inhibition (TKI) as a first line treatment of choice for advanced adenocarcinoma of the lung carrying an activating EGFR mutation. The mutation rate in preselected Caucasian patients is higher than previously reported. Issues relevant for clinical practice are discussed.
+
+
+ Trial Registration
+
ClinicalTrials.gov NCT00339586
+
+
+
+ This work was supported by the following sources of funding: Stichting Tegen Kanker STK (JDG); Wetenschappelijk Fonds Willy Gepts UZ Brussel (JDG); Cancer plan 19 Grant 29-039 (JDG); and Roche Belgium (JDG). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.
+
+
+
+
+
+
+
+
+ Data Availability
+ All relevant data are within the paper and its Supporting Information files.
+
+
+
+
+ Data Availability
+
All relevant data are within the paper and its Supporting Information files.
+
+
+
+
+ Introduction
+
Patients with advanced non-small cell lung cancer (NSCLC) are incurable with a low probability for long-term survival. With platinum-based doublet chemotherapy a response rate of around 25% and a median OS of about 10–12 months can be obtained in metastatic disease [1] corresponding to a PFS of 60% or less at 3 months [2]
+
A novel approach to the treatment of advanced NSCLC was introduced with the use of agents blocking the tyrosine kinase part of the Epidermal Growth Factor Receptor (EGFR). Some patients had dramatic responses to these EGFR tyrosine kinase inhibitors (TKI’s) [3, 4]. Ten years ago it became clear that mutations in the exons coding for the intracellular EGFR kinase domain, in particular in exon 19 and 21 highly increase the sensitivity to EGFR TKI’s [5, 6]. These mutations have been observed in 10% or less of all lung cancers tested, in 30% of adenocarcinoma of the lung if the smoking history was maximally 15 years and up to 50% in never-smokers [7], although these figures depend highly on the ethnicity of the population tested, being much higher in East-Asian populations than in Caucasians.
+
Most (90%) sensitizing mutations are found in exon 19 and 21. Mutations in exon 20 are generally not associated with increased sensitivity towards reversible TKI’s [8]. The overall response rate (ORR) to TKI in EGFR mutant lung cancers varies between 60 and 90% [9].
+
Gefitinib in an Asian population [10, 11], and erlotinib, in both a Caucasian [12] and an Asian [13] population, were validated as superior to chemotherapy in terms of PFS in patients whose tumors harbor sensitizing driver mutations in the EGFR gene and are therefore recommended as the preferred first-line therapies for these patients.
+
FIELT (First line Inhibitor of EGFR in Lung cancer Treatment) is a prospective academic study investigating the efficacy and tolerability of first-line treatment with erlotinib in newly diagnosed advanced adenocarcinoma of the lung carrying EGFR kinase domain mutations, as well as the feasibility of inserting genomic testing in a multicenter clinical setting (S1 Text). The study aimed to estimate whether first-line erlotinib could reach an efficacy threshold higher than chemotherapy.
+
At the time of initiation of FIELT in 2006, advanced lung cancer was treated indiscriminately with platinum-based chemotherapy and no data were available on the prospective first-line use of any EGFR TKI in phenotypically or genotypically selected NSCLC, while only retrospective data were available for gefitinib [14].
+
+
+ Materials and Methods
+
The study was an academic study registered at clinicaltrials.gov as NCT00339586 (S1 Text).
+
+ Patient eligibility
+
Key eligibility criteria were locally advanced or metastatic (Stage IIIB or Stage IV) adenocarcinoma of the lung. Radiotherapy and adjuvant or neo-adjuvant chemotherapy completed more than six months before inclusion were allowed. Patients should not have received previous chemotherapy for metastatic disease and had to have a smoking history of less than 15 years and have stopped smoking more than one year before diagnosis.
+
Measurable disease was not mandatory. An ECOG performance status of 0–3 was required. Previously diagnosed and treated central nervous system metastases or spinal cord compression with evidence of stable disease for at least two months was permitted. Specific exclusion criteria were: pre-existing symptomatic interstitial lung disease, not related to the current malignancy, and gastrointestinal disease or concomitant food or drug intake which could impair absorption and metabolism of erlotinib. Significant ophthalmological abnormalities, especially severe dry eye syndrome, keratoconjunctivitis sicca, Sjögren syndrome, severe exposure keratitis or any other disorder likely to increase the risk of corneal epithelial lesions were also exclusion criteria.
+
Standard phase II selection criteria were applicable for organ function. Separate signed informed consents were required for mutation testing and subsequent inclusion in the erlotinib treatment phase.
+
+
+ Study design and treatment
+
The study was a multicenter academic single arm phase II study in 17 university and non-university centers in Belgium and Luxemburg (NCT00339586).
+
The study was approved by the institutional medical ethics review board of each participating center. The study was approved by the ethics committee of the Academisch Ziekenhuis-Vrije Universiteit Brussel which was the leading ethics committee that according to Belgian law approved the study in a single opinion form. Academisch Ziekenhuis-Vrije Universiteit Brussel is the former name of the current UZ Brussel of the Vrije Universiteit Brussel (S2 Text). Participants provided a written informed consent.
+
+
+ Mutation analysis
+
Patients had a central tumor EGFR mutation testing in the Laboratory of Medical and Molecular Oncology (LMMO) of the Oncologisch Centrum, UZ Brussel. Mutation analysis was performed on DNA extracted from three consecutive 10μm thick sections of formalin fixed and paraffin embedded material. Tissue sections were verified for the presence of a sufficient proportion of malignant cells and manually macro-dissected when necessary. The collected DNA was used to perform a hemi-nested PCR followed by a denaturing gradient gel electrophoresis (DGGE). The PCR/DGGE method requires only very small amounts of template DNA and is able to detect any mutation found in exon 18–21, but could not reliably detect mutations in samples in which the tumor DNA represented less than 25% of the total DNA. Mutations were confirmed by Sanger sequencing, but to minimize waiting time for physicians, patients could be entered in the treatment phase of the trial, based on the DGGE results. No results had to be revoked based on the subsequent Sanger sequence. Results were delivered to the participating clinicians within two weeks except for cases in which the material was insufficient to obtain a reliable result in a first round. If a second analysis was needed for such inconclusive results, the responsible physician was informed of the delay.
+
+
+ Treatment phase
+
Upon ascertainment of an EGFR mutation in the tumor sample, consenting patients started erlotinib treatment 150 mg daily plus best supportive care within one week. Any mutation found in exon 18–21 made the patient eligible for inclusion in the treatment phase of the study. Treatment was until disease progression (RECIST definition, both for patients with or without measurable lesions) or prohibitive toxicity. Response assessments (CT imaging and physical) and ancillary parameters were scheduled at 6, 12, 18 and 24 weeks after treatment initiation, thereafter every 12 weeks.
+
Toxicity was scored every four weeks during treatment according to the common toxicity criteria adverse events version 3.0 [15].
+
The primary endpoint of the study was to establish a significant clinical benefit by achieving at least a 70% PFS rate at three months on erlotinib. Secondary objectives and endpoints were ORR according to RECIST criteria, response duration under erlotinib treatment, the effect on Quality of Life (QOL), ECOG performance score, weight, the PFS and OS. The QOL results will be reported elsewhere. The analysis of the primary efficacy endpoint was based on all subjects who received at least one dose of study treatment. Data analysis cut-off was done in May 2013. If scheduled follow-up evaluations were missing, patients were considered progressive, unless a later examination confirmed persistent remission.
+
The premises for the sample size determination were as follows. With platinum based chemotherapy, a PFS of 60% or less is obtained at 3 months follow-up in advanced NSCLC [1, 16]. Taking into account the known general better tolerability of erlotinib it was proposed that if a PFS of more than 70% could be obtained at three months, then this treatment deserved further evaluation in the first-line treatment of advanced NSCLC. If the study would result in a PFS less than 50% at three months, single agent erlotinib should not be further evaluated in this setting as outcomes inferior in terms of efficacy to the current standard were not acceptable.
+
+
+ Statistical considerations
+
Based on these premises, a one-step Fleming design required at least 33 patients to be included in the study (α = 10%; β = 10%). With the provision of a margin for eligibility/evaluation issues, a prospective number to be included was set at 40 patients.
+
PFS and OS were estimated from the date of registration until respectively the documentation of progression and the date of death, irrespective of the cause of death. Patients who had not progressed or died at the time of the analysis were censored at the date of last contact. PFS and OS were calculated according to the Kaplan-Meier method with SPSS statistical software (version 20.0; SPSS Inc., Chicago, IL). Results are presented with 95% confidence intervals (CIs).
+
+
+
+ Results
+
+ Mutation analysis
+
From January 2006 through March 2010, tumor tissue from 229 phenotypically selected patients was analyzed for the presence of EGFR kinase domain mutations. Tissue area ranged from 1 mm2 to 600 mm2, with 40% of the sections smaller than 5 mm2 (mostly needle biopsies).
+
Baseline material was insufficient for analysis in 24/229 samples (10%) due to the lack of adequate material (almost no tissue material available, no tumor cells in tissue, no amplifiable DNA). An EGFR kinase domain mutation was found in 56 out of 205 evaluable patients (27% or 24% of the original phenotypically selected population) (Table 1). Five mutations were previously unreported (two in exon 19, three in exon 20). The samples in which the tumor DNA was less than 25% of the total DNA represented 30% of all samples. Although we stated to the referring clinical investigators that a negative result should be considered as unreliable in these cases, we did find mutations in 23% of these cases, a similar proportion as in the overall population. A subsequent analysis with a more sensitive PNA (Peptide Nucleic Acid) PCR method [17] showed four more mutations (7% of all mutations). The overall sensitivity of the mutation detection method used in this study was thus 93% in evaluable samples or 90% in all samples, also considering the pathology limitations (absence of analyzable material). Other mutations found were: KRAS in 16%, BRAF in 2%, HER2 in 2%, HER3 in 0.4%, all mutually exclusive. The FIELT study flow is detailed in Fig 1.
+
+ 10.1371/journal.pone.0147599.g001
+
+
+ FIELT study flow.
+
+
+
+
+ 10.1371/journal.pone.0147599.t001
+
+
+ Mutations found.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Exon
+
Mutations
+
Times found (in COSMIC)
+
DXS/COBAS*
+
+
+
+
Nucleotide/s Change
+
Amino acid/s Change
+
+
+
+
+
+
+
+ 18
+
+
c.2156G>C
+
p.Gly719Ala
+
1 (31x)
+
yes/yes
+
+
+
+ 19
+
+
c.2235_2249del
+
p.Glu746_Ala750del
+
22 (785x)
+
yes/yes
+
+
+
+
c.2236_2250del
+
p.Glu746_Ala750del
+
3 (360x)
+
yes/yes
+
+
+
+
c.2237_2257delinsTCT
+
p.Glu746_Pro753delinsValSer
+
1 (2x)
+
no/yes
+
+
+
+
c.2237_2253delinsTTCCT
+
p.Glu746_Thr751delinsValPr
+
1 (2x)
+
no/no
+
+
+
+
c.2239_2248delinsC
+
p.Leu747_A750del insPro
+
1 (77x)
+
yes/yes
+
+
+
+
c.2240_2254del
+
p.Leu747_Thr751del
+
1 (51x)
+
yes/yes
+
+
+
+
c.2240_2257del
+
p.Leu747_Pro753delins Ser
+
2 (121x)
+
yes/yes
+
+
+
+
c.2248_2276delinsCCAAC
+
p.Ala750_Ile759delinsProThr
+
1 (0x)
+
no/no
+
+
+
+
c.2249_2277delinsGAAGT
+
p.Ala750_Ile759del insGlySer
+
1 (0x)
+
no/no
+
+
+
+
c.2253_2276del
+
p.Ser752_Ile759del
+
1 (5x)
+
no/no
+
+
+
+ 20
+
+
c.2303_2311dup9
+
p.Ser768_Asp770dup
+
1 (1x)?
+
no/no
+
+
+
+
c.2319_2320insAACCAC
+
p.Pro772_His773insHisAsn
+
1 (0x)
+
no/no
+
+
+
+
c.2311delAinsGTCC
+
p.Asn771del insValHis
+
1 (0x)
+
no/no
+
+
+
+
c.2311_2312insCCA
+
p.Asp770_Asn771insThr
+
1 (0x)
+
no/no
+
+
+
+
c.2310_2311insGGT
+
p.Asp770_Asn771insGly
+
1 (x5)
+
yes/yes
+
+
+
+ 21
+
+
c.2573T>G
+
p.Leu858Arg
+
15 (1607x)
+
yes/yes
+
+
+
+
+
+
+
*indicates whether mutation is listed as detectable by Dxs Therascreen or COBAS.? = mutation that supposedly has been misnamed in the COSMIC database
+
+
+
+
Ten patients (18%) in whom an EGFR mutation was found did not enter the phase II part of the study because of various reasons: three became ineligible between an inconclusive first mutation screen and identification of the EGFR mutation in a second sample, three patients died before inclusion, one deteriorated and became ineligible and in three patients the reasons are unknown.
+
+
+ Patient characteristics
+
The characteristics of the 46 patients included in the phase II study are provided in Table 2. Eighty five percent of patients were female. The median age was 72 yrs. (35–83 yrs.) and 37% of the patients were aged 75 or older. Two patients were included although not strictly fulfilling the selection criteria: one patient had a 25 pack-year of cigarette smoking history, but had since stopped smoking for 30 years and received therefore a waiver and one patient was stage IB (T2N0M0) recurrent disease after prior curative intent radiotherapy and was not any longer eligible for local curative intent treatment. Thirty eight were never smokers, seven were past smokers and one was a current smoker.
+
+ 10.1371/journal.pone.0147599.t002
+
+
+ Characteristics of the 46 patients included in the phase II study and treated with erlotinib.
+
+
+
+
+
+
+
+
+
+
+
+ Age
+
+
72 median (35–83 yrs.)
+
+
+
+ Performance status
+
+
PSO = 11
+
+
+
+
PS1 = 28
+
+
+
+
PS2 = 6
+
+
+
+
PS3 = 1
+
+
+
+ Stage
+
+
Stage IB = 1
+
+
+
+
Stage IIIB = 4
+
+
+
+
Stage IV = 41
+
+
+
+ Sex
+
+
7 male;39 female
+
+
+
+ EGFR Mutation
+
+
Exon 19 = 27
+
+
+
+
Exon 21 = 15
+
+
+
+
Exon 20 = 3
+
+
+
+
Exon 18 = 1
+
+
+
+ Cigarette smoking history (pack years)
+
+
Median 0 yrs. (0–25 yrs.)
+
+
+
+
+
+
+
+ Efficacy
+
The efficacy results are reported on intent to treat basis including the two ineligible patients. A separate analysis with these two patients removed did not alter the results.
+
The PFS rate (PFSR) at three months was 81% and at six months 72%. The median PFS was 11 months (95% CI = 9.7–12.3 months) (Fig 2).
+
+ 10.1371/journal.pone.0147599.g002
+
+
+ Survival outcome.
+
Kaplan-Meier curves for overall and progression free survival of the FIELT cohort.
+
+
+
+
Twenty six patients achieved a partial remission (PR) (57%), one a complete remission (CR) (2%); ten a stable disease (SD) (22%) and nine had progressive disease (PD) as best response (20%). Although this was not the primary endpoint, responses were confirmed by a second measurement at least four weeks later. The clinical benefit rate (CR+PR+SD) was thus 81%. The distribution of responses was similar in exon 19 and 21 mutations.
+
Median duration of response was 9.7 months (2.7–29 + months) (+ means patient alive beyond); 9.7 months for cases with objective remissions (2.7–29+ months) and 10.3 months for those with stable disease (4.1–23.5 months).
+
With a median follow-up from treatment initiation of 45 months (36–84 months), median OS was 23 months (95% CI = 21.3–28.6+ months) (Fig 2). For patients achieving an objective response (PR or CR), median OS was 23.9 months (3.9–52.5 months), for patients that with SD as best response 14.5 months (3.2–39.6 months) and for progressing patients 3.9 months (0.9–9.2 months).
+
There was a numeric difference in OS between PS 0/1 and 2/3 patients: median 23 months (95%CI = 20–26 months) vs 13 months (95%CI = 7–21 months). The difference is statistically not significant according to log rank test p = 0.687, probably due to the small numbers.
+
Median overall survival was 25 months (95%CI = 20–29 months) for the younger patients and 15 months (95%CI = 0–30 months) OS for the elderly patients with a p-value of 0.222. Despite the numeric difference there is extensive overlap in outcomes.
+
A similar non-significant ratio was observed with a cut-off of 70 years (used in geriatrics) with a median OS of 26 months (95%CI = 23–28 months) vs 20 months (95%CI = 10–29 months)
+
At the time of analysis six patients were alive with median treatment duration on erlotinib of 18 months (11–32 months) of which three had been switched to gefitinib for tolerance reasons.
+
Median treatment duration was 12.8 months (0.6–33 months) and average treatment duration was 13.3 months. When we also consider the patients that were eligible, but never received erlotinib the median treatment duration is 10.4 months (0–33 months).
+
Eight patients (17%) (Table 3) failed within 12 weeks on erlotinib, (median PFS 2.8 months) of treatment initiation, and they were therefore considered as being de novo resistant to erlotinib. These patients had a much shorter OS of 5.6 months (1–26 months) compared to the overall 24 months median OS of the other patients (p< 0.002, two-sided). One patient however had a survival of 26 months despite an erlotinib treatment that lasted only four months, reflecting the impact of subsequent chemotherapy in this patient population. Factors that could explain a lack of response are an exon 20 mutation for two patients [18] and a heavy smoking history for one patient. Other past smokers benefited from erlotinib treatment albeit less than the overall study population: one 15 pack year: one 8 month PFS; one 5 pack year: one 18 month PFS; one 1 pack year, immediate progression.
+
+ 10.1371/journal.pone.0147599.t003
+
+
+ Characteristics of patients that failed early on erlotinib.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Initials
+
Sex
+
Age
+
EGFR mutation
+
Smoking status
+
Survival (months)
+
+
+
+
+
180 CV
+
F
+
73
+
EX 19 (p.Glu746_Ala750del)
+
Never
+
0.9
+
+
+
173 LAL
+
F
+
72
+
EX 20 (p.Asn771del insValHis)*
+
Never
+
3.2
+
+
+
229 FM
+
F
+
64
+
EX 19 (p.Glu746_Ala750del)
+
Never
+
3.7
+
+
+
241 DEL
+
M
+
78
+
EX 21 p.Leu858Arg
+
25 pack year
+
3.9
+
+
+
224 FPM
+
F
+
42
+
EX 19 p.Leu747_Pro753delins Ser
+
Never
+
7.4
+
+
+
208 VEC
+
F
+
48
+
EX 18 p.Gly719Ala
+
One year
+
9.2
+
+
+
42 TF
+
F
+
38
+
EX 20 p.Ser768_Asp770dup**
+
Never
+
14
+
+
+
8 MI
+
F
+
35
+
EX 21 p.Leu858Arg
+
Never
+
26.2
+
+
+
+
+
+
+
* Novel mutation
+
+
+
** reported once in COSMIC database; all other mutations are recurrent.
+
+
+
PY: pack years; F: female; M: male; ex: exon
+
+
+
+
Some never smokers with an exon 19 deletion mutation were also de novo resistant (Table 3).
+
One patient with an exon 18 mutation that is generally sensitive to EGFR TKI [19], failed immediately, received one second line of therapy and survived for 9 months.
+
+
+ Post-study treatment
+
All patients had stopped erlotinib treatment at the time of analysis. Second- and further- lines of treatment were as reported by the participating centers. Twenty-nine (64%) of the 45 patients for whom the information was available received one or more second or further line treatments. For sixteen patients (36%) no second-line treatment was reported. For one patient the information was not available. Seventeen of the 29 (59%; 38% of total) received only one second-line therapy, nine (31%) received also a third-line and three (10%) also received also a fourth-line. Of the 29 patients who did receive further therapy, 17 received a targeted agent (7 gefitinib, 9 afatinib, one continued erlotinib beyond progression).
+
Seventeen patients received chemotherapy, of which ten a platinum-based doublet. Seven patients received only (sequential) single agent chemotherapies. Three of these patients were more than 75 years old. Four patients received radiotherapy to the brain (three) and the chest (one).
+
+
+ Safety
+
Treatment with erlotinib was generally well tolerated with manageable toxicity as reported by the participating investigators (Table 4). One patient died without disease progression and one stopped because of skin toxicity while still responding after 18.9 months of treatment and subsequently received gefitinib with a continued sustained response and good tolerance.
+
+ 10.1371/journal.pone.0147599.t004
+
+
+ Common toxicities.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Grade 1
+
Grade 2
+
Grade 3
+
+
+
+
+
Skin rash
+
14
+
17
+
10
+
+
+
Diarrhea
+
16
+
14
+
4
+
+
+
Ocular toxicity
+
10
+
11
+
0
+
+
+
+
+
+
+
+
+ Discussion
+
At the time of initiation of the current phase II study no prospective data were available about the efficacy of EGFR TKI in EGFR mutant lung cancers. This has changed dramatically since, as several randomized studies have shown that both gefitinib and erlotinib as reversible EGFR inhibitors and afatinib as a covalent pan-HER inhibitor outperform chemotherapy as a first-line therapy in these patients with regard to PFS and QOL [10–13]. Most of the studies have been performed in Asian or predominantly Asian populations. Only a few studies have been conducted in homogeneous Caucasian populations, and the current study is the only one to supplement the data generated with erlotinib in the EURTAC study [12].
+
The two most representative landmark studies are the iPASS study with gefitinib in an Asian population [11] and the EURTAC study with erlotinib in a European Caucasian population [12]. More recently, similar results were obtained in the LUX-Lung 3 study with the irreversible pan-HER inhibitor afatinib [20]. Afatinib resulted in a major improvement in PFS versus the optimal standard chemotherapy (pemetrexed and cisplatin) for this type of lung cancer. No survival benefit has generally been observed in these studies, attributed to the important crossover to TKI as a second-line treatment in the chemotherapy arms, except for afatinib in patients with a del19 EGFR mutations in the tumor [21].
+
The PFSR at three months of 81% in the current study exceeds the primary endpoint of the study that hypothesized a PFSR of > 70%.
+
The inclusion of 10% of mutations in exon 20 that are less sensitive to EGFR TKI [22] makes our population less favorable than in the two randomized studies with erlotinib that included only patients with sensitizing mutations. Despite this, the PFS of 11 months in our study is intermediate between the PFS of 13 months in the Optimal study [23] and the PFS of 9.7 months in the EURTAC study [12]. In the more recent Lux-Lung 3 study with afatinib that also included other mutations than exon 19/21, the PFS was 11.1 months, similar to our outcome [20].
+
The median OS was 23 months in the current study, 22 months in the Optimal and 19.3 months in the EURTAC study. The current phase II study thus further clarifies the exact impact of erlotinib in Caucasian patients with an EGFR mutation-positive adenocarcinoma, indicating that erlotinib has a similar therapeutic impact in Caucasian as in Asian patients.
+
Treatment beyond progression was limited as median treatment duration was approximately two months longer than the PFS, similar to the OPTIMAL study. This occurred despite the protocol specification that patients could be treated until progression and indicates clinical benefit beyond progression.
+
Of interest, older patients benefited as much as younger patients. Most of these patients might otherwise not have benefited from optimal doublet chemotherapy and consequently the availability of EGFR targeted treatments has a large impact on this population introducing an effective treatment for a previously unmet medical need. This is in line with other data obtained with gefitinib [24].
+
The mutation detection rate was high (24%) in this phenotypically preselected population. In a more diluted general Caucasian lung cancer population the mutation rate is 10% or less. In the EURTAC study in a population similar to ours, the mutation rate (only exons 19 and 21) was 16.6%. Today, in routine practice upfront reflex EGFR mutation testing should be performed in all patients with adenocarcinoma of the lung and in squamous cell lung cancer with a non-smoking history.
+
We used a generic method for mutation testing that covered all exons encoding the kinase domain of EGFR. This method had a high sensitivity of 93% similar to the sensitivity of commercial kits such as Cobas® and Dxs Therascreen®. In the current cohort five novel mutations were identified, not present in the Cosmic data-base (http://cancer.sanger.ac.uk/cancergenome/projects/cosmic/). Several unique mutations would possibly not have been detected by the DxS Therascreen (9/56) and Cobas (8/56) commercial diagnostic mutation detection kits (Table 1). This includes sensitizing mutations which would have been denied an EGFR TKI therapy when using these commercial kits.
+
The small proportion of false negatives (7%) is not overlapping between both approaches: in the commercial kits some not listed sensitizing mutations can be missed but these methods can detect highly diluted mutations whereas our method was limited with regard to the dilution of the mutant DNA, which we proved subsequently by using a more sensitive technique. For the same reason, our method was unable to identify minority mutations such as the T790M, which at that time seemed of minor importance for the first-line therapy as it had no impact on the therapeutic strategy. This has changed with the documentation of a negative prognostic impact [25] and with the development of novel third generation EGFR inhibitors that can inhibit the T790M EGFR and their exploration in first-line setting [26].
+
The few false negatives with either method are unacceptable when one considers the therapeutic implications. Therefore, we and others have switched to an amplicon-based deep sequencing method (Roche 454 technology, Roche Diagnostics Belgium) that allows maximal detection of mutations and also allows simultaneous examination of other drugable genes without having to resort to sequential diagnostics. It is thereby important that all EGFR kinase domain exons are included in the analysis.
+
In the 10% of patients in whom biopsies are quantitatively or qualitatively insufficient, repeat biopsies in metastatic sites (immediately or later in the disease course) should be done as soon as feasible and acceptable to the patient. It is important that this specific recommendation is included in the report to the clinician whenever this might be relevant. An alternative would be to develop the collection and molecular analysis of circulating tumor cells which, for lung cancer, is not yet optimally developed. Analyzing plasma DNA (cfDNA) has been validated as a more efficient and straightforward method for the diagnosis of EGFR mutations in patients in whom a tissue diagnosis is not possible [27].
+
The incomplete translation of mutation detection to treatment in a prospective setting is surprising as 18% eligible patients did not enter the treatment phase with erlotinib. A similar drop-out rate (16%) has also been observed in the phase II study by Rosell et al [28]. In the EURTAC study 10% of patients did not receive erlotinib per protocol after randomization [12]. In the Chinese Optimal study, 99% of those allocated to erlotinib received at least one dose of study drug [13], a remarkable compliance that might have a cultural basis.
+
Seventeen percent of the patients failed early and were considered as being de novo resistant to erlotinib. The median OS for these patients was much shorter, but not necessarily in each patient. Indeed one patient, treated in second- line with carboplatin-gemcitabine survived for 26 months, illustrating the complementarity of therapeutics for these patients. There are several mechanisms of de novo resistance to EGFR TKI [29], but an explanation cannot be found in all patients.
+
We were unable to identify characteristics that could predict TKI resistance in most of these patients. Three patients with an exon 20 mutation did not do well, with two failing early and one achieving SD but with an OS of only 9 months. But there were also three non-smokers with an exon 19 mutation that failed early. In one series a KRAS mutation was found to coexist with EGFR mutation in 2/40 cases [29], but this was not the case in our patients. One of these patients was a heavy and current smoker who failed immediately. We hypothesize that in smokers the EGFR mutation occurs in a context in which a large number of cigarette induced mutations also drive the malignant phenotype and dilute the pathogenic and therapeutic relevance of the mutant EGFR. There is also evidence that in current smokers nicotine diminishes the effectiveness of EGFR-TKI [30].
+
Further genomic studies could in the future bring to light mechanisms of de novo and acquired resistance and prompt the investigation of upfront combined therapeutic strategies.
+
The proportion of patients receiving a second-line treatment was low (only 33% chemotherapy and 28% TKI). Only twelve patients received two or more lines of further treatment, one of which received five lines of further therapy and is still alive at 38+ months. For eighteen patients (39%) the local investigators reported no second-line systemic treatment. A similar observation was made in the Optimal study in which 31% of the patients in the erlotinib arm did not receive a second-line treatment [13].
+
These data are surprising in view of the relatively high efficacy of chemotherapy in such patients. It suggests that in practice erlotinib might be continued beyond progression until the disease rapidly deteriorates, the patient’s condition might become less eligible or the patient is less willing to be further treated with chemotherapy. The general consensus is that EGFR-TKI should be given upfront considering their tolerability compared to chemotherapy leading to a more prolonged preservation of QOL [31]. If the opportunity is missed in the first-line, then these patients should receive a TKI in any subsequent line, as early as possible. In patients in whom baseline genotyping was non-conclusive, attempts should be made to repeat biopsies and genotyping upon progression.
+
+
+ Conclusion
+
In this prospective phase II study in phenotypically selected patients with a somatic EGFR mutation in their tumor erlotinib was well tolerated and highly effective in the majority of the patients. The study further clarifies the impact of first-line erlotinib in EGFR mutant lung cancer in Caucasian patients. Elderly patients seem to benefit from EGFR-TKI as well as younger patients, thus filling an important medical need.
+
In order to minimize the false negativity rate, the most sensitive method for mutation detection should be used (deep sequencing of all kinase domain exons) in order not to deny patients a TKI treatment. Mechanisms underlining primary resistance to EGFR TKI need further exploration.
+
+
+ Supporting Information
+
+
+
+ FIELT protocol version 2.0.
+
(DOC)
+
+
+
+
Click here for additional data file.
+
+
+
+
+
+
+ Ethical committee 05–122 Advice.
+
(PDF)
+
+
+
+
Click here for additional data file.
+
+
+
+
+
+
+ TREND Statement checklist.
+
(PDF)
+
+
+
+
Click here for additional data file.
+
+
+
+
+
+
+
+
We thank all patients, their families and personnel involved in the care of these patients as well as Carine Keppens, MD for her support in the initiation of this study.
+
The study was an academic study registered at clinicaltrials.gov as NCT00339586.
+
+
+ References
+
+
+ BunnPAJr. Treatment of advanced non-small-cell lung cancer with two-drug combinations. Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2002;20(17):3565–7. Epub 2002/08/31. .12202651
+
+
+
+ ScagliottiGV, ParikhP, von PawelJ, BiesmaB, VansteenkisteJ, ManegoldC, et al
+Phase III study comparing cisplatin plus gemcitabine with cisplatin plus pemetrexed in chemotherapy-naive patients with advanced-stage non-small-cell lung cancer. Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2008;26(21):3543–51. Epub 2008/05/29. 10.1200/JCO.2007.15.0375 .18506025
+
+
+
+ ShepherdFA, Rodrigues PereiraJ, CiuleanuT, TanEH, HirshV, ThongprasertS, et al
+Erlotinib in previously treated non-small-cell lung cancer. The New England journal of medicine. 2005;353(2):123–32. Epub 2005/07/15. 10.1056/NEJMoa050753 .16014882
+
+
+
+ ThatcherN, ChangA, ParikhP, Rodrigues PereiraJ, CiuleanuT, von PawelJ, et al
+Gefitinib plus best supportive care in previously treated patients with refractory advanced non-small-cell lung cancer: results from a randomised, placebo-controlled, multicentre study (Iressa Survival Evaluation in Lung Cancer). Lancet. 2005;366(9496):1527–37. Epub 2005/11/01. 10.1016/S0140-6736(05)67625-8 .16257339
+
+
+
+ PaezJG, JannePA, LeeJC, TracyS, GreulichH, GabrielS, et al
+EGFR mutations in lung cancer: correlation with clinical response to gefitinib therapy. Science. 2004;304(5676):1497–500. Epub 2004/05/01. 10.1126/science.1099314 .15118125
+
+
+
+ LynchTJ, BellDW, SordellaR, GurubhagavatulaS, OkimotoRA, BranniganBW, et al
+Activating mutations in the epidermal growth factor receptor underlying responsiveness of non-small-cell lung cancer to gefitinib. The New England journal of medicine. 2004;350(21):2129–39. Epub 2004/05/01. 10.1056/NEJMoa040938 .15118073
+
+
+
+ PhamD, KrisMG, RielyGJ, SarkariaIS, McDonoughT, ChuaiS, et al
+Use of cigarette-smoking history to estimate the likelihood of mutations in epidermal growth factor receptor gene exons 19 and 21 in lung adenocarcinomas. Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2006;24(11):1700–4. Epub 2006/03/01. 10.1200/JCO.2005.04.3224 .16505411
+
+
+
+ YasudaH, ParkE, YunCH, SngNJ, Lucena-AraujoAR, YeoWL, et al
+Structural, biochemical, and clinical characterization of epidermal growth factor receptor (EGFR) exon 20 insertion mutations in lung cancer. Science translational medicine. 2013;5(216):216ra177 Epub 2013/12/20. 10.1126/scitranslmed.3007205
+24353160
+
+
+
+ MoritaS, OkamotoI, KobayashiK, YamazakiK, AsahinaH, InoueA, et al
+Combined survival analysis of prospective clinical trials of gefitinib for non-small cell lung cancer with EGFR mutations. Clinical cancer research: an official journal of the American Association for Cancer Research. 2009;15(13):4493–8. Epub 2009/06/18. 10.1158/1078-0432.CCR-09-0391 .19531624
+
+
+
+ MokTS, WuYL, ThongprasertS, YangCH, ChuDT, SaijoN, et al
+Gefitinib or carboplatin-paclitaxel in pulmonary adenocarcinoma. The New England journal of medicine. 2009;361(10):947–57. Epub 2009/08/21. 10.1056/NEJMoa0810699 .19692680
+
+
+
+ FukuokaM, WuYL, ThongprasertS, SunpaweravongP, LeongSS, SriuranpongV, et al
+Biomarker analyses and final overall survival results from a phase III, randomized, open-label, first-line study of gefitinib versus carboplatin/paclitaxel in clinically selected patients with advanced non-small-cell lung cancer in Asia (IPASS). Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2011;29(21):2866–74. Epub 2011/06/15. 10.1200/JCO.2010.33.4235 .21670455
+
+
+
+ RosellR, CarcerenyE, GervaisR, VergnenegreA, MassutiB, FelipE, et al
+Erlotinib versus standard chemotherapy as first-line treatment for European patients with advanced EGFR mutation-positive non-small-cell lung cancer (EURTAC): a multicentre, open-label, randomised phase 3 trial. The lancet oncology. 2012;13(3):239–46. Epub 2012/01/31. 10.1016/S1470-2045(11)70393-X .22285168
+
+
+
+ ZhouC, WuYL, ChenG, FengJ, LiuXQ, WangC, et al
+Erlotinib versus chemotherapy as first-line treatment for patients with advanced EGFR mutation-positive non-small-cell lung cancer (OPTIMAL, CTONG-0802): a multicentre, open-label, randomised, phase 3 study. The lancet oncology. 2011;12(8):735–42. Epub 2011/07/26. 10.1016/S1470-2045(11)70184-X .21783417
+
+
+
+ JannePA, JohnsonBE. Effect of epidermal growth factor receptor tyrosine kinase domain mutations on the outcome of patients with non-small cell lung cancer treated with epidermal growth factor receptor tyrosine kinase inhibitors. Clinical cancer research: an official journal of the American Association for Cancer Research. 2006;12(14 Pt 2):4416s–20s. Epub 2006/07/22. 10.1158/1078-0432.CCR-06-0555 .16857820
+
+
+
+ TrottiA, ColevasAD, SetserA, RuschV, JaquesD, BudachV, et al
+CTCAE v3.0: development of a comprehensive grading system for the adverse effects of cancer treatment. Seminars in radiation oncology. 2003;13(3):176–81. Epub 2003/08/07. 10.1016/S1053-4296(03)00031-6 .12903007
+
+
+
+ SmitEF, van MeerbeeckJP, LianesP, DebruyneC, LegrandC, SchramelF, et al
+Three-arm randomized study of two cisplatin-based regimens and paclitaxel plus gemcitabine in advanced non-small-cell lung cancer: a phase III trial of the European Organization for Research and Treatment of Cancer Lung Cancer Group—EORTC 08975. Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2003;21(21):3909–17. Epub 2003/10/29. 10.1200/JCO.2003.03.195 .14581415
+
+
+
+ ShahiRB, De BrakeleerS, De GreveJ, GeersC, In't VeldP, TeugelsE. Detection of EGFR-TK Domain-activating Mutations in NSCLC With Generic PCR-based Methods. Diagnostic molecular pathology: the American journal of surgical pathology, part B. 2014 Epub 2014/02/04. 10.1097/PDM.0000000000000035 .
+
+
+
+ YasudaH, KobayashiS, CostaDB. EGFR exon 20 insertion mutations in non-small-cell lung cancer: preclinical data and clinical implications. The lancet oncology. 2012;13(1):e23–31. Epub 2011/07/19. 10.1016/S1470-2045(11)70129-2 .21764376
+
+
+
+ SharmaSV, BellDW, SettlemanJ, HaberDA. Epidermal growth factor receptor mutations in lung cancer. Nature reviews Cancer. 2007;7(3):169–81. Epub 2007/02/24. 10.1038/nrc2088 .17318210
+
+
+
+ SequistLV, YangJC, YamamotoN, O'ByrneK, HirshV, MokT, et al
+Phase III study of afatinib or cisplatin plus pemetrexed in patients with metastatic lung adenocarcinoma with EGFR mutations. Journal of clinical oncology: official journal of the American Society of Clinical Oncology. 2013;31(27):3327–34. Epub 2013/07/03. 10.1200/JCO.2012.44.2806 .23816960
+
+
+
+ YangJC, WuYL, SchulerM, SebastianM, PopatS, YamamotoN, et al
+Afatinib versus cisplatin-based chemotherapy for EGFR mutation-positive lung adenocarcinoma (LUX-Lung 3 and LUX-Lung 6): analysis of overall survival data from two randomised, phase 3 trials. The lancet oncology. 2015;16(2):141–51. Epub 2015/01/16. 10.1016/S1470-2045(14)71173-8 .25589191
+
+
+
+ WuJY, WuSG, YangCH, GowCH, ChangYL, YuCJ, et al
+Cancer with epidermal growth factor receptor exon 20 mutations is associated with poor gefitinib treatment response. Clin Cancer Res. 2008;14(15):4877–82. 10.1158/1078-0432.CCR-07-5123 .18676761
+
+
+
+ ZhouC, WuYL, ChenG, FengJ, LiuXQ, WangC, et al
+Final overall survival results from a randomised, phase III study of erlotinib versus chemotherapy as first-line treatment of EGFR mutation-positive advanced non-small-cell lung cancer (OPTIMAL, CTONG-0802). Annals of oncology: official journal of the European Society for Medical Oncology / ESMO. 2015 Epub 2015/07/05. 10.1093/annonc/mdv276 .26141208
+
+
+
+ TateishiK, IchiyamaT, HiraiK, AgatsumaT, KoyamaS, HachiyaT, et al
+Clinical outcomes in elderly patients administered gefitinib as first-line treatment in epidermal growth factor receptor-mutated non-small-cell lung cancer: retrospective analysis in a Nagano Lung Cancer Research Group study. Medical oncology. 2013;30(1):450 Epub 2013/01/15. 10.1007/s12032-012-0450-2 .23315220
+
+
+
+ DingD, YuY, LiZ, NiuX, LuS. The predictive role of pretreatment epidermal growth factor receptor T790M mutation on the progression-free survival of tyrosine-kinase inhibitor-treated non-small cell lung cancer patients: a meta-analysis. OncoTargets and therapy. 2014;7:387–93. Epub 2014/03/14. 10.2147/OTT.S58870
+24623981
+
+
+
+ CortotAB, JannePA. Molecular mechanisms of resistance in epidermal growth factor receptor-mutant lung adenocarcinomas. European respiratory review: an official journal of the European Respiratory Society. 2014;23(133):356–66. Epub 2014/09/02. 10.1183/09059180.00004614 .25176972
+
+
+
+ KarachaliouN, Mayo-de Las CasasC, QueraltC, de AguirreI, MelloniB, CardenalF, et al
+Association of EGFR L858R Mutation in Circulating Free DNA With Survival in the EURTAC Trial. JAMA oncology. 2015;1(2):149–57. Epub 2015/07/17. 10.1001/jamaoncol.2014.257 .26181014
+
+
+
+ RosellR, MoranT, QueraltC, PortaR, CardenalF, CampsC, et al
+Screening for epidermal growth factor receptor mutations in lung cancer. The New England journal of medicine. 2009;361(10):958–67. Epub 2009/08/21. 10.1056/NEJMoa0904554 .19692684
+
+
+
+ TakedaM, OkamotoI, FujitaY, AraoT, ItoH, FukuokaM, et al
+De novo resistance to epidermal growth factor receptor-tyrosine kinase inhibitors in EGFR mutation-positive patients with non-small cell lung cancer. Journal of thoracic oncology: official publication of the International Association for the Study of Lung Cancer. 2010;5(3):399–400. Epub 2010/02/27. 10.1097/JTO.0b013e3181cee47e .20186026
+
+
+
+ FilostoS, BeckerCR, GoldkornT. Cigarette smoke induces aberrant EGF receptor activation that mediates lung cancer development and resistance to tyrosine kinase inhibitors. Molecular cancer therapeutics. 2012;11(4):795–804. Epub 2012/02/04. 10.1158/1535-7163.MCT-11-0698 .22302097
+
+
+
+ OizumiS, KobayashiK, InoueA, MaemondoM, SugawaraS, YoshizawaH, et al
+Quality of life with gefitinib in patients with EGFR-mutated non-small cell lung cancer: quality of life analysis of North East Japan Study Group 002 Trial. The oncologist. 2012;17(6):863–70. Epub 2012/05/15. 10.1634/theoncologist.2011-0426
+22581822
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/PMC4919728.xml b/tests/data/PMC4919728.xml
new file mode 100644
index 0000000..92db3b0
--- /dev/null
+++ b/tests/data/PMC4919728.xml
@@ -0,0 +1,1691 @@
+
+
+
+
+
+
+ EBioMedicine
+ EBioMedicine
+
+ EBioMedicine
+
+ 2352-3964
+
+ Elsevier
+
+
+
+ 27428428
+ 4919728
+ S2352-3964(16)30181-5
+ 10.1016/j.ebiom.2016.04.038
+
+
+ Research Paper
+
+
+
+ Genomic Profiling of Adult and Pediatric B-cell Acute Lymphoblastic Leukemia
+
+
+
+
+ Liu
+ Yuan-Fang
+
+ a
+ 1
+
+
+
+ Wang
+ Bai-Yan
+
+ a
+ m
+ 1
+
+
+
+ Zhang
+ Wei-Na
+
+ a
+ 1
+
+
+
+ Huang
+ Jin-Yan
+
+ a
+ ⁎
+ 1
+
+
+
+ Li
+ Ben-Shang
+
+ c
+ 1
+
+
+
+ Zhang
+ Ming
+
+ a
+
+
+
+ Jiang
+ Lu
+
+ a
+
+
+
+ Li
+ Jian-Feng
+
+ a
+
+
+
+ Wang
+ Ming-Jie
+
+ a
+
+
+
+ Dai
+ Yu-Jun
+
+ a
+
+
+
+ Zhang
+ Zi-Guan
+
+ b
+
+
+
+ Wang
+ Qiang
+
+ a
+
+
+
+ Kong
+ Jie
+
+ a
+
+
+
+ Chen
+ Bing
+
+ a
+
+
+
+ Zhu
+ Yong-Mei
+
+ a
+
+
+
+ Weng
+ Xiang-Qin
+
+ a
+
+
+
+ Shen
+ Zhi-Xiang
+
+ a
+
+
+
+ Li
+ Jun-Min
+
+ a
+
+
+
+ Wang
+ Jin
+
+ a
+
+
+
+ Yan
+ Xiao-Jing
+
+ i
+
+
+
+ Li
+ Yan
+
+ i
+
+
+
+ Liang
+ Ying-Min
+
+ k
+
+
+
+ Liu
+ Li
+
+ k
+
+
+
+ Chen
+ Xie-Qun
+
+ j
+
+
+
+ Zhang
+ Wang-Gang
+
+ m
+
+
+
+ Yan
+ Jin-Song
+
+ l
+
+
+
+ Hu
+ Jian-Da
+
+ n
+
+
+
+ Shen
+ Shu-Hong
+
+ c
+
+
+
+ Chen
+ Jing
+
+ c
+
+
+
+ Gu
+ Long-Jun
+
+ c
+
+
+
+ Pei
+ Deqing
+
+ h
+
+
+
+ Li
+ Yongjin
+
+ f
+
+
+
+ Wu
+ Gang
+
+ f
+
+
+
+ Zhou
+ Xin
+
+ f
+
+
+
+ Ren
+ Rui-Bao
+
+ a
+
+
+
+ Cheng
+ Cheng
+
+ h
+
+
+
+ Yang
+ Jun J.
+
+ g
+
+
+
+ Wang
+ Kan-Kan
+
+ a
+
+
+
+ Wang
+ Sheng-Yue
+
+ d
+
+
+
+ Zhang
+ Jinghui
+
+ f
+
+
+
+ Mi
+ Jian-Qing
+
+ a
+ ⁎
+
+
+
+ Pui
+ Ching-Hon
+
+ e
+ ⁎
+
+
+
+ Tang
+ Jing-Yan
+
+ c
+ ⁎
+
+
+
+ Chen
+ Zhu
+
+ a
+ b
+ ⁎
+
+
+
+ Chen
+ Sai-Juan
+
+ sjchen@stn.sh.cn
+ a
+ b
+ ⁎
+
+
+ State Key Laboratory of Medical Genomics, Shanghai Institute of Hematology, Rui Jin Hospital Affiliated to Shanghai Jiao Tong University School of Medicine, 197 Rui Jin Road II, Shanghai 200025, China
+ Key Laboratory of Ministry of Education for Systems Biomedicine, Shanghai Center for Systems Biomedicine, Shanghai Jiao Tong University, 800 Dongchuan Road, Shanghai 200240, China
+ Key Laboratory of Pediatric Hematology & Oncology, Ministry of Health, Department of Hematology and Oncology, Shanghai Children's Medical Center, Shanghai Jiao Tong University School of Medicine, Shanghai 200127, China
+ Chinese National Human Genome Center at Shanghai, Shanghai 201203, China
+ Department of Oncology, St. Jude Children's Research Hospital, Memphis, TN, USA
+ Computational Biology and Bioinformatics, St. Jude Children's Research Hospital, Memphis, TN, USA
+ Pharmaceutical Sciences, St. Jude Children's Research Hospital, Memphis, TN, USA
+ Biostatistics, St. Jude Children's Research Hospital, Memphis, TN, USA
+ Department of Hematology, First Hospital of China Medical University, Shenyang 110001, China
+ Department of Hematology, Xi Jing Hospital affiliated to the Fourth Military Medical University, Xi'an, Shaan Xi 710032, China
+ Department of Hematology, Tang Du Hospital affiliated to the Fourth Military Medical University, Xi'an, Shaan Xi 710038, China
+ Dalian Key Laboratory of Hematology, Department of Hematology, Second Hospital of Dalian Medical University, Dalian, Liaoning 116027, China
+ Department of Hematology, The Second Affiliated Hospital of Xi'an Jiaotong University School of Medicine, Xi'an, Shaan Xi 710004, China
+ Fujian Institute of Hematology, Fujian Medical University Union Hospital, Fuzhou 350000, China
+
+ Corresponding authors. sjchen@stn.sh.cn
+
+
+
Y.-F.L., B.-Y.W., W.-N.Z., J.-Y.H. and B.-S.L. contributed equally to this work.
Genomic landscapes of 92 adult and 111 pediatric patients with B-cell acute lymphoblastic leukemia (B-ALL) were investigated using next-generation sequencing and copy number alteration analysis. Recurrent gene mutations and fusions were tested in an additional 87 adult and 93 pediatric patients. Among the 29 newly identified in-frame gene fusions, those involving MEF2D and ZNF384 were clinically relevant and were demonstrated to perturb B-cell differentiation, with EP300-ZNF384 inducing leukemia in mice. Eight gene expression subgroups associated with characteristic genetic abnormalities were identified, including leukemia with MEF2D and ZNF384 fusions in two distinct clusters. In subgroup G4 which was characterized by ERG deletion, DUX4-IGH fusion was detected in most cases. This comprehensive dataset allowed us to compare the features of molecular pathogenesis between adult and pediatric B-ALL and to identify signatures possibly related to the inferior outcome of adults to that of children. We found that, besides the known discrepancies in frequencies of prognostic markers, adult patients had more cooperative mutations and greater enrichment for alterations of epigenetic modifiers and genes linked to B-cell development, suggesting difference in the target cells of transformation between adult and pediatric patients and may explain in part the disparity in their responses to treatment.
+
+
+ Highlights
+
+
+
+
+
The genomic landscapes of adult and pediatric B-ALL were defined by next-generation sequencing of patient samples.
+
+
+
+
MEF2D and ZNF384 fusions could perturb B-cell differentiation or induce leukemia in mice and exhibited clinical relevance.
+
+
+
+
Adult patients showed greater enrichment for alterations of genes linked to epigenetic modification and B-cell development.
+
+
+
+
This study comprehensively addressed the genomic signatures of adult versus pediatric B-ALL. The identification of distinct MEF2D and ZNF384 fusions expands the existing knowledge about molecular subtypes of B-ALL in both age groups. RNA-seq data allowed most of the B-ALL cases to be clustered into 8 subgroups related to genetic abnormalities. Notably, adult patients have more cooperative sequence variation mutations than pediatric patients, especially in genes involved in epigenetic regulation and B-cell development. These findings may improve our understanding of leukemogenesis in B-ALL, leading to a more precise genetic classification and the further development of targeted therapy in this disease.
Acute lymphoblastic leukemia (ALL) results from the clonal proliferation of lymphoid stem or progenitor cells, with more than 80% being originated from B-cell progenitors (B-ALL) (Pui, 2010). Recurrent cytogenetic and molecular abnormalities have been identified to play key roles in ALL pathogenesis, frequently by targeting vital molecular components of hematopoietic differentiation, cell cycling, tumor suppression and stem cell self-renewal (Roberts et al., 2012). The identification of these abnormalities not only reveals underlying molecular pathology, but also provides important therapeutic targets, as exemplified by the improved outcome achieved with ABL tyrosine kinase inhibitors in patients with Philadelphia chromosome-positive or Philadelphia chromosome-like ALL patients with ABL class fusion transcripts (Schultz et al., 2014, Chalandon et al., 2015, Roberts et al., 2014). Recent technologic advances have enabled detailed characterization of the genomic landscape of childhood ALL, including DNA sequence abnormalities such as single nucleotide variations (SNV), small insertions or deletions (indels) and copy number variations (CNV) (Mullighan et al., 2007); gene expression anomalies (Roberts et al., 2014, Den Boer et al., 2009); gene fusions due to cryptic chromosomal rearrangements (Roberts et al., 2014, Gocho et al., 2015); and aberrant epigenetic modifications including abnormal DNA methylation, histone modifications and mutations of epigenetic modifier genes (Gabriel et al., 2015). The precise contribution of these genetic or epigenetic abnormalities to leukemogenesis, the development of drug resistance and leukemic clone evolution remains to be defined (Ma et al., 2015).
+
Remarkable progress has been made in the treatment of ALL. Currently, the five-year overall survival rates exceed 85% in pediatric patients in developed countries (Pui et al., 2015), but remain less than 45% in adults (Jabbour et al., 2015, Bassan and Hoelzer, 2011).The poor outcome in adults has been attributed in part to a high frequency of unfavorable genetic subtypes of ALL, pre-existing co-morbidities, and poor tolerance of intensive treatment.
+
In our previous report on large B-ALL cohorts in China, the outcome of both adult and pediatric patients seemed less favorable than that of the best centers in Western countries (Mi et al., 2012). The higher frequency of unfavorable prognostic factors such as BCR-ABL1 and the lower frequency of favorable factors such as ETV6-RUNX1 and hyperdiploidy in Chinese pediatric ALL could contribute to this difference (Chen et al., 2012). However, within the same genetic subtypes of ALL, the leukemic cells of adult patients are more resistant to treatment than those of pediatric patients. For example, cures can be achieved with intensive chemotherapy and an ABL tyrosine kinase inhibitor in up to 70% of children with Philadelphia chromosome-positive ALL with BCR-ABL1 fusion (Chalandon et al., 2015), but in less than 50% of adults even with the addition of transplantation (Chalandon et al., 2015, Jabbour et al., 2015). In a recent study of MLL-rearranged B-ALL, older children had more somatic mutations and had a higher frequency of mutated epigenetic regulators than did infants (Andersson et al., 2015), suggesting important differences in the development and prognosis of leukemia between infants and older children. Thus, we undertook a detailed analysis of B-ALL leukemic cell genomics in adults and children to identify genetic abnormalities in a systematic way and to discover alterations that might explain the inferior prognosis of adult B-ALL and to identify potential therapeutic targets for this high-risk cancer.
+
+
+
+ Methods
+
+
+ Patients and Samples
+
All of the B-ALL patients enrolled in this study were diagnosed and/or treated in the Shanghai Institute of Hematology (SIH)-based hospital network or Multicenter Hematology-Oncology Protocols Evaluation System (M-HOPES) in China. A total of 383 patients including 179 adults (> 18 years) and 204 children (≤ 18 years) with newly diagnosed B-cell ALL and tissue samples available for genomic analyses were enrolled in this study.
+
The 92 adults and 111 children with sufficient bone marrow and matched remission samples or saliva samples for one or more of the following analyses formed the discovery cohort: whole-exome sequencing (201 patients), whole-genome sequencing (9), whole-transcriptome sequencing (172) and copy number alteration analysis (202) (Fig. 1). The features for these patients are reported in Table 1 (Table S1 for individual patients) and are in concordance with the data we previously reported (Mi et al., 2012, Chen et al., 2012).
+
Targeted deep sequencing for gene mutations and reverse transcriptase (RT)-PCR for recurrent fusion genes were performed in an additional 87 adults and 93 children, designated the recurrent cohort. Adult patients in discovery cohort were mostly enrolled in an SIH protocol [Chinese Clinical Trial Registry, number ChiCTR-RNC-14004969 (for sample collection) and ChiCTR-ONRC-14004968 (for treatment)] which were basically a modified VDLCP regimen. Pediatric patients in discovery cohort were mostly enrolled in the Shanghai Children's Medical Center ALL-2005 protocol (Chinese Clinical Trial Registry, number ChiCTR-ONC-14005003). Other patients in the recurrent cohort were treated as described in detail previously (Mi et al., 2012). Fifteen patients 17 to 18 years old were treated with the adult clinical trial. The study was approved by the institution review board of each participating center. All patients, parents or guardians provided informed consent for sample collection and research in accord with the Declaration of Helsinki.
+
Bone marrow (BM) aspiration was conducted at diagnosis and mononuclear cells were enriched by density gradient centrifugation with Ficoll solution. Genomic DNA and total RNA were extracted using AllPrep DNA/RNA/Protein Mini Kit (Qiagen) or TRIzol reagent (Invitrogen). The germline control DNA was obtained from matching peripheral blood during complete remission after standard chemotherapy and prepared by QuickGene DNA whole blood Kit L (FUJIFILM, Life Science), or from saliva samples collected using Oragene kits according to the manufacturer's instructions (DNA Genotek Inc.). Morphological, immunophenotypic and cytogenetic analyses were performed at the time of diagnosis. The transcripts of BCR-ABL1, ETV6-RUNX1, TCF3-PBX1, MLL-MLLT1 and MLL-AFF1 fusion genes, as well as the transcriptional expression of CRLF2 and IKZF1 were detected as previously described (Chen et al., 2012).
+
+
+
+ Genomic Analysis and Functional Experiments
+
The details of next-generation sequencing, gene expression and single-nucleotide-polymorphism microarray profiling and functional experiments are provided in Supplementary methods.
+
+
+
+ Statistical Analysis
+
Comparisons of categorical variables were determined by Pearson's Chi-square test or Fisher's exact test. Survival was measured from the date of diagnosis of B-ALL to the date of death from any cause or to the date of last contact. The database frozen on December 3, 2015 was used for analysis. The Kaplan-Meier method was used to calculate estimates of survival probabilities, which were compared by the log-rank test. The Welch's t-test was used for the luciferase reporter assay, chromatin immunoprecipitation (ChIP) assay, flow cytometry assay and comparisons of mutation numbers in adults and children. Two-sided P values less than 0.05 were considered statistically significant. Analyses were performed with the use of SPSS 22.0 software and R3.2.2.
+
+
+
+ Acknowledgements and Role of the Funding Source
+
We are indebted to colleagues from SIH and Department of Hematology, Rui Jin Hospital. We are grateful to all the patients who participated in the study. We also thank Drs. Wei-Wu He and Ke-Hu Yuan (OriGene Technologies) for kindly providing the plasmids of wild-type full-length ZNF384, MEF2D, HNRNPUL1, BCL9, EP300 and HDAC9 cDNAs constructed into the CMV6 vector. This work was supported by Chinese National Key Basic Research Project 973 grant (2013CB966800), National High Tech Program for Biotechnology grant 863 (2012AA02A505), Ministry of Health grant (201202003), Mega-projects of Scientific Research for the 12th Five-Year Plan Grant (2013ZX09303302), National Natural Science Foundation of China grants (81123005, 81200373, 81570122), the Samuel Waxman Cancer Research Foundation Co-Principal Investigator Program, The Program for Professor of Special Appointment (Eastern Scholar) at Shanghai Institutions of Higher Learning (QD2015005) and the Program of Shanghai Subject Chief Scientist (16XD1402000). These funding sources played key supportive role for data and sample collection, experiments of genomic profiling and molecular analysis of patient samples, in vitro and in vivo studies of gene functions, bioinformatics and data analysis, as well as data interpretation. This work was also supported by Grants No. CA21765, U01GM92666, and P50 GM115279 from the National Institutes of Health, Cancer Center support grant P30 CA021765 from the US National Cancer Institute, and the American Lebanese Syrian Associated Charities, in data analyses and interpretation.
+
+
+
+
+ Results
+
+
+ Overview of Sequence Mutations, Copy Number Alterations and Gene Fusions
+
We addressed the frequencies of single nucleotide transitions and transversions of the coding regions and entire genomic regions in the whole-exome sequencing and whole-genome sequencing datasets, respectively. It was noted that in both sequencing analyses, the most common mutation types were transitions including C/G to T/A and A/T to G/C (Fig. 2a). In total, 2336 non-silent mutations within 1779 genes were confirmed in the primary leukemia samples of 203 cases (Table S2). A tendency of the mutation load increasing with the age was observed (P < 0.001, Fig. 2b). These mutations consisted predominantly of missense ones (n = 1954; 83.6%), followed by nonsense mutations (n = 132; 5.7%), frameshift due to small indels (n = 128; 5.5%), in‐frame indels (n = 66; 2.8%) and splice‐site variants (n = 56; 2.4%) (Fig. 2c).
+
Across 203 sequenced samples, the number of non-silent coding mutations (Table S2) in each individual varied significantly (range, 0–88), with a median of 11 mutations (range, 3–88) per adult case and 9 mutations (range, 0–45) per pediatric case (P = 0.004, Fig. 3a). In addition to the well-established genes involved in B-ALL pathogenesis such as PAX5 and RAS family members, to identify possible “driver” mutations related to leukemogenesis, we mainly focused on the recurrent ones. We also used PolyPhen2, SIFT and PROVEAN tools to predict the possible effect of amino acid changes on the normal functions of the proteins (Adzhubei et al., 2010, Choi and Chan, 2015). Indeed, 76.4% (1481/1938) of the missense mutations available for either of these tools was predicted to be deleterious (Table S2). Three hundred and twenty-five genes were recurrently mutated, 105 (32.3%) of which were not described before in B-ALL (Table S3). The most frequently mutated genes are listed in Fig. 3b, after exclusion of very large size genes with lower selection pressure (Greenman et al., 2007). Notably, mutations affecting ZFHX4, a member of the DNA-damage-repair pathway that may interact with CHD4 to modulate TP53 (Chudnovsky et al., 2014), were not previously reported in leukemia.
+
Single nucleotide polymorphism array profiling identified genes with somatic copy number gains or losses that were recurrent across 91 adult patients and 111 pediatric patients analyzed (Table S4). Copy losses of CDKN2A/2B (9p21), PAX5 (9p13) and ETV6 (12p13) were prevalent in both adults and children, while copy gains of RUNX1 (21q22.3) were more enriched in children and deletions of IKZF1 (7p12.2) were more common in adults (Fig. 4).
+
Besides commonly observed fusions (BCR-ABL1, E2A-PBX1, ETV6-RUNX1, MLL-rearranged), RNA-seq of 78 adults and 94 children identified 41 fusions which were not reported previously, including 29 in-frame fusions and 12 out-of-frame fusions (Fig. 3c, Table S5 and Table S6). Two classes of in-frame fusions – one with MEF2D as the N-terminus partner and the other with ZNF384 as the C-terminus partner – were noteworthy for their frequent occurrence in our cohort and the disruption of partner genes involved in transcriptional regulation. Another class of fusion transcript with DUX4 and IGH rearrangements was also identified.
+
+
+
+ Characterization of MEF2D and ZNF384 Related Gene Fusions
+
From the combined data of the discovery and recurrent cohorts, MEF2D fusions were found in 12 of 177 (6.8%) adult and 7 of 199 (3.5%) pediatric patients, with ZNF384 fusions identified in 13 of 177 (7.3%) adult and 8 of 199 (4.0%) pediatric patients. Pediatric patients with MEF2D fusions tended to be older (median 12.1 years old, P = 0.08). The leukemic cells with MEF2D fusions mostly had a pre-B immunophenotype (Table S7). Among patients with available data, the 7 children with MEF2D fusions had a significantly worse five-year survival than did the remaining 186 without this feature (33.3% vs. 71.2%, P = 0.01); a similar tendency was observed in adult B-ALL (15.6% [n = 12] vs. 31.3% [n = 157], P = 0.08) (Table S8).
+
Leukemic cells from patients with ZNF384 fusions were more likely to be CD10-negative than were those from other patients in both the adult (15.8% vs. 5.3%, P = 0.04) and pediatric (18.8% vs. 2.9%, P = 0.02) cohorts; and to co-express myeloid-associated antigen CD13 and/or CD33 (13.1% vs. 0, P = 0.001 in adults; 12.1% vs. 1.7%, P = 0.02 in children) (Table S7). There were no significant survival differences between patients with or without ZNF384 fusions in either the adult (38.6% [n = 12] vs. 29.6% [n = 157]) or pediatric cohort (75.0% [n = 8] vs. 69.4% [n = 185]) (Table S8). Our observation on the clinical impact of the above two fusions (Fig. 5) was in concordance with a very recent report (Yasuda et al., 2016).
+
We identified 3 new fusion partners of MEF2D, including BCL9 (8 cases), HNRNPUL1 (8) and SS18 (2), and a single case with the known fusion partner DAZAP1 (Prima et al., 2005), some of which could express different fusion transcripts with distinct exon usage (Table S7). We also identified five partners for ZNF384 fusions: EP300 (14 cases), EWSR1 (2), TCF3 (2) and TAF15 (2) and CREBBP (1) (Table S7). CREBBP, a histone acetyltransferase, represented a fusion partner of ZNF384 newly identified in this study (Fig. 6a), while its homolog, EP300, was described recently (Gocho et al., 2015). The protein domain compositions of MEF2D and ZNF384 gene fusions are depicted in Fig. 6a and Fig. S1, while the nuclear localizations of MEF2D-HNRNPUL1, MEF2D-BCL9 and EP300-ZNF384 proteins were confirmed (Fig. S2).
+
When co-cultured with SCF, interleukin 7 and Flt-3 ligand (Flt3L) to evaluate the effects of MEF2D fusions on hematopoietic development (Hirose et al., 2002), MEF2D-HNRNPUL1 and MEF2D-BCL9 significantly inhibited the differentiation of mouse Lin− c-KitLow cells into CD19+ B-cells in vitro (Fig. S3a). In parallel, in a retrovirus-mediated BM transplantation model, ectopic expression of MEF2D fusions resulted in a striking increase in the proportion of early immature B-cells (B220+, CD43+) and decrease in B220+, CD19+ subset in BM (Opferman et al., 2003) (Fig. 6b) with a concomitant decrease of B220+ cells in peripheral blood (Fig. 6c). These results suggested an impairment of B-cell development. Compared to wild-type MEF2D, fusion proteins showed markedly higher transactivating activities for HDAC9, a known transcriptional target of MEF2D (Haberland et al., 2007) (Fig. 6d). Both chimeric proteins directly interacted with the HDAC9 promoter region as evidenced by ChIP, and this interaction was much stronger for fusion proteins compared to wild-type MEF2D (Fig. 6e). RAG1, an executor of V(D)J recombination, was regulated by HDAC9, which was verified in JM-1, a pre-B leukemia cell line (Fig. 6f). In fact, B-ALL blasts harboring MEF2D fusions showed extremely high levels of HDAC9 transcripts and downregulation of RAG1 (Fig. S3b). Similarly, EP300-ZNF384 completely blocked the differentiation of mouse Lin− c-KitLow cells into CD19+ B-cells in vitro (Fig. S4a), and mice transplanted with EP300-ZNF384-expressing BM cells were largely void of B-cells in peripheral blood (Fig. 6g). Notably, EP300-ZNF384 mice developed acute leukemia with a median survival of 100 days (Fig. 6h), with characteristic leukocytosis, anemia, splenomegaly (Fig. S4b, Fig. S4c) and an accumulation of blast cells in BM (Fig. 6i). The leukemic blasts were monoblastic in appearance with a significant inhibition of naphthol AS-D acetate esterase activities by sodium fluoride (Fig. 6j). Flow cytometry analysis further confirmed a striking expansion of c-Kit+ blasts with a severe reduction of lymphoid compartments (Fig. 6k). According to a recent redefinition of the hematopoietic hierarchy, hematopoiesis does not follow a rigid model of myeloid-lymphoid segregation and the myelomonocytic lineage is tied to lymphoid fate (Notta et al., 2016). Secondary transplantation of the EP300-ZNF384 mouse leukemic blasts led to even more pronounced disease progression (Fig. 6h).
+
+
+
+ Gene Expression Profiles and Associations with Genetic Abnormalities
+
Unsupervised clustering of gene expression derived from RNA-seq of 78 adult and 94 pediatric B-ALL patients (Table S9) identified 8 distinct subgroups G1-G8 (Fig. 7) showing strong associations with oncogenic gene fusion, karyotype abnormality or intra-genic deletion. Leukemia with MEF2D or ZNF384 gene fusions were clustered in G1 and G5, respectively, providing strong evidence that these gene fusions are oncogenic drivers of two distinct leukemia subtypes. The associated expression signatures of these two subgroups were described below.
+
The G1 subgroup with MEF2D fusions was the closest to G2 subgroup with TCF3-PBX1, as both showed upregulation of genes encoding pre-B-cell receptor signaling (pre-BCR) molecules (IGHM, IGLL1 and ZAP70) (Geng et al., 2015) and transcription factors promoting B-cell differentiation (IRF4, PAX5, EBF1, BCL6 and IKZF3), together with the downregulation of CD34 and STAT5 pathway related genes (ITGA6, CCND2 and SOCS2). However, G1 was separated from G2 by upregulated expression in several outlier genes, such as HDAC9 (Haberland et al., 2008), one of the downstream target genes of MEF2D, and PTPRZ1 that shape the B-cell repertoire (Cohen et al., 2012). The G5 subgroup driven by ZNF384 fusions exhibited high expression of transcription factors such as GATA3 (Heavey et al., 2003), CEBPA and CEBPB (Xie et al., 2004), all of which play key roles in the reprogramming of B-cells into myeloid cells (Fig. S5a). This is consistent with our observation of CD13/CD33 expression on primary B-ALL blasts with ZNF384 fusions (Table S7). Strikingly, G5 had few mutations or deletions of genes involved in B-cell development (e.g. IKZF1, PAX5, RUNX1, ETV6) or the cell cycle (e.g. CDKN2A/2B). The JAK-STAT signaling pathway was upregulated in leukemic cells with ZNF384 fusions, in contrast to its downregulation in leukemic cells with MEF2D fusions, which may rely on pre-BCR signaling for survival (Fig. S5b).
+
In other subgroups including G3, G4, G6, G7 and G8, the dominant genetic subtypes were ETV6-RUNX1, ERG deletion, hyperdiploidy, MLL-rearranged, BCR-ABL1 or BCR-ABL1-like, respectively. The characteristic expression profiles were summarized in Supplementary Results. Of note, all cases of G4 subgroup (dominated by ERG deletion) were found to exhibit over-expression of DUX4 gene mostly due to the DUX4-IGH fusion as described by a very recent work (Yasuda et al., 2016),suggesting a possible cooperative relationship between the two events.
+
+
+
+ Comparison of Mutation Frequencies Between Adult and Pediatric Cases
+
To explore the possible genetic differences between adults and children, we first compared the total number of non-silent mutations in the discovery cohort and found that it was significantly higher in adults than in children (Fig. 3a), a finding that was also consistent across gene expression cluster subgroups, especially G4, G7 and G8 (Fig. S6). The combined analysis of the findings of SNV or indels from the discovery and the recurrent cohorts (Table S10) revealed an interesting trend of higher frequencies of IKZF1 (P = 0.03), MLL2 (P = 0.03), JAK3 (P = 0.047) mutations in adults, and PTPN11 (P = 0.03) mutations in children (Table S11). Hyperdiploidy was found in 6 of our 14 pediatric patients with PTPN11 mutations but in only 36 of the other 204 without such mutations (P = 0.03). Of our 4 adult patients with PTPN11 mutations, 3 had pseudodiploidy and one hypodiploidy. Moreover, epigenetic modifiers involved in histone or DNA methylation and demethylation (MLL2, SETD2, ASXL1, EZH2, TET2 and KDM5C) were mutated more often in adults than in children (22.9% vs. 12.7%, P = 0.009, Table S11).
+
+
+
+ Comparisons of Clinical Relevance and Cooperating Mutations in Different Gene Expression Subgroups
+
We then compared the clinical relevance of the gene expression subgroups in the combined group. Patients in the recurrent cohort were assigned to each of the eight gene expression subgroups according to the dominant genetic subtypes. Overall, adult group had a worse five-year survival estimate than childhood one (Table S12, Table S13). With regard to gene expression cluster subgroups, G3 (ETV6-RUNX1) and G6 (hyperdiploid) were seen mostly in children and with quite favorable prognosis. In each of the remaining subgroups, adult cases tended to have inferior outcome to pediatric counterparts. We then examined, in subgroups G1, G2, G4, G5, G7 and G8, the proportions of adult versus pediatric patients with gene alterations combining the data of SNV or indels and CNVs in each of the six significantly mutated pathways – B-cell development, cell cycle, epigenetic modifiers, RAS signaling, JAK signaling and other signaling. The genes involved in the pathway analysis were listed in Table S14. We found that pediatric patients (n = 10) in G5 (ZNF384 fusion) lacked alterations of B-cell development genes as compared to adult patients (42.9%, n = 14, P = 0.02). Adult patients in G8 (BCR-ABL1 or BCR-ABL1-like) had significantly higher rates of somatic alterations in B-cell development genes (78.6%, n = 56) than did pediatric patients (53.6%, n = 28, P = 0.02), but lower frequencies of alterations affecting signaling genes other than those in the RAS or JAK pathway (5.4% vs. 21.4%, P = 0.05). For patients in G7 (MLL-rearranged), adults had more mutations in epigenetic modifiers compared to pediatric cases (42.9% of 28 vs. 11.8% of 17, P = 0.046). In this regard, adults had more frequent alterations in epigenetic modifiers in all subgroups except G1 (Fig. 8a, Fig. S7). When all patients were combined, genes involved in B-cell development and epigenetics were mutated more frequently in adults than in children (P = 0.01 and 0.03, respectively, Fig. 8b).
+
+
+
+
+ Discussion
+
In this comprehensive study on the genetic landscape of B-ALL, we found 105 genes with recurrent sequence mutations and identified 41 gene fusions including 29 in-frame ones which had not been reported previously. Particularly, RNA-seq analysis allowed us to identify 8 gene expression subgroups which demonstrated strong correlation with major gene fusions or genomic copy number abnormalities. These subgroups also showed certain relationship to the immunophenotypes.
+
Of note, MEF2D and ZNF384 fusions were highly recurrent affecting 6.7% and 7.3% of adults, 3.4% and 3.9% of pediatric patients, respectively. Leukemia cases with these two fusions comprised distinct gene expression subgroups (G1 and G5), suggesting that the fusions are oncogenic drivers. Indeed, functional studies confirmed that MEF2D-BCL9, MEF2D-HNRNPUL1 and EP300-ZNF384 fusion genes profoundly disrupted B-cell development in vivo and in vitro. Interestingly, MEF2D fusions up-regulated HDAC9, a class II histone deacetylase, which in turn could cooperate with the fusion transcription factors in the repression of genes essential for B-lineage differentiation (RAG1). Moreover, EP300-ZNF384 fusion alone rapidly gave rise to overt acute leukemia. In our series, EP300-ZNF384 fusion conferred an intermediate prognosis and the gene set enrichment analysis showed significant upregulation of JAK-STAT pathway, suggestive of a potential benefit from treatment with inhibitors of this pathway. By contrast, MEF2D fusions appeared to be associated with a poor prognosis in both adults and children. It may be worthwhile to test histone deacetylase inhibitors in patients with this genotype. Moreover, we discovered a close association between ERG deletion and DUX4 overexpression in G4 subgroup, and future therapies may benefit from a deeper understanding of this cooperative mechanism of the two genetic defects.
+
While analyzing recurrent sequence abnormalities in adult and pediatric B-ALL groups, we found that adult patients had more gene mutations, especially IKZF1, MLL2, and JAK3, but fewer alterations of PTPN11 than did pediatric patients. A number of sequence mutations could cooperate with gene fusions or aberrant expression patterns in disease mechanisms and could exert effect on distinct clinical outcomes between different age groups. For example, the strong association of IKZF1 alteration with Philadelphia chromosome-positive and Philadelphia chromosome-like B-ALL and PTPN11 mutation with hyperdiploid B-ALL suggests that the enrichment for IKZF1 in adults, and PTPN11 in children, is associated with biases in leukemia subtype distribution between the two age groups. On the other hand, our findings of more mutations within each gene expression cluster subgroup, and the enrichment of genetic alterations affecting B-cell development and epigenetic modifier genes in adult B-ALL as compared to pediatric patients in several subgroups, suggest increased complexity of pathway involvements and different target cells for malignant transformation with increasing age, which may partly, albeit not fully, explain the dismal prognosis in adults with B-ALL. Recent studies of clonal evolution of pediatric (Ma et al., 2015, Mullighan et al., 2011) and adult (Xiao et al., 2016) ALL found enrichment of mutations in epigenetic regulators from diagnosis to relapse. It is thus possible that profound chromatin changes owing to more frequent epigenetic modifier abnormalities in adult B-ALL cells confer greater drug-resistance.
+
We would stress that even though the total number of patients in our study is relatively large, the number in each subgroup varies, compromising our ability to generate statistically significant results in some analyses. Second, patients are arbitrarily divided into adult and pediatric age groups using 18 years as the threshold, even though young children, adolescents, young adults and older adults differ in their responses to treatment. Further comparisons with larger sample sizes would allow better discrimination among a broader spectrum of age groups. Third, patients 17 to 18 years mostly receive adult treatment in China. New trials based on pediatric-type protocols would be expected to further improve outcome in these patients in the near future. In spite of the above limitations, our genomic profiling of adult and pediatric B-ALL provides useful clues to precise molecular classifications of this heterogeneous disease group and to identify new molecular vulnerabilities that could be exploited in the design of more effective targeted therapies.
+
+
+ Contributions
+
S.-J.C. and Z.C. were the principal investigators of the study. S.-J.C., Z.C., J.-Y.T., C.-H.P., J.-Q.M and J.-Y.H. coordinated and oversaw the study. Y.-F.L., B.-Y.W. and W.-N.Z. collected samples, performed data analyses and most of the experiments; J.-Y.H. led and performed the bioinformatics and data analyses; J.-F.L., M.-J.W., Z.-G.Z., and J.K. participated in bioinformatics analyses; K.-K.W. provided helps on bioinformatics analyses; M.Z., Q.W., Y.-M.Z., and L.J. participated in sample processing, PCR and targeted deep sequencing. B.C. performed cytogenetic analyses; X.-Q.W. helped in flow cytometry analyses; B.-S.L., J.-Y.T., J.C., L.-J.G., J.-M.L., J.W., J.-Q.M., Z.-X.S., B.C., X.-J.Y., Y.L., X.-Q.C., Y.-M.L., L.L., W.-G.Z., J.-S.Y. and J.-D.H. participated in sample collection and/or treatment to patients. S.-Y.W. and L.J. helped to perform next generation sequencing. M.Z. and Y.-J.D. helped in functional experiments. C.-H.P., J.Z., J.-J.Y., Y.L., G.W., X.Z., D.P. and C.C. of St. Jude Children's Research Hospital helped in data analyses and interpretation. Z.C., S.-J.C., C.-H.P., J.-Q.M., J.-Y.H., Y.-F.L., B.-Y.W. and W.-N.Z. wrote the manuscript.
+
+
+ Declaration of Interests
+
The authors declare no conflicts of interests.
+
+
+
+
+ References
+
+
+
+
+ Adzhubei
+ I.A.
+
+
+ Schmidt
+ S.
+
+
+ Peshkin
+ L.
+
+
+ A method and server for predicting damaging missense mutations
+ Nat. Methods
+ 7
+ 2010
+ 248
+ 249
+ 20354512
+
+
+
+
+
+
+ Andersson
+ A.K.
+
+
+ Ma
+ J.
+
+
+ Wang
+ J.
+
+
+ The landscape of somatic mutations in infant MLL-rearranged acute lymphoblastic leukemias
+ Nat. Genet.
+ 47
+ 2015
+ 330
+ 337
+ 25730765
+
+
+
+
+
+
+ Bassan
+ R.
+
+
+ Hoelzer
+ D.
+
+
+ Modern therapy of acute lymphoblastic leukemia
+ J. Clin. Oncol.
+ 29
+ 2011
+ 532
+ 543
+ 21220592
+
+
+
+
+
+
+ Chalandon
+ Y.
+
+
+ Thomas
+ X.
+
+
+ Hayette
+ S.
+
+
+ Randomized study of reduced-intensity chemotherapy combined with imatinib in adults with Ph-positive acute lymphoblastic leukemia
+ Blood
+ 125
+ 2015
+ 3711
+ 3719
+ 25878120
+
+
+
+
+
+
+ Chen
+ B.
+
+
+ Wang
+ Y.-Y.
+
+
+ Shen
+ Y.
+
+
+ Newly diagnosed acute lymphoblastic leukemia in China (I): abnormal genetic patterns in 1346 childhood and adult cases and their comparison with the reports from Western countries
+ Leukemia
+ 26
+ 2012
+ 1608
+ 1616
+ 22382891
+
+
+
+
+
+
+ Choi
+ Y.
+
+
+ Chan
+ A.P.
+
+
+ PROVEAN web server: a tool to predict the functional effect of amino acid substitutions and indels
+ Bioinformatics
+ 31
+ 2015
+ 2745
+ 2747
+ 25851949
+
+
+
+
+
+
+ Chudnovsky
+ Y.
+
+
+ Kim
+ D.
+
+
+ Zheng
+ S.
+
+
+ ZFHX4 interacts with the NuRD core member CHD4 and regulates the glioblastoma tumor-initiating cell state
+ Cell Rep.
+ 6
+ 2014
+ 313
+ 324
+ 24440720
+
+
+
+
+
+
+ Cohen
+ S.
+
+
+ Shoshana
+ O.-y.
+
+
+ Zelman-Toister
+ E.
+
+
+ The cytokine midkine and its receptor RPTPζ regulate B cell survival in a pathway induced by CD74
+ J. Immunol.
+ 188
+ 2012
+ 259
+ 269
+ 22140262
+
+
+
+
+
+
+ Den Boer
+ M.L.
+
+
+ van Slegtenhorst
+ M.
+
+
+ De Menezes
+ R.X.
+
+
+ A subtype of childhood acute lymphoblastic leukaemia with poor treatment outcome: a genome-wide classification study
+ Lancet Oncol.
+ 10
+ 2009
+ 125
+ 134
+ 19138562
+
+
+
+
+
+
+ Gabriel
+ A.S.
+
+
+ Lafta
+ F.M.
+
+
+ Schwalbe
+ E.C.
+
+
+ Epigenetic landscape correlates with genetic subtype but does not predict outcome in childhood acute lymphoblastic leukemia
+ Epigenetics
+ 10
+ 2015
+ 717
+ 726
+ 26237075
+
+
+
+
+
+
+ Geng
+ H.
+
+
+ Hurtz
+ C.
+
+
+ Lenz
+ K.B.
+
+
+ Self-enforcing feedback activation between BCL6 and pre-B cell receptor signaling defines a distinct subtype of acute lymphoblastic leukemia
+ Cancer Cell
+ 27
+ 2015
+ 409
+ 425
+ 25759025
+
+
+
+
+
+
+ Gocho
+ Y.
+
+
+ Kiyokawa
+ N.
+
+
+ Ichikawa
+ H.
+
+
+ A novel recurrent EP300-ZNF384 gene fusion in B-cell precursor acute lymphoblastic leukemia
+ Leukemia
+ 29
+ 2015
+ 2445
+ 2448
+ 25943178
+
+
+
+
+
+
+ Greenman
+ C.
+
+
+ Stephens
+ P.
+
+
+ Smith
+ R.
+
+
+ Patterns of somatic mutation in human cancer genomes
+ Nature
+ 446
+ 2007
+ 153
+ 158
+ 17344846
+
+
+
+
+
+
+ Haberland
+ M.
+
+
+ Arnold
+ M.A.
+
+
+ McAnally
+ J.
+
+
+ Phan
+ D.
+
+
+ Kim
+ Y.
+
+
+ Olson
+ E.N.
+
+
+ Regulation of HDAC9 gene expression by MEF2 establishes a negative-feedback loop in the transcriptional circuitry of muscle differentiation
+ Mol. Cell. Biol.
+ 27
+ 2007
+ 518
+ 525
+ 17101791
+
+
+
+
+
+
+ Haberland
+ M.
+
+
+ Montgomery
+ R.L.
+
+
+ Olson
+ E.N.
+
+
+ The many roles of histone deacetylases in development and physiology: implications for disease and therapy
+ Nat. Rev. Genet.
+ 10
+ 2008
+ 32
+ 42
+ 19065135
+
+
+
+
+
+
+ Heavey
+ B.
+
+
+ Charalambous
+ C.
+
+
+ Cobaleda
+ C.
+
+
+ Busslinger
+ M.
+
+
+ Myeloid lineage switch of Pax5 mutant but not wild-type B cell progenitors by C/EBPalpha and GATA factors
+ EMBO J.
+ 22
+ 2003
+ 3887
+ 3897
+ 12881423
+
+
+
+
+
+
+ Hirose
+ J.
+
+
+ Kouro
+ T.
+
+
+ Igarashi
+ H.
+
+
+ Yokota
+ T.
+
+
+ Sakaguchi
+ N.
+
+
+ Kincade
+ P.W.
+
+
+ A developing picture of lymphopoiesis in bone marrow
+ Immunol. Rev.
+ 189
+ 2002
+ 28
+ 40
+ 12445263
+
+
+
+
+
+
+ Jabbour
+ E.
+
+
+ O'Brien
+ S.
+
+
+ Konopleva
+ M.
+
+
+ Kantarjian
+ H.
+
+
+ New insights into the pathophysiology and therapy of adult acute lymphoblastic leukemia
+ Cancer
+ 121
+ 2015
+ 2517
+ 2528
+ 25891003
+
+
+
+
+
+
+ Ma
+ X.
+
+
+ Edmonson
+ M.
+
+
+ Yergeau
+ D.
+
+
+ Rise and fall of subclones from diagnosis to relapse in pediatric B-acute lymphoblastic leukaemia
+ Nat. Commun.
+ 6
+ 2015
+ 6604
+ 25790293
+
+
+
+
+
+
+ Mi
+ J.Q.
+
+
+ Wang
+ X.
+
+
+ Yao
+ Y.
+
+
+ Newly diagnosed acute lymphoblastic leukemia in China (II): prognosis related to genetic abnormalities in a series of 1091 cases
+ Leukemia
+ 26
+ 2012
+ 1507
+ 1516
+ 22297722
+
+
+
+
+
+
+ Mullighan
+ C.G.
+
+
+ Goorha
+ S.
+
+
+ Radtke
+ I.
+
+
+ Genome-wide analysis of genetic alterations in acute lymphoblastic leukaemia
+ Nature
+ 446
+ 2007
+ 758
+ 764
+ 17344859
+
+
+
+
+
+
+ Mullighan
+ C.G.
+
+
+ Zhang
+ J.
+
+
+ Kasper
+ L.H.
+
+
+ CREBBP mutations in relapsed acute lymphoblastic leukaemia
+ Nature
+ 471
+ 2011
+ 235
+ 239
+ 21390130
+
+
+
+
+
+
+ Notta
+ F.
+
+
+ Zandi
+ S.
+
+
+ Takayama
+ N.
+
+
+ Distinct routes of lineage development reshape the human blood hierarchy across ontogeny
+ Science
+ 351
+ 2016
+ aab2116
+ 26541609
+
+
+
+
+
+
+ Opferman
+ J.T.
+
+
+ Letai
+ A.
+
+
+ Beard
+ C.
+
+
+ Sorcinelli
+ M.D.
+
+
+ Ong
+ C.C.
+
+
+ Korsmeyer
+ S.J.
+
+
+ Development and maintenance of B and T lymphocytes requires antiapoptotic MCL-1
+ Nature
+ 426
+ 2003
+ 671
+ 676
+ 14668867
+
+
+
+
+
+
+ Prima
+ V.
+
+
+ Gore
+ L.
+
+
+ Caires
+ A.
+
+
+ Cloning and functional characterization of MEF2D/DAZAP1 and DAZAP1/MEF2D fusion proteins created by a variant t(1;19)(q23;p13.3) in acute lymphoblastic leukemia
+ Leukemia
+ 19
+ 2005
+ 806
+ 813
+ 15744350
+
+
+
+
+
+
+ Pui
+ C.-H.
+
+
+ PART XI: NEOPLASTIC LYMPHOID DISEASES, 93. Acute lymphoblastic leukemia
+
+
+ Kenneth Kaushansky
+ M.L.
+
+
+ Beutler
+ E.
+
+
+ Kipps
+ T.
+
+
+ Prchal
+ J.
+
+
+ Seligsohn
+ U.
+
+
+ Williams Hematology. 2460
+ eighth ed.
+ 2010
+ McGraw-Hill Education
+ USA
+ (2010-07-09)
+
+
+
+
+
+
+ Pui
+ C.-H.
+
+
+ Yang
+ J.J.
+
+
+ Hunger
+ S.P.
+
+
+ Childhood acute lymphoblastic leukemia: progress through collaboration
+ J. Clin. Oncol.
+ 33
+ 2015
+ 2938
+ 2948
+ 26304874
+
+
+
+
+
+
+ Roberts
+ K.G.
+
+
+ Morin
+ R.D.
+
+
+ Zhang
+ J.
+
+
+ Genetic alterations activating kinase and cytokine receptor signaling in high-risk acute lymphoblastic leukemia
+ Cancer Cell
+ 22
+ 2012
+ 153
+ 166
+ 22897847
+
+
+
+
+
+
+ Roberts
+ K.G.
+
+
+ Li
+ Y.
+
+
+ Payne-Turner
+ D.
+
+
+ Targetable kinase-activating lesions in Ph-like acute lymphoblastic leukemia
+ N. Engl. J. Med.
+ 371
+ 2014
+ 1005
+ 1015
+ 25207766
+
+
+
+
+
+
+ Schultz
+ K.R.
+
+
+ Carroll
+ A.
+
+
+ Heerema
+ N.A.
+
+
+ Long-term follow-up of imatinib in pediatric Philadelphia chromosome-positive acute lymphoblastic leukemia: Children's Oncology Group study AALL0031
+ Leukemia
+ 28
+ 2014
+ 1467
+ 1471
+ 24441288
+
+
+
+
+
+
+ Xiao
+ H.
+
+
+ Wang
+ L.M.
+
+
+ Luo
+ Y.
+
+
+ Mutations in epigenetic regulators are involved in acute lymphoblastic leukemia relapse following allogeneic hematopoietic stem cell transplantation
+ Oncotarget
+ 7
+ 2016
+ 2696
+ 2708
+ 26527318
+
+
+
+
+
+
+ Xie
+ H.
+
+
+ Ye
+ M.
+
+
+ Feng
+ R.
+
+
+ Graf
+ T.
+
+
+ Stepwise reprogramming of B cells into macrophages
+ Cell
+ 117
+ 2004
+ 663
+ 676
+ 15163413
+
+
+
+
+
+
+ Yasuda
+ T.
+
+
+ Tsuzuki
+ S.
+
+
+ Kawazu
+ M.
+
+
+ Recurrent DUX4 fusions in B cell acute lymphoblastic leukemia of adolescents and young adults
+ Nat. Genet.
+ 48
+ 2016
+ 569
+ 574
+ 27019113
+
+
+
+
+
+ Supplementary Data
+
+
+
+
Supplementary material.
+
+
+ Image 1
+
+
+
+
+
+
+
+
Supplementary data to this article can be found online at http://dx.doi.org/10.1016/j.ebiom.2016.04.038.
+
+
+
+
+
+
+
+
Flow chart of B-ALL patients in this study. A total of 383 patients including 179 adults (> 18 years) and 204 children (≤ 18 years) with newly diagnosed B-ALL were enrolled in this study. The 92 adults and 111 children with sufficient samples for next-generation sequencing formed the discovery cohort. An additional 87 adults and 93 children, designated the recurrent cohort, were screened for recurrent gene mutations and fusion genes.
+
+ Fig. 1
+
+
+
+
+
+
Patterns of somatic non-silent mutations identified by whole-exome sequencing (WES) and whole-genome sequencing (WGS) in B-ALL. (a) The percentages of distinct transitions and transversions of all non-silent SNVs in WES and WGS. (b) Correlation of mutation burdens and the age of B-ALL patients. A linear regression model was applied to calculate R2 and significant level, and a fitting curve was drawn to indicate the trend. (c) Proportions of non-silent mutation types according to their effects on protein coding.
+
+ Fig. 2
+
+
+
+
+
+
Comparison of non-silent mutations identified by whole-exome and whole-genome sequencing, and gene fusions identified by RNA-seq between adult and childhood samples. (a) Box plot of the numbers of non-silent mutations detected by whole-exome and whole-genome sequencing. (b) The distribution of the most frequently mutated genes. (c) All of the in-frame fusions identified by RNA-seq. The fusion events underlined represent novel fusion genes. The numbers in the bars are the exact numbers of cases with each fusion. For more commonly identified fusion genes, frequencies are indicated in the parenthesis after the numbers.
+
+ Fig. 3
+
+
+
+
+
+
Spectrum of acquired CNVs between adult (a) and childhood (b) samples. Copy number gain and loss were indicated red or green separately.
+
+ Fig. 4
+
+
+
+
+
+
Overall survival of adult and pediatric B-ALL with the fusions involving MEF2D and ZNF384 genes. (a) Kaplan-Meier survival curves of adult B-ALL patients. (b) Kaplan-Meier survival curves of pediatric B-ALL patients.
+
+ Fig. 5
+
+
+
+
+
+
Schema of the wild-type and fusion proteins involving MEF2D and ZNF384 and results of functional studies. (a) Structural and functional domains of wild-type proteins and the most frequently identified fusion proteins. Arrows indicate breakpoints of the wild-type proteins. * labeled beside ZNF384 indicated two fusion points upstream of the coding region of ZNF384 (5 bp or 65 bp). (b) Representative flow cytometry results of the B-cell population of GFP+ bone marrow (BM) cells in vector control (Vector), MEF2D-HNRNPUL1 (MH) and MEF2D-BCL9 (MB) mice. The upper panel showed different B-cell subsets in total GFP+ cells (B220 vs. CD43), and bottom panel showed subsets in B220+ B-cell fraction (CD19 vs. CD43). (c) The percentages of B220, CD3 and Mac-1 in GFP+ peripheral blood (PB) cells in Vector, MEF2D, MH and MB mice. * and ** denote differences between MH and MB with Vector. (d) Responsiveness of the HDAC9 promoter to wild-type MEF2D and MEF2D fusions. 293T cells were cotransfected with a pGL4.15-Luc reporter containing the promoter region of HDAC9 and wild-type MEF2D or MEF2D fusions. Compared to wild-type MEF2D, MEF2D fusions displayed stronger transcriptional activity (MH vs. MEF2D, P < 0.001; MB vs. MEF2D, P = 0.03). (e) ChIP assays revealed MEF2D fusions had enhanced binding activity of HDAC9 promoter in 293T cells (MH vs. MEF2D, P = 0.01; MB vs. MEF2D, P = 0.02). ChIP DNA, immunoprecipitated with an anti-Myc tag antibody or goat IgG, was quantified with primers flanking HDAC9 promoter. (f) Cotransfection of wild-type MEF2D or MEF2D fusions plasmids and HDAC9 shRNA in JM1. Transfection of MEF2D fusions led to the upregulation of HDAC9 and the downregulation of RAG1, by contrast, reduced expression of HDAC9 mRNA level with transient transfection of HDAC9 shRNA caused remarkable rebound of RAG1 expression. (g) Flow cytometry analysis of different lineage markers of GFP+ PB in Vector, ZNF384, EP300-ZNF384 (EZ) four weeks after transplantation. (h) Kaplan-Meier survival curves of EZ mice (1st transplantation, n = 8; 2nd transplantation, n = 6). (i) Wright's staining of BM cytospin samples from control and EZ mice. (j) Naphthol AS-D acetate esterase (NAS-DAE) staining (up) and inhibition of NAS-DAE staining by sodium fluoride (NaF) (bottom) of the BM of EZ mice. (k) Flow cytometry analysis of the BM of EZ mice versus vector control. *P < 0.05; **P < 0.01; ***P < 0.001.
+
+ Fig. 6
+
+
+
+
+
+
Unsupervised hierarchical clustering identified specific subgroups of patients with shared gene expression patterns. Columns indicate ALL patients and rows are genes. The bottom panels show immunophenotype and genotype for each sample as well as significantly altered genes (Fisher's exact P < 0.05) within each of the eight unique gene expression subgroups. The immunophenotypes were determined according to the recommendation of European Group for the Immunological Characterization of Leukemias (EGIL).
+
+ Fig. 7
+
+
+
+
+
+
Comparisons between adult and pediatric B-ALL patients with regard to gene pathways. (a) Comparisons within each cluster subgroup excluding G3 and G6. A: number of adults; P: number of children. (b) Comparison of all the patients.
+
+ Fig. 8
+
+
+
+
+
+
Clinical characteristics and genetic types of patient cohorts.
+
+ Table 1
+
+
+
+
+
Discovery cohort
+
Recurrent cohort
+
+
+
+
Adult
+
Pediatric
+
Adult
+
Pediatric
+
+
+
+
+
Number
+
92
+
111
+
87
+
93
+
+
+
Age at diagnosis (year)
+
+
+
Median
+
31.4
+
5.1
+
36.7
+
6.8
+
+
+
Range
+
18.1–68.9
+
0.4–18.0
+
18.1–63.4
+
1.2–17.8
+
+
+
Gender, no. (%)
+
+
+
Male
+
48(52.2%)
+
63(56.8%)
+
43(49.4%)
+
63(67.7%)
+
+
+
Female
+
44(47.8%)
+
48(43.2%)
+
44(50.6%)
+
30(32.3%)
+
+
+
WBC count at diagnosis (× 109/L)
+
+
+
Median
+
22.5
+
10.8
+
18.3
+
15.2
+
+
+
Range
+
0.4–438.6
+
0.9–508.8
+
1.1–420.0
+
1.1–767.7
+
+
+
Specific genetic abnormalities, no. (%)
+
+
+
+ BCR-ABL1
+
+
24(26.1%)
+
10(9.0%)
+
25(28.7%)
+
14(15.1%)
+
+
+
BCR-ABL1-likea
+
8(10.3%)
+
5(5.3%)
+
–
+
–
+
+
+
MLL rearranged
+
7(7.6%)
+
4(3.6%)
+
5(5.7%)
+
2(2.2%)
+
+
+
+ TCF3-PBX1
+
+
9(9.8%)
+
8(7.2%)
+
0(0.0%)
+
4(4.3%)
+
+
+
+ ETV6-RUNX1
+
+
0(0.0%)
+
22(19.8%)
+
1(1.1%)
+
13(14.0%)
+
+
+
ZNF384 fusionsb
+
9(10.0%)
+
6(5.7%)
+
4(4.6%)
+
2(2.2%)
+
+
+
MEF2D fusionsb
+
3(3.3%)
+
4(3.8%)
+
9(10.3%)
+
2(2.2%)
+
+
+
hyperdiploidy > 50
+
0(0.0%)
+
3(2.7%)
+
1(1.1%)
+
11(11.8%)
+
+
+
hypodiploidy
+
6(6.5%)
+
1(0.9%)
+
6(6.9%)
+
3(3.2%)
+
+
+
others
+
26(28.3%)
+
48(43.2%)
+
36(41.4%)
+
42(45.2%)
+
+
+
+
+
+
+
The BCR-ABL1-like signature was identified with gene expression data, which was available in 78 adults and 94 children subject to RNA-seq.
+
+
+
+
+
+
The data of MEF2D and ZNF384 fusions were available in 90 adults and 106 children in the discovery cohort, and 87 adults and 93 children in the recurrent cohort.
The anaplastic lymphoma kinase (ALK) protein drives tumorigenesis in subsets of several tumors through chromosomal rearrangements that express and activate its C-terminal kinase domain. In addition, germline predisposition alleles and acquired mutations are found in the full-length protein in the pediatric tumor neuroblastoma. ALK-specific tyrosine kinase inhibitors (TKIs) have become important new drugs for ALK-driven lung cancer, but acquired resistance via multiple mechanisms including kinase-domain mutations eventually develops, limiting median progression-free survival to less than a year. Here we assess the impact of several kinase-domain mutations that arose during TKI resistance selections of ALK+ anaplastic large-cell lymphoma (ALCL) cell lines. These include novel variants with respect to ALK-fusion cancers, R1192P and T1151M, and with respect to ALCL, F1174L and I1171S. We assess the effects of these mutations on the activity of six clinical inhibitors in independent systems engineered to depend on either the ALCL fusion kinase NPM-ALK or the lung-cancer fusion kinase EML4-ALK. Our results inform treatment strategies with a likelihood of bypassing mutations when detected in resistant patient samples and highlight differences between the effects of particular mutations on the two ALK fusions.
Anaplastic lymphoma kinase (ALK) [1] is an important therapeutic target in cancer, despite the function of the wild-type protein being poorly understood. While having key roles in early nervous system development [2], the levels of wild-type ALK subsequently drop off and remain at low levels for the remainder of life. Compounding ALK's enigmatic nature, the mammalian ligand(s) responsible for its activation are still up for debate [3–6]. Germline and acquired mutations in wild-type ALK, however, are found in the childhood cancer neuroblastoma [7–10]. Furthermore, chromosomal rearrangements fusing ALK's C-terminal kinase domain to a number of constitutively expressed genes result in malignant transformation through activation of multiple oncogenic signaling pathways [11, 12].
+
In NPM-ALK, identified in 1994 [13], ALK is fused to the constitutively expressed nucleophosmin gene [14]. Approximately 70% of anaplastic large cell lymphomas (ALCL) are positive for this or similar fusions [15]. In fact, it was through the discovery of this fusion that ALK was originally cloned [13]. Subsequently, in 2007, ALK was found fused to echinoderm microtubule associated protein-like 4 (EML4) yielding the fusion kinase EML4-ALK seen in approximately 3–5% of non-small cell lung cancers (NSCLC) [16, 17]. Several other ALK fusions since have been identified, including the RANBP2 (RNA binding protein 2)-ALK fusion seen in inflammatory myofibroblastic tumor (IMT, reviewed in [14]).
+
Since the highly successful use of imatinib and other BCR-ABL tyrosine kinase inhibitors (TKIs) against chronic myeloid leukemia [18], there have been great efforts to find inhibitors that turn off other such kinases [19]. In 2011, a mere four years after the discovery of EML4-ALK, the U.S. FDA approved the dual ALK/MET TKI crizotinib for ALK+ NSCLC. While initial response to crizotinib may be strong [20–24], patients inevitably succumb due to acquired resistance through multiple mechanisms, including kinase-domain mutations, prompting development of newer generation inhibitors (Table 1).
+
+
+
+ Kinase domain mutations leading to acquired resistance
+
Abbreviations: ALCL = Anaplastic Large Cell Lymphoma; NSCLC = Non-Small Cell Lung Cancer; IMT = Inflammatory Myofibroblastic Tumor; R = Resistant; S = Sensitive.
+
+
+
Seen by ultra-deep sequencing at low frequency.
+
+
+
+
Here we assess the resistance/sensitivity profiles of mutations that arose in patient derived-cell ALCL lines continually exposed to either crizotinib or the 2nd generation ALK/IGF-1R inhibitor ceritinib (LDK378; FDA approved in 2014 for treating ALK+ NSCLC patients who failed crizotinib) [25]. Each mutation was profiled against six ALK TKIs – crizotinib, ceritinib, alectinib (which recently received FDA approval for treating ALK+ NSCLC patients who failed crizotinib [26]), AP26113 (brigatinib; a dual ALK/EGFR inhibitor in phase I/II trials that received FDA breakthrough therapy designation in 2014 [19]), ASP3026 (an ALK TKI in phase I trials [27, 28]) and AZD3463 (a dual ALK/EGFR inhibitor in preclinical development) [19, 29, 30].
+
+
+ RESULTS
+
The ALK+ ALCL cell lines SUP-M2 and SU-DHL-1, which are highly sensitive to ALK inhibition (Figure 1A), were selected in increasing concentrations of either crizotinib or ceritinib to investigate mechanisms of acquired resistance. We previously showed resistance initially was caused by increased NPM-ALK expression in all of these selections, and that this ALK up-regulation induced TKI-dependency as drug withdrawal resulted in the death of resistant cells [31]. Individual subclones, however, were able to grow again without ALK inhibitor following prolonged passaging, leading to normalization of NPM-ALK expression. These lines were named after their respective parent lines (SUP or DHL1), the inhibitor they were grown in (CR for crizotinib resistance, LR for ceritinib (LDK378) resistance), and the top nanomolar concentration in which they were able to proliferate during selection. Despite restoration of baseline NPM-ALK expression each line still exhibited varying degrees of persistent TKI resistance. Sequencing of the ALK TKD by Sanger and deep sequencing methods had suggested second-site mutations could be driving resistance, but we did not further characterize these initial findings. For this report, we maintained resistant clones in their top TKI concentration and then twice repeated Sanger sequencing of cDNA amplified from NPM-ALK mRNA. This detected a single second-site mutation in each resistant sub-clone (Figure 1B). Two of the mutations (I1171S from SUP-CR500-2 and F1174L from SUP-LR-2) were present as single peaks in the sequencing, indicating homogeneous populations in the sub-clones following drug selections. The other three mutations (R1192P from DHL1-CR500, T1151M from DHL-LR150, and G1269A from DHL1-CR500-2) appeared together with underlying wild-type peaks, indicating heterogeneous cell populations. While some of these mutations have been observed previously in the context of ALK-fusion cancers, we characterize two novel mutations that thus far have only been observed in neuroblastoma – T1151M and R1192P – and two mutations not previously characterized in ALK+ ALCL (Table 1). Each mutation was modeled on an X-ray structure of the ALK kinase domain (Figure 1C; discussed further below).
(A) IC50s of parental ALK+ ALCL cell lines (SUP-M2 and SU-DHL-1) as well as an ALK-negative ALCL line (MAC-2A). Mean ± SEM for quadruplicates. (B) Sanger sequencing identifying each resistance mutation in cell lines. (C) Location of the five mutations identified in this study with respect to the ALK kinase domain shown as ball and stick models with associated surfaces colored by atoms.
+
+
+
+
We first compared each subclone to its respective parent line for sensitivity to the TKI in which it had been selected (Figure 2A; Table 2). In all cases, subclones were significantly less sensitive, as determined by a highly significant increase in IC50, but additional factors could have arisen during selections to promote resistance. Furthermore, three of the five mutations were present in heterogeneous populations of cells also containing the wild-type NPM-ALK (as discussed above; Figure 1B). Therefore, to isolate the specific effect of each identified ALK-kinase mutation, we employed IL3-dependent FL5.12 murine pro-B cells as an independent system [32]. We generated each mutation through site-directed mutagenesis in NPM-ALK cloned into a GFP co-expressing MSCV-based vector (Supplementary Figure S1). Retroviral introduction of wild-type NPM-ALK or mutants, followed by IL3 withdrawal, transformed the FL5.12 cells from cytokine-dependence to oncogene-dependence (Figure 2B). Transformed lines withdrawn from IL3 proliferate only as 100% GFP+ clones, and dependence on NPM-ALK is further demonstrated by the failure of kinase-dead NPM-ALK or empty vector (not shown) to permit cytokine-independent growth. We then assessed sensitivity of lines transformed by each mutant to six TKIs in comparison to those transformed by wild-type NPM-ALK (Figure 2C; Supplementary Figure S2; Table 3). All five mutations exhibited significant cross-resistance to the three approved inhibitors, crizotinib, ceritinib, and alectinib. The two mutations derived from lines that had been selected in the second-generation inhibitor ceritinib, T1151M and F1174L, were pan-resistant to all five TKIs, though degree of resistance varied (discussed below). The other three mutations, from crizotinib-selected lines, remained sensitive to at least one other inhibitor. This independent system demonstrates resistance in ALK+ ALCL lines is driven at least in part by acquired ALK kinase-domain mutations, and confirms previous observations that mutations arising in response to one drug may affect multiple inhibitors of the same target.
+
+
+
+ Resistance profiles of ALK mutations against six ALK TKIs
+
(A) Cell viability assays (with denoted IC50s) for each resistant line compared to the parental line from which they were derived. (B) Cellular transformation of FL5.12 cells infected with an MSCV-based vector co-expressing GFP and wild-type or mutant NPM-ALK constructs upon cytokine withdrawal. The kinase-dead mutant was unable to survive in the absence of cytokine (*). (C) IC50's for each FL5.12 NPM-ALK construct against six ALK TKIs (see also Supplementary Figure S2). (D) Same as (B) but with mutations created in a retroviral vector containing EML4-ALK. (E) IC50's for each FL5.12 EML4-ALK construct against six ALK TKIs (see also Supplementary Figure S4). Mean ± SEM for quadruplicates (A, C and E) or triplicates (B and D). (A, C and E) Unpaired two-tailed t-test was performed using GraphPad Prism version 6 to compare the IC50s for each mutant to their respective parental (A) or wild-type (C, E) cells. *p < 0.05, **p < 0.001.
+
+
+
+
+
+
+ IC50s of each resistant line compared to the parent line from which they were derived
+
+
+
+
+
+
Cell Line
+
+
+
SUP-M2
+
SU-DHL-1
+
I1171SSUP-CR500-2
+
F1174LSUP-LR150-2
+
R1192PDHL1-CR500
+
T1151MDHL1-LR150
+
G1269ADHL1-CR500-2
+
+
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
+
+
+
+
TKI
+
Crizotinib
+
67.75
+
1
+
128.9
+
1
+
878.03
+
12.96
+
-
+
-
+
887.18
+
6.88
+
-
+
-
+
933.275
+
7.24
+
+
+
Ceritinib
+
15.57
+
1
+
67.94
+
1
+
-
+
-
+
57.69
+
3.7
+
-
+
-
+
249.98
+
3.68
+
-
+
-
+
+
+
+
+
+
+
+ FL5.12 NPM-ALK mutant IC50s
+
+
+
+
+
+
FL5.12 NPM-ALK
+
+
+
WT
+
I1171S
+
F1174L
+
R1192P
+
T1151M
+
G1269A
+
+
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
+
+
+
+
TKI
+
Crizotinib
+
171.85
+
1
+
345.03
+
2.01
+
347.63
+
2.02
+
421.5
+
2.45
+
346.65
+
2.02
+
871.75
+
5.07
+
+
+
Ceritnib
+
34.03
+
1
+
75.8
+
2.23
+
140.45
+
4.13
+
167.08
+
4.91
+
95.78
+
2.81
+
46.04
+
1.35
+
+
+
Alectinib
+
11.13
+
1
+
192.5
+
17.29
+
46.43
+
4.17
+
48.84
+
4.39
+
25.42
+
2.28
+
89.27
+
8.02
+
+
+
+
+
To further identify the potential clinical implications of each mutation, and to assess whether the effects of each mutation are fusion-specific, we employed an additional in vitro model. Each mutation was generated via site-directed mutagenesis in EML4-ALK, the most common ALK fusion detected in NSCLC, and cloned into the same GFP-expressing MSCV-based vector backbone described above (Supplementary Figure S3). Once again, using FL5.12 murine pro-B cells, retroviral introduction of each EML4-ALK mutant construct and subsequent IL3 withdrawal resulted in cytokine-independent, oncogene-dependent cellular transformation (Figure 2D). The resistance/sensitivity profiles of each EML4-ALK mutant versus wild-type EML4-ALK to the same panel of TKIs were assessed in the same manner as above (Figure 2E; Supplementary Figure S4; Table 4). For mutations previously observed in ALK TKI resistance models, our results were largely consistent with individual findings reported by others (Table 1). We observed some important differences, however, between the effects of particular mutations on NPM-ALK vs. EML4-ALK (compare Figure 2E and Figure 2C). In particular, R1192P and T1151M – never before detected in resistant EML4-ALK-driven cell lines or patient samples – had substantially greater effect on NPM-ALK in promoting resistance than on EML4-ALK. These findings among others highlight potentially clinically important differences mediated by ALK's fusion partner affecting selection by tumors for particular mutations (see discussion).
+
+
+
+ FL5.12 EML4-ALK mutant IC50s
+
+
+
+
+
+
FL5.12 EML4-ALK
+
+
+
WT
+
I1171S
+
F1174L
+
R1192P
+
T1151M
+
G1269A
+
+
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
IC50
+
Fold Change
+
+
+
+
+
TKI
+
Crizotinib
+
70.59
+
1
+
260.38
+
3.7
+
68.14
+
0.97
+
86.27
+
1.25
+
89.07
+
1.27
+
395.2
+
5.62
+
+
+
Ceritnib
+
9.6
+
1
+
64.69
+
6.79
+
16.86
+
1.76
+
16.96
+
1.79
+
24.24
+
2.53
+
14.57
+
1.52
+
+
+
Alectinib
+
3.46
+
1
+
224.28
+
64.98
+
3.35
+
0.98
+
2.55
+
0.70
+
3.00
+
0.87
+
40.09
+
11.6
+
+
+
AP26113
+
4.13
+
1
+
24.87
+
6.09
+
5.79
+
1.42
+
2.7
+
0.66
+
3.81
+
0.93
+
4.4
+
1.08
+
+
+
ASP3026
+
42.21
+
1
+
245.13
+
5.83
+
110.97
+
2.63
+
90.59
+
2.11
+
105.73
+
2.51
+
71.79
+
1.71
+
+
+
AZD3463
+
16.79
+
1
+
38.27
+
2.28
+
15.83
+
0.94
+
10.16
+
0.59
+
18.99
+
1.13
+
11.62
+
0.69
+
+
+
+
+
We next assessed through immunoblotting the signaling consequences in FL5.12 cells of NPM-ALK-R1192P and T1151M, the two mutations not previously described as fusion ALK-kinase resistance mutations (Figure 3). Results suggest a particularly strong resistance phenotype of R1192P to all the inhibitors except AP26113, in line with the IC50 data. The effect of T1151M, meanwhile, was weaker, with higher concentrations of all drugs except ASP3026 overcoming its effects on ALK phosphorylation. We note that pERK levels decreased in crizotinib-treated FL5.12 cells not transfected with NPM-ALK (growing in IL3), and pERK is correspondingly more crizotinib sensitive in all the transformed lines as well. This suggests an off-target effect such as this drug's known activity against MET [33]. Similar to our published findings, phosphorylation of ERK and AKT were overall variable in these results, providing inconsistent markers of drug potency [31]. STAT3 activation, however, (indicated by phosphorylation at Y705), which is known to be ALCL's core survival pathway was strongly consistent with drug potency and ALK phosphorylation status [34, 35]. We therefore confirm drug-resistant ALK activation promoting ALCL's core survival pathway mediated by NPM-ALK-R1192P and T1151M, consistent with viability data.
+
+
+
+ Activated ALK and downstream signaling is preserved in the novel ALK-fusion mutations, R1192P and T1151M
+
Immunoblotting for ALK and downstream signaling targets at the indicated concentrations of six ALK TKIs for FL5.12 cells (A), FL5.12 cells infected with an MSCV-based vector co-expressing GFP and wild-type NPM-ALK (B), NPM-ALK R1192P (C) or NPM-ALK T1151M (D).
+
+
+
+
+
+ DISCUSSION
+
Resistance to ALK-kinase inhibition in lung cancer is described in several published studies that employed cell lines, patient samples, or both. Our goal has been to understand resistance in ALK+ ALCL, which is less well studied. We previously reported resistance driven by NPM-ALK up-regulation, which also paradoxically drives dependence on continued inhibitor exposure, defining for the first time oncogene overdose by ALK over-activation [31]. Here, we examine five NPM-ALK kinase domain mutations that drive resistance in ALCL in an absence of over-expression. Two of these, R1192P and T1151M, are previously unreported as ALK TKI resistance mutations. We evaluated mutations in an independent system permitting isolation of mutation effect from other factors that may arise in ALK-dependent cells continually exposed to inhibitors. These experiments showed R1192P, T1151M, and the other mutations in NPM-ALK adversely affected the activity of multiple ALK TKIs but also pointed to inhibitors for which sensitivity is maintained or even enhanced. Interestingly, cross-validation using the lung cancer-derived EML4-ALK fusion showed both similarities and key differences in the effect of mutations on particular drugs. Our study therefore characterizes novel NPM-ALK TKI resistance mutations, providing guidance for appropriate choice of alternate therapies as more of these drugs move toward regulatory approval. It also highlights key differences in the effect of mutations on the ALK kinase domain's sensitivity to inhibition depending on its fusion partner. For example, the fact that R1192P and T1151M have not previously been detected in resistant EML4-ALK-driven systems is not surprising since their effect on drug activity was significantly less for that kinase than for NPM-ALK. The profile of ALK TKI resistance mutations likely to arise in a tumor, therefore, is inherently shaped by ALK's fusion partner, as should choices of alternative therapy.
+
+ I1171S
+
I1171S previously was seen in an in vitro accelerated mutagenesis screen in ALK+ NSCLC using increasing concentrations of crizotinib [36] and somatic mutations in this residue also have been reported in neuroblastoma [10, 37]. Subsequently, this mutation was identified in an ALK+ NSCLC patient as conferring alectinib resistance [38]. Consistent with its original identification in EML4-ALK-driven systems, we find I1171S is potently resistance-conferring to FL5.12 cells dependent on this fusion. All six TKIs tested had significantly increased IC50s when tested vs. EML4-ALK-I117S compared to wild-type (Figure 2E). In NPM-ALK, I1171S promoted resistance to crizotinib, the drug used in selecting the line in which it was isolated, ceritinib, and again pronounced resistance to alectinib. Ultra-deep sequencing previously identified I1171S at very low frequencies as a candidate ASP3026-resistant mutation in ALK+ ALCL [39], which is also confirmed in our functional system. Interestingly, however, NPM-ALK-I1171S promoted no resistance, indeed increased sensitivity compared to wild type, to both AP26113 and AZD3463. The effect of a resistance mutation conferring sensitivity to other inhibitors is similar to a recent high-profile case report in which the L1198F mutation, providing resistance to the third-generation inhibitor lorlatinib, resulted in resensitization to crizotinib [40]. The novel finding in our study is that this effect was seen for NPM-ALK but not EML4-ALK. Our data do not identify the reason for differential the effects of I1171S on AP26113 and AZD3463 (structurally similar compounds that also inhibit EGFR) between NPM-ALK and EML4-ALK. Given the FL5.12 system used is identical in all ways other than ALK's fusion partner, the effect is likely on-target, mediated by subtle ATP binding-pocket differences between the two fusion kinases.
+
Several other amino acid substitutions occur at I1171 residue, all promoting resistance to both crizotinib and alectinib regardless of whether their characterization was in the context of NSCLC or ALCL (Table 1) [39, 41–46]. There are some differences in TKI resistance between our findings for I1171S and other amino acid substitutions at this residue. For instance, in lymphoma studies, I1171N and I1171T are reported to be AP26113-resistant [39, 41], and while both appear resistant to ceritinib in NPM-ALK [39, 41], they show sensitivity in EML4-ALK [43, 44, 47, 48]. Further variability exists for both I1171N and I1171T, with the former being ASP3026-sensitive [39, 42] and the latter resistant [39] in NPM-ALK.
+
I1171 is part of the hydrophobic regulatory spine (R-spine), along with residues C1182, F1271 (DFG motif), H1247 (HRD motif) and D1311 (F-helix), which connects the two major ALK TKD lobes. Mutations in I1171 are believed to lock ALK in its active conformation, accelerating its activation through autophosphorylation [37, 43, 49–52]. Additionally, mutations at this residue decrease stability of inhibitor binding at the DFG motif, and, in the case of at least I1171N, modify the structure of the kinase-inhibitor complex [41]. Our data suggest NPM-ALK+ ALCL patients who develop resistance due to I1171S could benefit from treatment using either AP26113 or AZD3463, as both overcome this mutation in our independent system. Other inhibitors may be necessary in the context of EML4-ALK.
+
+
+ F1174L
+
The phenylalanine residue at position 1174, located at the end of the αC helix amongst a highly hydrophobic cluster of residues, maintains autoinhibitory interactions when ALK is in its inactive conformation [37, 49, 51] and is a mutational hotspot within the ALK kinase domain [53]. We found F1174L in a cell line grown in increasing concentrations of ceritinib. It is a frequent somatic mutation in neuroblastoma and increases ALK's affinity for ATP, despite not directly contacting the ATP pocket. Accordingly, resistance is not due to steric hindrance of drug binding, but rather promotion of ALK's active conformation indicated by accelerated autophosphorylation [10, 37, 54]. F1174L therefore acts as both a TKI-resistance and an activating mutation [7–10, 37, 54]. In agreement with our observations for NPM-ALK (Table 1; Figure 2C; Supplementary Figure S2), F1174L results in resistance to crizotinib in both ALK+ NSCLC and ALK+ IMT [55–57], although we did not observe a significant change in resistance in our EML4-ALK in vitro assay (p = 0.4846; Table 4; Figure 2E; Supplementary Figure S4). In contrast to previous reports in ALK+ NSCLC [56–58] along with our EML4-ALK in vitro assay, we observe resistance to alectinib, which is more in agreement with this mutation favoring ATP binding over inhibitor binding, and suggests potential differences in the conformational changes induced by a leucine substitution at this residue based on ALK's fusion partner. Studies involving other amino acid substitutions at position 1174 support this. Whereas F1174C and F1174V are resistant to crizotinib and ceritinib in ALK+ NSCLC cell line models and patient samples [36, 47, 59], F1174V and F1174I are sensitive to both TKIs in ALK+ ALCL [39, 60].
+
Furthermore, despite F1174C and F1174V showing resistance to alectinib in ALK+ ALCL [46, 60], this drug is able to overcome an isoleucine substitution in the same model [39]. Additionally, while we observe a small fold-increase in the IC50 for AP26113 (1.4-fold) for this mutant compared to wild-type NPM-ALK, the increase is not statistically significant (p = 0.0513), and AP26113 has been reported to show sensitivity to both F1174V and F1174I [39, 60]. Our data for both fusions, in agreement with other studies in NPM-ALK, show ASP2036 is unable to overcome any amino acid substitutions at this location [39, 60]. Furthermore, while AP26113 may show slight efficacy in overcoming ALK+ ALCL resistance acquired by this mutation (p = 0.0513), it is predicted to be ineffective in ALK+ NSCLC, with the opposite being true for AZD3463 with respect to each disease. This further highlights the care required when considering treatment options to overcome resistance in patients, as both the specific substitution and the particular ALK-fusion may be key factors.
+
+
+ R1192P
+
The R1192P mutation, residing in the N-lobe, is one of the most frequent germline mutations in neuroblastoma [10]. In a similar fashion to both I1171 and F1174 mutations, R1192P results in accelerated autophosphorylation of ALK's TKD [37]. In fact, this mutation is considered an exception, as most residues in the N-lobe have smaller impacts. We report here for the first time the identification of this mutation in any ALK-fusion cancer, in a cell line selected in crizotinib. Perhaps unsurprisingly, due to its ability to strongly turn on the kinase activity of ALK, all but one TKI proved to be ineffective at overcoming resistance to this mutation in NPM-ALK (Table 1; Figures 2C, 2D and 3; Supplementary Figure S2). The only exception, AP26113, may be an attractive therapeutic in the event of resistance by this mutation. The effect of this mutation was dramatically less vs. all drugs when it was introduced instead to EML4-ALK, likely explaining, as discussed above, why it has never been reported in resistant ALK+ lung cancer. Detailed structural determinations are outside the scope of this report, but it is interesting to note that all three mutations reported to activate ALK kinase rather than block drug binding to the ATP pocket – F1174L, R1192P, and T1151M (below) – have greater effect on drug activity when found in NPM-ALK than in EML4-ALK. Follow-up structural chemistry studies to this report may confirm and identify the basis for this effect.
+
+
+ T1151M
+
T1151M was identified recently in neuroblastoma patients [37] and we present here the first report of this mutation in the ALK-fusion context, arising in a cell line grown in ceritinib. An amino acid insertion, 1151Tins is well-characterized as causing cross-resistance to several ALK TKIs in ALK+ NSCLC cell lines and patients [25, 47, 61], although one study reports that alectinib is effective against it [58]. Additionally, 1151Tins was reported in neuroblastoma [62]. Finally, the amino acid substitution T1151K was also found in an accelerated mutagenesis screen in ALK+ NSCLC assessing crizotinib resistance [36].
+
Mutations at residue 1151, located at the N-terminal lobe of the ALK catalytic domain, alter affinity of the mutated kinase for ATP, and diminish inhibitor binding through conformational changes, despite lying some distance from the ATP pocket [47, 61]. Furthermore, mutations in this domain lead to activation of the ALK TKD, albeit more modestly than I1171, F1174, or R1192 [37]. It is therefore perhaps unsurprising that 1151Tins, T1151K, or, in our case, T1151M in NPM-ALK, cause cross-resistance to several ALK TKIs due to the mutant kinase domain strongly favoring ATP binding, as confirmed by fold-changes in IC50 and western blotting (Table 1; Figure 2C and 2D; Supplementary Figure S2). Somewhat surprising is the lack of significant change in response to AP26113 (p = 0.2090) and sensitivity to alectinib (p = 0.0169; in agreement with Kodama et al. [58] as mentioned above) when compared to wild-type EML4-ALK, further highlighted the importance of the particular amino acid substitution, as well as the potential differences in protein folding dependent upon the fusion partner in question.
+
+
+ G1269A
+
In agreement with our observations, this mutation resists crizotinib in both NPM-ALK (as expected as it was isolated from a cell line grown in crizotinib) and EML4-ALK, but is sensitive to AP26113 in both fusions [47, 63, 64]. A serine substitution at the same residue behaves similarly in EML4-ALK [36, 55]. G1269 lies in the ATP-binding pocket, making direct contact with crizotinib. Using computer modeling (Figure 1C), we observed that the dichlorofluorophenyl ring of crizotinib binds near G1269 (distance 3.49 Å between Ca G1269 and fluorine atom). G1269A causes a steric clash between the dichlorofluorophenyl ring and the alanine's methyl group, apparently sufficient to disrupt the drug's activities [47, 63, 65]. Much like T1151 mutations, G1269 mutations cause modest constitutive ALK tyrosine kinase activity [37].
+
While we observed resistance to ceritinib in both fusions (Table 1; Figure 2C; Supplementary Figure S2), others have shown that ceritinib effectively overcomes G1269A in both ALK+ NSCLC [25, 47] and ALK+ ALCL [64] due to it stabilizing ALK's conformational dynamics and exhibiting increased potency for this mutant over WT ALK [48]. Once again, the degree of resistance in our study was only mild (1.35-fold for NPM-ALK and 1.52-fold for EML4-ALK), albeit still statistically significant (p = 0.0018 and < 0.0001, respectively), which may explain this difference. Another discrepancy is with ASP3026, predicted to overcome resistance in NPM-ALK by our data but unable to do so in a separate study in both ALK+ NSCLC and ALK+ ALCL [64]. However, the sensitivity reported in our study is just barely significant (p = 0.0495), perhaps explaining this discrepancy. Additionally, while we and Fontana and colleagues [64] report that alectinib is unable to overcome G1269A in the context of either disease, two other studies showed a response to alectinib in vitro, in xenograft models, and in a patient harboring this mutation [58, 66]. Borderline results in vitro may therefore be less reliable predictors of responses in vivo. Indeed, others have shown some TKIs with IC50 shifts in vitro may still cause tumor regression in vivo [47]. Further trial and error, including studies in resistant patients, will be necessary to clarify the situation for some mutations. Our data suggest AZD3463 may be able to overcome G1269A in both fusion-cancers. Both ASP3026 and AZD3463 interact differently compared to crizotinib with the active site near G1269. The X-ray crystal structure of ASP3026 with the ALK kinase domain (PDB code: 2XB7) shows that the isopropylsulfone group interacts with lysine 1150 through hydrogen bonding. This bond draws ASP3026's binding away from G1269 (distance = 5.85 Å between G1269 Ca and methylene carbon), and it is expected that ASP3026 would bind G1269A mutant in similar fashion.
+
The ALK kinase has several mutational hotspots, such as F1174, F1245 and R1275 [53]. The identification of two mutations not previously reported in any malignancy driven by an ALK-fusion (T1151M and R1192P) is therefore highly informative. Despite a predilection for certain mutations conferring resistance, “novel” mutations at sites other than mutational hotspots can still occur, perhaps albeit at lower frequencies, as the tumor evolves in a desperate attempt to evade death. Our findings with mutations favoring ATP-binding and dampening inhibitor-binding and/or constitutively activating ALK's TKD, suggest newer competitive inhibitors are desperately needed to overcome acquired resistance. One such inhibitor, the first 3rd generation ALK (and ROS1) inhibitor PF-06463922 (lorlatinib), exhibits increased potency against F1174L, 1151Tins, I1171T and G1269A in preclinical EML4-ALK models [67, 68], with impressive anti-tumour activity observed in xenograft neuroblastoma models harboring F1174L [53]. Another new inhibitor, a structural analog of alectinib, JH-VIII-157-02, also shows great promise against a series of ALK resistance mutations, including G1269A, F1174L and 1151Tins [69]. Further development and screening of newer generation ALK inhibitors is highly important in overcoming resistance mutations in patients suffering from ALK-related malignancies.
+
+
+
+ MATERIALS AND METHODS
+
+ Cell lines, reagents and inhibitors
+
RPMI 1640 and penicillin/streptomycin (P/S) supplemented with 10% fetal bovine serum (FBS) for SU-DHL-1 and MAC 2A, or with 20% FBS for SUP-M2. Phoenix cells grown in DMEM plus 10% FBS and P/S. FL5.12 cells cultured in RPMI 1640 with 10% FBS, P/S, ± 10% WeHi-3B supernatant and murine IL3 (400 ρM, eBioscience). All lines purchased from DSMZ apart from FL5.12's (gift from Wendel lab). Crizotinib and alectinib were purchased from Selleck Chemicals; ceritinib (LDK378) was kindly provided by Novartis.
+
+
+ Cell viability assays
+
3000 cells per well were seeded in serial dilutions of the indicated ALK TKIs. Viability was assessed after 72 hours (Cell Titer Glo®; Promega) by measuring luminescence on a BioTek Synergy HT plate reader. GraphPad Prism version 6 was used to calculate IC50s with non-linear curve-fit regression.
+
+
+ Computer modeling
+
Images in Figure 1C were created using the X-ray structure of ALK kinase domain using the Maestro software (Schrodinger Inc.)
+
+
+ Identification of kinase-domain mutations
+
RNA extraction (RNeasy® Mini Ki; QIAgen) followed by cDNA synthesis (Taqman® Reverse Transcriptase kit; Roche) was carried out two independent times for all resistant cell lines as well as their respective parental lines. The NPM-ALK fusion was PCR amplified (Primers - Forward: GTCCGCCTTCTCTCCTACCT, Reverse: TTGGCACAAAACAAAACGTG) on a BioRad T100 Thermal Cycler. The kinase domain was sequenced by Sanger sequencing. The sequences obtained for the resistant lines were aligned to their respective parental lines using the ClustalW online sequence alignment tool to identify base pair changes. The identified mutations were the same for both independent sets of sequencing.
+
+
+ Protein extraction, quantification and immunoblotting
+
As described previously [31], loading 30 μg per lane, with all primary and secondary antibodies from Cell Signal Technology, developed using autoradiograph film (GeneMate).
+
+
+ Site directed mutagenesis
+
Plasmid purification (PerfectPrep Spin Mini Kit; 5Prime) was carried out for MSCV-based -NPM-ALK and -EML4-ALK vectors mutated using site directed mutagenesis (QuikChange II XL; Agilent Technologies). Sanger sequencing was then used to confirm the presence of mutations in the ALK TKD.
+
+
+ Transfections, infections and cellular transformation
+
As described previously [31] using an MSCV-based vector co-expressing GFP plus either wild-type NPM-ALK, wild-type EML4-ALK, each NPM-ALK mutation, the same mutations in EML4-ALK, or an NPM-ALK kinase dead mutation (K210R) as a negative control. Cellular transformation was assessed by flow cytometry (Guava EasyCyte).
The MIG-NPM-ALK and MIG-NPM-ALK-Kd plasmids were generously provided by Dr. Emanuela Colombo (University of Milano, Milan, Italy). The pCDH1-MCS1-EML4-ALKv3-EF1-Puro plasmid was a generous gift from Dr. Robert Doebele (University of Colorado, USA). Custom DNA Constructs (Ohio, USA) cloned the EML4-ALK ORF into the MSCV-backbone.
+
+
+
+
+ CONFLICTS OF INTEREST
+
+
The authors declare no conflicts of interest.
+
+
+
+ GRANT SUPPORT
+
+
This work was supported by the NIH/NCGI grant 1R01CA190696-01 awarded to JHS.
+
+
+
+ Authors' contributions
+
+
Conceived and designed the experiments: ADA and JHS; Performed the experiments: ADA, LL, SSR, VG, MJG, PP, EOT and MW; Analyzed the data: ADA, LL, SSR, VG, MJG, PP, EOT, MW and JHS; Wrote the paper: ADA and JHS.
Epidermal growth factor receptor exon 20 insertion (EGFRex20ins) mutations
+ represent approximately 4–12% of EGFR mutations and are generally refractory to
+ the 1st and 2nd generation EGFR tyrosine kinase inhibitors (TKIs). Development of
+ effective therapies for patients with EGFRex20ins mutant non-small-cell lung carcinoma
+ (NSCLC) represents a great unmet need. Preclinical models have shown that osimertinib
+ is active in NSCLC harboring EGFRex20ins, while the antitumor activity of osimertinib
+ remains to be evaluated in patients with EGFRex20ins mutations.
+
+
+ Methods
+
Tumor genotyping was performed in 2316 Chinese NSCLC cases with targeted
+ next generation sequencing (NGS) covering the whole exons of EGFR gene. The frequency
+ and genetic characteristics of EGFRexon20ins mutations were analyzed. Furthermore, six
+ patients with specific EGFRexon20ins mutations and receiving osimertinib 80 mg
+ once daily were retrospectively included to assess the antitumor activity and safety
+ of monotherapy osimertinib.
+
+
+ Results
+
EGFRex20ins mutations were identified in 4.8% (53/1095) of EGFR mutant
+ NSCLC and 2.3% (53/2316) of all NSCLC cases. The most frequently identified
+ EGFRexon20ins is A767_V769dup (17/53,32.1%). We found that the genetic characteristics
+ of EGFRex20ins mutations in Chinese patients with NSCLC were comparable to those
+ reported in Caucasian patients. Four patients with osimertinib therapy achieved
+ partial response and the rest stable disease. Median progression free survival (PFS)
+ was 6.2 months (95% confidence interval 5.0–12.9 months; range
+ 4.9–14.6 months). The most common adverse events (AEs) were diarrhea
+ (2/6), pruritis (2/6), stomatitis (1/6) and nausea (1/6). No grade 3 or more AEs were
+ documented.
+
+
+ Conclusions
+
This study revealed that the genetic characteristics of EGFRex20ins
+ mutations in Chinese patients with NSCLC were comparable to those reported in
+ Caucasian patients. Furthermore, our study firstly demonstrated promising antitumor
+ activity of osimertinib in certain EGFRex20ins mutant advanced NSCLC patients,
+ indicating that osimertinib treatment for EGFRex20ins positive patients deserves
+ further study.
+
+
+ Electronic supplementary material
+
The online version of this article (10.1186/s12885-019-5820-0) contains supplementary
+ material, which is available to authorized users.
During the past decades, the identification of specific genomic aberrations
+ and their corresponding targeted therapies have significantly improved the outcome and
+ quality of life for patients with non-small-cell lung cancer (NSCLC). Epidermal growth
+ factor receptor (EGFR) mutation is the first identified targetable driver mutation that
+ was reported in about 17 and 50% of lung adenocarcinoma in Caucasians and Asians,
+ respectively [1–3]. The most common cluster of mutations in EGFR gene include inframe
+ deletions around the LeuArgGluAla motif (residues 746–750) of exon 19, and the
+ Leu858Arg (L858R) point mutation in exon 21, each accounting for about 45% of all EGFR
+ mutations. These mutations are termed classic EGFR mutations and are more common in tumors
+ in women, Asians, never smokers, and those with adenocarcinoma [4–6]. The frequency and
+ distribution of EGFR mutations in patients with different ethnic backgrounds also differ [7, 8].
+
Patients with classic EGFR mutations generally have profound radiographic and
+ clinical response to monotherapy EGFR tyrosine kinase inhibitors (TKIs) [9–12].
+ However, some unclassical EGFR mutations are associated with poor responses with
+ reversible EGFR TKIs [13, 14]. Among these are most EGFR exon 20 insertion (EGFRex20ins)
+ mutations reported as far. Exon 20 of EGFR encompasses nucleotides that translate into
+ amino acid at position 762 to 823. It contains a C-helix (residues 762–766) and the
+ loop following C-helix (residues 767–774), where the insertions could induce
+ ligand-independent EGFR pathway activation and give rise to tumorigenesis [15]. The true frequency of EGFRex20ins mutations
+ within the EGFR mutant lung cancer is inconsistent, contributing to roughly 4–12%
+ of all EGFR mutations identified [16–18]. In most reports, EGFRex20ins mutations are more
+ common in tumors among never-smokers [16, 18]. However, the genetic and clinical characteristics
+ of NSCLCs harboring EGFRex20ins mutations in Asian populations remain unknown due to the
+ lack of large comprehensive genomic studies.
+
Preclinical and clinical studies have shown that most EGFRex20ins (except for
+ few subtypes such as EGFR A763_Y764insFQEA) mutant tumors confer resistance to the 1st and
+ 2nd generation EGFR TKIs because the insertions produce steric hindrance and activate EGFR
+ without saliently decreasing affinity for ATP or enhancing affinity for EGFR TKIs [15, 19–23]. Several clinical studies specifically involving
+ tumors with EGFRex20ins mutations are ongoing, with some of them showing preliminary
+ promising activity [24, 25]. However, there are still no established molecular targeted drugs
+ for NSCLC patients with EGFRx20ins mutations. Development of more effective therapeutics
+ for these specific patients represents a great unmet need.
+
Osimertinib is an oral, potent, irreversible EGFR TKI selective for
+ sensitizing EGFR and EGFR T790 M resistance mutations. Preclinical studies have
+ reported that osimertinib was active in EGFRex20ins mutant cell lines and tumor xenografts
+ with a wide therapeutic window [26–29]. However, whether the preclinical activity of
+ osimertinib could translate into clinical effect remains unclear.
+
Herein, we explored the characteristics of EGFRex20ins, as well as the
+ patterns of co-mutations (mutually exclusive or inclusive) in Chinese NSCLC patients. In
+ addition, we assessed the safety and antitumor activity of osimertinib in six advanced
+ NSCLC patients with various EGFRex20ins mutations.
+
+
+ Methods
+
+ Patients
+
The study included a cohort of patients who were referred to OrigiMed
+ (Shanghai, China) for targeted next generation sequencing (NGS) test in China between
+ August 2016 and July 2018. Patient samples and clinical information including gender,
+ age and histologic subtypes were retrieved at the time of referral. Six patients with
+ EGFRex20ins mutant stage IV NSCLC who were treated with osimertinib 80 mg once
+ daily were included to evaluate the antitumor activity of osimertinib. Data were
+ retrospectively collected from digital medical records. The study was approved by the
+ Institutional Review Board of SYSUCC and written informed consent was obtained for each
+ patient prior to sample collection. Written consents were obtained from parents if
+ patients were under 16 years old.
+
+
+ EGFR ex20ins and co-mutations analysis
+
DNA from Formalin-fixed, paraffin-embedded tumor tissue and matched blood
+ samples was extracted. Comprehensive genomic profiling was performed by NGS with a 37 or
+ 450 cancer related gene panel covering the whole exons of EGFR gene at a mean coverage
+ depth of >800X (1547 cases with 37 panel, and 769 cases with 450 panel). The genomic
+ alterations including single base substitution, insertions/deletions, copy number
+ variations, as well as gene rearrangement and fusions were assessed. As for six patients
+ treated with osimertinib, genetic status was also determined through NGS prior to
+ osimertinib.
+
+
+ Response evaluation
+
All six patients received oral osimertinib 80 mg once daily.
+ Radiological follow-up was performed at the first months then once every 2 months with
+ computed tomography (CT) of the thorax and upper abdomen. Regular cerebral magnetic
+ resonance imaging (MRI) with CT was carried out once any patient was confirmed brain
+ metastasis. Response was assessed according to Response Criteria in Solid Tumors
+ (RECIST) 1.1 [30]. Progression-free survival
+ (PFS) was defined as the interval from the date of initiation of osimertinib therapy to
+ the date of disease progression or death from any cause, whichever occurred first.
+
+
+ Statistical analyses
+
The statistical analyses were performed using the SPSS 20.0 (Chicago, IL,
+ USA). The difference in the frequency of each group was analyzed by the Chi-square test
+ or Fisher’s exact test. The median age between groups was compared using
+ nonparametric test. A two-sided p value < 0.05 was considered
+ statistically significant.
+
+
+
+ Results
+
+ Frequency and genetic characteristics of EGFRex20ins mutations
+
Among the 2316 unselective NSCLC tumors, EGFR mutations were identified in
+ 1095 cases (47.3%). EGFRex20ins mutations were detected in 53 cases, contributing 2.3%
+ of all NSCLC cases and 4.8% of EGFR-mutant tumors. Compared with Foundation Medicine
+ (FM) data representing the largest EGFRex20ins cohort, we found that although EGFR
+ mutations were much more common in our Chinese NSCLC patients than that of Western
+ population (47.3% vs 15.5% in FM), EGFRex20ins mutation represented a much smaller group
+ in EGFR mutant NSCLC (4.8% vs 11.7% in FM, p < 0.001)
+ [17]. Of note, our result was comparable with FM
+ cohort in frequency of EGFRex20ins in total NSCLC (2.3% vs. 1.8% in FM, p = 0.12,
+ Fig. 1). The smaller proportion of
+ EGFRex20ins in Chinese populations is due to the larger scale of EGFR mutations than
+ western groups.
+
+
+
Frequency of EGFRex20ins mutations. a. Comparison of EGFRex20ins
+ frequency in total NSCLC patients (OrigiMed 2.3% vs. FM 1.8%, p = 0.12)
+ and adenocarcinoma patients (OrigiMed 2.7% vs. FM 2.3%, p
+ = 0.32). b. Comparison of EGFRex20ins frequency in
+ EGFR-mutant NSCLC patients (OrigiMed 4.8% vs. FM 11.7%, p < 0.001)
+ and EGFR-mutant adenocarcinoma patients (OrigiMed 4.8% vs. FM 13.9%, p < 0.001).
+ EGFRex20ins, epidermal growth factor receptor exon 20 insertions; NSCLC, non-small cell
+ lung cancer; FM: Foundation Medicine. * p < 0.001
+
+
+
+
The demographic and clinical characteristics of these patients are
+ summarized in Tables 1 and 2. Of the patients with EGFR mutations, EGFRex20ins ranked the
+ fourth most common type, following EGFR exon 19 deletions (436/1095, 39.8%), L858R
+ (410/1095, 37.4%) and T790 M mutations (58/1095, 5.3%) (Fig. 2a). The majority of EGFRex20ins mutations were
+ identified in lung adenocarcinoma (92.5%, 49/53). EGFRex20ins were also detected in two
+ adenosquamous cases and two NSCLC not otherwise specified (NOS). Median age of patients
+ with exon 20 insertion is 57 (31–85) years.
+
+
+
Histologic and clinical characteristics of non-small cell lung cancer patients
+ tested in this study
+
+
+
+
+
+
Adenocarcinoma
+
Squamous
+
Others a
+
Total
+
+
+
+
+
Total cases
+
1820
+
290
+
206
+
2316
+
+
+
Median age
+
60(26–92)
+
62(30–88)
+
60(12–83)
+
61(12–92)
+
+
+
Sex, M/F
+
917/903
+
247/43
+
144/62
+
1308/1008
+
+
+
EGFR mutant cases
+
1021
+
11
+
63
+
1095
+
+
+
Frequency in total
+
56.1%
+
3.8%
+
30.6%
+
47.3%
+
+
+
Median age
+
60(26–86)
+
64(38–89)
+
57(27–74)
+
60(26–89)
+
+
+
Sex, M/F
+
397/624
+
5/6
+
28/35
+
430/665
+
+
+
EGFRex20ins cases
+
49
+
0
+
4
+
53
+
+
+
Frequency in EGFR mutant
+
4.8%
+
0
+
6.3%
+
4.8%
+
+
+
Frequency in total
+
2.7%
+
0
+
1.9%
+
2.3%
+
+
+
Median age
+
57(31–85)
+
–
+
56.5(49–70)
+
57(31–85)
+
+
+
Sex, M/F
+
26/23
+
–
+
2/2
+
28/25
+
+
+
+
+
a Other pathological type in NSCLC, including adenosquamous lung
+ cancer, NSCLC not otherwise specified, large cell lung cancer, neuroendocrine carcinoma
+ and sarcomatoid carcinoma
Clinical comparison of EGFR ex20ins NSCLC with EGFR
+
+
+
+
+
+
EGFR 20ins
+
EGFR WT
+
EGFR 19del
+
EGFR L858R
+
EGFR T790 M
+
+
+
+
+
Median age
+
57
+
61
+
59.5
+
62
+
59
+
+
+
p value vs. EGFR ex20ins
+
+
0.399
+
0.897
+
0.206
+
0.792
+
+
+
Sex, M/F(%M)
+
28/25(53%)
+
878/343(72%)
+
177/259(41%)
+
130/280(32%)
+
21/37(36%)
+
+
+
p value vs. EGFR ex20ins
+
+
0.003
+
0.088
+
0.002
+
0.078
+
+
+
+
+
WT and EGFR mutant (19del/L858R/ T790 M) NSCLC
+
WTwild type
+
+
+
+
+
Distribution of EGFR mutations and EGFR exon 20 mutation types and EGFRex20ins in
+ this study. a. Distribution of EGFR mutations. b. Distribution
+ of EGFR exon 20 mutation types and EGFRex20ins mutations
+
+
+
+
In total, 20 different variants of exon EGFRex20ins were identified in 53
+ NSCLC patients. The most frequent variant is A767_V769dup (32.1%, 17/53), followed by
+ P772_H773dup (4/53, 7.5%), S768_D770dup (4/53, 7.5%), N771_H773dup (4/53, 7.5%),
+ A763_Y764insFQEA (3/53, 5.7%). Unique EGFRex20ins mutations detected by NGS were
+ summarized in Fig. 2b.
+
EGFRex20ins tended to be exclusive with NSCLC driver genes such as
+ EGFR mutation ERBB2, ALK, BRAF and RET mutations. The most common co-mutations
+ were TP53 (49.1%). Co-mutation pattern compared with EGFR ex19dels, L858R, T790 M
+ and other EGFR uncommon mutations were summarized in Additional file 1: Figure S1 and Additional file 2: Figure S2.
+
+
+ Antitumor activity of monotherapy Osimertinib for patients with EGFRex20ins
+ mutations
+
From August 28th, 2017 to April 30th, 2018, six patients with stage IV lung
+ adenocarcinoma bearing EGFRex20ins started osimertinib treatment. Median follow-up time
+ was 6.2 months. Previous treatment, detailed mutation characteristics and the
+ outcome of osimertinib are shown in Table 3.
+ All the patients had stage IV lung adenocarcinoma and predominantly females (5/6).
+ Before osimertinib treatment, four patients were observed metastasis in lungs and
+ pleura. Patient 5 was diagnosed with brain metastasis and patient 6 with bone
+ metastasis. The median age is 64 years old. Two patients received osimertinib as
+ first line therapy and two patients had previous treatment with other EGFR TKIs. Per
+ RECISIT 1.1, four (67.7%) patients achieved partial response (PR) and the remaining two
+ patients (33.3%) obtained stable disease (SD). Median progression-free survival (PFS)
+ was 6.2 months (95% confidence interval 5.0–12.9 months; range
+ 4.9–14.6 months). Treatment-related adverse events (AEs) included diarrhea
+ (2/6), pruritus (2/6), stomatitis (1/6) and nausea (1/6). No grade 3 or more AEs were
+ documented. At data cut-off (December 1st, 2018), Two patients had sustaining disease
+ control and remained on osimertinib treatment, while the other four patients had
+ progressive disease (PD) ultimately.
+
+
+
Mutation characteristics and outcome of osimertinib treatment
Patient 1 had EGFR A767_V769dup mutation and received first-line therapy
+ with osimertinib. The patient achieved PR and the PFS was 6.0 months. PR was also
+ observed in patient 2 (EGFR S768_D770dup), with a PFS of 14.6+ months. The patient was
+ still on treatment at data cutoff. Patient 3 was identified with a novel EGFRex20ins
+ mutation (EGFR N771_P772insL), which had not been reported before. The patient had SD as
+ best response under first line osimertinib treatment and transferred to other treatment
+ after 4.9 months due to enlarged pleural nodules. Patient 4 harbored the same
+ EGFRex20ins mutation as that of patient 2 and attained SD with a PFS of 11.2+ months.
+ Both patient 2 and patient 4 remained on osimertinib treatment. Patient 5 was confirmed
+ with multiple cerebral metastasis and experienced dizzy and vomiting when diagnosed as
+ stage IV adenocarcinoma. The patient was treated with first line chemotherapy with the
+ best response of PD. Thereafter, the patient started osimertinib and exhibited salient
+ clinical improvement and a reduction of nearly half the tumor burden. The patient had PD
+ finally due to new onset bone metastasis, with a PFS of 6.4 months. Patient 6 was
+ initially diagnosed as lung adenocarcinoma harboring EGFR A763_Y764insFQEA. The patients
+ started gefitinib treatment with a best response of PR and PFS of 9.0 months. At
+ disease progression, the patients had rebiopsy and his tumor was found to had EGFR
+ A763_Y764insFQEA and EGFR T790 M. Thereafter, the patient was treated with
+ osimertinib and attained PR. The patient experienced PD eventually due to brain
+ metastasis and achieved a PFS of 5.1 months. CT scans performed prior to
+ (baseline) and after osimertinib treatment are demonstrated in Fig. 3. Tumor shrinkage for each patient is shown in
+ Fig. 4.
+
+
+
CT scans of the thorax performed before (baseline) and after osimertinib
+ treatment (PR or SD). CT, computed tomography; PR, partial response; SD, stable
+ disease
+
+
+
+
+
+
Maximum change in tumor size according to Response Criteria in Solid Tumors
+ (RECIST) 1.1. Orange grid indicates partial response and gray grid stable disease
+
+
+
+
+
+
+ Discussion
+
Based on a large-scale study including 2316 NSCLC patient, our study
+ demonstrated the EGFRex20ins distribution in EGFR mutant Chinese patients (4.8%), the most
+ common EGFRex20ins mutation (A767_V769dup) and co-mutation (TP53), as well as clinical
+ characteristics of EGFRex20ins in Chinese NSCLC patients. As for six EGFRex20ins positive
+ patients with osimertinib treatment, four (67.7%) patients achieved PR and two SD, with
+ disease control rate 100%.
+
To our knowledge, our study represents the largest NGS based study on Chinese
+ EGFRex20ins mutant NSCLC patients. We found that frequency of Chinese EGFRex20ins in total
+ NSCLC (2.3%) was comparable with that of western groups in FM cohort [17]. Although the proportion of EGFR mutant patients
+ was much larger in our Chinese cohort (47.3% vs 15.5%), which consists with previous
+ studies [1, 2],
+ exon 20 insertions accounted for a smaller proportion compared to FM cohort in EGFR mutant
+ NSCLC (4.8% vs. 11.7%, p < 0.001) [17]. This might mainly due to different EGFR mutation proportion
+ between Asians and Americans, rather than sequencing technology issue as discussed in
+ a previous study [17]. In the contrast,
+ proportion of exon 20 insertion in EGFR- mutant patients in our study is similar to those
+ reported in other Asian cohorts, ranging from 3.6–4% [31, 32]. These results
+ revealed a concordance in the prevalence of EGFRex20ins between Chinese and Western
+ populations in NSCLC.
+
The genetic Chinese characteristics of EGFRex20ins is similar to that of
+ western populations, including the majority of unique mutations and the most prevalent
+ co-mutation [17]. EGFRex20ins detected in our
+ study also tended to be exclusive with other NSCLC oncogenic drivers including ERBB2,
+ BRAF, ALK, KRAS and RET mutations.
+
Most EGFRex20ins mutations (with the exception of a few subtypes such as
+ A763_Y764insFQEA) are associated with poor responses with the 1st and 2nd generation EGFR
+ TKIs [19–
+ 23]. Previous clinical studies including combination therapy of afatinib plus
+ cetuximab or monotherapy of Poziotinib have demonstrated good therapeutic efficacy in some
+ EGFRex20ins mutations positive NSCLC patients [24, 25]. However, the high proportion of severe AEs
+ including skin toxicity and diarrhea of these therapies might limit their universal
+ clinical applying in future. Several preclinical studies have proved that osimertinib was
+ active in specific lung cancer cell lines with EGFRex20ins mutations [26–29],
+ while the clinical activity of the 3rd generation EGFR TKIs in EGFRex20ins tumors remains
+ unknown. Our study showed promising antitumor activity of osimertinib in NSCLC patients
+ harboring certain EGFRex20ins mutations, with four patients attaining PR and two patients
+ SD. Median PFS with osimertinib was 6.2 months, which was numerically higher than
+ that with the 1st generation TKIs and afatinib [19
+ , 22]. We also for the first time reported a novel
+ EGFRex20ins mutation, EGFR N771_P772insL, in lung adenocarcinoma.
+
Despite the documented activity of osimertinib in our six patients, in vitro
+ study still demonstrated limited osimertinib effect in several EGFRex20ins mutant cell
+ lines [26–
+ 29]. Whether other EGFRex20ins mutant tumors could response to osimertinib warrants
+ further study. These studies indicate EGFRex20ins is a heterogeneous group of EGFR
+ mutation and deserves more researches to fully determine osimertinib sensitivity in
+ different EGFRex20ins tumors.
+
A recent case report has showed that an advanced NSCLC patient with
+ EGFRex20ins mutation, S768_D770dup, responded to osimertinib 160 mg daily [33]. The mutation detected was the same as that of
+ patient 2 and patient 4, which might suggest favorable lower-dose osimertinib efficacy in
+ tumors bearing EGFR S768_D770dup mutation. Another case report also suggested that EGFR
+ H773L/V774 M, an EGFR exon 20 mutation, could be suppressed by osimertinib [34], further supporting the osimertinib effect in
+ specific EGFR exon 20 mutations.
+
In addition, considering that EGFR T790 M was identified in patient 6
+ harboring EGFR A763_Y764insFQEA after acquired resistance to gefitinib, we found that
+ EGFR T790 M served as a potential resistant mechanism in EGFR
+ A763_Y764insFQEA positive NSCLC patients and occurrence of both mutations could be
+ targeted by osimertinib. This is the first case showing that T790 M mediated the
+ acquired resistance to gefitinib for patient with EGFR A763_Y764insFQEA and osimertinib
+ treatment was effective for patient with both EGFR A763_Y764insFQEA and EGFR
+ T790 M.
+
Still, there are several limitations in the study. Firstly, panels of NGS
+ performed on patients are not uniform, contributing to relatively incomprehensive genetics
+ statistics in patients with small panels. Secondly, although all six patients in our study
+ acquired disease control, the sample size is too small to establish the therapeutic
+ efficacy of osimertinib in patients with EGFRex20ins.
+
+
+ Conclusions
+
In summary, our study revealed no significant difference in the prevalence and
+ genetic characteristics of EGFRex20ins between Chinese and Western populations in NSCLC.
+ Moreover, promising antitumor activity of osimertinib was observed in specific EGFRex20ins
+ positive NSCLC patients, more studies are urgently needed to fully determine osimertinib
+ effect in NSCLC patients with different EGFRex20ins mutations.
+
+
+ Additional files
+
+
+
+
+
+
+
Figure S1. EGFR ex20ins and co-mutation pattern. (JPG 186 kb)
+
+
+
+
+
+
+
+
Figure S2. Comparison of co-mutations in EGFR ex20ins (n = 53),
+ ex19del (n = 436), L858R (n = 410),
+ T790 M (primary and secondary mutation, n = 58)
+ and other EGFR sensitive mutations (n = 90).
+ Others: Other EGFR sensitive mutations, including G719X, L861Q, S768I and
+ compound mutations. (JPG 1798 kb)
Non-small-cell lung cancer not otherwise specified
+
+
+
+ NSCLC
+
+
Non-small-cell lung cancer
+
+
+
+ PD
+
+
Progressive disease
+
+
+
+ PFS
+
+
Progression-free survival
+
+
+
+ PR
+
+
Partial response
+
+
+
+ RECIST
+
+
Response criteria in solid tumors
+
+
+
+ SD
+
+
Stable disease
+
+
+
+ TKIs
+
+
Tyrosine kinase inhibitors
+
+
+
+
+
+
+
+ Publisher’s Note
+
+
Springer Nature remains neutral with regard to jurisdictional claims in published maps
+ and institutional affiliations.
+
+
+
Wenfeng Fang, Yihua Huang, Shaodong Hong, Zhonghan Zhang and Minghui Wang contributed
+ equally to this work.
+
+
+
+ Acknowledgements
+
The authors would like to thank Dr. Xiaoqian Chen from OrigiMed for her excellent work in
+ statistical support.
+
+
+ Authors’ contributions
+
WF, YH, SH, ZZ, MW conceived of the study; LZ and WF designed the study; YH, SH, ZZ, MW
+ analyzed data; JG, WW, HG, KW collected data; WF, YH, SH, ZZ wrote the manuscript. LZ, WF
+ and SH corrected and approved the final version of the manuscript. All authors read and
+ approved the final manuscript.
+
+
+ Funding
+
This work was financially supported by National Key R&D Program of China
+ (2016YFC0905500, 2016YFC0905503), Chinese National Natural Science Foundation (81772476,
+ 81602005, 81872499, and 81702283), Science and Technology Program of Guangdong
+ (2017B020227001). All the grand supporters have no role in the design of the study and
+ collection, analysis, and interpretation of data and in writing the manuscript.
+
+
+ Availability of data and materials
+
The datasets used and/or analyzed during the current study are available from the
+ corresponding author on reasonable request.
+
+
+ Ethics approval and consent to participate
+
The study was approved by the Institutional Review Board of SYSUCC and written
+ informed consent was obtained for each patient prior to sample collection. Patients were
+ informed that the resected specimens were stored by the hospital and potentially used for
+ scientific research, and that their privacy would be maintained. Written consents were
+ obtained from parents if patients were under 16 years old.
+
+
+ Consent for publication
+
Not applicable.
+
+
+ Competing interests
+
The authors declare that they have no competing interests.
Suspected metastatic site lesions that are poorly differentiated present a diagnostic challenge when morphologic and immunohistochemical profiling cannot establish the primary tumor site. Here we present a patient diagnosed with both a malignant neoplasm in the lung and a right upper extremity (RUE) neoplasm of unclear histogenetic origin. Immunohistochemical staining performed on the latter specimen was inconclusive in determining the site of origin. Although the lung biopsy sample was insufficient for molecular testing, hybrid capture-based comprehensive genomic profiling (FoundationOne) identified an EML4-ALK rearrangement in the RUE lesion. Crizotinib treatment resulted in a major response in both the RUE and the lung lesions. This report illustrates the utility of comprehensive genomic profiling employed at the initial presentation of an unknown primary malignant neoplasm, which resulted in the front-line use of targeted therapy and a significant and sustained antitumor response.
A 53-year-old never-smoker Caucasian female presented with a 3-week history of worsening fatigue and severe exertional dyspnea. Physical examination identified a 3-cm nontender, hard, subcutaneous, proximal right upper extremity (RUE) mass with erythematous discoloration of the overlying skin. A 1-cm nontender, subcutaneous nodule was also identified in the left parietal area of the scalp. Positron emission tomography (PET) and computed tomography (CT) imaging revealed multiple metabolically active masses in the right lung, with the largest mass in the upper lobe measuring 5.8 × 5.0 cm [standardized uptake value (SUV): 23.4]; a left lung mass measuring 2.3 × 2.2 cm (SUV: 12.8) was also identified. Right hilar and mediastinal lymphadenopathy and subcarinal lymphadenopathy were also noted, with the subcarinal nodal mass measuring 4.4 × 4.3 cm (SUV: 21.7). Extrathoracic, metabolically active lesions were also noted, including a mass located in the celiac nodal basin and a mass in the anterior right upper arm that correlated with the mass identified during physical examination. Magnetic resonance imaging of the brain showed a 3-cm transcranial lesion in the left frontal bone which was 1.8 cm thick.
+
+
+ Methods
+
The patient underwent an endobronchial ultrasound-guided fine needle aspirate of a hilar node, which revealed a poorly differentiated malignant neoplasm that could not be further characterized definitively by ancillary studies due to insufficient sample volume. As a consequence, multiple core biopsies of the RUE mass were obtained and submitted for both pathology and hybrid capture-based comprehensive genomic profiling (FoundationOne). These NGS assay sequences of the entire coding sequence of 236 cancer-related genes and select intronic regions from 19 genes commonly rearranged in cancer.
+
+
+ Results
+
Morphologic examination of the biopsy from the RUE mass revealed a malignant neoplasm of unknown histogenesis (fig. 1a). Immunohistochemical staining was positive for vimentin but negative for epithelial and lung markers including TTF1, Napsin A, AE1/AE3, CK5, CK6, CK7, CAM5.2, and mCEA. The initial clinical impression that the RUE mass was a metastatic tumor originating from the lung could not be confirmed. Comprehensive genomic profiling revealed an EML4-ALK rearrangement (fig. 1b). The other genomic alterations included amplification of the MCL1 gene and homozygous deletion of the tumor suppressor genes CDKN2A and CDKN2B. Based on the presence of the EML4-ALK rearrangement, a known oncogenic driver in a subset of pulmonary adenocarcinomas [1], the patient was started on crizotinib 250 mg orally twice daily.
+
A follow-up 1 week later suggested a marked clinical improvement, with a significant size reduction of both the right-arm and scalp lesions, which became barely palpable, as well as a symptomatic improvement. A CT scan performed 1 month after treatment initiation demonstrated a significant size reduction of the lesions in the right lung (fig. 1c, d). There was also a marked improvement in lymphadenopathy, including a significant size reduction of the subcarinal lymph node. A restaging PET scan was obtained after 2 months of therapy and showed near-complete resolution of all hypermetabolic masses (fig. 1e, f). At the last examination, 5 months after the start of crizotinib treatment, the patient was completely asymptomatic with an entirely negative examination.
+
+
+ Discussion
+
Our patient was diagnosed with a nonsmall cell malignant neoplasm situated in the lung, but the initial diagnostic sample obtained from the lung was insufficient for further testing. The biopsy of the RUE tumor to perform molecular profiling prevented an additional transthoracic or bronchoscopic procedure [2] and possibly other invasive studies to identify an anatomic site of origin. Although the pathologic findings on this RUE mass biopsy were not consistent with a metastatic lesion originating from the lung, the comprehensive NGS assay (FoundationOne) identified an EML4-ALK rearrangement, which is most frequently observed in nonsmall cell lung cancer (NSCLC) [1]. However, EML4-ALK rearrangements are not exclusive to NSCLC and have also been observed in colorectal carcinoma [3, 4], papillary thyroid carcinoma [5, 6], renal cell carcinoma [7] and breast carcinoma [4] as well as in inflammatory myofibroblastic tumors (IMT), which are generally considered to be a type of soft tissue sarcoma [8, 9]. Although a definitive morphologic diagnosis was not possible for our patient, the decision to treat her with crizotinib was made given the aggressive disease and the favorable responses of patients with ALK-rearranged NSCLC to crizotinib treatment [10]. This decision also avoided the risks and delays associated with obtaining and testing a further lung biopsy.
+
The patient responded to crizotinib with rapid and significant volume decreases of the masses from both sites. This concordance in response suggests that these lesions harbor a genomic alteration conferring sensitivity to crizotinib, i.e. EML4-ALK rearrangement, and are part of the same neoplastic process. However, due to insufficient sample volume from the initial diagnostic biopsy, the presence of the EML4-ALK rearrangement in the lung lesions could not be confirmed.
+
+ EML4-ALK rearrangements are most frequently encountered in NSCLC and occur in approximately 7% of cases [1]. ALK-rearrangements, including EML4-ALK [ 10] have been reported in 50% of IMT cases [8, 11]. Therefore, the immunophenotype coupled with the identification of an EML4-ALK genomic alteration could be consistent with a diagnosis of either poorly differentiated NSCLC, which does not stain positively for TTF1, Napsin A, or CK5/6 in 25% of cases [12], or IMT.
+
This case is an example of how genomic profiling can provide additional insight beyond an indeterminate histologic diagnosis and lead to immediate implications for treatment. It shows the promise of comprehensive genomic profiling in patients who present with advanced cancers of unknown primary site where the identification of genomic alterations may simultaneously provide additional diagnostic information and allow for targeted therapy as the first line of treatment.
+
+
+ Disclosure Statement
+
J.H.C., S.M.A., L.M.G., R.L.E., P.J.S., N.A.P., J.S.R. and V.A.M. are employees of and have equity interest in Foundation Medicine, Inc. J.D., K.R. and A.C. have no conflicts of interest to declare. R.M. is a consultant to Foundation Medicine, Inc.
some long description of the table contents in the table footer
+
+
+
+
+ """
+ chunks = extract_text_chunks([etree.fromstring(table_xml.strip())])
+
+ table_header = [c.text for c in chunks if c.xml_path.endswith("thead")]
+
+ assert len(table_header) == 1
+ assert len(table_header[0].split(TABLE_DELIMITER)) == cols
+
+ table_body = [c.text for c in chunks if c.xml_path.endswith("tbody")]
+
+ if cols == 1 and rows == 1 and all([cleanup_text(v) == "" for v in values_used]):
+ # will omit the table body if it is entirely empty
+ assert not table_body
+ else:
+ assert len(table_body) == 1
+ assert len(table_body[0].split(TABLE_DELIMITER)) == cols * rows
+
+
+# TODO: tests for this table https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7461630/table/T2/?report=objectonly
+# need to care about indenting
+
+
+@pytest.mark.parametrize(
+ "filename,expected_chunks",
+ [
+ [
+ "table_rowspans.xml",
+ ["SUP-CR500-2\tI1171S\tI1171T\tNPM-ALK ALCL\tCrizotinib-R"],
+ ],
+ ["table_inner_colspan.xml", ["months\t30.5\t30.5\t16.8"]],
+ ["table.xml", ["TCGA-BR-4370-01\tStomach (TCGA)\tR2193C"]],
+ ],
+)
+def test_parses_table_body(filename, expected_chunks):
+ xml_input = data_file_path(filename)
+ with open(xml_input, "r") as fh:
+ xml_data = fh.read()
+ chunks = extract_text_chunks([etree.fromstring(xml_data)])
+ table_body = [c.text for c in chunks if c.xml_path.endswith("tbody")]
+
+ for chunk in expected_chunks:
+ assert chunk in table_body[0]
+
+
+@pytest.mark.parametrize(
+ "filename,expected_header,table_index",
+ [
+ [
+ "table_floating.xml",
+ [
+ "Patient sample",
+ "Exon",
+ "DNA",
+ "Protein",
+ "Domain",
+ "Germline/ Somatic\n",
+ ],
+ 0,
+ ],
+ [
+ "PMC5029658.xml",
+ [
+ "",
+ "",
+ "Cell Line SUP-M2 IC50",
+ "Cell Line SUP-M2 Fold Change",
+ "Cell Line SU-DHL-1 IC50",
+ "Cell Line SU-DHL-1 Fold Change",
+ "Cell Line I1171S SUP-CR500-2 IC50",
+ "Cell Line I1171S SUP-CR500-2 Fold Change",
+ "Cell Line F1174L SUP-LR150-2 IC50",
+ "Cell Line F1174L SUP-LR150-2 Fold Change",
+ "Cell Line R1192P DHL1-CR500 IC50",
+ "Cell Line R1192P DHL1-CR500 Fold Change",
+ "Cell Line T1151M DHL1-LR150 IC50",
+ "Cell Line T1151M DHL1-LR150 Fold Change",
+ "Cell Line G1269A DHL1-CR500-2 IC50",
+ "Cell Line G1269A DHL1-CR500-2 Fold Change\n",
+ ],
+ 1,
+ ],
+ ],
+)
+def test_parses_table_header(filename, expected_header, table_index):
+ xml_input = data_file_path(filename)
+ with open(xml_input, "r") as fh:
+ xml_data = fh.read()
+ chunks = extract_text_chunks([etree.fromstring(xml_data)])
+ table_header = [c.text for c in chunks if c.xml_path.endswith("thead")]
+ assert len(table_header) > table_index
+ table_header = table_header[table_index]
+ table_header = table_header.split(TABLE_DELIMITER)
+ assert table_header == expected_header
+
+
+@pytest.mark.parametrize(
+ "filename,table_index,expected_columns,expected_rows",
+ [
+ ("table_floating.xml", 0, 6, 16),
+ ("table_format_chars.xml", 0, 1, 2),
+ ("table_rowspans.xml", 0, 6, 29),
+ ("table_malformed_span.xml", 0, 14, 3),
+ ("table_inner_colspan.xml", 0, 4, 8),
+ ("table.xml", 0, 3, 79),
+ ("PMC6580637.xml", 0, 5, 12),
+ ("PMC6580637.xml", 1, 6, 4),
+ ("PMC6580637.xml", 2, 8, 6),
+ ],
+)
+def test_parses_table_body_size(filename, table_index, expected_columns, expected_rows):
+ xml_input = data_file_path(filename)
+ with open(xml_input, "r") as fh:
+ xml_data = fh.read()
+ chunks = extract_text_chunks([etree.fromstring(xml_data)])
+
+ table_body = [c.text for c in chunks if c.xml_path.endswith("tbody")]
+ assert len(table_body) > table_index
+ table_body = table_body[table_index]
+ assert len(table_body.split(TABLE_DELIMITER)) == expected_columns * expected_rows
+
+
+@pytest.mark.parametrize(
+ "filename,table_index,row_index,row_content",
+ [
+ (
+ "PMC5029658.xml",
+ 0,
+ 5,
+ ["SUP-CR500-2", "I1171S", "I1171N", "EML4-ALK NSCLC", "Alectinib-R", ""],
+ ), # rowspans split
+ pytest.param(
+ "PMC4816447.xml",
+ 0,
+ 2,
+ ["19", "c.2236_2250del", "p.Glu746_Ala750del", "3 (360x)", "yes/yes"],
+ marks=pytest.mark.skip(reason="TODO"),
+ ),
+ (
+ "PMC4919728.xml",
+ 0,
+ 1,
+ ["Age at diagnosis (year): Median", "31.4", "5.1", "36.7", "6.8"],
+ ),
+ ],
+)
+def test_parses_table_body_row_content(filename, table_index, row_index, row_content):
+ xml_input = data_file_path(filename)
+ with open(xml_input, "r") as fh:
+ xml_data = fh.read()
+ chunks = extract_text_chunks([etree.fromstring(xml_data)])
+
+ table_body = [c.text for c in chunks if c.xml_path.endswith("tbody")]
+ assert len(table_body) > table_index
+ table_body = table_body[table_index]
+ assert len(table_body) > row_index
+
+ columns = len(row_content)
+
+ row = table_body.split(TABLE_DELIMITER)[
+ row_index * columns : (row_index + 1) * columns
+ ]
+ assert row == row_content
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 6c054cb..fabe1df 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,22 +1,18 @@
import textwrap
import xml.etree.cElementTree as etree
-from typing import List, Optional
+from typing import Optional
from unittest.mock import MagicMock
-from xml.sax.saxutils import escape
import pytest
+from hypothesis import given, infer
+
from bioconverters.utils import (
- TABLE_DELIMITER,
cleanup_text,
extract_text_chunks,
merge_adjacent_xref_siblings,
remove_brackets_without_words,
strip_annotation_markers,
)
-from hypothesis import given, infer
-from hypothesis import strategies as st
-
-from .util import data_file_path
@pytest.mark.parametrize(
@@ -62,6 +58,184 @@ def test_extract_text_chunks_sibling_xrefs():
assert locations == [113, 325]
+PARSING_CASES = [
+ ['incubator containing 5% CO2', 'incubator containing 5% CO2'],
+ [
+ 'Activating mutations in ALK provide a therapeutic target in neuroblastoma',
+ 'Activating mutations in ALK provide a therapeutic target in neuroblastoma',
+ ],
+ ['104', '10^4'],
+ ['especially in CBL-W802* cells', 'especially in CBL-W802* cells'],
+ [
+ 'influenced by the presence of allelic variants—GSTP1 Ile105Val (rs1695) and GSTP1 Ala114Val (rs1138272), with homozygote',
+ 'influenced by the presence of allelic variants--GSTP1 Ile105Val (rs1695) and GSTP1 Ala114Val (rs1138272), with homozygote',
+ ],
+ [
+ '''breast cancer, clear cell renal carcinoma, and colon cancer6
+ 7
+ 8
+ 9
+ 10 have successfully identified''',
+ 'breast cancer, clear cell renal carcinoma, and colon cancer have successfully identified',
+ ],
+ ['Labela', 'Label a'],
+ [
+ 'Introduction of the NTRK3 G623R mutation',
+ 'Introduction of the NTRK3 G623R mutation',
+ ],
+ ['Patientsample', 'Patient sample'],
+ [
+ ''', and in the transgenic
+GATA-1,
+low mouse''',
+ ', and in the transgenic GATA-1, low mouse',
+ ],
+ [
+ 'we selected an allele (designated cic4) that removes',
+ 'we selected an allele (designated cic^4) that removes',
+ ],
+ [
+ 'whereas a CIC derivative lacking the HMG-box is mainly cytoplasmic [9], implying',
+ 'whereas a CIC derivative lacking the HMG-box is mainly cytoplasmic, implying',
+ ],
+ [
+ 'inactivated by somatic mutations [22–30], but',
+ 'inactivated by somatic mutations, but',
+ ],
+ [
+ 'regulation of the Wnt-β-catenin pathway',
+ 'regulation of the Wnt-beta-catenin pathway',
+ ],
+ [
+ 'previously reported cell lines (CAL27, CAL33, Detroit 562, UM-SCC-47, SCC-25, SCC-9, UM-SCC-11B and UM-SCC-17B) [6], while',
+ 'previously reported cell lines (CAL27, CAL33, Detroit 562, UM-SCC-47, SCC-25, SCC-9, UM-SCC-11B and UM-SCC-17B), while',
+ ],
+ [
+ 'clinic-pathologic parameters, χ2 and Fisher exact tests',
+ 'clinic-pathologic parameters, chi2 and Fisher exact tests',
+ ],
+ [
+ 'due to RB1 inhibition [38], the specific',
+ 'due to RB1 inhibition, the specific',
+ ],
+ [
+ 'the specific HPV+ gene expression',
+ 'the specific HPV+ gene expression',
+ ],
+ [
+ 'known to be resistant to 1st and 2nd generation EGFR-TKIS, osimertinib',
+ 'known to be resistant to 1st and 2nd generation EGFR-TKIS, osimertinib',
+ ],
+ [
+ 'at 37°C in a humidified 5% CO2 incubator',
+ 'at 37 deg C in a humidified 5% CO2 incubator',
+ ],
+ [
+ 'seeded at concentrations below 1 × 106/ml, selected',
+ 'seeded at concentrations below 1 x 10^6/ml, selected',
+ ],
+ [
+ 'PCR cycling parameters were: one cycle of 95 °C for 15 min; 35 cycles of 95 °C for 20 s, 60 °C for 30 s, and 72 °C for 1 min; followed by one cycle of 72 °C for 3 min.',
+ 'PCR cycling parameters were: one cycle of 95 deg C for 15 min; 35 cycles of 95 deg C for 20 s, 60 deg C for 30 s, and 72 deg C for 1 min; followed by one cycle of 72 deg C for 3 min.',
+ ],
+ [
+ '9 patients with a BRAF-mutant tumour',
+ '9 patients with a BRAF-mutant tumour',
+ ],
+ [
+ 'patients with BRAFWT tumours',
+ 'patients with BRAF-WT tumours',
+ ],
+ ['MSIhi tumours', 'MSI-hi tumours'],
+ ['P53mutation', 'P53 mutation'],
+ [
+ 'upper limit of normal, creatinine clearance ⩾30 ml min−1,',
+ 'upper limit of normal, creatinine clearance ⩾30 ml min^-1,',
+ ],
+ ['P = 1.0 × 10−6', 'P = 1.0 x 10^-6'],
+ [
+ 'domains [13]: the N-terminal domain',
+ 'domains: the N-terminal domain',
+ ],
+ [
+ 'motif (residues 234 to 247 [56]) immediately',
+ 'motif (residues 234 to 247) immediately',
+ ],
+ [
+ 'the oncometabolite R(–)-2-hydroxyglutarate at the',
+ 'the oncometabolite R(-)-2-hydroxyglutarate at the',
+ ],
+ ['[3H]-Thymidine', '[3H]-Thymidine'],
+ [
+ '
Class IA PI3K dimers are composed of a p110 catalytic subunit and a p85 regulatory subunit, each with three isoforms encoded by three genes17. Mutations in five of these genes have been observed in many human cancers31–34. Our data show that mutations in the p85β (PIK3R2) regulatory and p110α (PIK3CA) catalytic subunits are a common cause of megalencephaly syndromes, albeit with a clear genotype-phenotype correlation as PIK3R2 and PIK3CA mutations are associated with MPPH (P = 3.3 × 10−6) and MCAP (P = 1.0 × 10−6), respectively (Supplementary Table 9, Online Methods). Both PIK3R1 and PIK3R2 have oncogenic potential, and mutations including the glycine-to-arginine substitution of PIK3R2 found in MPPH (p.Gly373Arg) and substitution of the homologous amino acid residue in PIK3R1 (p.Gly376Arg) have been found in cancer32. Available functional studies showed that several of these mutations disrupt the inactive conformation of the PI3K dimer and maintain the catalytic subunit in a high activity state32,35. Our observations in lymphoblastoid cells derived from patient LR00-016a1 show that the p.Gly373Arg mutation results in increased PI3K activity and elevated PI3K-mTOR signaling, further supporting this mechanism.
',
+ 'Class IA PI3K dimers are composed of a p110 catalytic subunit and a p85 regulatory subunit, each with three isoforms encoded by three genes. Mutations in five of these genes have been observed in many human cancers. Our data show that mutations in the p85beta (PIK3R2) regulatory and p110alpha (PIK3CA) catalytic subunits are a common cause of megalencephaly syndromes, albeit with a clear genotype-phenotype correlation as PIK3R2 and PIK3CA mutations are associated with MPPH (P = 3.3 x 10^-6) and MCAP (P = 1.0 x 10^-6), respectively (Supplementary Table 9,Online Methods). Both PIK3R1 and PIK3R2 have oncogenic potential, and mutations including the glycine-to-arginine substitution of PIK3R2 found in MPPH (p.Gly373Arg) and substitution of the homologous amino acid residue in PIK3R1 (p.Gly376Arg) have been found in cancer. Available functional studies showed that several of these mutations disrupt the inactive conformation of the PI3K dimer and maintain the catalytic subunit in a high activity state. Our observations in lymphoblastoid cells derived from patient LR00-016a1 show that the p.Gly373Arg mutation results in increased PI3K activity and elevated PI3K-mTOR signaling, further supporting this mechanism.',
+ ],
+ [
+ '
The AR, like other members of the steroid hormone receptor family, is a ligand-activated transcription factor which has distinct structural and functional domains [13]: the N-terminal domain (NTD) important for transactivation; the DNA binding domain (DBD) and the C-terminal ligand binding domain (LBD). Upon ligand binding, the AR undergoes conformational transformation facilitating intra- and intermolecular interactions [14]. The transactivational capability of the AR is modulated by several signaling systems [15] through a range of post-translational modifications [13], [16]. Although the AR exerts most of its actions by functioning as a transcription factor binding to specific response elements, non-genomic effects can also contribute to the regulatory outcome. Activation of the phosphatidylinositol 3-kinase (PI3K)/Akt signaling pathway not only regulates AR activity through phosphorylation of the receptor, but also has a major role in the process leading to invasion and metastasis of PCa cells through downstream phosphorylation of affiliated substrates leading to protection from apoptosis and increased cell survival. The AR can stimulate PI3K/Akt signaling by interacting directly with the p85α regulatory subunit of PI3K in response to synthetic and natural androgens [17] through its NTD [18], and by binding and stimulating Akt1 within lipid rafts [19]. Many different processes are involved in the acquisition of hormone resistance [20] and they follow several diverse routes. Activation of sufficient levels of AR in a castration environment can occur through missense mutations within the AR [21], or splice variants, which result in: enhanced binding of androgens; creation of a constitutively active receptor [22]–[25]; promiscuous binding of other ligands [26]–[30] or altered recruitment of co-activators and co-repressors to the NTD and LBD. The levels of AR can be raised through increased expression, altered protein turnover and gene amplification [31]–[33]. In addition, aberrant intratumoral androgen synthesis can lead to activation of AR [34].
',
+ 'The AR, like other members of the steroid hormone receptor family, is a ligand-activated transcription factor which has distinct structural and functional domains: the N-terminal domain (NTD) important for transactivation; the DNA binding domain (DBD) and the C-terminal ligand binding domain (LBD). Upon ligand binding, the AR undergoes conformational transformation facilitating intra- and intermolecular interactions. The transactivational capability of the AR is modulated by several signaling systems through a range of post-translational modifications. Although the AR exerts most of its actions by functioning as a transcription factor binding to specific response elements, non-genomic effects can also contribute to the regulatory outcome. Activation of the phosphatidylinositol 3-kinase (PI3K)/Akt signaling pathway not only regulates AR activity through phosphorylation of the receptor, but also has a major role in the process leading to invasion and metastasis of PCa cells through downstream phosphorylation of affiliated substrates leading to protection from apoptosis and increased cell survival. The AR can stimulate PI3K/Akt signaling by interacting directly with the p85alpha regulatory subunit of PI3K in response to synthetic and natural androgens through its NTD, and by binding and stimulating Akt1 within lipid rafts. Many different processes are involved in the acquisition of hormone resistance and they follow several diverse routes. Activation of sufficient levels of AR in a castration environment can occur through missense mutations within the AR, or splice variants, which result in: enhanced binding of androgens; creation of a constitutively active receptor; promiscuous binding of other ligands or altered recruitment of co-activators and co-repressors to the NTD and LBD. The levels of AR can be raised through increased expression, altered protein turnover and gene amplification. In addition, aberrant intratumoral androgen synthesis can lead to activation of AR.',
+ ],
+ [
+ '
The predominant type of mutation i.e. loss of function, was well represented in the NTD. Mutations L57Q, E198G, D221H, A234T, S296R; S334P, P340L, P504L and D528G all displayed loss of function with E198G showing the greatest reduction (50% at 1 nM) and P340L also being present in AIS. The loss of transactivational ability was generally seen in both basal activity and across a wide range of DHT concentrations. A possible explanation for the loss of function of mutation A234T is that it is located at the start of the highly conserved motif (residues 234 to 247 [56]) immediately carboxyl-terminal of TAU-1 which forms the interaction site for the Hsp70-interacting protein E3 ligase CHIP [57].
',
+ 'The predominant type of mutation i.e. loss of function, was well represented in the NTD. Mutations L57Q, E198G, D221H, A234T, S296R; S334P, P340L, P504L and D528G all displayed loss of function with E198G showing the greatest reduction (50% at 1 nM) and P340L also being present in AIS. The loss of transactivational ability was generally seen in both basal activity and across a wide range of DHT concentrations. A possible explanation for the loss of function of mutation A234T is that it is located at the start of the highly conserved motif (residues 234 to 247) immediately carboxyl-terminal of TAU-1 which forms the interaction site for the Hsp70-interacting protein E3 ligase CHIP.',
+ ],
+ [
+ 'The 2-year invasive disease-free survival rate was 93·9%',
+ 'The 2-year invasive disease-free survival rate was 93.9%',
+ ],
+ [
+ 'Title of a thing
paragraph content
',
+ 'Title of a thing\nparagraph content',
+ ],
+ [
+ 'Compared with KRAS wild type and empty vector controls, KRAS10G11 and 11GA12 significantly enhanced in vivo tumor growth',
+ 'Compared with KRAS wild type and empty vector controls, KRAS 10G11 and 11GA12 significantly enhanced in vivo tumor growth',
+ ],
+ [
+ 'To investigate the impact of KRAS mutation variants on the activity of regorafenib in SW48 colorectal cancer cells.Activity of regorafenib',
+ 'AIM: To investigate the impact of KRAS mutation variants on the activity of regorafenib in SW48 colorectal cancer cells. MATERIALS & METHODS: Activity of regorafenib',
+ ],
+]
+
+
+@pytest.mark.parametrize('input_text,output_text', PARSING_CASES)
+@pytest.mark.parametrize('annotations', [True, False])
+def test_extract_text_chunks(input_text, output_text, annotations):
+ xml_input = f'{input_text}'
+ root_nodes = [etree.fromstring(xml_input)]
+
+ if annotations:
+ map = {}
+ chunks = extract_text_chunks(root_nodes, annotations_map=map)
+ result, _ = strip_annotation_markers(''.join(c.text for c in chunks), map)
+ else:
+ chunks = extract_text_chunks(root_nodes)
+ result = ''.join(c.text for c in chunks)
+ print([c.text for c in chunks])
+ print('extracted', ''.join(chunk.text for chunk in chunks))
+ print(chunks)
+
+ print(len(result), len(output_text))
+ diff_start = -1
+ for i, (c1, c2) in enumerate(zip(result, output_text)):
+ if c1 != c2:
+ diff_start = i
+ break
+ if diff_start >= 0:
+ print(
+ [
+ repr(output_text[max(diff_start - 10, 0) : diff_start]),
+ repr(output_text[diff_start : diff_start + 10]),
+ ]
+ )
+ print(
+ [
+ repr(result[max(diff_start - 10, 0) : diff_start]),
+ repr(result[diff_start : diff_start + 10]),
+ ]
+ )
+ assert result == output_text
+
+
def test_extract_figure_label():
xml_input = '10.1371/journal.pone.0026760.g003
Anchorage-independent growth of ERBB2 mutants.
'
root_nodes = [etree.fromstring(xml_input)]
@@ -70,7 +244,7 @@ def test_extract_figure_label():
assert not annotations_map
xml_paths = [c.xml_path for c in chunks]
assert 'article/fig/label' in xml_paths
- assert 'Figure 3' in [c.text for c in chunks if c.xml_path == 'article/fig/label']
+ assert 'Figure 3\n' in [c.text for c in chunks if c.xml_path == 'article/fig/label']
@pytest.mark.parametrize(
@@ -106,6 +280,12 @@ def test_extract_figure_label():
'This is a sentence with an in-text citation.',
[42],
),
+ (
+ '(residues 234 to 247 ANN_a2b8dd34-f190-41c7-98f6-259aa8d402e8) immediately',
+ {'ANN_a2b8dd34-f190-41c7-98f6-259aa8d402e8': '1'},
+ '(residues 234 to 247) immediately',
+ [19],
+ ),
],
ids=[
'single citation',
@@ -113,11 +293,12 @@ def test_extract_figure_label():
'middle sentence citation',
'round-brackets',
'square-brackets',
+ 'end of brackets',
],
)
def test_strip_annotation_markers(text, annotations_map, expected_text, expected_locations):
text_result, annotations_result = strip_annotation_markers(
- text, annotations_map, marker_pattern=r'ANN_\d+'
+ text, annotations_map, marker_pattern=r'ANN_[-\w]+'
)
assert text_result == expected_text
locations = []
@@ -127,122 +308,6 @@ def test_strip_annotation_markers(text, annotations_map, expected_text, expected
assert locations == expected_locations
-def test_extract_title_with_italics():
- xml = 'Activating mutations in ALK provide a therapeutic target in neuroblastoma'
- chunks = extract_text_chunks([etree.fromstring(xml)])
- assert len(chunks) == 1
- assert (
- 'Activating mutations in ALK provide a therapeutic target in neuroblastoma'
- == chunks[0].text
- )
-
-
-@given(
- values=st.lists(
- st.text(alphabet=st.characters(blacklist_categories=['Cc', 'Cs'])), min_size=1, max_size=50
- ),
- rows=st.integers(min_value=1, max_value=3),
- cols=st.integers(min_value=1, max_value=3),
-)
-def test_extract_delimited_table(values: List[str or int or float or None], rows: int, cols: int):
- values = [escape(v) for v in values]
- rows_xml = []
- values_used = set()
-
- for row_index in range(rows):
- tr = []
- for col_index in range(cols):
- value = values[(row_index * col_index + col_index) % len(values)]
- tr.append(
- f'\n