Skip to content

Commit

Permalink
Merge pull request #503 from theosanderson/fix-nexus
Browse files Browse the repository at this point in the history
fix nexus comments implementation
  • Loading branch information
theosanderson authored Jul 24, 2023
2 parents 9b130de + 44f8d97 commit bdbe5e6
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 55 deletions.
26 changes: 2 additions & 24 deletions taxonium_component/src/utils/nexusToNewick.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,15 @@ function nexusToNewick(nexusString) {
// get the Newick string from the tree block
const newickString = treeBlock[0].match(/\((.*?)\).+;/gims)[0];

let nodeProperties = {};

// extract properties, which are indicated by [&key=value] or [&key={value1,value2,...}]
newickString.replace(
/\[&?(.*?)\]/gims,
(match, contents, offset, inputString) => {
let nodeId = inputString.slice(0, offset).match(/[^,\(\):]+$/g)[0];
// use a regular expression to split on commas not inside curly brackets
let properties = contents.split(/,(?![^{]*})/g);
let propertyDict = {};
for (let prop of properties) {
let [key, value] = prop.split("=");
propertyDict["meta_" + key] = value;
}
nodeProperties[nodeId] = propertyDict;
}
);

// remove comments, which are indicated by [...]

const newick = newickString.replace(/\[(.*?)\]/gims, "");

// translate the taxon labels in the Newick string
const translatedNewickString = newick.replace(
const translatedNewickString = newickString.replace(
/([^:\,\(\)]+)/gims,
(match) => {
return translations[match] || match;
}
);

return { newick: translatedNewickString, nodeProperties };
return { newick: translatedNewickString };
}

export default nexusToNewick;
85 changes: 54 additions & 31 deletions taxonium_component/src/utils/processNewick.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ import reduceMaxOrMin from "./reduceMaxOrMin";
import nexusToNewick from "../utils/nexusToNewick.js";
const emptyList = [];

function removeSquareBracketedComments(str) {
return str.replace(/\[[^\]]*\]/g, "");
}

async function do_fetch(url, sendStatusMessage, whatIsBeingDownloaded) {
if (!sendStatusMessage) {
sendStatusMessage = () => {};
Expand Down Expand Up @@ -82,13 +78,29 @@ function fetch_or_extract(file_obj, sendStatusMessage, whatIsBeingDownloaded) {
}
}

function parseNewickKeyValue(newickKVString, obj_to_set) {
// Regular expression that matches key=value pairs, accounting for commas within {}
const regex = /(&?\w+)=({[^}]*}|[^,]*)/g;

const result = [];
let match;

// Use the RegExp.exec() method to find all matches in the string
while ((match = regex.exec(newickKVString)) !== null) {
// Remove the '&' character if it's present at the start of the key
const key = match[1].startsWith("&") ? match[1].slice(1) : match[1];
// Push the key-value pair to the result array
obj_to_set["meta_" + key] = match[2];
}
}

async function cleanup(tree) {
tree.node.forEach((node, i) => {
node.node_id = i;
});

tree.node = tree.node.map((node, i) => {
return {
const to_return = {
name: node.name.replace(/'/g, ""),
parent_id: node.parent ? node.parent.node_id : node.node_id,
x_dist: node.x,
Expand All @@ -98,6 +110,12 @@ async function cleanup(tree) {
is_tip: node.child.length === 0,
node_id: node.node_id,
};
// if node.meta is not empty, parse it.
// We need to parse things of the form "&name=blabla,mutations={T694A:1.0,C29870A:1.0},Ns={1-3,4-17,18-20,21-26,686-693,22029-22033,28248-28253,28271-28271}"
if (node.meta) {
parseNewickKeyValue(node.meta, to_return);
}
return to_return;
});

const scale_y = 2000;
Expand All @@ -117,30 +135,31 @@ async function cleanup(tree) {

export async function processNewick(data, sendStatusMessage) {
let the_data;
let extra_metadata;

the_data = await fetch_or_extract(data, sendStatusMessage, "tree");

console.log("data.filetype", data.filetype);
if (data.filetype == "nexus") {
const result = nexusToNewick(the_data);
the_data = result.newick;
extra_metadata = result.nodeProperties;
}

sendStatusMessage({
message: "Parsing Newick file",
});

// remove all square-bracketed comments from the string
the_data = removeSquareBracketedComments(the_data);
// if starts with a "[", then trim to after the first "]"
if (the_data[0] === "[") {
the_data = the_data.slice(the_data.indexOf("]") + 1);
}

// remove newlines from the string

the_data = the_data.replaceAll("\n", "");
the_data = the_data.replaceAll("\r", "");

const tree = kn_parse(the_data);
console.log("tree", tree);

function assignNumTips(node) {
if (node.child.length === 0) {
Expand Down Expand Up @@ -189,6 +208,7 @@ export async function processNewick(data, sendStatusMessage) {
});

cleanup(tree);
console.log("tree", tree);

const overallMaxX = reduceMaxOrMin(tree.node, (x) => x.x_dist, "max");
const overallMinX = reduceMaxOrMin(tree.node, (x) => x.x_dist, "min");
Expand All @@ -208,7 +228,6 @@ export async function processNewick(data, sendStatusMessage) {
rootMutations: [],
rootId: 0,
overwrite_config: { num_tips: total_tips, from_newick: true },
extra_metadata,
};

return output;
Expand Down Expand Up @@ -299,37 +318,41 @@ export async function processNewickAndMetadata(data, sendStatusMessage) {
headers.slice(1).map((x) => ["meta_" + x, ""])
);

if (tree.extra_metadata) {
// loop over the extra metadata dict to find all the (sub)keys
const all_extra_keys = new Set();
Object.values(tree.extra_metadata).forEach((node_extra) => {
Object.keys(node_extra).forEach((key) => {
all_extra_keys.add(key);
});
});
// add any misssing keys to blanks
all_extra_keys.forEach((key) => {
if (!blanks[key]) {
blanks[key] = "";
}
const all_keys = new Set();
tree.nodes.forEach((node) => {
// get all the keys that start with "meta_"
const meta_keys = Object.keys(node).filter((x) => x.startsWith("meta_"));
// add them to the set
meta_keys.forEach((key) => {
all_keys.add(key);
});
}
});
console.log("all_keys", all_keys);
// update the blanks object to include all the keys
all_keys.forEach((key) => {
if (!blanks[key]) {
blanks[key] = "";
}
});
console.log("blanks", blanks);

const blanksList = Object.entries(blanks);

sendStatusMessage({
message: "Assigning metadata to nodes",
});
tree.nodes.forEach((node) => {
const this_metadata = metadata.get(node.name);
Object.assign(node, blanks);
// add blanks for any properties not currently set
blanksList.forEach(([key, value]) => {
if (!node[key]) {
node[key] = value;
}
});

if (this_metadata) {
Object.assign(node, this_metadata);
}
if (tree.extra_metadata) {
const node_extra = tree.extra_metadata[node.name];
if (node_extra) {
Object.assign(node, node_extra);
}
}
});

return tree;
Expand Down

1 comment on commit bdbe5e6

@vercel
Copy link

@vercel vercel bot commented on bdbe5e6 Jul 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.