Skip to content

Commit

Permalink
Fix data in fields where HTML characters were double escaped
Browse files Browse the repository at this point in the history
  • Loading branch information
ridz1208 committed Mar 13, 2020
1 parent 109a515 commit 639bfdc
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 4 deletions.
8 changes: 4 additions & 4 deletions php/libraries/LorisForm.class.inc
Original file line number Diff line number Diff line change
Expand Up @@ -693,10 +693,10 @@ class LorisForm
}
}
}
// Always sanitize user-controlled input
if (!is_array($newValue)) {
$newValue = htmlspecialchars($newValue);
}
// // Always sanitize user-controlled input
// if (!is_array($newValue)) {
// $newValue = htmlspecialchars($newValue);
// }

return $newValue;
}
Expand Down
165 changes: 165 additions & 0 deletions tools/single_use/fix_double_escape.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php
/**
* This tool scores any registered instrument that was built using the
* NDB_BVL_Instrument class and that has a working score() method.
* The command line arguments need to contain a valid test_name, a 'all' or 'one'
* option, a CandID and a SessionID.
*
* The 'all' option scores all existing records of an instrument if and only if
* - They belong to an active timepoint
* - The data-entry status of the timepoint is set to `complete`
* - The administration of the timepoint is NOT `none`
*
* Note: This tool can also be used to debug the scoring algorithms for development.
*
* Limitation: This tool does not reset or nullify the score of an instrument
* which was previously scored but no longer meets the criteria above (i.e. if the
* administration was changed from `all` to `none`, the score will not be removed).
*
* @package behavioural
*/

require_once __DIR__."/../generic_includes.php";

// LOGGING
$dir = __DIR__ . "/../logs/";
if (!is_dir($dir)) {
mkdir($dir);
}
$today = getdate();
$date = strftime("%Y-%m-%d_%H:%M");
$logPath = "$dir/fix_double_escaped_fields_$date.log";
$logfp = fopen($logPath, 'a');

if (!$logfp) {
printError(
"No logs can be generated, path:$logPath ".
"does not exist or can not be written to.\n"
);
}

if (isset($argv[1]) && $argv[1] === 'help' || in_array('-h', $argv, true)) {
showHelp();
}
$confirm = false;
if (isset($argv[1]) && $argv[1] === 'confirm') {
$confirm =true;
}

$instrumentNames = $DB->pselectCol("SELECT Test_name FROM test_names", array());
$errorsDetected = false;

// get the list of CommentIDs for valid timepoints
foreach($instrumentNames as $instrumentName) {
printOut("Checking $instrumentName");
try{
$instrument = \NDB_BVL_Instrument::factory($instrumentName);
} catch (Exception $e) {
printError(
"There was an error instantiating instrument $instrumentName.
This instrument will be skipped."
);
continue;
}
$instrumentCIDs = $DB->pselectCol(
"SELECT CommentID FROM flag WHERE Test_name=:tn",
array("tn" => $instrumentName)
);
foreach ($instrumentCIDs as $cid) {
$instrumentInstance = \NDB_BVL_Instrument::factory($instrumentName, $cid);

$instrumentData = \NDB_BVL_Instrument::loadInstanceData(
$instrumentInstance
);
$set = array();
foreach ($instrumentData as $field=>$value){
// Each of the expressions below uniquely match each of the targeted
// characters indicated in the comment above the function.

// < : match any substring starting with `&`
// followed by 1 or more `amp;` and ending with `lt;`
$newValue = preg_replace('/&(amp;)+lt;/', '<', $value);
// > : match any substring starting with `&`
// followed by 1 or more `amp;` and ending with `gt;`
$newValue = preg_replace('/&(amp;)+gt;/', '>', $newValue);
// " : match any substring starting with `&`
// followed by 1 or more `amp;` and ending with `quot;`
$newValue = preg_replace('/&(amp;)+quot;/', '"', $newValue);
// & : match any substring starting with `&`
// followed by 2 or more `amp;` (because 1 is normal in the database
// since it is the escaped form of `&`) and
// NOT ending with `lt;` or `gt;` or `quot;` or `amp;`
// (the last one is to ensure we don't match subsequences from the
// case above).
$newValue = preg_replace('/&(amp;){2,}(?!(lt;|gt;|quot;|amp;))/', '&', $newValue);

if (!empty($value) && !empty($newValue) && $newValue !== $value) {
printOut(
"CommentID: $cid - Value at $field will be modified. ".
"\n\tCurrent Value: $value".
"\n\tWill be replaced by: $newValue\n"
);

$set[$field] = $newValue;
$errorsDetected = true;
}
}
if (!empty($set) && $confirm) {
$instrumentInstance->_save($set);
}
}
}

if (!$confirm && $errorsDetected) {
printOut("\nRun tool again with `confirm` argument to apply changes");
} else {
printOut("End");
}
fclose($logfp);

/*
* Prints to log file
*/
function logMessage($message)
{
global $logfp;
if (!$logfp) {
//The log file could not be instantiated
//use print instead
print_r($message);
}
$now_string = strftime("%Y-%m-%d %H:%M:%S");
fwrite($logfp, "[$now_string] $message\n");

}

/*
* Prints to STDERR
*/
function printError($message)
{
logMessage($message);
fwrite(STDERR, "$message \n");
}

/*
* Prints to STDOUT
*/
function printOut($message)
{
logMessage($message);
print_r("$message\n");
}

function showHelp()
{
echo "\n\n*** Fix Double Escaped Fields ***\n\n";

echo "Usage:
fix_double_escape.php [help | -h] -> displays this message
fix_double_escape.php -> runs tool without making any changes
fix_double_escape.php confirm -> runs tool and rectifies erroneous data
\n\n";

die();
}

0 comments on commit 639bfdc

Please sign in to comment.