Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find common code #873

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions tests/collect-objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import os
import sys
import subprocess
import difflib
from pprint import pprint

# Set this to also get double-defined symbol warnings; typically just multi-config (e.g., ref+avx2) duplicates
WARN=False

# set of expected unknowns:
KNOWN_UNKNOWNS=["_GLOBAL_OFFSET_TABLE_", "__popcountdi2", "__stack_chk_fail", "__printf_chk", "__fprintf_chk", "fput", "fopen", "fclose", "fput", "fread", "fwrite", "read", "putchar", "puts", "stderr", "stdout", "malloc", "calloc", "realloc", "aligned_alloc", "free", "memset", "memcmp", "memmove", "memcpy", "sqrt", "time", "strstr", "strcmp", "strcasecmp", "strtol", "__strcat_chk", "__strcpy_chk", "__memcpy_chk", "__assert_fail", "__explicit_bzero_chk", "exit", "abort", "strftime", "localtime", "gettimeofday", "pthread", "EVP_", "BN_"]

# return the substring of line starting with idx to the next space or the end of line:
def get_symbol(line, idx):
si = line.find(" ",idx)
if si>idx:
return line[idx:si]
else:
return line[idx:]

# return list of all known ("t" or "T") and unknown ("U") symbols:
def get_symbols(filepath):
known = []
unknown = []
# method: Use nm:
#print(filepath)
p = subprocess.Popen(["nm", filepath], stdout=subprocess.PIPE)

while True:
line = p.stdout.readline()
if not line:
break
l = line.decode().strip()
ui = l.find("U ")
ki = l.find("T ")
if ui >= 0:
unknown.append(get_symbol(l, ui+2))
if (ki >= 0):
known.append(get_symbol(l, ki+2))
else: # check for "R ":
ki = l.find("R ")
if (ki >= 0):
known.append(get_symbol(l, ki+2))

return (known, unknown)

# iterate through directory given on command line or ./build:
if len(sys.argv)>1:
rootdir = sys.argv[1]
else:
print("Usage: %s <objectcode folder to search>." % (sys.argv[0]))
print(" Will examine all object files for known (T,R) and unknown (U) symbols")
print(" Outputs unresolved symbols taking common C lib and OpenSSL symbols into account.")
print(" Also suggests library candidates with more than 5 locations of use.")
exit(-1)


known = {} # dictionary of known symbols with file containing them
unknown = {} # dictionary of unknown symbols with list of files containing them
for subdir, dirs, files in os.walk(rootdir):
for file in files:
# Only look for object files:
if file.endswith(".o"):
fpath = os.path.join(subdir, file)
# determine lists of all new known and unknown symbols for this file
(nk, nu) = get_symbols(fpath)
# now add new known symbols to global known symbol table
for s in nk:
if s != "main":
if WARN and (s in known):
#print("%s in %s already known. Why already in %s?" % (s, fpath, known[s]))
print("%s already known:" % (s))
sm=difflib.SequenceMatcher()
sm.set_seqs(fpath, known[s])
r=sm.find_longest_match(0, len(fpath), 0, len(known[s]))
if (r.size>0):
print(" Diff: " )
print(" %s " % (fpath[r.size:]))
print(" %s " % (known[s][r.size:]))
else:
known[s]=fpath
# now add all unknown symbols to global unknown symbol list, possibly adding counts
for s in nu:
if not s in unknown:
unknown[s] = []
unknown[s].append(fpath)

# finally, output all unknown symbols (i.e., not in known symbol list)
print("known symbols: %d; unknown symbols: %d" % (len(known.keys()), len(unknown.keys())))

for us in unknown.keys():
known_unknown = False
for ku in KNOWN_UNKNOWNS:
if us.startswith(ku):
known_unknown=True
break
# possible candidates for library functions are those appearing often (and not already in the library)
if not known_unknown and len(unknown[us]) > 5 and not us.startswith("OQS_"):
print("Library candidate (unknown in %d locations): %s" % (len(unknown[us]), us))
if not us in known.keys() and not known_unknown:
print("Symbol not found: %s " % (us))
8 changes: 4 additions & 4 deletions tests/example_kem.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
#include <oqs/oqs.h>

/* Cleaning up memory etc */
void cleanup_stack(uint8_t *secret_key, size_t secret_key_len,
static void cleanup_stack(uint8_t *secret_key, size_t secret_key_len,
uint8_t *shared_secret_e, uint8_t *shared_secret_d,
size_t shared_secret_len);

void cleanup_heap(uint8_t *secret_key, uint8_t *shared_secret_e,
static void cleanup_heap(uint8_t *secret_key, uint8_t *shared_secret_e,
uint8_t *shared_secret_d, uint8_t *public_key,
uint8_t *ciphertext, OQS_KEM *kem);

Expand Down Expand Up @@ -161,15 +161,15 @@ int main(void) {
}
}

void cleanup_stack(uint8_t *secret_key, size_t secret_key_len,
static void cleanup_stack(uint8_t *secret_key, size_t secret_key_len,
uint8_t *shared_secret_e, uint8_t *shared_secret_d,
size_t shared_secret_len) {
OQS_MEM_cleanse(secret_key, secret_key_len);
OQS_MEM_cleanse(shared_secret_e, shared_secret_len);
OQS_MEM_cleanse(shared_secret_d, shared_secret_len);
}

void cleanup_heap(uint8_t *secret_key, uint8_t *shared_secret_e,
static void cleanup_heap(uint8_t *secret_key, uint8_t *shared_secret_e,
uint8_t *shared_secret_d, uint8_t *public_key,
uint8_t *ciphertext, OQS_KEM *kem) {
if (kem != NULL) {
Expand Down
8 changes: 4 additions & 4 deletions tests/example_sig.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#define MESSAGE_LEN 50

/* Cleaning up memory etc */
void cleanup_stack(uint8_t *secret_key, size_t secret_key_len);
static void cleanup_stack(uint8_t *secret_key, size_t secret_key_len);

void cleanup_heap(uint8_t *public_key, uint8_t *secret_key,
static void cleanup_heap(uint8_t *public_key, uint8_t *secret_key,
uint8_t *message, uint8_t *signature,
OQS_SIG *sig);

Expand Down Expand Up @@ -152,11 +152,11 @@ int main(void) {
}
}

void cleanup_stack(uint8_t *secret_key, size_t secret_key_len) {
static void cleanup_stack(uint8_t *secret_key, size_t secret_key_len) {
OQS_MEM_cleanse(secret_key, secret_key_len);
}

void cleanup_heap(uint8_t *public_key, uint8_t *secret_key,
static void cleanup_heap(uint8_t *public_key, uint8_t *secret_key,
uint8_t *message, uint8_t *signature,
OQS_SIG *sig) {
if (sig != NULL) {
Expand Down
28 changes: 28 additions & 0 deletions tests/find_common_code.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: MIT

# Purpose of this script is to find common code dependencies of all algorithms
# as per https://github.com/open-quantum-safe/liboqs/issues/849

# Chosen approach:
# step 1: Determine all symbols exposed by common code and store them in COMMON_CODE_FILE:
COMMON_CODE_FILE=common_code.txt

if [ ! -d build/src/common ]; then
echo "Precondition for script: liboqs must have been built. Exiting."
exit 1
fi

find build/src/common -name "*.o" -exec nm {} \; | grep " T " | awk '{ print $3 }' > ${COMMON_CODE_FILE}

# step 2 (conservative, possibly prune more than only OQS_API) also collect all #defines in common source code:
find src/common -name "*.h" -exec grep \#define {} \; | awk '{print $2}' | sed 's/(.*$//' | grep -v OQS_API | grep -v ALIGN | grep -v UNUSED >> ${COMMON_CODE_FILE}

# step 3: Determine which of these symbols are referenced by which QSC algorithm code

find src/kem -maxdepth 1 -mindepth 1 -type d -exec tests/grep_symbols.sh ${COMMON_CODE_FILE} {} \;
find src/sig -maxdepth 1 -mindepth 1 -type d -exec tests/grep_symbols.sh ${COMMON_CODE_FILE} {} \;

# clean up
rm ${COMMON_CODE_FILE}

24 changes: 24 additions & 0 deletions tests/grep_symbols.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: MIT

# Purpose of script: Output if a given symbol in symbols file
# (parameter 1) is found in files in a given directory (parameter 2)

if [ $# -ne 2 ]; then
echo "Usage: $0 <symbols file> <directory to check>"
exit 1
fi

if [ ! -d $2 ]; then
echo "Usage: $0 <symbols file> <directory to check>"
exit 1
fi

echo "Found in $2:"
while IFS= read -r line
do
grep -r "$line" $2 > /dev/null
if [ $? == 0 ]; then
echo " $line"
fi
done < "$1"
2 changes: 1 addition & 1 deletion tests/test_kem.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ struct thread_data {
OQS_STATUS rc;
};

void *test_wrapper(void *arg) {
static void *test_wrapper(void *arg) {
struct thread_data *td = arg;
td->rc = kem_test_correctness(td->alg_name);
return NULL;
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sig.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ struct thread_data {
OQS_STATUS rc;
};

void *test_wrapper(void *arg) {
static void *test_wrapper(void *arg) {
struct thread_data *td = arg;
td->rc = sig_test_correctness(td->alg_name);
return NULL;
Expand Down