riscv-software-src · AFOliveira · Dec 13, 2024 · lenary · Dec 13, 2024 · lenary
@@ -0,0 +1,328 @@
+import os
+import json
+import re
+import sys
+from collections import defaultdict
+import yaml  
+
+REPO_INSTRUCTIONS = {} 
+REPO_DIRECTORY = None
+
+def safe_get(data, key, default=""):
+    """Safely get a value from a dictionary, return default if not found or error."""
+    try:
+        if isinstance(data, dict):
+            return data.get(key, default)
+        return default
+    except:
+        return default
+
+def load_yaml_encoding(instr_name):
+    """
+    Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data.
+    We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations.
+    """
+    candidates = set()
+    lower_name = instr_name.lower()
+    candidates.add(lower_name)
+    candidates.add(lower_name.replace('_', '.'))
+
+    yaml_file_path = None
+    yaml_category = None
+    for cand in candidates:
+        if cand in REPO_INSTRUCTIONS:
+            yaml_category = REPO_INSTRUCTIONS[cand]
+            yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml")
+            if os.path.isfile(yaml_file_path):
+                break
+            else:
+                yaml_file_path = None
+
+    if not yaml_file_path or not os.path.isfile(yaml_file_path):
+        # YAML not found
+        return None, None
+
+    # Load the YAML file
+    with open(yaml_file_path, 'r') as yf:
+        ydata = yaml.safe_load(yf)
+
+    encoding = safe_get(ydata, 'encoding', {})
+    yaml_match = safe_get(encoding, 'match', None)
+    yaml_vars = safe_get(encoding, 'variables', [])
+
+    return yaml_match, yaml_vars
+
+def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
+    """
+    Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
+    Return a list of differences.
+    """
+    if not yaml_match:
+        return ["No YAML match field available for comparison."]
+    if not json_encoding_str:
+        return ["No JSON encoding available for comparison."]
+
+    yaml_pattern_str = yaml_match.replace('-', '.')
+    if len(yaml_pattern_str) != 32:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
+
+    def parse_location(loc_str):
+        high, low = loc_str.split('-')
+        return int(high), int(low)
+
+    yaml_var_positions = {}
+    for var in yaml_vars:
+        high, low = parse_location(var["location"])
+        yaml_var_positions[var["name"]] = (high, low)
+
+    # Tokenize JSON encoding
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
+    json_bits = []
+    bit_index = 31
+    for t in tokens:
+        json_bits.append((bit_index, t))
+        bit_index -= 1
+
+    if bit_index != -1:
+        return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
+
+    differences = []
+
+    # Check fixed bits
+    for b in range(32):
+        yaml_bit = yaml_pattern_str[31 - b]
+        token = [tt for (pos, tt) in json_bits if pos == b]
+        if not token:
+            differences.append(f"Bit {b}: No corresponding JSON bit found.")
+            continue
+        json_bit_str = token[0]
+
+        if yaml_bit in ['0', '1']:
+            if json_bit_str not in ['0', '1']:
+                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
+            elif json_bit_str != yaml_bit:
+                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
+        else:
+            # Variable bit in YAML
+            if json_bit_str in ['0', '1']:
+                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
+
+    # Check variable fields
+    for var_name, (high, low) in yaml_var_positions.items():
+        json_var_fields = []
+        for bb in range(low, high+1):
+            token = [tt for (pos, tt) in json_bits if pos == bb]
+            if token:
+                json_var_fields.append(token[0])
+            else:
+                json_var_fields.append('?')
+
+        field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
+        if len(field_names) == 0:
+            differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+        elif len(field_names) > 1:
+            differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
+
+    return differences
+
+def safe_print_instruction_details(name: str, data: dict, output_stream):
+    """Print formatted instruction details and compare YAML/JSON encodings."""
+    try:
+        # Print the instruction details without separating by category
+        output_stream.write(f"\n{name} Instruction Details\n")
+        output_stream.write("=" * 50 + "\n")
+
+        # Basic Information
+        output_stream.write("\nBasic Information:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Name:              {name}\n")
+        output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
+        output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
+
+        # Location
+        locs = safe_get(data, '!locs', [])
+        loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
+        output_stream.write(f"Location:          {loc}\n")
+
+        # Operands
+        output_stream.write("\nOperands:\n")
+        output_stream.write("-" * 20 + "\n")
+        try:
+            in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Inputs:            {in_ops}\n")
+        except:
+            output_stream.write("Inputs:            N/A\n")
+
+        try:
+            out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Outputs:           {out_ops}\n")
+        except:
+            output_stream.write("Outputs:           N/A\n")
+
+        # Instruction Properties
+        output_stream.write("\nInstruction Properties:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
+        output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
+        output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
+        output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
+
+        # Scheduling Info
+        sched = safe_get(data, 'SchedRW', [])
+        if sched:
+            output_stream.write("\nScheduling Information:\n")
+            output_stream.write("-" * 20 + "\n")
+            output_stream.write("Operations:\n")
+            try:
+                for op in sched:
+                    if isinstance(op, dict):
+                        output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
+            except:
+                output_stream.write("  - Unable to parse scheduling information\n")
+
+        # Encoding
+        output_stream.write("\nEncoding Pattern:\n")
+        output_stream.write("-" * 20 + "\n")
+        encoding_bits = []
+        try:
+            inst = safe_get(data, 'Inst', [])
+            for bit in inst:
+                if isinstance(bit, dict):
+                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+                else:
+                    encoding_bits.append(str(bit))
+            # Reverse the bit order before joining
+            encoding_bits.reverse()
+            encoding = "".join(encoding_bits)
+            output_stream.write(f"Binary Format:     {encoding}\n")
+        except:
+            output_stream.write("Binary Format:     Unable to parse encoding\n")
+            encoding = ""
+
+        # Now compare YAML vs JSON encodings
+        yaml_match, yaml_vars = load_yaml_encoding(name)
+        if yaml_match is not None and encoding:
+            differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
+            if differences:
+                output_stream.write("\nDifferences in encoding:\n")
+                for d in differences:
+                    output_stream.write(f"  - {d}\n")
+                    print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
+            else:
+                output_stream.write("\nNo encoding differences found.\n")
+        else:
+            # If we have no YAML match or no encoding, we note that we can't compare
+            if yaml_match is None:
+                output_stream.write("\nNo YAML encoding match found for comparison.\n")
+            if not encoding:
+                output_stream.write("\nNo JSON encoding found for comparison.\n")
+
+        output_stream.write("\n")
+    except Exception as e:
+        output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
+        output_stream.write("Continuing with next instruction...\n\n")
+
+def get_repo_instructions(repo_directory):
+    """
+    Recursively find all YAML files in the repository and extract instruction names along with their category.
+    """
+    repo_instructions = {}
+    for root, _, files in os.walk(repo_directory):
+        rel_path = os.path.relpath(root, repo_directory)
+        if rel_path == '.':
+            category = "Other"
+        else:
+            parts = rel_path.split(os.sep)
+            category = parts[0] if parts else "Other"
+
+        for file in files:
+            if file.endswith(".yaml"):
+                instr_name = os.path.splitext(file)[0]
+                # Store lowercase key for easy lookup
+                repo_instructions[instr_name.lower()] = category
+    return repo_instructions
+
+def find_json_key(instr_name, json_data):
+    """
+    Attempt to find a matching key in json_data for instr_name, considering different
+    naming conventions: replacing '.' with '_', and trying various case transformations.
+    """
+    lower_name = instr_name.lower()
+    lower_name_underscore = lower_name.replace('.', '_')
+    variants = {
+        lower_name,
+        lower_name_underscore,
+        instr_name.upper(),
+        instr_name.replace('.', '_').upper(),
+        instr_name.capitalize(),
+        instr_name.replace('.', '_').capitalize()
+    }
+
+    for v in variants:
+        if v in json_data:
+            return v
+    return None
+
+def main():
+    global REPO_INSTRUCTIONS, REPO_DIRECTORY
+
+    if len(sys.argv) != 3:
+        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
+        sys.exit(1)
+
+    json_file = sys.argv[1]
+    REPO_DIRECTORY = sys.argv[2]
+
+    # Get instructions and categories from the repository structure
+    REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
+    if not REPO_INSTRUCTIONS:
+        print("No instructions found in the provided repository directory.")
+        sys.exit(1)
+
+    try:
+        # Read and parse JSON
+        with open(json_file, 'r') as f:
+            data = json.loads(f.read())
+    except Exception as e:
+        print(f"Error reading file: {str(e)}")
+        sys.exit(1)
+
+    all_instructions = []
+
+    # For each YAML instruction, try to find it in the JSON data
+    for yaml_instr_name, category in REPO_INSTRUCTIONS.items():
+        json_key = find_json_key(yaml_instr_name, data)
+        if json_key is None:
+            print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
+            continue
+
+        instr_data = data.get(json_key)
+        if not isinstance(instr_data, dict):
+            print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
+            continue
+
+        # Add this instruction to our list
+        all_instructions.append((json_key, instr_data))
+
+    # Sort all instructions by name
+    all_instructions.sort(key=lambda x: x[0].lower())
+
+    with open("output.txt", "w") as outfile:
+        outfile.write("RISC-V Instruction Summary\n")
+        outfile.write("=" * 50 + "\n")
+        total = len(all_instructions)
+        outfile.write(f"\nTotal Instructions Found: {total}\n")
+        for name, _ in all_instructions:
+            outfile.write(f"  - {name}\n")
+
+        outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
+        outfile.write("=" * 80 + "\n")
+
+        # Print details for each instruction directly, no category splitting
+        for name, instr_data in all_instructions:
+            safe_print_instruction_details(name, instr_data, outfile)
+
+    print("Output has been written to output.txt")
+
+if __name__ == '__main__':
+    main()