Skip to content

Commit

Permalink
feat: Add support for char and string (#125)
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasgrewal16 authored Oct 28, 2024
1 parent 97fb87c commit 7b8f52d
Show file tree
Hide file tree
Showing 6 changed files with 337 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/astx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
LiteralUInt32,
LiteralUInt64,
LiteralUInt128,
LiteralUTF8Char,
LiteralUTF8String,
Number,
SignedInteger,
UInt8,
Expand All @@ -79,6 +81,8 @@
UInt64,
UInt128,
UnsignedInteger,
UTF8Char,
UTF8String,
)
from astx.flows import (
ForCountLoop,
Expand Down Expand Up @@ -217,6 +221,10 @@ def get_version() -> str:
"LiteralComplex",
"LiteralComplex32",
"LiteralComplex64",
"LiteralUTF8Char",
"LiteralUTF8String",
"UTF8Char",
"UTF8String",
]


Expand Down
2 changes: 2 additions & 0 deletions src/astx/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class ASTKind(Enum):
Time64DTKind = -619
Decimal128DTKind = -620
Decimal256DTKind = -621
UTF8CharDTKind = -622
UTF8StringDTKind = -623

# imports
ImportStmtKind = -700
Expand Down
104 changes: 104 additions & 0 deletions src/astx/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,3 +679,107 @@ def __init__(
"""Initialize LiteralComplex64."""
super().__init__(Complex64(real, imag), loc)
self.type_ = Complex64


@public
@typechecked
class UTF8String(DataTypeOps):
"""Class for UTF-8 encoded strings."""

def __init__(
self, value: str, loc: SourceLocation = NO_SOURCE_LOCATION
) -> None:
if not isinstance(value, str):
raise TypeError("Expected a valid UTF-8 string.")
value.encode("utf-8")
super().__init__()
self.value = value
self.loc = loc
self.kind = ASTKind.UTF8StringDTKind

def __str__(self) -> str:
"""Return a string representation of the object."""
return f"UTF8String({self.value})"

def get_struct(self, simplified: bool = False) -> ReprStruct:
"""Return the structure of the object in a simplified."""
key = "UTF8String"
value = self.value
return self._prepare_struct(key, value, simplified)


@public
@typechecked
class UTF8Char(DataTypeOps):
"""Class for UTF-8 encoded characters."""

def __init__(
self, value: str, loc: SourceLocation = NO_SOURCE_LOCATION
) -> None:
if len(value) != 1:
raise ValueError("Expected a single UTF-8 character.")
value.encode("utf-8")
super().__init__()
self.value = value
self.loc = loc
self.kind = ASTKind.UTF8CharDTKind

def __str__(self) -> str:
"""Return a string representation of the object."""
return f"UTF8Char({self.value})"

def get_struct(self, simplified: bool = False) -> ReprStruct:
"""Return the structure of the object in a simplified."""
key = "UTF8Char"
value = self.value
return self._prepare_struct(key, value, simplified)


@public
@typechecked
class LiteralUTF8String(Literal):
"""Literal class for UTF-8 strings."""

def __init__(
self, value: str, loc: SourceLocation = NO_SOURCE_LOCATION
) -> None:
super().__init__(loc)
value.encode("utf-8")
self.value = value
self.type_ = UTF8String
self.loc = loc

def __str__(self) -> str:
"""Return a string representation of the object."""
return f"LiteralUTF8String({self.value})"

def get_struct(self, simplified: bool = False) -> ReprStruct:
"""Return the structure of the object in a simplified."""
key = f"LiteralUTF8String: {self.value}"
value = self.value
return self._prepare_struct(key, value, simplified)


@public
@typechecked
class LiteralUTF8Char(Literal):
"""Literal class for UTF-8 characters."""

def __init__(
self, value: str, loc: SourceLocation = NO_SOURCE_LOCATION
) -> None:
super().__init__(loc)
value.encode("utf-8")
self.value = value
self.type_ = UTF8Char
self.loc = loc

def __str__(self) -> str:
"""Return a string representation of the object."""
return f"LiteralUTF8Char({self.value})"

def get_struct(self, simplified: bool = False) -> ReprStruct:
"""Return the structure of the object in a simplified."""
key = f"LiteralUTF8Char: {self.value}"
value = self.value
return self._prepare_struct(key, value, simplified)
20 changes: 20 additions & 0 deletions src/astx/transpilers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,23 @@ def visit(self, node: astx.LiteralComplex64) -> str:
real = node.value.real
imag = node.value.imag
return f"complex({real}, {imag})"

@dispatch # type: ignore[no-redef]
def visit(self, node: astx.UTF8String) -> str:
"""Handle UTF8String nodes."""
return repr(node.value)

@dispatch # type: ignore[no-redef]
def visit(self, node: astx.UTF8Char) -> str:
"""Handle UTF8Char nodes."""
return repr(node.value)

@dispatch # type: ignore[no-redef]
def visit(self, node: astx.LiteralUTF8String) -> str:
"""Handle LiteralUTF8String nodes."""
return repr(node.value)

@dispatch # type: ignore[no-redef]
def visit(self, node: astx.LiteralUTF8Char) -> str:
"""Handle LiteralUTF8Char nodes."""
return repr(node.value)
135 changes: 135 additions & 0 deletions tests/test_datatype_char_string.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Tests for UTF-8 character and string data types."""

from __future__ import annotations

from typing import Callable, Type

import astx
import pytest

from astx.operators import BinaryOp, UnaryOp
from astx.variables import Variable

VAR_A = Variable("a")

UTF8_CHAR_LITERAL_CLASSES = [
astx.LiteralUTF8Char,
astx.UTF8Char,
]

UTF8_STRING_LITERAL_CLASSES = [
astx.LiteralUTF8String,
astx.UTF8String,
]


def test_variable() -> None:
"""Test variable UTF-8 character and string."""
var_a = Variable("a")
var_b = Variable("b")

BinaryOp(op_code="+", lhs=var_a, rhs=var_b)


@pytest.mark.parametrize("literal_class", UTF8_CHAR_LITERAL_CLASSES)
def test_utf8_char_literal(literal_class: Type[astx.Literal]) -> None:
"""Test UTF-8 character literals."""
lit_a = literal_class("A")
lit_b = literal_class("B")
BinaryOp(op_code="+", lhs=lit_a, rhs=lit_b)


@pytest.mark.parametrize("literal_class", UTF8_STRING_LITERAL_CLASSES)
def test_utf8_string_literal(literal_class: Type[astx.Literal]) -> None:
"""Test UTF-8 string literals."""
lit_a = literal_class("Hello")
lit_b = literal_class("World")
BinaryOp(op_code="+", lhs=lit_a, rhs=lit_b)


@pytest.mark.parametrize(
"fn_bin_op,op_code",
[
(lambda literal_class: VAR_A + literal_class("A"), "+"),
(lambda literal_class: VAR_A == literal_class("A"), "=="),
(lambda literal_class: VAR_A != literal_class("A"), "!="),
],
)
@pytest.mark.parametrize("literal_class", UTF8_CHAR_LITERAL_CLASSES)
def test_bin_ops_char(
literal_class: Type[astx.Literal],
fn_bin_op: Callable[[Type[astx.Literal]], BinaryOp],
op_code: str,
) -> None:
"""Test binary operations on UTF-8 characters."""
bin_op = fn_bin_op(literal_class)
assert bin_op.op_code == op_code
assert str(bin_op) != ""
assert repr(bin_op) != ""
assert bin_op.get_struct() != {}
assert bin_op.get_struct(simplified=True) != {}


@pytest.mark.parametrize(
"fn_bin_op,op_code",
[
(lambda literal_class: VAR_A + literal_class("Hello"), "+"),
(lambda literal_class: VAR_A == literal_class("Hello"), "=="),
(lambda literal_class: VAR_A != literal_class("Hello"), "!="),
],
)
@pytest.mark.parametrize("literal_class", UTF8_STRING_LITERAL_CLASSES)
def test_bin_ops_string(
literal_class: Type[astx.Literal],
fn_bin_op: Callable[[Type[astx.Literal]], BinaryOp],
op_code: str,
) -> None:
"""Test binary operations on UTF-8 strings."""
bin_op = fn_bin_op(literal_class)
assert bin_op.op_code == op_code
assert str(bin_op) != ""
assert repr(bin_op) != ""
assert bin_op.get_struct() != {}
assert bin_op.get_struct(simplified=True) != {}


@pytest.mark.parametrize(
"fn_unary_op,op_code",
[
(lambda literal_class: +literal_class("A"), "+"),
],
)
@pytest.mark.parametrize("literal_class", UTF8_CHAR_LITERAL_CLASSES)
def test_unary_ops_char(
literal_class: Type[astx.Literal],
fn_unary_op: Callable[[Type[astx.Literal]], UnaryOp],
op_code: str,
) -> None:
"""Test unary operations on UTF-8 characters."""
unary_op = fn_unary_op(literal_class)
assert unary_op.op_code == op_code
assert str(unary_op) != ""
assert repr(unary_op) != ""
assert unary_op.get_struct() != {}
assert unary_op.get_struct(simplified=True) != {}


@pytest.mark.parametrize(
"fn_unary_op,op_code",
[
(lambda literal_class: +literal_class("Hello"), "+"),
],
)
@pytest.mark.parametrize("literal_class", UTF8_STRING_LITERAL_CLASSES)
def test_unary_ops_string(
literal_class: Type[astx.Literal],
fn_unary_op: Callable[[Type[astx.Literal]], UnaryOp],
op_code: str,
) -> None:
"""Test unary operations on UTF-8 strings."""
unary_op = fn_unary_op(literal_class)
assert unary_op.op_code == op_code
assert str(unary_op) != ""
assert repr(unary_op) != ""
assert unary_op.get_struct() != {}
assert unary_op.get_struct(simplified=True) != {}
68 changes: 68 additions & 0 deletions tests/transpilers/test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,71 @@ def test_literal_complex64() -> None:
assert (
generated_code == expected_code
), f"Expected '{expected_code}', but got '{generated_code}'"


def test_transpiler_utf8_char() -> None:
"""Test astx.Utf8Char."""
# Create a Utf8Char node
utf8_char_node = astx.UTF8Char(value="c")

# Initialize the generator
generator = astx2py.ASTxPythonTranspiler()

# Generate Python code
generated_code = generator.visit(utf8_char_node)
expected_code = repr("c")

assert (
generated_code == expected_code
), f"Expected '{expected_code}', but got '{generated_code}'"


def test_transpiler_utf8_string() -> None:
"""Test astx.Utf8String."""
# Create a Utf8String node
utf8_string_node = astx.UTF8String(value="hello")

# Initialize the generator
generator = astx2py.ASTxPythonTranspiler()

# Generate Python code
generated_code = generator.visit(utf8_string_node)
expected_code = repr("hello")

assert (
generated_code == expected_code
), f"Expected '{expected_code}', but got '{generated_code}'"


def test_transpiler_literal_utf8_char() -> None:
"""Test astx.LiteralUtf8Char."""
# Create a LiteralUtf8Char node
literal_utf8_char_node = astx.LiteralUTF8Char(value="a")

# Initialize the generator
generator = astx2py.ASTxPythonTranspiler()

# Generate Python code
generated_code = generator.visit(literal_utf8_char_node)
expected_code = repr("a")

assert (
generated_code == expected_code
), f"Expected '{expected_code}', but got '{generated_code}'"


def test_transpiler_literal_utf8_string() -> None:
"""Test astx.LiteralUtf8String."""
# Create a LiteralUtf8String node
literal_utf8_string_node = astx.LiteralUTF8String(value="world")

# Initialize the generator
generator = astx2py.ASTxPythonTranspiler()

# Generate Python code
generated_code = generator.visit(literal_utf8_string_node)
expected_code = repr("world")

assert (
generated_code == expected_code
), f"Expected '{expected_code}', but got '{generated_code}'"

0 comments on commit 7b8f52d

Please sign in to comment.