2323import inspect
2424from pathlib import Path
2525from typing import Any , Dict , List , Optional
26+ import gzip
2627
2728from pydantic import BaseModel
2829from typing_extensions import Literal
30+ import msgpack
31+
32+
33+ def msgpk (cls ):
34+ """
35+ Decorator that adds MessagePack serialization methods to Pydantic models.
36+
37+ Adds methods:
38+ - to_msgpack_bytes() -> bytes: Serialize to compact binary format
39+ - from_msgpack_bytes(data: bytes) -> cls: Deserialize from binary format
40+ - to_msgpack_dict() -> dict: Convert to msgpack-compatible dict
41+ - from_msgpack_dict(data: dict) -> cls: Create instance from msgpack dict
42+ """
43+
44+ def _prepare_for_serialization (obj : Any ) -> Any :
45+ """Convert objects to serialization-friendly format."""
46+ if isinstance (obj , Path ):
47+ return str (obj )
48+ elif isinstance (obj , dict ):
49+ return {
50+ _prepare_for_serialization (k ): _prepare_for_serialization (v )
51+ for k , v in obj .items ()
52+ }
53+ elif isinstance (obj , list ):
54+ return [_prepare_for_serialization (item ) for item in obj ]
55+ elif isinstance (obj , tuple ):
56+ return tuple (_prepare_for_serialization (item ) for item in obj )
57+ elif isinstance (obj , set ):
58+ return [_prepare_for_serialization (item ) for item in obj ]
59+ elif hasattr (obj , "model_dump" ): # Pydantic model
60+ return _prepare_for_serialization (obj .model_dump ())
61+ else :
62+ return obj
63+
64+ def to_msgpack_bytes (self ) -> bytes :
65+ """Serialize the model to compact binary format using MessagePack + gzip."""
66+ data = _prepare_for_serialization (self .model_dump ())
67+ msgpack_data = msgpack .packb (data , use_bin_type = True )
68+ return gzip .compress (msgpack_data )
69+
70+ @classmethod
71+ def from_msgpack_bytes (cls_obj , data : bytes ):
72+ """Deserialize from MessagePack + gzip binary format."""
73+ decompressed_data = gzip .decompress (data )
74+ obj_dict = msgpack .unpackb (decompressed_data , raw = False )
75+ return cls_obj .model_validate (obj_dict )
76+
77+ def to_msgpack_dict (self ) -> dict :
78+ """Convert to msgpack-compatible dictionary format."""
79+ return _prepare_for_serialization (self .model_dump ())
80+
81+ @classmethod
82+ def from_msgpack_dict (cls_obj , data : dict ):
83+ """Create instance from msgpack-compatible dictionary."""
84+ return cls_obj .model_validate (data )
85+
86+ def get_msgpack_size (self ) -> int :
87+ """Get the size of the msgpack serialization in bytes."""
88+ return len (self .to_msgpack_bytes ())
89+
90+ def get_compression_ratio (self ) -> float :
91+ """Get compression ratio compared to JSON."""
92+ json_size = len (self .model_dump_json ().encode ("utf-8" ))
93+ msgpack_gzip_size = self .get_msgpack_size ()
94+ return msgpack_gzip_size / json_size if json_size > 0 else 1.0
95+
96+ # Add methods to the class
97+ cls .to_msgpack_bytes = to_msgpack_bytes
98+ cls .from_msgpack_bytes = from_msgpack_bytes
99+ cls .to_msgpack_dict = to_msgpack_dict
100+ cls .from_msgpack_dict = from_msgpack_dict
101+ cls .get_msgpack_size = get_msgpack_size
102+ cls .get_compression_ratio = get_compression_ratio
103+
104+ return cls
29105
30106
31107def builder (cls ):
@@ -92,26 +168,9 @@ def build(self):
92168
93169
94170@builder
171+ @msgpk
95172class PyImport (BaseModel ):
96- """Represents a Python import statement.
97-
98- Attributes:
99- module (str): The name of the module being imported.
100- name (str): The name of the imported entity (e.g., function, class).
101- alias (Optional[str]): An optional alias for the imported entity.
102- start_line (int): The line number where the import statement starts.
103- end_line (int): The line number where the import statement ends.
104- start_column (int): The starting column of the import statement.
105- end_column (int): The ending column of the import statement.
106-
107- Example:
108- - import numpy as np will be represented as:
109- PyImport(module="numpy", name="np", alias="np", start_line=1, end_line=1, start_column=0, end_column=16)
110- - from math import sqrt will be represented as:
111- PyImport(module="math", name="sqrt", alias=None, start_line=2, end_line=2, start_column=0, end_column=20
112- - from os.path import join as path_join will be represented as:
113- PyImport(module="os.path", name="path_join", alias="join", start_line=3, end_line=3, start_column=0, end_column=30)
114- """
173+ """Represents a Python import statement."""
115174
116175 module : str
117176 name : str
@@ -123,18 +182,9 @@ class PyImport(BaseModel):
123182
124183
125184@builder
185+ @msgpk
126186class PyComment (BaseModel ):
127- """
128- Represents a Python comment.
129-
130- Attributes:
131- content (str): The actual comment string (without the leading '#').
132- start_line (int): The line number where the comment starts.
133- end_line (int): The line number where the comment ends (same as start_line for single-line comments).
134- start_column (int): The starting column of the comment.
135- end_column (int): The ending column of the comment.
136- is_docstring (bool): Whether this comment is actually a docstring (triple-quoted string).
137- """
187+ """Represents a Python comment."""
138188
139189 content : str
140190 start_line : int = - 1
@@ -145,20 +195,9 @@ class PyComment(BaseModel):
145195
146196
147197@builder
198+ @msgpk
148199class PySymbol (BaseModel ):
149- """
150- Represents a symbol used or declared in Python code.
151-
152- Attributes:
153- name (str): The name of the symbol (e.g., 'x', 'self.x', 'os.path').
154- scope (Literal['local', 'nonlocal', 'global', 'class', 'module']): The scope where the symbol is accessed.
155- kind (Literal['variable', 'parameter', 'attribute', 'function', 'class', 'module']): The kind of symbol.
156- type (Optional[str]): Inferred or annotated type, if available.
157- qualified_name (Optional[str]): Fully qualified name (e.g., 'self.x', 'os.path.join').
158- is_builtin (bool): Whether this is a Python builtin.
159- lineno (int): Line number where the symbol is accessed or declared.
160- col_offset (int): Column offset.
161- """
200+ """Represents a symbol used or declared in Python code."""
162201
163202 name : str
164203 scope : Literal ["local" , "nonlocal" , "global" , "class" , "module" ]
@@ -171,11 +210,9 @@ class PySymbol(BaseModel):
171210
172211
173212@builder
213+ @msgpk
174214class PyVariableDeclaration (BaseModel ):
175- """Represents a Python variable declaration.
176-
177- Attributes:
178- """
215+ """Represents a Python variable declaration."""
179216
180217 name : str
181218 type : Optional [str ]
@@ -189,18 +226,9 @@ class PyVariableDeclaration(BaseModel):
189226
190227
191228@builder
229+ @msgpk
192230class PyCallableParameter (BaseModel ):
193- """Represents a parameter of a Python callable (function/method).
194-
195- Attributes:
196- name (str): The name of the parameter.
197- type (str): The type of the parameter.
198- default_value (str): The default value of the parameter, if any.
199- start_line (int): The line number where the parameter is defined.
200- end_line (int): The line number where the parameter definition ends.
201- start_column (int): The column number where the parameter starts.
202- end_column (int): The column number where the parameter ends.
203- """
231+ """Represents a parameter of a Python callable (function/method)."""
204232
205233 name : str
206234 type : Optional [str ] = None
@@ -212,10 +240,9 @@ class PyCallableParameter(BaseModel):
212240
213241
214242@builder
243+ @msgpk
215244class PyCallsite (BaseModel ):
216- """
217- Represents a Python call site (function or method invocation) with contextual metadata.
218- """
245+ """Represents a Python call site (function or method invocation) with contextual metadata."""
219246
220247 method_name : str
221248 receiver_expr : Optional [str ] = None
@@ -231,26 +258,9 @@ class PyCallsite(BaseModel):
231258
232259
233260@builder
261+ @msgpk
234262class PyCallable (BaseModel ):
235- """Represents a Python callable (function/method).
236-
237- Attributes:
238- name (str): The name of the callable.
239- signature (str): The fully qualified name of the callable (e.g., module.function_name).
240- docstring (PyComment): The docstring of the callable.
241- decorators (List[str]): List of decorators applied to the callable.
242- parameters (List[PyCallableParameter]): List of parameters for the callable.
243- return_type (Optional[str]): The type of the return value, if specified.
244- code (str): The actual code of the callable.
245- start_line (int): The line number where the callable is defined.
246- end_line (int): The line number where the callable definition ends.
247- code_start_line (int): The line number where the code block starts.
248- accessed_symbols (List[str]): Symbols accessed within the callable.
249- call_sites (List[str]): Call sites of this callable.
250- is_entrypoint (bool): Whether this callable is an entry point.
251- local_variables (List[PyVariableDeclaration]): Local variables within the callable.
252- cyclomatic_complexity (int): Cyclomatic complexity of the callable.
253- """
263+ """Represents a Python callable (function/method)."""
254264
255265 name : str
256266 path : str
@@ -274,16 +284,9 @@ def __hash__(self) -> int:
274284
275285
276286@builder
287+ @msgpk
277288class PyClassAttribute (BaseModel ):
278- """Represents a Python class attribute.
279-
280- Attributes:
281- name (str): The name of the attribute.
282- type (str): The type of the attribute.
283- docstring (PyComment): The docstring of the attribute.
284- start_line (int): The line number where the attribute is defined.
285- end_line (int): The line number where the attribute definition ends.
286- """
289+ """Represents a Python class attribute."""
287290
288291 name : str
289292 type : Optional [str ] = None
@@ -293,20 +296,9 @@ class PyClassAttribute(BaseModel):
293296
294297
295298@builder
299+ @msgpk
296300class PyClass (BaseModel ):
297- """Represents a Python class.
298-
299- Attributes:
300- name (str): The name of the class.
301- signature (str): The fully qualified name of the class (e.g., module.class_name).
302- docstring (PyComment): The docstring of the class.
303- base_classes (List[str]): List of base class names.
304- methods (Dict[str, PyCallable]): Mapping of method names to their callable representations.
305- attributes (Dict[str, PyClassAttribute]): Mapping of attribute names to their variable declarations.
306- inner_classes (Dict[str, "PyClass"]): Mapping of inner class names to their class representations.
307- start_line (int): The line number where the class definition starts.
308- end_line (int): The line number where the class definition ends.
309- """
301+ """Represents a Python class."""
310302
311303 name : str
312304 signature : str # e.g., module.class_name
@@ -325,18 +317,9 @@ def __hash__(self):
325317
326318
327319@builder
320+ @msgpk
328321class PyModule (BaseModel ):
329- """Represents a Python module.
330-
331- Attributes:
332- file_path (str): The file path of the module.
333- module_name (str): The name of the module (e.g., module.submodule).
334- imports (List[PyImport]): List of import statements in the module.
335- comments (List[PyComment]): List of comments in the module.
336- classes (Dict[str, PyClass]): Mapping of class names to their class representations.
337- functions (Dict[str, PyCallable]): Mapping of function names to their callable representations.
338- variables (List[PyVariableDeclaration]): List of variable declarations in the module.
339- """
322+ """Represents a Python module."""
340323
341324 file_path : str
342325 module_name : str
@@ -348,13 +331,8 @@ class PyModule(BaseModel):
348331
349332
350333@builder
334+ @msgpk
351335class PyApplication (BaseModel ):
352- """Represents a Python application.
353-
354- Attributes:
355- name (str): The name of the application.
356- version (str): The version of the application.
357- description (str): A brief description of the application.
358- """
336+ """Represents a Python application."""
359337
360338 symbol_table : dict [Path , PyModule ]
0 commit comments