All-Hands-AI · ryanhoangt · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/openhands_aci/editor/editor.py b/openhands_aci/editor/editor.py
@@ -1,11 +1,12 @@
-import mimetypes
 import os
 import re
 import shutil
 import tempfile
 from pathlib import Path
 from typing import Literal, get_args
 
+from binaryornot.check import is_binary
+
 from openhands_aci.linter import DefaultLinter
 from openhands_aci.utils.shell import run_shell_cmd
 
@@ -461,26 +462,11 @@ def validate_file(self, path: Path) -> None:
                 reason=f'File is too large ({file_size / 1024 / 1024:.1f}MB). Maximum allowed size is {int(max_size / 1024 / 1024)}MB.',
             )
 
-        # Check if file is binary
-        mime_type, _ = mimetypes.guess_type(str(path))
-        if mime_type is None:
-            # If mime_type is None, try to detect if it's binary by reading first chunk
-            try:
-                chunk = open(path, 'rb').read(1024)
-                if b'\0' in chunk:  # Common way to detect binary files
-                    raise FileValidationError(
-                        path=str(path),
-                        reason='File appears to be binary. Only text files can be edited.',
-                    )
-            except Exception as e:
-                raise FileValidationError(
-                    path=str(path), reason=f'Error checking file type: {str(e)}'
-                )
-        elif not mime_type.startswith('text/'):
-            # Known non-text mime type
+        # Check file type
+        if is_binary(str(path)):
             raise FileValidationError(
                 path=str(path),
-                reason=f'File type {mime_type} is not supported. Only text files can be edited.',
+                reason='File appears to be binary. Only text files can be edited.',
             )
 
     def read_file(

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "openhands-aci"
-version = "0.2.1"
+version = "0.2.2"
 description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
 authors = ["OpenHands"]
 license = "MIT"
@@ -22,6 +22,7 @@ grep-ast = "0.3.3"
 diskcache = "^5.6.3"
 flake8 = "*"
 whatthepatch = "^1.0.6"
+binaryornot = "^0.4.4"
 
 
 [tool.poetry.group.dev.dependencies]

diff --git a/tests/integration/editor/test_file_validation.py b/tests/integration/editor/test_file_validation.py
@@ -1,34 +1,59 @@
 """Tests for file validation in file editor."""
 
+import os
+from pathlib import Path
+
 from openhands_aci.editor import file_editor
 
 from .conftest import parse_result
 
 
 def test_file_validation(temp_file):
     """Test file validation for various file types."""
+    # Ensure temp_file has .sql suffix
+    temp_file_sql = Path(temp_file).with_suffix('.sql')
+    os.rename(temp_file, temp_file_sql)
+
     # Test binary file
-    with open(temp_file, 'wb') as f:
+    with open(temp_file_sql, 'wb') as f:
         f.write(b'Some text\x00with binary\x00content')
 
     result = file_editor(
         command='view',
-        path=temp_file,
+        path=str(temp_file_sql),
         enable_linting=False,
     )
     result_json = parse_result(result)
     assert 'binary' in result_json['formatted_output_and_error'].lower()
 
     # Test large file
     large_size = 11 * 1024 * 1024  # 11MB
-    with open(temp_file, 'w') as f:
+    with open(temp_file_sql, 'w') as f:
         f.write('x' * large_size)
 
     result = file_editor(
         command='view',
-        path=temp_file,
+        path=str(temp_file_sql),
         enable_linting=False,
     )
     result_json = parse_result(result)
     assert 'too large' in result_json['formatted_output_and_error']
     assert '10MB' in result_json['formatted_output_and_error']
+
+    # Test SQL file
+    sql_content = """
+    SELECT *
+    FROM users
+    WHERE id = 1;
+    """
+    with open(temp_file_sql, 'w') as f:
+        f.write(sql_content)
+
+    result = file_editor(
+        command='view',
+        path=str(temp_file_sql),
+        enable_linting=False,
+    )
+    result_json = parse_result(result)
+    assert 'SELECT *' in result_json['formatted_output_and_error']
+    assert 'binary' not in result_json['formatted_output_and_error'].lower()
diff --git a/tests/integration/editor/test_peak_memory.py b/tests/integration/editor/test_peak_memory.py
@@ -15,7 +15,9 @@ def get_memory_info():
     """Get current and peak memory usage in bytes."""
     process = psutil.Process(os.getpid())
     rss = process.memory_info().rss
-    peak_rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024  # Convert KB to bytes
+    peak_rss = (
+        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024
+    )  # Convert KB to bytes
     return {
         'rss': rss,
         'peak_rss': peak_rss,
@@ -28,17 +30,17 @@ def create_test_file(path: Path, size_mb: float = 5.0):
     line_size = 100  # bytes per line approximately
     num_lines = int((size_mb * 1024 * 1024) // line_size)
 
-    print(f"\nCreating test file with {num_lines} lines...")
+    print(f'\nCreating test file with {num_lines} lines...')
     with open(path, 'w') as f:
         for i in range(num_lines):
             f.write(f'Line {i}: ' + 'x' * (line_size - 10) + '\n')
 
     actual_size = os.path.getsize(path)
-    print(f"File created, size: {actual_size / 1024 / 1024:.2f} MB")
+    print(f'File created, size: {actual_size / 1024 / 1024:.2f} MB')
     return actual_size
 
 
-def set_memory_limit(file_size: int, multiplier: float = 1.5):
+def set_memory_limit(file_size: int, multiplier: float = 2.0):
     """Set memory limit to multiplier * file_size."""
     # Add base memory for pytest and other processes (100MB)
     base_memory = 100 * 1024 * 1024  # 100MB
@@ -50,11 +52,13 @@ def set_memory_limit(file_size: int, multiplier: float = 1.5):
         current_usage = psutil.Process().memory_info().rss
         if memory_limit > current_usage:
             resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard))
-            print(f"Memory limit set to {memory_limit / 1024 / 1024:.2f} MB")
+            print(f'Memory limit set to {memory_limit / 1024 / 1024:.2f} MB')
         else:
-            print(f"Warning: Current memory usage ({current_usage / 1024 / 1024:.2f} MB) higher than limit ({memory_limit / 1024 / 1024:.2f} MB)")
+            print(
+                f'Warning: Current memory usage ({current_usage / 1024 / 1024:.2f} MB) higher than limit ({memory_limit / 1024 / 1024:.2f} MB)'
+            )
     except Exception as e:
-        print(f"Warning: Could not set memory limit: {str(e)}")
+        print(f'Warning: Could not set memory limit: {str(e)}')
     return memory_limit
 
 
@@ -82,6 +86,7 @@ def test_str_replace_peak_memory():
 
         # Force Python to release file handles and clear buffers
         import gc
+
         gc.collect()
 
         # Get initial memory usage
@@ -93,7 +98,7 @@ def test_str_replace_peak_memory():
 
         # Perform str_replace operation
         try:
-            result = file_editor(
+            _ = file_editor(
                 command='str_replace',
                 path=path,
                 old_str='Line 5000',  # Replace a line in the middle
@@ -118,6 +123,7 @@ def test_insert_peak_memory():
 
         # Force Python to release file handles and clear buffers
         import gc
+
         gc.collect()
 
         # Get initial memory usage
@@ -129,7 +135,7 @@ def test_insert_peak_memory():
 
         # Perform insert operation
         try:
-            result = file_editor(
+            _ = file_editor(
                 command='insert',
                 path=path,
                 insert_line=5000,  # Insert in the middle
@@ -154,6 +160,7 @@ def test_view_peak_memory():
 
         # Force Python to release file handles and clear buffers
         import gc
+
         gc.collect()
 
         # Get initial memory usage
@@ -165,7 +172,7 @@ def test_view_peak_memory():
 
         # Test viewing specific lines
         try:
-            result = file_editor(
+            _ = file_editor(
                 command='view',
                 path=path,
                 view_range=[5000, 5100],  # View 100 lines from middle
@@ -189,6 +196,7 @@ def test_view_full_file_peak_memory():
 
         # Force Python to release file handles and clear buffers
         import gc
+
         gc.collect()
 
         # Get initial memory usage
@@ -200,7 +208,7 @@ def test_view_full_file_peak_memory():
 
         # Test viewing entire file
         try:
-            result = file_editor(
+            _ = file_editor(
                 command='view',
                 path=path,
                 enable_linting=False,
@@ -212,4 +220,4 @@ def test_view_full_file_peak_memory():
                 pytest.fail('Memory limit exceeded - peak memory usage too high')
             raise
 
-        check_memory_usage(initial['max'], file_size, 'view_full')
+        check_memory_usage(initial['max'], file_size, 'view_full')