Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Escape Control Characters #480

Merged
merged 4 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions b2sdk/_v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@
)
from b2sdk.session import B2Session
from b2sdk.utils.thread_pool import ThreadPoolMixin
from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars

# filter
from b2sdk.filter import FilterType, Filter
Expand Down
6 changes: 4 additions & 2 deletions b2sdk/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import time
from abc import ABCMeta, abstractmethod

from .utils.escape import escape_control_chars

try:
from tqdm import tqdm # displays a nice progress bar
except ImportError:
Expand Down Expand Up @@ -112,7 +114,7 @@ def __init__(self, *args, **kwargs):
def set_total_bytes(self, total_byte_count: int) -> None:
if self.tqdm is None:
self.tqdm = tqdm(
desc=self.description,
desc=escape_control_chars(self.description),
total=total_byte_count,
unit='B',
unit_scale=True,
Expand Down Expand Up @@ -159,7 +161,7 @@ def bytes_completed(self, byte_count: int) -> None:
elapsed = now - self.last_time
if 3 <= elapsed and self.total != 0:
if not self.any_printed:
print(self.description)
print(escape_control_chars(self.description))
print(' %d%%' % int(100.0 * byte_count / self.total))
self.last_time = now
self.any_printed = True
Expand Down
19 changes: 2 additions & 17 deletions b2sdk/raw_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from __future__ import annotations

import base64
import re
from abc import ABCMeta, abstractmethod
from enum import Enum, unique
from logging import getLogger
from typing import Any

from .utils.escape import unprintable_to_hex
from .utils.typing import JSON

try:
Expand Down Expand Up @@ -873,21 +873,6 @@ def update_file_legal_hold(
except AccessDenied:
raise RetentionWriteError()

def unprintable_to_hex(self, string):
"""
Replace unprintable chars in string with a hex representation.

:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")

"""
unprintables_pattern = re.compile(r'[\x00-\x1f]')

def hexify(match):
return fr'\x{ord(match.group()):02x}'

return unprintables_pattern.sub(hexify, string)

def check_b2_filename(self, filename):
"""
Raise an appropriate exception with details if the filename is unusable.
Expand All @@ -906,7 +891,7 @@ def check_b2_filename(self, filename):
lowest_unicode_value = ord(min(filename))
if lowest_unicode_value < 32:
message = "Filename \"{}\" contains code {} (hex {:02x}), less than 32.".format(
self.unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value
unprintable_to_hex(filename), lowest_unicode_value, lowest_unicode_value
)
raise UnusableFileName(message)
# No DEL for you.
Expand Down
9 changes: 5 additions & 4 deletions b2sdk/sync/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from ..sync.report import ProgressReport, SyncReport
from ..transfer.outbound.outbound_source import OutboundTransferSource
from ..transfer.outbound.upload_source import UploadSourceLocalFile
from ..utils.escape import escape_control_chars
from .encryption_provider import AbstractSyncEncryptionSettingsProvider
from .report import SyncFileReporter

Expand Down Expand Up @@ -179,7 +180,7 @@ def do_report(self, bucket: Bucket, reporter: ProgressReport) -> None:
:param bucket: a Bucket object
:param reporter: a place to report errors
"""
reporter.print_completion('upload ' + self.relative_name)
reporter.print_completion(f'upload {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'b2_upload({self.local_full_path}, {self.b2_file_name}, {self.mod_time_millis})'
Expand Down Expand Up @@ -255,7 +256,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('hide ' + self.relative_name)
reporter.print_completion(f'hide {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'b2_hide({self.b2_file_name})'
Expand Down Expand Up @@ -478,7 +479,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('delete ' + self.relative_name + ' ' + self.note)
reporter.print_completion(f"delete {escape_control_chars(self.relative_name)} {self.note}")

def __str__(self) -> str:
return f'b2_delete({self.b2_file_name}, {self.file_id}, {self.note})'
Expand Down Expand Up @@ -519,7 +520,7 @@ def do_report(self, bucket: Bucket, reporter: SyncReport):
:param reporter: a place to report errors
"""
reporter.update_transfer(1, 0)
reporter.print_completion('delete ' + self.relative_name)
reporter.print_completion(f'delete {escape_control_chars(self.relative_name)}')

def __str__(self) -> str:
return f'local_delete({self.full_path})'
58 changes: 58 additions & 0 deletions b2sdk/utils/escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
######################################################################
#
# File: b2sdk/utils/escape.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################

import re
import shlex

# skip newline, tab
UNPRINTABLE_PATTERN = re.compile(r'[\x00-\x08\x0e-\x1f\x7f-\x9f]')


def unprintable_to_hex(s):
"""
Replace unprintable chars in string with a hex representation.

:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")

"""

def hexify(match):
return fr'\x{ord(match.group()):02x}'

if s:
return UNPRINTABLE_PATTERN.sub(hexify, s)
return None


def escape_control_chars(s):
"""
Replace unprintable chars in string with a hex representation AND shell quotes the string.

:param string: an arbitrary string, possibly with unprintable characters.
:return: the string, with unprintable characters changed to hex (e.g., "\x07")

"""
if s:
return shlex.quote(unprintable_to_hex(s))
return None


def substitute_control_chars(s):
"""
Replace unprintable chars in string with � unicode char

:param string: an arbitrary string, possibly with unprintable characters.
:return: tuple of the string with � replacements made and boolean indicated if chars were replaced

"""
match_result = UNPRINTABLE_PATTERN.search(s)
s = UNPRINTABLE_PATTERN.sub('�', s)
return (s, match_result is not None)
3 changes: 2 additions & 1 deletion b2sdk/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from b2sdk._v3 import parse_folder as parse_sync_folder
from b2sdk._v3 import AbstractPath as AbstractSyncPath
from b2sdk._v3 import LocalPath as LocalSyncPath
from b2sdk.utils.escape import unprintable_to_hex, escape_control_chars, substitute_control_chars

from .account_info import AbstractAccountInfo
from .api import B2Api
Expand Down Expand Up @@ -45,4 +46,4 @@
# large_file

from .large_file import LargeFileServices
from .large_file import UnfinishedLargeFile
from .large_file import UnfinishedLargeFile
3 changes: 3 additions & 0 deletions b2sdk/v2/raw_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def get_upload_file_headers(
**kwargs,
)

def unprintable_to_hex(self, s):
return v3.unprintable_to_hex(s)

@_file_infos_rename
def upload_file(
self,
Expand Down
1 change: 1 addition & 0 deletions changelog.d/+escape_control_characters.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added control character escaping for bucket and filenames.
32 changes: 32 additions & 0 deletions test/unit/utils/test_escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
######################################################################
#
# File: test/unit/utils/test_escape.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################

from b2sdk.utils.escape import escape_control_chars, substitute_control_chars, unprintable_to_hex


def test_unprintable_to_hex():
cases = [
(' abc-z', ' abc-z', "' abc-z'", (' abc-z', False)),
('a\x7fb', 'a\\x7fb', "'a\\x7fb'", ('a�b', True)),
('a\x00b a\x9fb ', 'a\\x00b a\\x9fb ', "'a\\x00b a\\x9fb '", ('a�b a�b ', True)),
('a\x7fb\nc', 'a\\x7fb\nc', "'a\\x7fb\nc'", ('a�b\nc', True)),
('\x9bT\x9bEtest', '\\x9bT\\x9bEtest', "'\\x9bT\\x9bEtest'", ('�T�Etest', True)),
(
'\x1b[32mC\x1b[33mC\x1b[34mI', '\\x1b[32mC\\x1b[33mC\\x1b[34mI',
"'\\x1b[32mC\\x1b[33mC\\x1b[34mI'", ('�[32mC�[33mC�[34mI', True)
)
]
for (
s, expected_unprintable_to_hex, expected_escape_control_chars,
expected_substitute_control_chars
) in cases:
assert unprintable_to_hex(s) == expected_unprintable_to_hex
assert escape_control_chars(s) == expected_escape_control_chars
assert substitute_control_chars(s) == expected_substitute_control_chars
Loading