Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement +[NSRegularExpression escapedPatternForString:] and -[NSString enumerateSubstringsInRange:options:usingBlock] #370

Merged
merged 4 commits into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,24 @@ DerivedData/
**/xcshareddata/WorkspaceSettings.xcsettings

# End of https://www.gitignore.io/api/xcode

.cache
compile_commands.json
**.kdev4

# Documentation
Documentation/Base*
Documentation/General
Documentation/manual
Documentation/ReleaseNotes
Documentation/ANNOUNCE
Documentation/*.pdf
Documentation/README
Documentation/version.texi
Documentation/*.aux
Documentation/*.toc
Documentation/INSTALL
Documentation/NEWS
**/dependencies
Source/Base.gsdoc
Source/BaseAdditions.gsdoc
3 changes: 3 additions & 0 deletions Headers/Foundation/NSRegularExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ GS_EXPORT_CLASS
inString: (NSString*)string
offset: (NSInteger)offset
template: (NSString*)templat;
#if OS_API_VERSION(MAC_OS_X_VERSION_10_7, GS_API_LATEST)
+ (NSString *)escapedPatternForString:(NSString *)string;
#endif
#if GS_HAS_DECLARED_PROPERTIES
@property (readonly) NSRegularExpressionOptions options;
@property (readonly) NSUInteger numberOfCaptureGroups;
Expand Down
48 changes: 48 additions & 0 deletions Headers/Foundation/NSString.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,48 @@ enum {
typedef NSUInteger NSStringEncodingConversionOptions;
#endif

#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
/** For enumerateSubstringsInRange:options:usingBlock:
You must include an substring type (`NSStringEnumerationBy`), and may
bitwise or (`|`) with any of the other options. */
enum {
/* Must include one of these
Must fit into 8 bits. */
/** Enumerate by lines. Uses lineRangeForRange: */
NSStringEnumerationByLines = 0,
/** Enumerate by paragraph. Uses paragraphRangeForRange: */
NSStringEnumerationByParagraphs = 1,
/** Enumerate by composed character sequence. Uses rangeOfComposedCharacterSequencesForRange: */
NSStringEnumerationByComposedCharacterSequences = 2,
/** Enumerate by word, as specified in Unicode TR 29.
Only supported if GNUstep is compiled with ICU.
Uses UBRK_WORD, with current locale and standard abbreviation lists if
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
NSStringEnumerationByWords = 3,
/** Enumerate by sentence, as specified in Unicode TR 29.
Only supported if GNUstep is compiled with ICU.
Uses UBRK_WORD, with current locale and standard abbreviation lists if
NSStringEnumerationLocalized is passed, otherwise the locale is "en_US_POSIX". */
NSStringEnumerationBySentences = 4,
#if OS_API_VERSION(MAC_OS_X_VERSION_11,GS_API_LATEST)
/** Undocumented public API on macOS. Not supported by GNUstep. */
NSStringEnumerationByCaretPositions = 5,
/** Undocumented public API on macOS. Not supported by GNUstep. */
NSStringEnumerationByDeletionClusters = 6,
#endif

/* May pass one of these via bitwise or.
Must be a single bit set at an offset >= 8. */
NSStringEnumerationReverse = 1UL << 8,
NSStringEnumerationSubstringNotRequired = 1UL << 9,
NSStringEnumerationLocalized = 1UL << 10
};

typedef NSUInteger NSStringEnumerationOptions;

DEFINE_BLOCK_TYPE(GSNSStringEnumerationBlock, void, NSString* substring, NSRange substringRange, NSRange enclosingRange, BOOL* stop);
#endif

/**
* <p>
* <code>NSString</code> objects represent an immutable string of Unicode 3.0
Expand Down Expand Up @@ -1050,6 +1092,12 @@ GS_EXPORT_CLASS
+ (Class) constantStringClass;
#endif /* GS_API_NONE */

#if OS_API_VERSION(MAC_OS_X_VERSION_10_6,GS_API_LATEST)
- (void) enumerateSubstringsInRange: (NSRange)range
options: (NSStringEnumerationOptions)opts
usingBlock: (GSNSStringEnumerationBlock)block;
#endif

@end

GS_EXPORT_CLASS
Expand Down
14 changes: 14 additions & 0 deletions Source/NSRegularExpression.m
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,20 @@ - (NSString*) replacementStringForResult: (NSTextCheckingResult*)result
}
#endif

+ (NSString *)escapedPatternForString:(NSString *)string {
// https://unicode-org.github.io/icu/userguide/strings/regexp.html
// Need to escape * ? + [ ( ) { } ^ $ | \ .
return [[NSRegularExpression
regularExpressionWithPattern: @"([*?+\\[(){}^$|\\\\.])"
options: 0
error: NULL]
stringByReplacingMatchesInString: string
options: 0
range: NSMakeRange(0, [string length])
withTemplate: @"\\\\$1"
];
}

- (NSRegularExpressionOptions) options
{
return options;
Expand Down
158 changes: 158 additions & 0 deletions Source/NSString.m
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@
# include <icu.h>
#endif

#import "Foundation/NSObjCRuntime.h"
#import "GNUstepBase/GSBlocks.h"
#if GS_USE_ICU
#include <unicode/ubrk.h>
#include <unicode/utypes.h>
#endif


/* Create local inline versions of key functions for case-insensitive operations
*/
#import "Additions/unicode/caseconv.h"
Expand Down Expand Up @@ -6248,6 +6256,156 @@ - (BOOL) containsString: (NSString *)string
return [self rangeOfString: string].location != NSNotFound;
}

- (void) enumerateSubstringsInRange: (NSRange)range
options: (NSStringEnumerationOptions)opts
usingBlock: (GSNSStringEnumerationBlock)block
{
// Get low byte.
uint8_t substringType = opts & 0xFF;

BOOL isReverse = opts & NSStringEnumerationReverse;
BOOL substringNotRequired = opts & NSStringEnumerationSubstringNotRequired;
BOOL localized = opts & NSStringEnumerationLocalized;

NSUInteger currentLocation;
BOOL stop = NO;
if(isReverse) {
fredkiefer marked this conversation as resolved.
Show resolved Hide resolved
currentLocation = range.location + range.length;
} else {
currentLocation = range.location;
}

if (substringType == NSStringEnumerationByLines || substringType == NSStringEnumerationByParagraphs) {
fredkiefer marked this conversation as resolved.
Show resolved Hide resolved
BOOL isLineSep = substringType == NSStringEnumerationByLines;

while(YES) {
// contains the index of the first character of the line containing the beginning of aRange.
NSUInteger start;
// contains the index of the first character past the terminator of the line containing the end of aRange.
NSUInteger end;
// contains the index of the first character of the terminator of the line containing the end of aRange.
NSUInteger contentsEnd;
NSRange currentLocationRange = (NSRange){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use NSMakeRange(currentLocation, 0); ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a stylistic choice? I find

(NSRange){
  .location = currentLocation,
  .length = 0,
}

more readable, but I can change this if this is the standard style or if it causes problems with old compilers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As Richard wrote, changing this and a lot of other style difference would be great.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fredkiefer I see in a lot of other places in the code the use of (NSRange){location, length}. Is this also an issue, or are there cases in which the (NSRange){location, length} syntax is applicable?

.location = currentLocation,
.length = 0,
};
[self _getStart: &start
end: &end
contentsEnd: &contentsEnd
forRange: currentLocationRange
lineSep: isLineSep];
// If the enumerated range starts after the line/paragraph, we start at the beginning of the enumerated range
NSUInteger substringStart = start > range.location ? start : range.location;
NSRange substringRange = (NSRange){
.location = substringStart,
.length = contentsEnd - substringStart
};
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: substringRange],
substringRange,
(NSRange){
.location = start,
.length = end - start
},
&stop);
if(stop) break;
if(end == range.location + range.length) break;
currentLocation = end;
}
} else if (substringType == NSStringEnumerationByComposedCharacterSequences) {
// We could also use rangeOfComposedCharacterSequenceAtIndex:, but then we would need different logic.
while(YES) {
// Since all characters are in a composed character sequence, enclosingRange == substringRange
NSRange enclosingRange = [self rangeOfComposedCharacterSequenceAtIndex: currentLocation];
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
currentLocation = enclosingRange.location + enclosingRange.length;
}
} else if (substringType == NSStringEnumerationByWords || substringType == NSStringEnumerationBySentences) {
#if GS_USE_ICU
// These macros may be useful elsewhere.
#define GS_U_HANDLE_ERROR(errorCode, description) do { \
if(U_FAILURE(errorCode)) { \
NSWarnMLog(@"Error " description ": %s", u_errorName(errorCode)); \
return; \
} else if(errorCode < U_ZERO_ERROR) { \
NSWarnMLog(@"Warning " description ": %s", u_errorName(errorCode)); \
} \
errorCode = U_ZERO_ERROR; \
} while (NO)

BOOL byWords = substringType == NSStringEnumerationByWords;
NSUInteger length = range.length;
UChar characters[length];
[self getCharacters: characters range: range];
UErrorCode errorCode = U_ZERO_ERROR;
const char* locale = localized
? [[[[NSLocale currentLocale]
localeIdentifier]
// @ss=standard will use lists of common abbreviations, such as Mr., Mrs., etc.
stringByAppendingString: @"@ss=standard"]
UTF8String]
: "en_US_POSIX";
UBreakIterator* breakIterator = ubrk_open(byWords ? UBRK_WORD : UBRK_SENTENCE, // type
locale, // locale
characters, // text
length, // textLength
&errorCode);
GS_U_HANDLE_ERROR(errorCode, @"opening ICU break iterator");
ubrk_first(breakIterator);
while(YES) {
// Make sure it's a valid substring.
BOOL isValidSubstring = YES;

if(byWords) {
int32_t ruleStatus = ubrk_getRuleStatus(breakIterator);
// From ICU User Guide:
// A status value UBRK_WORD_NONE indicates that the boundary does
// not start a word or number.
// However, valid words seem to be UBRK_WORD_NONE, and invalid words
// seem to be UBRK_WORD_NONE_LIMIT.
isValidSubstring = ruleStatus != UBRK_WORD_NONE_LIMIT;
NSLog(@"Status for position %d (%d): %d", (int)currentLocation, (int)ubrk_current(breakIterator), (int) ruleStatus);
}
int32_t nextPosition = ubrk_next(breakIterator);
if(nextPosition == UBRK_DONE) break;
NSUInteger nextLocation = range.location + nextPosition;
// Same as substringRange
NSRange enclosingRange = (NSRange){
.location = currentLocation,
.length = nextLocation - currentLocation
};

if(isValidSubstring) {
CALL_BLOCK(block,
substringNotRequired ? nil : [self substringWithRange: enclosingRange],
enclosingRange,
enclosingRange,
&stop);
if(stop) break;
}

currentLocation = nextLocation;
}
#else
NSWarnLog(@"NSStringEnumerationByWords and NSStringEnumerationBySentences are not supported when GNUstep-base is compiled without ICU.");
return;
#endif
} else if (substringType == NSStringEnumerationByCaretPositions) {
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByCaretPositions is not supported");
return;
} else if (substringType == NSStringEnumerationByDeletionClusters) {
// FIXME - Not documented by Apple.
NSWarnLog(@"NSStringEnumerationByDeletionClusters is not supported");
return;
}
}

@end

/**
Expand Down
98 changes: 98 additions & 0 deletions Tests/base/NSString/enumerateSubstringsInRange.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#import "ObjectTesting.h"
#import <Foundation/NSAutoreleasePool.h>
#import <Foundation/NSString.h>

#if defined(__has_extension) && __has_extension(blocks)
int main (int argc, const char * argv[])
{
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
START_SET("Enumerate substrings by lines");

NSString* s1 = @"Line 1\nLine 2";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByLines
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Line 1"], "First line of \"Line 1\\nLine 2\" is \"Line 1\"");
if(currentIteration == 1) PASS([substring isEqual: @"Line 2"], "Second line of \"Line 1\\nLine 2\" is \"Line 2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two lines in \"Line 1\\nLine 2\"");
END_SET("Enumerate substrings by lines");

START_SET("Enumerate substrings by paragraphs");

NSString* s1 = @"Paragraph 1\nParagraph 2";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByParagraphs
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Paragraph 1"], "First paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 1\"");
if(currentIteration == 1) PASS([substring isEqual: @"Paragraph 2"], "Second paragraph of \"Paragraph 1\\nParagraph 2\" is \"Paragraph 2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two paragraphs in \"Paragraph 1\\nParagraph 2\"");
END_SET("Enumerate substrings by paragraphs");

START_SET("Enumerate substrings by words");

NSString* s1 = @"Word1 word2.";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationByWords
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Word1"], "First word of \"Word1 word2.\" is \"Word1\"");
if(currentIteration == 1) PASS([substring isEqual: @"word2"], "Second word of \"Word1 word2.\" is \"word2\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two words in \"Word1 word2.\"");
END_SET("Enumerate substrings by words");

START_SET("Enumerate substrings by sentences");

NSString* s1 = @"Sentence 1. Sentence 2.";
__block NSUInteger currentIteration = 0;
[s1 enumerateSubstringsInRange:(NSRange){
.location = 0,
.length = [s1 length]
} options: NSStringEnumerationBySentences
usingBlock: ^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
NSLog(@"Substring range: {.location=%ld, .length=%ld}", substringRange.location, substringRange.length);
NSLog(@"Enclosing range: {.location=%ld, .length=%ld}", enclosingRange.location, enclosingRange.length);
NSLog(@"Substring: %@", substring);
// *stop = YES;
if(currentIteration == 0) PASS([substring isEqual: @"Sentence 1. "], "First sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 1. \"");
if(currentIteration == 1) PASS([substring isEqual: @"Sentence 2."], "Second sentence of \"Sentence 1. Sentence 2.\" is \"Sentence 2.\"");
currentIteration++;
}];
PASS(currentIteration == 2, "There are only two sentences in \"Sentence 1. Sentence 2.");
END_SET("Enumerate substrings by sentences");

[pool drain];

return 0;
}
#else
int main (int argc, const char * argv[])
{
return 0;
}
#endif
Loading