Signal-iOS/SignalServiceKit/Util/NSString+OWS.m
Harry bb083ca39c
Fold SignalCoreKit into SignalServiceKit
Co-authored-by: Adam Sharp <sharplet@signal.org>
2024-06-26 08:44:41 -07:00

301 lines
10 KiB
Objective-C

//
// Copyright 2024 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
//
#import "NSString+OWS.h"
#import <SignalServiceKit/SignalServiceKit-Swift.h>
#import <objc/runtime.h>
NS_ASSUME_NONNULL_BEGIN
#pragma mark -
static void *kNSString_SSK_needsSanitization = &kNSString_SSK_needsSanitization;
static void *kNSString_SSK_sanitizedCounterpart = &kNSString_SSK_sanitizedCounterpart;
static unichar bidiLeftToRightIsolate = 0x2066;
static unichar bidiRightToLeftIsolate = 0x2067;
static unichar bidiFirstStrongIsolate = 0x2068;
static unichar bidiLeftToRightEmbedding = 0x202A;
static unichar bidiRightToLeftEmbedding = 0x202B;
static unichar bidiLeftToRightOverride = 0x202D;
static unichar bidiRightToLeftOverride = 0x202E;
static unichar bidiPopDirectionalFormatting = 0x202C;
static unichar bidiPopDirectionalIsolate = 0x2069;
@implementation NSString (OWS)
+ (NSCharacterSet *)nonPrintingCharacterSet
{
static NSCharacterSet *result = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
NSMutableCharacterSet *characterSet = NSCharacterSet.whitespaceAndNewlineCharacterSet.mutableCopy;
[characterSet formUnionWithCharacterSet:NSCharacterSet.controlCharacterSet];
[characterSet formUnionWithCharacterSet:[self bidiControlCharacterSet]];
// Left-to-right and Right-to-left marks.
[characterSet addCharactersInString:@"\u200E\u200f"];
result = [characterSet copy];
});
return result;
}
+ (NSCharacterSet *)bidiControlCharacterSet
{
static NSCharacterSet *result = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
NSMutableCharacterSet *characterSet = [NSMutableCharacterSet new];
[characterSet addCharactersInString:[NSString stringWithFormat:@"%C%C%C%C%C%C%C%C%C",
bidiLeftToRightIsolate,
bidiRightToLeftIsolate,
bidiFirstStrongIsolate,
bidiLeftToRightEmbedding,
bidiRightToLeftEmbedding,
bidiLeftToRightOverride,
bidiRightToLeftOverride,
bidiPopDirectionalFormatting,
bidiPopDirectionalIsolate]];
result = [characterSet copy];
});
return result;
}
- (NSString *)ows_stripped
{
if ([self stringByTrimmingCharactersInSet:[NSString nonPrintingCharacterSet]].length < 1) {
// If string has no printing characters, consider it empty.
return @"";
}
return [self stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
+ (NSCharacterSet *)unsafeFilenameCharacterSet
{
static NSCharacterSet *characterSet;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
// 0x202D and 0x202E are the unicode ordering letters
// and can be used to control the rendering of text.
// They could be used to construct misleading attachment
// filenames that appear to have a different file extension,
// for example.
characterSet = [NSCharacterSet characterSetWithCharactersInString:@"\u202D\u202E"];
});
return characterSet;
}
- (NSString *)filterUnsafeFilenameCharacters
{
NSCharacterSet *unsafeCharacterSet = [[self class] unsafeFilenameCharacterSet];
NSRange range = [self rangeOfCharacterFromSet:unsafeCharacterSet];
if (range.location == NSNotFound) {
return self;
}
NSMutableString *filtered = [NSMutableString new];
NSString *remainder = [self copy];
while (range.location != NSNotFound) {
if (range.location > 0) {
[filtered appendString:[remainder substringToIndex:range.location]];
}
// The "replacement" code point.
[filtered appendString:@"\uFFFD"];
remainder = [remainder substringFromIndex:range.location + range.length];
range = [remainder rangeOfCharacterFromSet:unsafeCharacterSet];
}
[filtered appendString:remainder];
return filtered;
}
- (NSString *)filterSubstringForDisplay
{
// We don't want to strip a substring before filtering.
return self.sanitized.ensureBalancedBidiControlCharacters;
}
- (NSString *)filterStringForDisplay
{
return self.ows_stripped.filterSubstringForDisplay;
}
- (NSString *)filterFilename
{
return self.ows_stripped.sanitized.filterUnsafeFilenameCharacters;
}
- (NSString *)withoutBidiControlCharacters
{
return [self stringByTrimmingCharactersInSet:[NSString bidiControlCharacterSet]];
}
- (NSString *)ensureBalancedBidiControlCharacters
{
NSInteger isolateStartsCount = 0;
NSInteger isolatePopCount = 0;
NSInteger formattingStartsCount = 0;
NSInteger formattingPopCount = 0;
for (NSUInteger index = 0; index < self.length; index++) {
unichar c = [self characterAtIndex:index];
if (c == bidiLeftToRightIsolate || c == bidiRightToLeftIsolate || c == bidiFirstStrongIsolate) {
isolateStartsCount++;
} else if (c == bidiPopDirectionalIsolate) {
isolatePopCount++;
} else if (c == bidiLeftToRightEmbedding || c == bidiRightToLeftEmbedding || c == bidiLeftToRightOverride
|| c == bidiRightToLeftOverride) {
formattingStartsCount++;
} else if (c == bidiPopDirectionalFormatting) {
formattingPopCount++;
}
}
if (isolateStartsCount == 0 && isolatePopCount == 0 && formattingStartsCount == 0 && formattingPopCount == 0) {
return self;
}
NSMutableString *balancedString = [NSMutableString new];
// If we have too many isolate pops, prepend FSI to balance
while (isolatePopCount > isolateStartsCount) {
[balancedString appendFormat:@"%C", bidiFirstStrongIsolate];
isolateStartsCount++;
}
// If we have too many formatting pops, prepend LRE to balance
while (formattingPopCount > formattingStartsCount) {
[balancedString appendFormat:@"%C", bidiLeftToRightEmbedding];
formattingStartsCount++;
}
[balancedString appendString:self];
// If we have too many formatting starts, append PDF to balance
while (formattingStartsCount > formattingPopCount) {
[balancedString appendFormat:@"%C", bidiPopDirectionalFormatting];
formattingPopCount++;
}
// If we have too many isolate starts, append PDI to balance
while (isolateStartsCount > isolatePopCount) {
[balancedString appendFormat:@"%C", bidiPopDirectionalIsolate];
isolatePopCount++;
}
return [balancedString copy];
}
- (NSString *)stringByPrependingCharacter:(unichar)character
{
return [NSString stringWithFormat:@"%C%@", character, self];
}
- (NSString *)stringByAppendingCharacter:(unichar)character
{
return [self stringByAppendingFormat:@"%C", character];
}
- (NSString *)bidirectionallyBalancedAndIsolated
{
if (self.length > 1) {
unichar firstChar = [self characterAtIndex:0];
unichar lastChar = [self characterAtIndex:self.length - 1];
// We're already isolated, nothing to do here.
if (firstChar == bidiFirstStrongIsolate && lastChar == bidiPopDirectionalIsolate) {
return self;
}
}
return [NSString stringWithFormat:@"%C%@%C",
bidiFirstStrongIsolate,
self.ensureBalancedBidiControlCharacters,
bidiPopDirectionalIsolate];
}
- (NSString *)sanitized
{
NSNumber *cachedNeedsSanitization = objc_getAssociatedObject(self, kNSString_SSK_needsSanitization);
if (cachedNeedsSanitization != nil) {
if (cachedNeedsSanitization.boolValue) {
return objc_getAssociatedObject(self, kNSString_SSK_sanitizedCounterpart) ?: self;
} else {
return self;
}
}
StringSanitizer *sanitizer = [[StringSanitizer alloc] initWithString:self];
const BOOL needsSanitization = sanitizer.needsSanitization;
objc_setAssociatedObject(self, kNSString_SSK_needsSanitization, @(needsSanitization), OBJC_ASSOCIATION_COPY);
if (!needsSanitization) {
return self;
}
NSString *sanitized = sanitizer.sanitized;
objc_setAssociatedObject(self, kNSString_SSK_sanitizedCounterpart, sanitized, OBJC_ASSOCIATION_COPY);
return sanitized;
}
+ (NSRegularExpression *)anyASCIIRegex
{
static dispatch_once_t onceToken;
static NSRegularExpression *regex;
dispatch_once(&onceToken, ^{
NSError *error;
regex = [NSRegularExpression regularExpressionWithPattern:@"[\x00-\x7F]+" options:0 error:&error];
if (error || !regex) {
// crash! it's not clear how to proceed safely, and this regex should never fail.
OWSFail(@"could not compile regex: %@", error);
}
});
return regex;
}
+ (NSRegularExpression *)onlyASCIIRegex
{
static dispatch_once_t onceToken;
static NSRegularExpression *regex;
dispatch_once(&onceToken, ^{
NSError *error;
regex = [NSRegularExpression regularExpressionWithPattern:@"^[\x00-\x7F]*$" options:0 error:&error];
if (error || !regex) {
// crash! it's not clear how to proceed safely, and this regex should never fail.
OWSFail(@"could not compile regex: %@", error);
}
});
return regex;
}
- (BOOL)isOnlyASCII
{
return
[self.class.onlyASCIIRegex rangeOfFirstMatchInString:self options:0 range:NSMakeRange(0, self.length)].location
!= NSNotFound;
}
- (BOOL)hasAnyASCII
{
return
[self.class.anyASCIIRegex rangeOfFirstMatchInString:self options:0 range:NSMakeRange(0, self.length)].location
!= NSNotFound;
}
- (NSString *)removeAllCharactersIn:(NSCharacterSet *)characterSet
{
OWSAssertDebug(characterSet);
return [[self componentsSeparatedByCharactersInSet:characterSet] componentsJoinedByString:@""];
}
- (NSString *)digitsOnly
{
return [self removeAllCharactersIn:[NSCharacterSet.decimalDigitCharacterSet invertedSet]];
}
@end
NS_ASSUME_NONNULL_END