Created
March 4, 2024 09:33
-
-
Save phillipcaudell/3ee1b696a42337b57612a5814f940ddd to your computer and use it in GitHub Desktop.
An extension that adds the ability to initialise a String.Encoding from an IANA charset name.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
extension String.Encoding { | |
/// Creates a new instance using the specified IANA charset name. | |
public init?(charsetName name: String) { | |
let match = name | |
.lowercased() | |
.replacing("-", with: "") | |
.replacing("_", with: "") | |
switch match { | |
case "usascii": | |
self = .init(rawValue: 1) | |
case "xnextstep": | |
self = .init(rawValue: 2) | |
case "eucjp": | |
self = .init(rawValue: 3) | |
case "utf8": | |
self = .init(rawValue: 4) | |
case "iso88591": | |
self = .init(rawValue: 5) | |
case "xmacsymbol": | |
self = .init(rawValue: 6) | |
case "cp932": | |
self = .init(rawValue: 8) | |
case "iso88592": | |
self = .init(rawValue: 9) | |
case "utf16": | |
self = .init(rawValue: 10) | |
case "windows1251": | |
self = .init(rawValue: 11) | |
case "windows1252": | |
self = .init(rawValue: 12) | |
case "windows1253": | |
self = .init(rawValue: 13) | |
case "windows1254": | |
self = .init(rawValue: 14) | |
case "windows1250": | |
self = .init(rawValue: 15) | |
case "iso2022jp": | |
self = .init(rawValue: 21) | |
case "macintosh": | |
self = .init(rawValue: 30) | |
default: | |
return nil | |
} | |
} | |
/// Returns the IANA charset name. | |
public var charsetName: String? { | |
switch rawValue { | |
case 1: | |
return "us-ascii" | |
case 2: | |
return "x-nextstep" | |
case 3: | |
return "euc-jp" | |
case 4: | |
return "utf-8" | |
case 5: | |
return "iso-8859-1" | |
case 6: | |
return "x-mac-symbol" | |
case 8: | |
return "cp932" | |
case 9: | |
return "iso-8859-2" | |
case 10: | |
return "utf-16" | |
case 11: | |
return "windows-1251" | |
case 12: | |
return "windows-1252" | |
case 13: | |
return "windows-1253" | |
case 14: | |
return "windows-1254" | |
case 15: | |
return "windows-1250" | |
case 21: | |
return "iso-2022-jp" | |
case 30: | |
return "macintosh" | |
default: | |
return nil | |
} | |
} | |
} | |
/// Optional codable support for String.Encoding | |
extension String.Encoding: Codable { | |
public func encode(to encoder: Encoder) throws { | |
var container = encoder.singleValueContainer() | |
try container.encode(self.charsetName) | |
} | |
public init(from decoder: Decoder) throws { | |
let container = try decoder.singleValueContainer() | |
let charsetName = try container.decode(String.self) | |
self = .init(charsetName: charsetName) ?? .ascii | |
} | |
} | |
#if canImport(Foundation) | |
extension String.Encoding { | |
/// Prints out switch statements that can be transplanted into our charset function. | |
/// | |
/// CFStringConvertEncodingToIANACharSetName and related APIs are unavailable on platforms where | |
/// we may wish to run SwiftEmail. For this reason we need our own equivalents. | |
/// This function prints out switch statements that can be transplanted into the above functions. | |
static func printIANASwitchStatements() { | |
var decodeStatements = [String]() | |
var encodeStatements = [String]() | |
// As String.Encoding isn't CaseIterable let's iterate through | |
// rawValue. Choose a high index to be sure we don't miss any. | |
for index in 1...1000 { | |
let encoding = String.Encoding(rawValue: UInt(index)) | |
guard encoding.description.isEmpty == false else { | |
continue | |
} | |
let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding.rawValue) | |
guard let name = CFStringConvertEncodingToIANACharSetName(cfEncoding) as? String else { | |
continue | |
} | |
// Need to handle all variants (us-ascii, us_ascii, usascii) so | |
// easier to just strip the dash out. | |
let strippedName = name.replacing("-", with: "") | |
// Print out the initialiser statements | |
let decode = """ | |
case "\(strippedName)": | |
self = .init(rawValue: \(encoding.rawValue)) | |
""" | |
decodeStatements.append(decode) | |
// Print out the charsetName statements | |
let encode = """ | |
case \(encoding.rawValue): | |
return "\(name)" | |
""" | |
encodeStatements.append(encode) | |
} | |
print("Initialiser statements:") | |
print(decodeStatements.joined(separator: "\n")) | |
print("Charset name statements:") | |
print(encodeStatements.joined(separator: "\n")) | |
} | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment