Last active
February 5, 2018 12:21
-
-
Save davidvpe/0136fc8eb09c53214ce3258fedd628f1 to your computer and use it in GitHub Desktop.
Dutch Language Processor that allows to separate syllables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// main.swift | |
// DutchGrammar | |
// | |
// Created by Velarde Robles, David on 05/02/2018. | |
// Copyright © 2018 Velarde Robles, David. All rights reserved. | |
// | |
import Foundation | |
func process(word: String) -> [String] { | |
let hard2Pronounce = [ | |
"rts", | |
"mbt", | |
"lfts", | |
"rwt" | |
] | |
var syllables = [String]() | |
var i = -1 | |
let totalCount = (word.count-1) | |
while i < totalCount { | |
i+=1 | |
let iIndex = word.index(word.startIndex, offsetBy: i) | |
let iLetter = word[iIndex] | |
var foundVowel = "aeiou".contains(iLetter) | |
var consonantIndex: String.Index? | |
var consonantIndexNumber = 0 | |
for var j in (i+1)..<word.count { | |
let jIndex = word.index(word.startIndex, offsetBy: j) | |
let jPlusIndex = word.index(word.startIndex, offsetBy: (j+1)) | |
let letter = word[jIndex] | |
if "aeiou".contains(letter) { | |
foundVowel = true | |
if (String(word[jIndex...])).hasPrefix("ij") { | |
j+=1 | |
} | |
if j == (word.count - 1) { | |
//reach the end of the word, wrap it up and go | |
let syllabe = word[iIndex...jIndex] | |
syllables.append(String(syllabe)) | |
i = j | |
} | |
} else { | |
let previousIndex = word.index(word.startIndex, offsetBy: j-1) | |
let previousLetter = word[previousIndex] | |
if previousLetter == "i" && letter == "j" { | |
continue | |
} | |
if consonantIndex == nil && foundVowel == true { | |
consonantIndex = word.index(word.startIndex, offsetBy: j) | |
consonantIndexNumber = j | |
var replaced = false | |
let restOfWord = String(word[consonantIndex!...]) | |
for hardWord in hard2Pronounce { | |
if let range = word.range(of: hardWord), restOfWord.contains(hardWord) { | |
//found range | |
let endOfSyllable = word.index(range.upperBound, offsetBy: -2) | |
let syllabe = word[iIndex...endOfSyllable] | |
syllables.append(String(syllabe)) | |
i = endOfSyllable.encodedOffset | |
replaced = true | |
break | |
} | |
} | |
if replaced { | |
break | |
} | |
} | |
if foundVowel == true { | |
if j < (word.count - 1) { | |
let nextLetter = word[jPlusIndex] | |
if (nextLetter == "h" && letter == "c") { | |
continue | |
} | |
if "aeiou".contains(nextLetter) || (String(word[jIndex...])).hasPrefix("ij") { | |
if j > 0 { | |
let syllabe = word[iIndex..<consonantIndex!] | |
syllables.append(String(syllabe)) | |
i = consonantIndexNumber-1 | |
} | |
} else { | |
if word.count > j+2 { | |
let jPlus2Index = word.index(word.startIndex, offsetBy: (j+2)) | |
let jPlus2Letter = word[jPlus2Index] | |
if jPlus2Letter == nextLetter { | |
let syllabe = word[iIndex...jPlusIndex] | |
syllables.append(String(syllabe)) | |
i = j+1 | |
} else { | |
let syllabe = word[iIndex...jIndex] | |
syllables.append(String(syllabe)) | |
i = j | |
} | |
} else { | |
let syllabe = word[iIndex...jIndex] | |
syllables.append(String(syllabe)) | |
i = j | |
} | |
} | |
} else { | |
let syllabe = word[iIndex...jIndex] | |
syllables.append(String(syllabe)) | |
i = j | |
} | |
break | |
} | |
} | |
} | |
} | |
return syllables | |
} | |
let words = [ | |
"ijsbergsla", // This one is causign trouble | |
"broodoven", | |
"startten", | |
"lachen", | |
"moeten", | |
"paarden", | |
"erwten", | |
"koortsig", | |
"ambtenaar", | |
"Delftse" | |
] | |
var syllables = [[String]]() | |
for word in words { | |
print(word, separator: "", terminator: " -> ") | |
let syllables = process(word: word) | |
print(syllables) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment