profh · November 9, 2021 21:09
diff --git a/nlp_parts_of_speech.swift b/nlp_parts_of_speech.swift
 // Playground using NSLinguisticTagger to analyze and tag a block of text
 // https://developer.apple.com/documentation/foundation/nslinguistictagger/identifying_parts_of_speech

 import Foundation

 // text from some article on Democratic candidate preferences (Nov 2018)
 let text = "The Democratic frontrunner, according to Politico's poll: Joe Biden, former Vice President and Senator from Delaware, who managed to grab just over a quarter (26%) of the Democrats' vote for who they'd most like to see facing off against Trump in two years for control of the White House. The runner-up is Vermont Senator Bernie Sanders, who ran a close primary campaign against Hillary Clinton in 2016, managing to get about a fifth of the votes (19%). The third-place candidate is Rep. Beto O’Rourke from Texas, who built national name-recognition through his losing Senate bid last week, with 8 percent. Following O’Rourke are three senators, all thought to be likely candidates: Sens. Elizabeth Warren (Mass.) at 5 percent, Kamala Harris (Calif.) at 4 percent and Cory Booker (N.J.) at 3 percent."

 // getting the first sentence from the text:
 let endOfSentence = text.firstIndex(of: ".")!
 let sentence = text[...endOfSentence]


 // set up a tagger with NSLinguisticTagger
 let tagger = NSLinguisticTagger(tagSchemes: [NSLinguisticTagScheme.lexicalClass], options: 0)

 // setting the tagger text to our text
 tagger.string = text

 // set the range of the text to be analyzed
 let range = NSMakeRange(0, 287)  // just the first sentence (287 is the count; MS Word)
 // let range = NSMakeRange(0, sentence.utf16.count)  // assuming hadn't found the count
 // let range = NSMakeRange(0, text.utf16.count)  // the entire block


 // set tagger options (pretty typical choices)
 let options:NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace, .joinNames]

 // set up a tags array and arrays to hold results
 // other tag options: https://developer.apple.com/documentation/foundation/nslinguistictag
 let tags:[NSLinguisticTag] = [.noun, .verb, .adjective]

 var nouns:[String] = []
 var verbs:[String] = []
 var adjs:[String] = []


 // The meat of the operation: enumerate the tags and add to results
 tagger.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options){
  tag, tokenRange, stop in
  
  guard let tag = tag else { return }  // just in case there is no tag...
  let token = (text as NSString).substring(with: tokenRange)
  
  // depending on the tag, add to the appropriate results array
  switch tag {
  case .noun: nouns.append(token)
  case .verb: verbs.append(token)
  case .adjective: adjs.append(token)
  default:break
    
  }
 }

 // Display results arrays
 print("NOUNS:")
 for noun in nouns {
  print(noun)
 }
 print("--------")
 print("VERBS:")
 for verb in verbs {
  print(verb)
 }
 print("--------")
 print("ADJECTIVES:")
 for adj in adjs {
  print(adj)
 }

 //** Adding language detection...  **//
 print("--------")
 // verify that the dominant language is English

 let language = tagger.dominantLanguage
 print("The language is \(language!)")

 // turn this into func; use repeatedly
 func determineLanguage(for text: String) {
  tagger.string = text
  let language = tagger.dominantLanguage
  print("The language is \(language!)")
 }

 let frenchQuote  = "La science n'a pas de patrie."  // Science has no homeland -- Pasteur
 let germanQuote  = "Das ist nicht mein Bier." // That's not my beer
 let italianQuote = "L’amore è cieco" // Love is blind
 let spanishQuote = "El amor todo lo puede." // Love will find a way
 let klingonQuote = "Heghlu'meH QaQ jajvam" // It is a good day to die
 let loremQuote   = "Lorem ipsum dolor sit amet"
 let gibberish    = "asdf plmjus qawsedrf"

 determineLanguage(for: frenchQuote)
 determineLanguage(for: germanQuote)
 determineLanguage(for: italianQuote)
 determineLanguage(for: spanishQuote)
 determineLanguage(for: klingonQuote)   // thinks it's Croation, not Klingon
 determineLanguage(for: loremQuote)     // thinks it's Romanian
 determineLanguage(for: gibberish)      // that's English too...
 determineLanguage(for: "x")            // this is undefined
	// Playground using NSLinguisticTagger to analyze and tag a block of text
	// https://developer.apple.com/documentation/foundation/nslinguistictagger/identifying_parts_of_speech

	import Foundation

	// text from some article on Democratic candidate preferences (Nov 2018)
	let text = "The Democratic frontrunner, according to Politico's poll: Joe Biden, former Vice President and Senator from Delaware, who managed to grab just over a quarter (26%) of the Democrats' vote for who they'd most like to see facing off against Trump in two years for control of the White House. The runner-up is Vermont Senator Bernie Sanders, who ran a close primary campaign against Hillary Clinton in 2016, managing to get about a fifth of the votes (19%). The third-place candidate is Rep. Beto O’Rourke from Texas, who built national name-recognition through his losing Senate bid last week, with 8 percent. Following O’Rourke are three senators, all thought to be likely candidates: Sens. Elizabeth Warren (Mass.) at 5 percent, Kamala Harris (Calif.) at 4 percent and Cory Booker (N.J.) at 3 percent."

	// getting the first sentence from the text:
	let endOfSentence = text.firstIndex(of: ".")!
	let sentence = text[...endOfSentence]


	// set up a tagger with NSLinguisticTagger
	let tagger = NSLinguisticTagger(tagSchemes: [NSLinguisticTagScheme.lexicalClass], options: 0)

	// setting the tagger text to our text
	tagger.string = text

	// set the range of the text to be analyzed
	let range = NSMakeRange(0, 287) // just the first sentence (287 is the count; MS Word)
	// let range = NSMakeRange(0, sentence.utf16.count) // assuming hadn't found the count
	// let range = NSMakeRange(0, text.utf16.count) // the entire block


	// set tagger options (pretty typical choices)
	let options:NSLinguisticTagger.Options = [.omitPunctuation, .omitWhitespace, .joinNames]

	// set up a tags array and arrays to hold results
	// other tag options: https://developer.apple.com/documentation/foundation/nslinguistictag
	let tags:[NSLinguisticTag] = [.noun, .verb, .adjective]

	var nouns:[String] = []
	var verbs:[String] = []
	var adjs:[String] = []


	// The meat of the operation: enumerate the tags and add to results
	tagger.enumerateTags(in: range, unit: .word, scheme: .lexicalClass, options: options){
	tag, tokenRange, stop in

	guard let tag = tag else { return } // just in case there is no tag...
	let token = (text as NSString).substring(with: tokenRange)

	// depending on the tag, add to the appropriate results array
	switch tag {
	case .noun: nouns.append(token)
	case .verb: verbs.append(token)
	case .adjective: adjs.append(token)
	default:break

	}
	}

	// Display results arrays
	print("NOUNS:")
	for noun in nouns {
	print(noun)
	}
	print("--------")
	print("VERBS:")
	for verb in verbs {
	print(verb)
	}
	print("--------")
	print("ADJECTIVES:")
	for adj in adjs {
	print(adj)
	}

	// Adding language detection... //
	print("--------")
	// verify that the dominant language is English

	let language = tagger.dominantLanguage
	print("The language is \(language!)")

	// turn this into func; use repeatedly
	func determineLanguage(for text: String) {
	tagger.string = text
	let language = tagger.dominantLanguage
	print("The language is \(language!)")
	}

	let frenchQuote = "La science n'a pas de patrie." // Science has no homeland -- Pasteur
	let germanQuote = "Das ist nicht mein Bier." // That's not my beer
	let italianQuote = "L’amore è cieco" // Love is blind
	let spanishQuote = "El amor todo lo puede." // Love will find a way
	let klingonQuote = "Heghlu'meH QaQ jajvam" // It is a good day to die
	let loremQuote = "Lorem ipsum dolor sit amet"
	let gibberish = "asdf plmjus qawsedrf"

	determineLanguage(for: frenchQuote)
	determineLanguage(for: germanQuote)
	determineLanguage(for: italianQuote)
	determineLanguage(for: spanishQuote)
	determineLanguage(for: klingonQuote) // thinks it's Croation, not Klingon
	determineLanguage(for: loremQuote) // thinks it's Romanian
	determineLanguage(for: gibberish) // that's English too...
	determineLanguage(for: "x") // this is undefined