Created
May 25, 2011 01:46
-
-
Save eiffelqiu/990166 to your computer and use it in GitHub Desktop.
Stripping HTML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- (NSString *)flattenHTML:(NSString *)html trimWhiteSpace:(BOOL)trim { | |
NSScanner *theScanner; | |
NSString *text = nil; | |
theScanner = [NSScanner scannerWithString:html]; | |
while ([theScanner isAtEnd] == NO) { | |
// find start of tag | |
[theScanner scanUpToString:@"<" intoString:NULL] ; | |
// find end of tag | |
[theScanner scanUpToString:@">" intoString:&text] ; | |
// replace the found tag with a space | |
//(you can filter multi-spaces out later if you wish) | |
html = [html stringByReplacingOccurrencesOfString: | |
[ NSString stringWithFormat:@"%@>", text] | |
withString:@" "]; | |
} // while // | |
// trim off whitespace | |
return trim ? [html stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]] : html; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment