Created
August 14, 2018 13:42
-
-
Save justasm/175c4070a50e09acebf7f9b487673fcd to your computer and use it in GitHub Desktop.
Soft-hyphen (U+00AD) support for API <21
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2006 The Android Open Source Project | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
import android.os.Build | |
import android.text.Layout | |
import android.text.StaticLayout | |
import android.text.TextPaint | |
import android.widget.TextView | |
import androidx.core.view.doOnLayout | |
/** | |
* Soft-hyphen (U+00AD) support for API <21 | |
*/ | |
object LegacyLineBreak { | |
private val softHyphen by lazy { "\u00AD".toRegex() } | |
fun setText(target: TextView, text: CharSequence) { | |
target.text = text | |
target.doOnLayout { | |
target.layout?.let { layout -> | |
target.text = convertSoftHyphens( | |
input = text, | |
paint = layout.paint, | |
width = layout.width, | |
alignment = layout.alignment, | |
spacingMultiplier = layout.spacingMultiplier, | |
spacingAdd = layout.spacingAdd, | |
includePadding = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.JELLY_BEAN) { | |
target.includeFontPadding | |
} else { | |
false | |
}) | |
} | |
} | |
} | |
private fun isSoftHyphen(input: CharSequence, position: Int) = | |
softHyphen.matches(input.subSequence(position, position + 1)) | |
private fun CharSequence.stripSoftHyphens() = replace(softHyphen, "") | |
/** | |
* Replaces soft hyphens at line breaks with traditional hyphens and strips out remaining soft hyphens. | |
*/ | |
private fun convertSoftHyphens(input: CharSequence, | |
paint: TextPaint, | |
width: Int, | |
alignment: Layout.Alignment = Layout.Alignment.ALIGN_NORMAL, | |
spacingMultiplier: Float = 1.0f, | |
spacingAdd: Float = 0.0f, | |
includePadding: Boolean = true, | |
hyphen: Char = '-'): CharSequence { | |
fun CharSequence.lineCount() = | |
StaticLayout(this, paint, width, | |
alignment, spacingMultiplier, spacingAdd, includePadding).lineCount | |
val stripped = input.stripSoftHyphens() | |
if (stripped.lineCount() > 1) { | |
// search backwards for a character we can line break on | |
// such that the head fits on a single line, and recursively convert the tail | |
for (index in input.lastIndex downTo 0) { | |
val isNewLine = isNewLine(input, index) | |
val isSoftHyphen = isSoftHyphen(input, index) | |
if (isNewLine || isSoftHyphen || isLineBreak(input, index)) { | |
val head = if (isSoftHyphen) { | |
input.subSequence(0, index + 1).replaceLast(softHyphen, "" + hyphen) | |
} else { | |
input.subSequence(0, index + 1) | |
}.stripSoftHyphens() | |
if (isNewLine || head.lineCount() <= 1) { | |
val tail = input.subSequence(index + 1, input.length) | |
return head + convertSoftHyphens( | |
tail, | |
paint, | |
width, | |
alignment, | |
spacingMultiplier, | |
spacingAdd, | |
includePadding, | |
hyphen) | |
} | |
} | |
} | |
return stripped | |
} else { | |
return stripped | |
} | |
} | |
} | |
private fun CharSequence.replaceLast(regex: Regex, replacement: CharSequence): CharSequence = | |
regex.replaceLast(this, replacement) | |
private fun Regex.replaceLast(input: CharSequence, replacement: CharSequence): CharSequence { | |
return findAll(input).lastOrNull()?.range?.let { input.replaceRange(it, replacement) } ?: input | |
} | |
private const val CHAR_NEW_LINE = '\n' | |
private const val CHAR_TAB = '\t' | |
private const val CHAR_SPACE = ' ' | |
private const val CHAR_SLASH = '/' | |
private const val CHAR_HYPHEN = '-' | |
private const val CHAR_ZWSP = '\u200B' | |
private const val CHAR_FIRST_CJK = '\u2E80' | |
private fun isNewLine(input: CharSequence, position: Int): Boolean { | |
return input[position] == CHAR_NEW_LINE | |
} | |
/** | |
* To exactly match platform line break behaviour, this has been borrowed from AOSP API <21. | |
* https://android.googlesource.com/platform/frameworks/base/+/kitkat-release/core/java/android/text/StaticLayout.java#359 | |
*/ | |
@Suppress("UnnecessaryVariable") | |
private fun isLineBreak(input: CharSequence, position: Int): Boolean { | |
val c = input[position] | |
val isSpaceOrTab = c == CHAR_SPACE || c == CHAR_TAB || c == CHAR_ZWSP | |
val j = position | |
val spanEnd = input.length | |
val chs = input | |
val paraStart = 0 | |
// From the Unicode Line Breaking Algorithm (at least approximately) | |
return isSpaceOrTab || | |
// / is class SY and - is class HY, except when followed by a digit | |
(c == CHAR_SLASH || c == CHAR_HYPHEN) && (j + 1 >= spanEnd || !Character.isDigit(chs[j + 1 - paraStart])) || | |
// Ideographs are class ID: breakpoints when adjacent, except for NS | |
// (non-starters), which can be broken after but not before | |
c >= CHAR_FIRST_CJK && isIdeographic(c, true) && | |
j + 1 < spanEnd && isIdeographic(chs[j + 1 - paraStart], false) | |
} | |
/** | |
* Returns true if the specified character is one of those specified | |
* as being Ideographic (class ID) by the Unicode Line Breaking Algorithm | |
* (http://www.unicode.org/unicode/reports/tr14/), and is therefore OK | |
* to break between a pair of. | |
* | |
* @param includeNonStarters also return true for category NS | |
* (non-starters), which can be broken | |
* after but not before. | |
*/ | |
private fun isIdeographic(c: Char, includeNonStarters: Boolean): Boolean { | |
return when (c) { | |
in '\u2E80'..'\u2FFF' -> true // CJK, KANGXI RADICALS, DESCRIPTION SYMBOLS | |
'\u3000' -> true // IDEOGRAPHIC SPACE | |
in '\u3040'..'\u309F' -> { | |
if (!includeNonStarters) { | |
when (c) { | |
'\u3041', // # HIRAGANA LETTER SMALL A | |
'\u3043', // # HIRAGANA LETTER SMALL I | |
'\u3045', // # HIRAGANA LETTER SMALL U | |
'\u3047', // # HIRAGANA LETTER SMALL E | |
'\u3049', // # HIRAGANA LETTER SMALL O | |
'\u3063', // # HIRAGANA LETTER SMALL TU | |
'\u3083', // # HIRAGANA LETTER SMALL YA | |
'\u3085', // # HIRAGANA LETTER SMALL YU | |
'\u3087', // # HIRAGANA LETTER SMALL YO | |
'\u308E', // # HIRAGANA LETTER SMALL WA | |
'\u3095', // # HIRAGANA LETTER SMALL KA | |
'\u3096', // # HIRAGANA LETTER SMALL KE | |
'\u309B', // # KATAKANA-HIRAGANA VOICED SOUND MARK | |
'\u309C', // # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK | |
'\u309D', // # HIRAGANA ITERATION MARK | |
'\u309E' // # HIRAGANA VOICED ITERATION MARK | |
-> false | |
else -> true | |
} | |
} else { | |
true // Hiragana (except small characters) | |
} | |
} | |
in '\u30A0'..'\u30FF' -> { | |
if (!includeNonStarters) { | |
when (c) { | |
'\u30A0', // # KATAKANA-HIRAGANA DOUBLE HYPHEN | |
'\u30A1', // # KATAKANA LETTER SMALL A | |
'\u30A3', // # KATAKANA LETTER SMALL I | |
'\u30A5', // # KATAKANA LETTER SMALL U | |
'\u30A7', // # KATAKANA LETTER SMALL E | |
'\u30A9', // # KATAKANA LETTER SMALL O | |
'\u30C3', // # KATAKANA LETTER SMALL TU | |
'\u30E3', // # KATAKANA LETTER SMALL YA | |
'\u30E5', // # KATAKANA LETTER SMALL YU | |
'\u30E7', // # KATAKANA LETTER SMALL YO | |
'\u30EE', // # KATAKANA LETTER SMALL WA | |
'\u30F5', // # KATAKANA LETTER SMALL KA | |
'\u30F6', // # KATAKANA LETTER SMALL KE | |
'\u30FB', // # KATAKANA MIDDLE DOT | |
'\u30FC', // # KATAKANA-HIRAGANA PROLONGED SOUND MARK | |
'\u30FD', // # KATAKANA ITERATION MARK | |
'\u30FE' // # KATAKANA VOICED ITERATION MARK | |
-> false | |
else -> true | |
} | |
} else { | |
true // Katakana (except small characters) | |
} | |
} | |
in '\u3400'..'\u4DB5' -> true // CJK UNIFIED IDEOGRAPHS EXTENSION A | |
in '\u4E00'..'\u9FBB' -> true // CJK UNIFIED IDEOGRAPHS | |
in '\uF900'..'\uFAD9' -> true // CJK COMPATIBILITY IDEOGRAPHS | |
in '\uA000'..'\uA48F' -> true // YI SYLLABLES | |
in '\uA490'..'\uA4CF' -> true // YI RADICALS | |
in '\uFE62'..'\uFE66' -> true // SMALL PLUS SIGN to SMALL EQUALS SIGN | |
in '\uFF10'..'\uFF19' -> true // WIDE DIGITS | |
else -> false | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment