Created
March 9, 2020 17:34
-
-
Save allanbatista/c16e4ccf2e9d4398a6b2ac6f305f6ef3 to your computer and use it in GitHub Desktop.
Normalização de caracteres especiais direto no bigquery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardSQL | |
# https://en.it1352.com/article/e546eb6a693c4f8db4c58aa717684d8d.html | |
CREATE TEMP FUNCTION accent2latin(word STRING) AS | |
(( | |
WITH lookups AS ( | |
SELECT | |
'ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents, | |
'c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins | |
), | |
pairs AS ( | |
SELECT accent, latin FROM lookups, | |
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1, | |
UNNEST(SPLIT(latins)) AS latin WITH OFFSET AS p2 | |
WHERE p1 = p2 | |
) | |
SELECT STRING_AGG(IFNULL(latin, char), '') | |
FROM UNNEST(SPLIT(word, '')) char | |
LEFT JOIN pairs | |
ON char = accent | |
)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment