Last active
January 13, 2022 05:54
-
-
Save melvinkcx/988a1d9ef7bb3d3556ecb774ca655203 to your computer and use it in GitHub Desktop.
Converting Unicode Sentences To Snake Case In SQL and Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import unidecode | |
def to_snake_case(text): | |
""" | |
Convert unicode text to snake case | |
>>> to_snake_case("My favourite dish was raclette.") | |
'my_favourite_dish_was_raclette' | |
>>> to_snake_case("My favourite dish now is Soupe à l'oignon.") | |
'my_favourite_dish_now_is_soupe_a_l_oignon' | |
""" | |
text = unidecode.unidecode(text.strip()).lower() | |
text = re.sub(r'[^a-z0-9\\-]+', '_', text) | |
return text.strip("_") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Tested on Postgres 13 | |
CREATE EXTENSION IF NOT EXISTS unaccent; | |
CREATE OR REPLACE FUNCTION public.to_snake_case( | |
v TEXT | |
) RETURNS TEXT | |
LANGUAGE plpgsql | |
STRICT IMMUTABLE AS | |
$function$ | |
BEGIN | |
-- 1. trim trailing and leading whitespaces from text | |
-- 2. remove accents (diacritic signs) from a given text | |
-- 3. lowercase unaccented text | |
-- 4. replace non-alphanumeric (excluding hyphen, underscore) with an underscore | |
-- 5. trim leading and trailing hyphens | |
RETURN trim(BOTH '_' FROM regexp_replace(lower(unaccent(trim(v))), '[^a-z0-9\\-]+', '_', 'gi')); | |
END; | |
$function$; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment