Last active
September 12, 2018 19:32
-
-
Save nportinari/d4eee09f5212b2be986f9270a2ca7d14 to your computer and use it in GitHub Desktop.
Contains sem acentos (Pandas)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Função para procurar substrings em um dataframe sem se preocupar com a acentuação | |
import re | |
import unicodedata | |
def strip_ctn(x, y): | |
# Tira os acentos e confere se x contém o valor y | |
# O x aponta para a referência de localização no DataFrame, o y, para a string procurada | |
text = y | |
try: | |
text = unicode(text, 'utf-8') | |
except (TypeError, NameError): # unicode is a default on python 3 | |
pass | |
text = unicodedata.normalize('NFD', text) | |
text = text.encode('ascii', 'ignore') | |
text = text.decode("utf-8") | |
text2 = x | |
try: | |
text = unicode(text, 'utf-8') | |
except (TypeError, NameError): # unicode is a default on python 3 | |
pass | |
text = unicodedata.normalize('NFD', text) | |
text = text.encode('ascii', 'ignore') | |
text = text.decode("utf-8") | |
return text2.str.contains(text, na=False, case = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment