Last active
October 20, 2018 00:59
-
-
Save rafaelhenrique/1d064ab71ee7d3de5cd04be1f4582e50 to your computer and use it in GitHub Desktop.
Go improves Python?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def find_cnpj_using_in(content, company_name): | |
line = next(line for line in content.split('\n') if company_name in line) | |
line = int(line[2:16]) | |
return line | |
def find_cnpj_using_search(content, company_name): | |
expression = r'\d{{2}}(\d{{14}}).*{}.*'.format(company_name) | |
return re.search(expression, content).group(1) | |
def find_cnpj_using_findall(content, company_name): | |
expression = r'\d{{2}}(\d{{14}}).*{}.*'.format(company_name) | |
pattern = re.compile(expression) | |
return pattern.findall(content)[0] | |
if __name__ == '__main__': | |
# Real data about CNPJ - too slow, too large and not versioned | |
# | |
# with open('./data/F.K03200UF.D71214PR', 'r', encoding='iso8859') as fp: | |
# content = fp.read() | |
with open('./data/MINIMAL', 'r', encoding='iso8859') as fp: | |
content = fp.read() | |
company_name = 'OLIST SERVICOS DIGITAIS LTDA' | |
print("find_cnpj_using_in result: ", find_cnpj_using_in(content, company_name)) | |
print("find_cnpj_using_search result: ", find_cnpj_using_search(content, company_name)) | |
print("find_cnpj_using_findall result: ", find_cnpj_using_findall(content, company_name)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"io/ioutil" | |
"regexp" | |
"strconv" | |
"strings" | |
) | |
import "C" | |
//export FindCnpjByRegex | |
func FindCnpjByRegex(content, company string) (cnpj int) { | |
pattern := regexp.MustCompile(`\d{2}(\d{14}).*` + company + `.*`) | |
result := pattern.FindStringSubmatch(content) | |
if len(result) == 0 { | |
return | |
} | |
cnpj, _ = strconv.Atoi(result[1]) | |
return | |
} | |
//export FindCnpjByContains | |
func FindCnpjByContains(content, company string) (cnpj int) { | |
splitedContent := strings.Split(content, "\n") | |
for _, line := range splitedContent { | |
if strings.Contains(line, company) { | |
cnpj, _ = strconv.Atoi(line[2:16]) | |
return | |
} | |
} | |
return | |
} | |
func main() { | |
// Real data about CNPJ - too slow, too large and not versioned | |
// | |
// file, err := ioutil.ReadFile("./data/F.K03200UF.D71214PR") | |
file, err := ioutil.ReadFile("./data/MINIMAL") | |
if err != nil { | |
fmt.Printf("Error to open file. Error: %v\n", err.Error()) | |
} | |
content := string(file) | |
cnpj := FindCnpjByRegex(content, "OLIST SERVICOS DIGITAIS LTDA") | |
fmt.Printf("FindCnpjByRegex result: %d\n", cnpj) | |
cnpj = FindCnpjByContains(content, "OLIST SERVICOS DIGITAIS LTDA") | |
fmt.Printf("FindCnpjByContains result: %d\n", cnpj) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ctypes import Structure, c_char_p, c_longlong, cdll | |
class GoString(Structure): | |
_fields_ = [("p", c_char_p), ("n", c_longlong)] | |
gofindcnpj = cdll.LoadLibrary("./gofindcnpj.so") | |
gofindcnpj.FindCnpjByContains.argtypes = [GoString, GoString] | |
gofindcnpj.FindCnpjByContains.restype = c_longlong | |
gofindcnpj.FindCnpjByRegex.argtypes = [GoString, GoString] | |
gofindcnpj.FindCnpjByRegex.restype = c_longlong | |
if __name__ == '__main__': | |
with open('./data/MINIMAL', 'r', encoding='iso8859') as fp: | |
content = fp.read() | |
company_name = 'OLIST SERVICOS DIGITAIS LTDA' | |
new_content = bytes(content, 'utf-8') | |
new_company_name = bytes(company_name, 'utf-8') | |
cnpj = gofindcnpj.FindCnpjByContains( | |
GoString(new_content, len(new_content)), | |
GoString(new_company_name, len(new_company_name)), | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment