Created
December 18, 2017 17:00
-
-
Save felipessalvatore/f6edad81731c6559ba0aa537f94277b7 to your computer and use it in GitHub Desktop.
using nltk to count N-gram overlap between two sentences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.translate.bleu_score import modified_precision | |
pt = "em plano aberto, a cidade parece linda" | |
ref = "in a wide shot, the city looks beautiful" | |
c1 = "in the open, the city looks beautiful" | |
c2 = "in open plan, the city looks gorgeous" | |
reference = [ref.split(" ")] | |
candidate1 = c1.split(" ") | |
candidate2 = c2.split(" ") | |
for i in range(4): | |
print("{}-gram".format(i+1)) | |
print(modified_precision(reference, candidate1, i+1)) | |
print() | |
for i in range(4): | |
print("{}-gram".format(i+1)) | |
print(modified_precision(reference, candidate2, i+1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment