Last active
May 13, 2022 18:15
-
-
Save hunterhector/ad5408065a3b5fb45e905fcd758f2185 to your computer and use it in GitHub Desktop.
forte sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install first | |
# conda create --name forte python=3.9 pip | |
# pip install forte | |
# pip install forte.nltk | |
# pip install forte.spacy | |
# if you want to use it in the notebook | |
# pip install jupyter lab | |
# Find our medium blog at: medium.com/casl-project/ | |
# Code adapted from: https://medium.com/casl-project/building-a-question-answering-system-part-1-query-understanding-in-18-lines-916110f9f2b2 | |
import forte | |
from forte import Pipeline | |
from forte.data.readers import TerminalReader | |
from fortex.nltk.nltk_processors import NLTKLemmatizer, NLTKWordTokenizer, NLTKPOSTagger, NLTKSentenceSegmenter | |
nlp = Pipeline().set_reader( | |
TerminalReader() | |
).add(NLTKSentenceSegmenter()).add(NLTKWordTokenizer()).add(NLTKPOSTagger()).add(NLTKLemmatizer()) | |
nlp.initialize() | |
from ft.onto.base_ontology import Token, Sentence | |
data_pack = nlp.process() | |
for sent in data_pack.get(Sentence): | |
print("Results for every sentence:") | |
for token in data_pack.get(Token, sent, components=["fortex.nltk.nltk_processors.NLTKWordTokenizer"]): | |
print(f" text: {token.text}, pos: {token.pos}, lemma: {token.lemma}") | |
nlp.save("temp.yml") | |
nlp2 = Pipeline() | |
nlp2.init_from_config_path("temp.yml") | |
from forte.processors.misc import LowerCaserProcessor | |
from fortex.spacy import SpacyProcessor | |
nlp2.add(LowerCaserProcessor()) | |
nlp2.add( | |
SpacyProcessor(), { | |
"processors": ["sentence", "tokenize", "pos", "lemma", "ner"] | |
} | |
) | |
nlp2.initialize() | |
data_pack = nlp2.process() | |
from ft.onto.base_ontology import EntityMention | |
for sent in data_pack.get(Sentence, components="fortex.nltk.nltk_processors.NLTKSentenceSegmenter"): | |
print("Show sentence:") | |
print(sent.text) | |
for entity_mention in sent.get(EntityMention): | |
print(" - Show entity mention:") | |
print(f" - Entity Mention in this sentence is {entity_mention.text}, {entity_mention.ner_type}") | |
for token in entity_mention.get(Token, components="fortex.spacy.spacy_processors.SpacyProcessor"): | |
print(f" -Token in this Entity Mention: {token.text} with pos {token.pos}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment