Last active
June 4, 2021 12:35
-
-
Save priyanksonis/2f3a6b5ce76e35e4739f2c8b92c343a2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import torch | |
from transformers import T5ForConditionalGeneration,T5Tokenizer | |
import time | |
def set_seed(seed): | |
torch.manual_seed(seed) | |
if torch.cuda.is_available(): | |
torch.cuda.manual_seed_all(seed) | |
set_seed(42) | |
model = T5ForConditionalGeneration.from_pretrained('/path/to/model') | |
tokenizer = T5Tokenizer.from_pretrained('t5-base') | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = model.to(device) | |
list = ["What is you name ?", | |
"Where do you live ?", | |
"You should say hi and greet him.", | |
"Say Sam to transfer if has 80 dollars cash", | |
"Tell John to buy some vegetables",] | |
l = [] | |
t = [] | |
for s in list: | |
start = time.perf_counter(); | |
sentence = s | |
text = "paraphrase: " + sentence + " </s>" | |
max_len = 256 | |
encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt") | |
input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device) | |
# set top_k = 50 and set top_p = 0.95 and num_return_sequences = 3 | |
beam_outputs = model.generate( | |
input_ids=input_ids, attention_mask=attention_masks, | |
do_sample=True, | |
max_length=256, | |
top_k=120, | |
top_p=0.98, | |
early_stopping=True, | |
num_return_sequences=1 | |
) | |
l.append(tokenizer.decode(beam_outputs[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)) | |
t.append("time taken = {}".format(time.perf_counter()-start)) | |
for i in l: | |
print(i) | |
for j in t: | |
print(j) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import T5ForConditionalGeneration, T5Tokenizer, T5Config, AdamW | |
import torch | |
from onnxt5 import generate_onnx_representation, GenerativeT5 | |
from onnxt5.api import get_sess | |
import tempfile | |
import os | |
import time | |
def set_seed(seed): | |
torch.manual_seed(seed) | |
if torch.cuda.is_available(): | |
torch.cuda.manual_seed_all(seed) | |
set_seed(42) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = T5ForConditionalGeneration.from_pretrained('/path/to/model') | |
tokenizer = T5Tokenizer.from_pretrained('t5-base') | |
path = "/data/priyank/onnx_exp/temp" | |
try: | |
os.mkdir(path) | |
except: | |
print("folder exist") | |
# Export to ONNX | |
generate_onnx_representation(output_prefix=f"{path}/t5-own-", model=model) | |
# Load the model | |
#decoder_sess, encoder_sess = get_sess(f"{path}/t5-own-") | |
#generative_t5 = GenerativeT5(encoder_sess, decoder_sess, tokenizer, onnx=True).to(device) | |
decoder_sess = InferenceSession(f"{path}/t5-own--decoder-with-lm-head.onnx") | |
encoder_sess = InferenceSession(f"{path}/t5-own--encoder.onnx") | |
generative_t5 = GenerativeT5(encoder_sess, decoder_sess, tokenizer, onnx = True, cuda = True) | |
list = ["What is you name ?", | |
"Where do you live ?", | |
"You should say hi and greet him.", | |
"Say Sam to transfer if has 80 dollars cash", | |
"Tell John to buy some vegetables",] | |
l = [] | |
t = [] | |
for s in list: | |
start = time.perf_counter(); | |
l.append(generative_t5(s, 35, temperature=0.)[0]) | |
t.append("time taken = {}".format(time.perf_counter()-start)) | |
for i in l: | |
print(i) | |
for j in t: | |
print(j) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment