Skip to content

Instantly share code, notes, and snippets.

@shirayu
Last active June 30, 2023 10:08
Show Gist options
  • Save shirayu/6e09123b365ff904c0cf7c4b8d78c97e to your computer and use it in GitHub Desktop.
Save shirayu/6e09123b365ff904c0cf7c4b8d78c97e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from transformers import AutoTokenizer
def main() -> None:
txt: str = "鯯を食べたい"
m1 = "rinna/japanese-gpt-neox-3.6b-instruction-ppo"
t1 = AutoTokenizer.from_pretrained(
m1,
use_fast=False,
)
x1 = t1(txt)
print(m1, x1, t1.decode(x1["input_ids"]), "\n")
m2 = "cyberagent/open-calm-7b"
t2 = AutoTokenizer.from_pretrained(m2)
x2 = t2(txt)
print(m2, x2, t2.decode(x2["input_ids"]), "\n")
m3 = "retrieva-jp/t5-xl"
t3 = AutoTokenizer.from_pretrained(
m3,
use_fast=False,
)
x3 = t3(txt)
print(m3, x3, t3.decode(x3["input_ids"]), "\n")
if __name__ == "__main__":
main()
rinna/japanese-gpt-neox-3.6b-instruction-ppo {'input_ids': [240, 182, 182, 18998, 1292, 3], 'attention_mask': [1, 1, 1, 1, 1, 1]} 鯯を食べたい</s>
cyberagent/open-calm-7b {'input_ids': [8862, 109, 9714, 1026], 'attention_mask': [1, 1, 1, 1]} 鯯を食べたい
retrieva-jp/t5-xl {'input_ids': [8, 2, 14271, 605, 1], 'attention_mask': [1, 1, 1, 1, 1]} <unk> を食べたい</s>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment