import intel_extension_for_pytorch # requried for XPU
import torch
from bigdl.llm.transformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline
# model_id = "facebook/opt-1.3b"
# model_id = "meta-llama/Llama-2-7b"
model_id = "meta-llama/Llama-2-7b-chat-hf"
prompt = "I love the Avengers,"
#load Hugging Face Transformers model with INT4 optimizations
# model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="xpu", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_id) # tokenizer doesnt support .to
model = model.to('xpu')
input_ids = tokenizer.encode(prompt, return_tensors="pt").to('xpu')
output_ids = model.generate(input_ids)
output = tokenizer.batch_decode(output_ids.cpu())
print(output)
print("joto")
Created
April 12, 2024 04:28
-
-
Save vuiseng9/1bbba77c9477f59adf530ba48a98e8ae to your computer and use it in GitHub Desktop.
Author
vuiseng9
commented
Apr 13, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment