Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mrmaheshrajput/90d4bbc91a04d0768dc517c995b77281 to your computer and use it in GitHub Desktop.
Save mrmaheshrajput/90d4bbc91a04d0768dc517c995b77281 to your computer and use it in GitHub Desktop.
import io
import sagemaker
import boto3
import json
# Change this to your role
iam_role = "arn:aws:iam::1111111111:role/service-role/AmazonSageMaker-ExecutionRole-00000000T000000"
sagemaker_session = sagemaker.session.Session()
region = sess._region_name
smr_client = boto3.client("sagemaker-runtime")
container_uri = sagemaker.image_uris.retrieve(framework="djl-lmi", version="0.28.0", region=region)
instance_type = "ml.g5.4xlarge"
endpoint_name = sagemaker.utils.name_from_base("phi3-4k-lmi-endpoint")
model = sagemaker.Model(
image_uri=container_uri,
role=iam_role,
env={
"HF_MODEL_ID": "microsoft/Phi-3-mini-4k-instruct",
"OPTION_ROLLING_BATCH": "vllm",
"TENSOR_PARALLEL_DEGREE": "max",
"OPTION_MAX_ROLLING_BATCH_SIZE": "2",
"OPTION_DTYPE":"fp16",
}
)
model.deploy(
instance_type=instance_type,
initial_instance_count=1,
endpoint_name=endpoint_name,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment