Skip to content

Instantly share code, notes, and snippets.

@yirenlu92
Created May 21, 2024 16:29
Show Gist options
  • Save yirenlu92/600b4d8a09c5f2a2f97806cfa417ad77 to your computer and use it in GitHub Desktop.
Save yirenlu92/600b4d8a09c5f2a2f97806cfa417ad77 to your computer and use it in GitHub Desktop.
Code for "Creating an infinite icon library" blog post
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from fastapi import FastAPI
from modal import Image, App, Volume, gpu, Secret, enter, method, asgi_app
GIT_SHA = "abd922bd0c43a504e47eca2ed354c3634bd00834" # specify the commit to fetch
image = (
Image.debian_slim(python_version="3.10")
.pip_install(
"accelerate==0.27.2",
"datasets~=2.19.1",
"ftfy~=6.1.1",
"gradio~=3.50.2",
"smart_open~=6.4.0",
"transformers~=4.38.1",
"torch~=2.2.0",
"torchvision~=0.16",
"triton~=2.2.0",
"peft==0.7.0",
"wandb==0.16.3",
)
.apt_install("git")
# Perform a shallow fetch of just the target `diffusers` commit, checking out
# the commit in the container's current working directory, /root.
.run_commands(
"cd /root && git init .",
"cd /root && git remote add origin https://github.com/huggingface/diffusers",
f"cd /root && git fetch --depth=1 origin {GIT_SHA} && git checkout {GIT_SHA}",
"cd /root && pip install -e .",
)
)
# ## Set up `Volume`s for training data and model output
#
# Modal can't access your local filesystem, so you should set up a `Volume` to eventually save the model once training is finished.
web_app = FastAPI()
# 4000 training steps, on full heroicons with captions without HCON prefix
app = App(name="example-diffusers-app-05-15-2024-full-heroicons")
MODEL_DIR = Path("/model")
model_volume = Volume.from_name(
"diffusers-model-volume-05-15-2024-full-heroicons", create_if_missing=True
)
VOLUME_CONFIG = {
MODEL_DIR: model_volume,
}
DATASET_NAME = [
"yirenlu/heroicons-without-hcon",
# "yirenlu/heroicons-subset-100-images",
]
RESOLUTIONS = [128]
# ## Set up config
#
# Each Diffusers example script takes a different set of hyperparameters, so you will need to customize the config depending on the hyperparameters of the script. The code below shows some example parameters.
@dataclass
class TrainConfig:
"""Configuration for the finetuning training."""
# identifier for pretrained model on Hugging Face
model_name: str = "runwayml/stable-diffusion-v1-5"
# resume_from_checkpoint: str = "/model/yirenlu/heroicons_512/checkpoint-6000/"
# HuggingFace Hub dataset
dataset_name = "yirenlu/heroicons"
# Hyperparameters/constants from some of the Diffusers examples
# You should modify these to match the hyperparameters of the script you are using.
mixed_precision: str = "fp16" # set the precision of floats during training, fp16 or less needs to be mixed with fp32 under the hood
resolution: int = 128 # images will be sized to this resolution
max_train_steps: int = (
5000 # number of times to apply a gradient update during training
)
checkpointing_steps: int = (
1000 # number of steps between model checkpoints, for resuming training
)
train_batch_size: int = 1 # how many images to process at once, limited by GPU VRAM
gradient_accumulation_steps: int = 4 # how many batches to process before updating the model, stabilizes training with large batch sizes
learning_rate: float = 1e-05 # scaling factor on gradient updates, make this proportional to the batch size * accumulation steps
lr_scheduler: str = (
"constant" # dynamic schedule for changes to the base learning_rate
)
lr_warmup_steps: int = 0 # for non-constant lr schedules, how many steps to spend increasing the learning_rate from a small initial value
max_grad_norm: int = 1 # value above which to clip gradients, stabilizes training
caption_column: str = "text" # name of the column in the dataset that contains the captions of the images
validation_prompt: str = "an icon of a dragon creature"
@dataclass
class AppConfig:
"""Configuration information for inference."""
num_inference_steps: int = 50
guidance_scale: float = 20
@app.function(
image=image,
gpu=gpu.A100(
size="80GB"
), # finetuning is VRAM hungry, so this should be an A100 or H100
volumes=VOLUME_CONFIG,
timeout=3600 * 5, # multiple hours
secrets=[Secret.from_name("huggingface-secret-ren")],
_allow_background_volume_commits=True,
)
def train(hyperparameter_config):
import huggingface_hub
from accelerate import notebook_launcher
from accelerate.utils import write_basic_config
# change this line to import the training script you want to use
from examples.text_to_image.train_text_to_image import main
from transformers import CLIPTokenizer
# set up TrainConfig
config = TrainConfig()
# set up hugging face accelerate library for fast training
write_basic_config(mixed_precision="fp16")
# authenticate to hugging face so we can download the model weights
hf_key = os.environ["HF_TOKEN"]
huggingface_hub.login(hf_key)
# check whether we can access the model repo
try:
CLIPTokenizer.from_pretrained(config.model_name, subfolder="tokenizer")
except OSError as e: # handle error raised when license is not accepted
license_error_msg = f"Unable to load tokenizer. Access to this model requires acceptance of the license on Hugging Face here: https://huggingface.co/{config.model_name}."
raise Exception(license_error_msg) from e
def launch_training():
sys.argv = [
"examples/text_to_image/train_text_to_image.py", # potentially modify
f"--pretrained_model_name_or_path={config.model_name}",
f"--dataset_name={hyperparameter_config['dataset_name']}",
"--use_ema",
f"--output_dir={hyperparameter_config['output_dir']}",
f"--resolution={hyperparameter_config['resolution']}",
"--center_crop",
"--random_flip",
f"--gradient_accumulation_steps={config.gradient_accumulation_steps}",
"--gradient_checkpointing",
f"--train_batch_size={config.train_batch_size}",
f"--learning_rate={config.learning_rate}",
f"--lr_scheduler={config.lr_scheduler}",
f"--max_train_steps={config.max_train_steps}",
f"--lr_warmup_steps={config.lr_warmup_steps}",
f"--checkpointing_steps={config.checkpointing_steps}",
# f"--resume_from_checkpoint={hyperparameter_config['checkpoint_dir']}",
]
main()
# run training -- see huggingface accelerate docs for details
print("launching fine-tuning training script")
notebook_launcher(launch_training, num_processes=1)
# The trained model artefacts have been output to the volume mounted at `MODEL_DIR`.
model_volume.commit()
@app.local_entrypoint()
def run():
hyperparameter_search = list(
[
{
"dataset_name": dataset_name,
"resolution": resolution,
"output_dir": MODEL_DIR / f"{dataset_name}_{resolution}",
"checkpoint_dir": MODEL_DIR
/ f"{dataset_name}_{resolution}/checkpoint-6000/",
}
for dataset_name in DATASET_NAME
for resolution in RESOLUTIONS
]
)
for x in train.map(hyperparameter_search):
print(x)
@app.cls(
image=image,
gpu="A10G", # inference requires less VRAM than training, so we can use a cheaper GPU
volumes=VOLUME_CONFIG, # mount the location where your model weights were saved to
)
class Model:
@enter()
def load_model(self):
import torch
from diffusers import StableDiffusionPipeline, UNet2DConditionModel
# Reload the modal.Volume to ensure the latest state is accessible.
model_volume.reload()
unet = UNet2DConditionModel.from_pretrained(
MODEL_DIR / "yirenlu/heroicons-without-hcon_128/checkpoint-3000/unet",
torch_dtype=torch.float16,
)
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", unet=unet, torch_dtype=torch.float16
)
pipe.to("cuda")
# pipe.enable_xformers_memory_efficient_attention()
# pipe = StableDiffusionPipeline.from_pretrained(
# MODEL_DIR / "yirenlu/heroicons-without-hcon_128", torch_dtype=torch.float16
# )
# pipe.to("cuda")
# pipe.enable_xformers_memory_efficient_attention()
self.pipe = pipe
@method()
def inference(self, text, config):
image = self.pipe(
text,
num_inference_steps=config.num_inference_steps,
guidance_scale=config.guidance_scale,
).images[0]
return image
assets_path = Path(__file__).parent / "assets"
@app.function(
image=image,
concurrency_limit=3,
)
@asgi_app()
def fastapi_app():
import gradio as gr
from gradio.routes import mount_gradio_app
# Call to the GPU inference function on Modal.
def go(text):
return Model().inference.remote(text, config)
# set up AppConfig
config = AppConfig()
HCON_prefix = "an icon of"
example_prompts = [
f"{HCON_prefix} a movie ticket",
f"{HCON_prefix} Barack Obama",
f"{HCON_prefix} a castle",
f"{HCON_prefix} a German Shepherd",
]
modal_docs_url = "https://modal.com/docs/guide"
modal_example_url = f"{modal_docs_url}/examples/train_and_serve_diffusers_script"
description = """Describe a concept that you would like drawn as a [Heroicon](https://heroicons.com/). Try the examples below for inspiration."""
# add a gradio UI around inference
interface = gr.Interface(
fn=go,
inputs="text",
outputs=gr.Image(shape=(512, 512)),
title="Generate custom heroicons",
examples=example_prompts,
description=description,
css="/assets/index.css",
allow_flagging="never",
)
# mount for execution on Modal
return mount_gradio_app(
app=web_app,
blocks=interface,
path="/",
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment