git clone git@github.com:filipstrand/mflux.git
cd mflux && pip install -r requirements.txt
Name this anything, maybe flux.py
. Make sure to update the two paths marked below.
from huggingface_hub import snapshot_download | |
import mlx.core as mx | |
import mlx.nn as nn | |
import time | |
class Block(nn.Module): | |
def __init__(self, in_dims, dims, stride=1): | |
super().__init__() |
import av | |
import numpy as np | |
import mlx.core as mx | |
def conway(a: mx.array): | |
source = """ | |
uint i = thread_position_in_grid.x; | |
uint j = thread_position_in_grid.y; | |
uint n = threads_per_grid.x; |
import os | |
import mlx.core as mx | |
from mlx_lm import load, generate | |
filename = os.path.join(os.path.dirname(mx.__file__), "core/__init__.pyi") | |
with open(filename, 'r') as fid: | |
prompt = fid.read() | |
prompt += "\nHow do you write a self-attention layer using the above API in MLX?" | |
model, tokenizer = load("mlx-community/meta-Llama-3.1-8B-Instruct-4bit") |
""" | |
A minimal, fast example generating text with Llama 3.1 in MLX. | |
To run, install the requirements: | |
pip install -U mlx transformers fire | |
Then generate text with: | |
python l3min.py "How tall is K2?" |
# Requires: | |
# pip install pyobjc-framework-Metal | |
import numpy as np | |
import Metal | |
# Get the default GPU device | |
device = Metal.MTLCreateSystemDefaultDevice() | |
# Make a command queue to encode command buffers to | |
command_queue = device.newCommandQueue() |
from typing import Callable, Tuple | |
import operator | |
from functools import reduce | |
from itertools import product | |
import mlx.core as mx | |
def _interpolate( | |
x: mx.array, scale_factor: Tuple, indices_fn: Callable, align_corners: bool = False | |
): |