Awni Hannun awni

git clone git@github.com:filipstrand/mflux.git
cd mflux && pip install -r requirements.txt

Name this anything, maybe flux.py. Make sure to update the two paths marked below.

MLX is lazy. No actual computation happens until you explicitly or implicitly evaluate the graph. Here are some ways that can happen:

MLX LM with the OpenAI Python Package

Install MLX LM and openai:

pip install mlx-lm openai

	from huggingface_hub import snapshot_download
	import mlx.core as mx
	import mlx.nn as nn
	import time


	class Block(nn.Module):
	def __init__(self, in_dims, dims, stride=1):
	super().__init__()

	import av
	import numpy as np
	import mlx.core as mx


	def conway(a: mx.array):
	source = """
	uint i = thread_position_in_grid.x;
	uint j = thread_position_in_grid.y;
	uint n = threads_per_grid.x;

	import os
	import mlx.core as mx
	from mlx_lm import load, generate

	filename = os.path.join(os.path.dirname(mx.__file__), "core/__init__.pyi")
	with open(filename, 'r') as fid:
	prompt = fid.read()
	prompt += "\nHow do you write a self-attention layer using the above API in MLX?"

	model, tokenizer = load("mlx-community/meta-Llama-3.1-8B-Instruct-4bit")

	"""
	A minimal, fast example generating text with Llama 3.1 in MLX.

	To run, install the requirements:

	pip install -U mlx transformers fire

	Then generate text with:

	python l3min.py "How tall is K2?"

	# Requires:
	# pip install pyobjc-framework-Metal
	import numpy as np
	import Metal

	# Get the default GPU device
	device = Metal.MTLCreateSystemDefaultDevice()

	# Make a command queue to encode command buffers to
	command_queue = device.newCommandQueue()

	from typing import Callable, Tuple

	import operator
	from functools import reduce
	from itertools import product
	import mlx.core as mx

	def _interpolate(
	x: mx.array, scale_factor: Tuple, indices_fn: Callable, align_corners: bool = False
	):