marcrasi · February 25, 2021 13:25 · oxinabox · Feb 25, 2021
diff --git a/aunifying.md b/aunifying.md
diff --git a/demo.swift b/demo.swift
 import TensorFlow

 // MARK: - Protocols

 /// Element of an abstract vector space.
 protocol Vector {
  static func * (lhs: Float, rhs: Self) -> Self
  static func + (lhs: Self, rhs: Self) -> Self
  static var zero: Self { get }
 }

 /// An environment containing all the types and linear operations required for defining
 /// derivatives.
 protocol Derivative {
  /// The derivative of a `Float`-valued function.
  associatedtype DFloat: Vector

  /// The derivative of a `(Float, Float)`-valued function.
  associatedtype DFloat2: Vector

  /// Returns the derivative of `f(_).0`, given the derivative `t` of `f(_)`.
  static func d0(_ t: DFloat2) -> DFloat

  /// Returns the derivative of `f(_).1`, given the derivative `t` of `f(_)`.
  static func d1(_ t: DFloat2) -> DFloat

  /// Returns the derivative of `(f0(_), f1(_))` given the derivative `t0` of `f0` and the
  /// derivative `t1` of `f1`.
  static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2
 }

 // It is unfortunate that I have to define `associatedtype D*` for all types that I want to use
 // in differentiation.


 // MARK: - Derivative functions

 // These look almost exactly like JVPs.

 // It should be easy to automatically produce them with a forward mode autodiff transform.

 // The exciting thing is that we can evaluate them in any differentiation mode we want, by
 // defining different `Derivative`s.

 extension Derivative {
  static func sum(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
    (x + y, d0(tIn) + d1(tIn))
  }

  static func product(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
    (x * y, y * d0(tIn) + x * d1(tIn))
  }

  static func square(_ x: Float, _ tIn: DFloat) -> (value: Float, tOut: DFloat) {
    product(x, x, dInit(tIn, tIn))
  }

  /// x^2 + 3x
  static func example1(_ x: Float, _ tIn: DFloat) -> (value: Float, tOut: DFloat) {
    let (v1, t1) = square(x, tIn)
    let (v2, t2) = product(3, x, dInit(DFloat.zero, tIn))
    return sum(v1, v2, dInit(t1, t2))
  }

  /// x^2 + 10y^2 + xy
  static func example2(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
    let dx = d0(tIn)
    let dy = d1(tIn)
    let (xSq, dxSq) = square(x, dx)
    let (ySq, dySq) = square(y, dy)
    let (ySq10, dySq10) = product(10, ySq, dInit(DFloat.zero, dySq))
    let (xy, dxy) = product(x, y, dInit(dx, dy))
    let (r1, dr1) = sum(xSq, ySq10, dInit(dxSq, dySq10))
    return sum(r1, xy, dInit(dr1, dxy))
  }
 }


 // MARK: - Derivative modes 

 enum ForwardTangent: Derivative {
  struct DFloat: Vector {
    var t: Float
    init(_ t: Float) { self.t = t }
    static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
    static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
    static var zero: Self { .init(0) }
  }

  struct DFloat2: Vector {
    var t0: Float
    var t1: Float
    init(_ t0: Float, _ t1: Float) { self.t0 = t0; self.t1 = t1 }
    static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t0, lhs * rhs.t1) }
    static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t0 + rhs.t0, lhs.t1 + rhs.t1) }
    static var zero: Self { .init(0, 0) }
  }

  static func d0(_ t: DFloat2) -> DFloat { .init(t.t0) }
  static func d1(_ t: DFloat2) -> DFloat { .init(t.t1) }
  static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 { .init(t0.t, t1.t) }
 }

 enum ForwardJacobian: Derivative {
  struct DFloat: Vector {
    // Shape [1, n], or the zero scalar.
    var t: Tensor<Float>
    init(_ t: Tensor<Float>) {
      precondition(t == Tensor(0) || (t.rank == 2 && t.shape[0] == 1))
      self.t = t
    }
    static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
    static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
    static var zero: Self { .init(Tensor(0)) }
  }
  struct DFloat2: Vector {
    // Shape [2, n], or the zero scalar.
    var t: Tensor<Float>
    init(_ t: Tensor<Float>) {
      precondition(t == Tensor(0) || (t.rank == 2 && t.shape[0] == 2))
      self.t = t
    }
    static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
    static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
    static var zero: Self { .init(Tensor(0)) }
  }

  static func d0(_ t: DFloat2) -> DFloat {
    .init(t.t[0].expandingShape(at: 0))
  }
  static func d1(_ t: DFloat2) -> DFloat {
    .init(t.t[1].expandingShape(at: 0))
  }
  static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 {
    if t0.t == Tensor(0) && t1.t == Tensor(0) { return .init(Tensor(0)) }
    let row0 = t0.t == Tensor(0) ? Tensor(zerosLike: t1.t) : t0.t
    let row1 = t1.t == Tensor(0) ? Tensor(zerosLike: t0.t) : t1.t
    return .init(Tensor(concatenating: [row0, row1]))
  }
 }

 enum Reverse<Base: Derivative, In: Vector>: Derivative {
  struct Pullback<Out: Vector>: Vector {
    var f: (Out) -> In
    init(_ f: @escaping (Out) -> In) { self.f = f }
    static func * (lhs: Float, rhs: Self) -> Self {
      .init({ rhs.f(lhs * $0) })
    }
    static func + (lhs: Self, rhs: Self) -> Self {
      .init({ lhs.f($0) + rhs.f($0) })
    }
    static var zero: Self { .init({ _ in .zero }) }
  }

  typealias DFloat = Pullback<Base.DFloat>
  typealias DFloat2 = Pullback<Base.DFloat2>

  static func d0(_ t: DFloat2) -> DFloat {
    .init({ t.f(Base.dInit($0, .zero)) })
  }
  static func d1(_ t: DFloat2) -> DFloat {
    .init({ t.f(Base.dInit(.zero, $0)) })
  }
  static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 {
    .init({ t0.f(Base.d0($0)) + t1.f(Base.d1($0)) })
  }
 }

 // MARK: - Example invocations

 // Optimizes to plain float operations.
 func example1ForwardDerivative(_ x: Float) -> Float {
  let (_, d) = ForwardTangent.example1(x, .init(1))
  return d.t
 }

 // Optimizes to:
 // - plain float operations that produce the result
 // - a bunch of closure allocations and deallocations that are not used in the result (!?)
 func example1ReverseDerivative(_ x: Float) -> Float {
  typealias System = Reverse<ForwardTangent, ForwardTangent.DFloat>
  let (_, d) = System.example1(x, .init({ $0 }))
  return d.f(.init(1)).t
 }

 func example2ReverseDerivative(_ x: Float, _ y: Float) -> ForwardTangent.DFloat2 {
  typealias System = Reverse<ForwardTangent, ForwardTangent.DFloat2>
  let (_, d) = System.example2(x, y, .init({ $0 }))
  return d.f(.init(1))
 }

 func example2ForwardJacobian(_ x: Float, _ y: Float) -> Tensor<Float> {
  let (_, d) = ForwardJacobian.example2(x, y, .init(eye(rowCount: 2)))
  return d.t
 }

 func example2ReverseJacobian(_ x: Float, _ y: Float) -> Tensor<Float> {
  typealias System = Reverse<ForwardJacobian, ForwardJacobian.DFloat2>
  let (_, d) = System.example2(x, y, .init({ $0 }))
  return d.f(.init(eye(rowCount: 1))).t
 }

 print(example1ForwardDerivative(2))
 print(example1ReverseDerivative(2))
 print(example2ReverseDerivative(1, 5))
 print(example2ForwardJacobian(1, 5))
 print(example2ReverseJacobian(1, 5))
	import TensorFlow

	// MARK: - Protocols

	/// Element of an abstract vector space.
	protocol Vector {
	static func * (lhs: Float, rhs: Self) -> Self
	static func + (lhs: Self, rhs: Self) -> Self
	static var zero: Self { get }
	}

	/// An environment containing all the types and linear operations required for defining
	/// derivatives.
	protocol Derivative {
	/// The derivative of a `Float`-valued function.
	associatedtype DFloat: Vector

	/// The derivative of a `(Float, Float)`-valued function.
	associatedtype DFloat2: Vector

	/// Returns the derivative of `f(_).0`, given the derivative `t` of `f(_)`.
	static func d0(_ t: DFloat2) -> DFloat

	/// Returns the derivative of `f(_).1`, given the derivative `t` of `f(_)`.
	static func d1(_ t: DFloat2) -> DFloat

	/// Returns the derivative of `(f0(_), f1(_))` given the derivative `t0` of `f0` and the
	/// derivative `t1` of `f1`.
	static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2
	}

	// It is unfortunate that I have to define `associatedtype D*` for all types that I want to use
	// in differentiation.


	// MARK: - Derivative functions

	// These look almost exactly like JVPs.

	// It should be easy to automatically produce them with a forward mode autodiff transform.

	// The exciting thing is that we can evaluate them in any differentiation mode we want, by
	// defining different `Derivative`s.

	extension Derivative {
	static func sum(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
	(x + y, d0(tIn) + d1(tIn))
	}

	static func product(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
	(x * y, y * d0(tIn) + x * d1(tIn))
	}

	static func square(_ x: Float, _ tIn: DFloat) -> (value: Float, tOut: DFloat) {
	product(x, x, dInit(tIn, tIn))
	}

	/// x^2 + 3x
	static func example1(_ x: Float, _ tIn: DFloat) -> (value: Float, tOut: DFloat) {
	let (v1, t1) = square(x, tIn)
	let (v2, t2) = product(3, x, dInit(DFloat.zero, tIn))
	return sum(v1, v2, dInit(t1, t2))
	}

	/// x^2 + 10y^2 + xy
	static func example2(_ x: Float, _ y: Float, _ tIn: DFloat2) -> (value: Float, tOut: DFloat) {
	let dx = d0(tIn)
	let dy = d1(tIn)
	let (xSq, dxSq) = square(x, dx)
	let (ySq, dySq) = square(y, dy)
	let (ySq10, dySq10) = product(10, ySq, dInit(DFloat.zero, dySq))
	let (xy, dxy) = product(x, y, dInit(dx, dy))
	let (r1, dr1) = sum(xSq, ySq10, dInit(dxSq, dySq10))
	return sum(r1, xy, dInit(dr1, dxy))
	}
	}


	// MARK: - Derivative modes

	enum ForwardTangent: Derivative {
	struct DFloat: Vector {
	var t: Float
	init(_ t: Float) { self.t = t }
	static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
	static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
	static var zero: Self { .init(0) }
	}

	struct DFloat2: Vector {
	var t0: Float
	var t1: Float
	init(_ t0: Float, _ t1: Float) { self.t0 = t0; self.t1 = t1 }
	static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t0, lhs * rhs.t1) }
	static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t0 + rhs.t0, lhs.t1 + rhs.t1) }
	static var zero: Self { .init(0, 0) }
	}

	static func d0(_ t: DFloat2) -> DFloat { .init(t.t0) }
	static func d1(_ t: DFloat2) -> DFloat { .init(t.t1) }
	static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 { .init(t0.t, t1.t) }
	}

	enum ForwardJacobian: Derivative {
	struct DFloat: Vector {
	// Shape [1, n], or the zero scalar.
	var t: Tensor<Float>
	init(_ t: Tensor<Float>) {
	precondition(t == Tensor(0) \|\| (t.rank == 2 && t.shape[0] == 1))
	self.t = t
	}
	static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
	static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
	static var zero: Self { .init(Tensor(0)) }
	}
	struct DFloat2: Vector {
	// Shape [2, n], or the zero scalar.
	var t: Tensor<Float>
	init(_ t: Tensor<Float>) {
	precondition(t == Tensor(0) \|\| (t.rank == 2 && t.shape[0] == 2))
	self.t = t
	}
	static func * (lhs: Float, rhs: Self) -> Self { .init(lhs * rhs.t) }
	static func + (lhs: Self, rhs: Self) -> Self { .init(lhs.t + rhs.t) }
	static var zero: Self { .init(Tensor(0)) }
	}

	static func d0(_ t: DFloat2) -> DFloat {
	.init(t.t[0].expandingShape(at: 0))
	}
	static func d1(_ t: DFloat2) -> DFloat {
	.init(t.t[1].expandingShape(at: 0))
	}
	static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 {
	if t0.t == Tensor(0) && t1.t == Tensor(0) { return .init(Tensor(0)) }
	let row0 = t0.t == Tensor(0) ? Tensor(zerosLike: t1.t) : t0.t
	let row1 = t1.t == Tensor(0) ? Tensor(zerosLike: t0.t) : t1.t
	return .init(Tensor(concatenating: [row0, row1]))
	}
	}

	enum Reverse<Base: Derivative, In: Vector>: Derivative {
	struct Pullback<Out: Vector>: Vector {
	var f: (Out) -> In
	init(_ f: @escaping (Out) -> In) { self.f = f }
	static func * (lhs: Float, rhs: Self) -> Self {
	.init({ rhs.f(lhs * $0) })
	}
	static func + (lhs: Self, rhs: Self) -> Self {
	.init({ lhs.f($0) + rhs.f($0) })
	}
	static var zero: Self { .init({ _ in .zero }) }
	}

	typealias DFloat = Pullback<Base.DFloat>
	typealias DFloat2 = Pullback<Base.DFloat2>

	static func d0(_ t: DFloat2) -> DFloat {
	.init({ t.f(Base.dInit($0, .zero)) })
	}
	static func d1(_ t: DFloat2) -> DFloat {
	.init({ t.f(Base.dInit(.zero, $0)) })
	}
	static func dInit(_ t0: DFloat, _ t1: DFloat) -> DFloat2 {
	.init({ t0.f(Base.d0($0)) + t1.f(Base.d1($0)) })
	}
	}

	// MARK: - Example invocations

	// Optimizes to plain float operations.
	func example1ForwardDerivative(_ x: Float) -> Float {
	let (_, d) = ForwardTangent.example1(x, .init(1))
	return d.t
	}

	// Optimizes to:
	// - plain float operations that produce the result
	// - a bunch of closure allocations and deallocations that are not used in the result (!?)
	func example1ReverseDerivative(_ x: Float) -> Float {
	typealias System = Reverse<ForwardTangent, ForwardTangent.DFloat>
	let (_, d) = System.example1(x, .init({ $0 }))
	return d.f(.init(1)).t
	}

	func example2ReverseDerivative(_ x: Float, _ y: Float) -> ForwardTangent.DFloat2 {
	typealias System = Reverse<ForwardTangent, ForwardTangent.DFloat2>
	let (_, d) = System.example2(x, y, .init({ $0 }))
	return d.f(.init(1))
	}

	func example2ForwardJacobian(_ x: Float, _ y: Float) -> Tensor<Float> {
	let (_, d) = ForwardJacobian.example2(x, y, .init(eye(rowCount: 2)))
	return d.t
	}

	func example2ReverseJacobian(_ x: Float, _ y: Float) -> Tensor<Float> {
	typealias System = Reverse<ForwardJacobian, ForwardJacobian.DFloat2>
	let (_, d) = System.example2(x, y, .init({ $0 }))
	return d.f(.init(eye(rowCount: 1))).t
	}

	print(example1ForwardDerivative(2))
	print(example1ReverseDerivative(2))
	print(example2ReverseDerivative(1, 5))
	print(example2ForwardJacobian(1, 5))
	print(example2ReverseJacobian(1, 5))