Skip to content

Instantly share code, notes, and snippets.

@dan-zheng
Last active April 29, 2020 17:43
Show Gist options
  • Save dan-zheng/9db7168437e91b7ba0d2002c5c845bc9 to your computer and use it in GitHub Desktop.
Save dan-zheng/9db7168437e91b7ba0d2002c5c845bc9 to your computer and use it in GitHub Desktop.
Sequential layer shape inference

Keras-style Sequential in Swift for TensorFlow.


The current Sequential in Swift for TensorFlow does not support shape inference. Previous layers' output sizes are redundantly repeated as next layers' input sizes. This increases user burden and adds room for error.

let modelOld = Sequential {
  Dense<Float>(inputSize: 10000, outputSize: 784)
  Dense<Float>(inputSize: 784, outputSize: 400, useBias: true)
  Dense<Float>(inputSize: 400, outputSize: 100)
  Dense<Float>(inputSize: 100, outputSize: 10, activation: relu)
}

New Sequential in this Gist supports shape inference:

let input = Tensor<Float>(randomNormal: [10000, 784])
let model = Sequential(inputShape: input.shape) {
  Dense<Float>.make(.init(outputSize: 784))
  Dense<Float>.make(.init(outputSize: 400, useBias: true))
  Dense<Float>.make(.init(outputSize: 100))
  Dense<Float>.make(.init(outputSize: 10, activation: relu))
}
print(model(input).shape) // [10000, 10]

The syntax above could be improved. Dense<Float>.make is a detail that ideally would be hidden from users.

Idea:

let input = Tensor<Float>(randomNormal: [10000, 784])
let model = Sequential(inputShape: input.shape) {
  .dense(outputSize: 784)
  .dense(outputSize: 400, useBias: true)
  .dense(outputSize: 100)
  .dense(outputSize: 10, activation: relu)
}
print(model(input).shape) // [10000, 10]

Alternative fluent API idea from @dabrahams:

let leNet = Sequence<Float>()
  .conv2D(filterShape: (5, 5, 1, 6), padding: .same, activation: relu)
  .avgPool2D(poolSize: (2, 2), strides: (2, 2))
  .conv2D(filterShape: (5, 5, 6, 16), activation: relu)
  .avgPool2D(poolSize: (2, 2), strides: (2, 2))
  .flatten()
  .dense(outputSize: 120, activation: relu)
  .dense(outputSize: 84, activation: relu)
  .dense(outputSize: 10)
import TensorFlow
/// A type whose instances define a shape.
protocol Shaped {
associatedtype Shape
var shape: Shape { get }
}
extension Tensor: Shaped {}
/// A layer whose inputs and outputs are shaped.
///
/// `ShapedLayer`-conforming types can be initialized given hyperparameters and
/// an input shape.
///
/// `ShapedLayer`-conforming types also implement shape propagation: they can
/// compute output shapes given input shapes.
protocol ShapedLayer: Layer
where Input: Shaped, Output: Shaped {
associatedtype Initializer: LayerInitializer
where Initializer.LayerType == Self
typealias Hyperparameters = Initializer.Hyperparameters
/// Returns the output shape for the given input shape.
func outputShape(for inputShape: Input.Shape) -> Output.Shape
/// Returns the output shape for the given input shape.
static func make(_ hyperparameters: Hyperparameters) -> Initializer
}
/// Returns the output shape for the given input shape.
///
/// Used for staged layer initialization:
/// `(Layer.Hyperparameters) -> (Layer.Input.Shape) -> Layer`.
protocol LayerInitializer {
/// The type of layer hyperparameters.
associatedtype Hyperparameters
var parameters: Hyperparameters { get }
init(parameters: Hyperparameters)
/// The associated layer type.
associatedtype LayerType: ShapedLayer where LayerType.Initializer == Self
/// Creates a layer given an input shape and hyperparameters.
func makeLayer(inputShape: LayerType.Input.Shape) -> LayerType
}
extension Dense: ShapedLayer {
struct Initializer: LayerInitializer {
struct Hyperparameters {
var outputSize: Int
var useBias: Bool = false
var activation: Activation = identity
}
var parameters: Hyperparameters
func makeLayer(inputShape: TensorShape) -> Dense {
precondition(inputShape.rank == 2, "The input shape must be 2-D.")
return Dense(
inputSize: inputShape[1], outputSize: parameters.outputSize,
activation: parameters.activation, useBias: parameters.useBias)
}
}
func outputShape(for inputShape: TensorShape) -> TensorShape {
let dummy = Tensor<Scalar>(zeros: inputShape)
return self(dummy).shape
}
static func make(_ parameters: Hyperparameters) -> Initializer {
return .init(parameters: parameters)
}
}
extension Sequential: ShapedLayer
where Layer1: ShapedLayer, Layer2: ShapedLayer {
struct Initializer: LayerInitializer {
var layer1Initializer: Layer1.Initializer
var layer2Initializer: Layer2.Initializer
typealias Hyperparameters =
(layer1: Layer1.Hyperparameters, layer2: Layer2.Hyperparameters)
var parameters: Hyperparameters {
(layer1Initializer.parameters, layer2Initializer.parameters)
}
init(_ layer1Initializer: Layer1.Initializer,
_ layer2Initializer: Layer2.Initializer) {
self.layer1Initializer = layer1Initializer
self.layer2Initializer = layer2Initializer
}
init(parameters: Hyperparameters) {
self.layer1Initializer = .init(parameters: parameters.layer1)
self.layer2Initializer = .init(parameters: parameters.layer2)
}
func makeLayer(inputShape: Input.Shape) -> Sequential {
let layer1 = layer1Initializer.makeLayer(inputShape: inputShape)
let middleShape = layer1.outputShape(for: inputShape)
let layer2 = layer2Initializer.makeLayer(inputShape: middleShape)
return Sequential(layer1, layer2)
}
}
func outputShape(for inputShape: Input.Shape) -> Output.Shape {
let middleShape = layer1.outputShape(for: inputShape)
return layer2.outputShape(for: middleShape)
}
static func make(_ parameters: Hyperparameters) -> Initializer {
return .init(parameters: parameters)
}
/// Creates a sequential layer from the given shaped layers, inferring shapes
/// from the given input shape.
///
/// - Note: this performs Keras-style `Sequential` shape inference.
init(inputShape: Input.Shape, @ShapedLayerBuilder layers: () -> Initializer) {
let layerInitializer = layers()
self = layerInitializer.makeLayer(inputShape: inputShape)
}
}
/// Function builder to support a shape-inferring initializer for `Sequential`.
@_functionBuilder
struct ShapedLayerBuilder {
static func buildBlock<L1Init: LayerInitializer, L2Init: LayerInitializer>(
_ l1: L1Init, _ l2: L2Init
) -> Sequential<L1Init.LayerType, L2Init.LayerType>.Initializer
where L1Init.LayerType.Output == L2Init.LayerType.Input {
.init(l1, l2)
}
static func buildBlock<
L1Init: LayerInitializer, L2Init: LayerInitializer, L3Init: LayerInitializer
>(
_ l1: L1Init, _ l2: L2Init, _ l3: L3Init
) -> Sequential<L1Init.LayerType,
Sequential<L2Init.LayerType, L3Init.LayerType>>.Initializer
where L1Init.LayerType.Output == L2Init.LayerType.Input {
.init(l1, .init(l2, l3))
}
static func buildBlock<
L1Init: LayerInitializer, L2Init: LayerInitializer,
L3Init: LayerInitializer, L4Init: LayerInitializer
>(
_ l1: L1Init, _ l2: L2Init, _ l3: L3Init, _ l4: L4Init
) -> Sequential<L1Init.LayerType, Sequential<L2Init.LayerType,
Sequential<L3Init.LayerType, L4Init.LayerType>>>.Initializer
where L1Init.LayerType.Output == L2Init.LayerType.Input,
L2Init.LayerType.Output == L3Init.LayerType.Input {
.init(l1, .init(l2, .init(l3, l4)))
}
}
// Example!
let input = Tensor<Float>(randomNormal: [10000, 784])
let model = Sequential(inputShape: input.shape) {
Dense<Float>.make(.init(outputSize: 784))
Dense<Float>.make(.init(outputSize: 400, useBias: true))
Dense<Float>.make(.init(outputSize: 100))
Dense<Float>.make(.init(outputSize: 10, activation: relu))
}
print(model(input).shape) // [10000, 10]
// Old `Sequential` without shape inference.
// Layer output sizes are redundantly repeated as next layers' input sizes.
// This increases user burden and adds room for error.
let modelOld = Sequential {
Dense<Float>(inputSize: 10000, outputSize: 784)
Dense<Float>(inputSize: 784, outputSize: 400, useBias: true)
Dense<Float>(inputSize: 400, outputSize: 100)
Dense<Float>(inputSize: 100, outputSize: 10, activation: relu)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment