module natcomp where
open import Data.Nat using (ℕ; _+_)
open import Data.List using (List; []; _∷_; _++_)
open import Data.List.Properties using (++-assoc)
open import Data.Maybe using (Maybe; just; nothing; _>>=_)
open import Relation.Binary.PropositionalEquality using (_≡_; refl; cong)
open Relation.Binary.PropositionalEquality.≡-Reasoning
My goal in this project is to learn about compiler correctness. I will write a small compiler from a minimalistic natural-numbers language down to a stack-based machine language, then prove forward simulation.
Begin by defining the source language, where terms consist of nat literals and addition
data Tm : Set where
lit : ℕ → Tm
add : Tm → Tm → Tm
Since all terms in this language are closed, we can interpret them directly to numbers
eval : Tm → ℕ
eval (lit n) = n
eval (add tm₁ tm₂) = eval tm₁ + eval tm₂
But to make things more interesting let's give small-step semantics
data _↦_ : Tm → Tm → Set where
redₗ : ∀{tm₁ tm₁′ tm₂ : Tm}
→ tm₁ ↦ tm₁′
→ add tm₁ tm₂ ↦ add tm₁′ tm₂
redᵣ : ∀{tm₁ tm₂ tm₂′ : Tm}
→ tm₂ ↦ tm₂′
→ add tm₁ tm₂ ↦ add tm₁ tm₂′
red₊ : ∀{n₁ n₂ : ℕ}
→ add (lit n₁) (lit n₂) ↦ lit (n₁ + n₂)
And the symmetric-transitive closure...
data _↦*_ : Tm → Tm → Set where
refl* : ∀{tm : Tm}
→ tm ↦* tm
trans* : ∀{tm₁ tm₂ tm₃ : Tm}
→ tm₁ ↦ tm₂
→ tm₂ ↦* tm₃
→ tm₁ ↦* tm₃
single* : ∀{tm₁ tm₂ : Tm} → tm₁ ↦ tm₂ → tm₁ ↦* tm₂
single* s = trans* s refl*
Next we define what our language will compile to. This is a simple stack language
data Instr : Set where
push : ℕ → Instr
add : Instr
Prog = List Instr
We can execute those as RPN programs. Their execution is defined in terms of a relation on stacks of numbers
Stack = List ℕ
data _∣_↦_ : Instr → Stack → Stack → Set where
red-push : ∀{Σ : Stack} {n : ℕ}
→ (push n) ∣ Σ ↦ (n ∷ Σ)
red-add : ∀{Σ : Stack} {n₁ n₂ : ℕ}
→ add ∣ (n₁ ∷ n₂ ∷ Σ) ↦ ((n₁ + n₂) ∷ Σ)
Execution of entire programs (starting from the "head" instruction) follows from that
data _∥_↦*_ : Prog → Stack → Stack → Set where
done* : ∀{Σ : Stack}
→ [] ∥ Σ ↦* Σ
step* : ∀{i : Instr} {p : Prog} {Σ Σ′ Σ″ : Stack}
→ i ∣ Σ ↦ Σ′
→ p ∥ Σ′ ↦* Σ″
→ (i ∷ p) ∥ Σ ↦* Σ″
As an aside, we can prove that the result of evaluating a stack program directly, if doesn't fail, will argee with the reduction relation
run : Stack → Prog → Maybe Stack
run stk [] = just stk
run stk (push n ∷ prog) = run (n ∷ stk) prog
run stk (add ∷ prog) with stk
... | n₁ ∷ n₂ ∷ stk = run ((n₁ + n₂) ∷ stk) prog
... | _ = nothing
run-soundness : ∀(prog : Prog) (stk stk′ : Stack)
→ run stk prog ≡ just stk′
→ prog ∥ stk ↦* stk′
run-soundness [] stk .stk refl = done*
run-soundness (add ∷ prog) (n₁ ∷ n₂ ∷ stk) stk′ success₊ =
step* red-add (run-soundness prog ((n₁ + n₂) ∷ stk) stk′ success₊)
run-soundness (push x ∷ prog) stk stk′ success =
step* red-push (run-soundness prog (x ∷ stk) stk′ success)
Another crucial property of program execution is the following characterization of concatenated programs: executing a concatenated program is the same as executing both sub-programs sequentially, assuming the first one doesn't fail.
run-++ : ∀(prog₁ prog₂ : Prog) (stk : Stack)
→ run stk (prog₁ ++ prog₂) ≡ (run stk prog₁ >>= (λ stk′ → run stk′ prog₂))
run-++ [] prog₂ stk = refl
run-++ (push n ∷ prog₁) prog₂ stk = run-++ prog₁ prog₂ (n ∷ stk)
run-++ (add ∷ prog₁) prog₂ (n₁ ∷ n₂ ∷ stk) = run-++ prog₁ prog₂ ((n₁ + n₂) ∷ stk)
run-++ (add ∷ prog₁) prog₂ [] = refl
run-++ (add ∷ prog₁) prog₂ (_ ∷ []) = refl
Compilation is a behavior-preserving translation from the source language to the target language
compile : Tm → Prog
compile (lit x) = (push x) ∷ []
compile (add tm₁ tm₂) = compile tm₂ ++ (compile tm₁ ++ (add ∷ []))
The simplest safety guarantee we'll prove for compilation is that it never "goes wrong". By that I mean, running a compiled stack program will always return a just stk
. In fact, we can prove something even stronger: a compiled program will always add just a single new element onto the stack, and that single element is precisely the result of evaluating the term we started from.
compiler-correctness : ∀(tm : Tm) (stk : Stack)
→ run stk (compile tm) ≡ just (eval tm ∷ stk)
compiler-correctness (lit n) stk = refl
compiler-correctness (add tm₁ tm₂) stk = begin
run stk (compile tm₂ ++ compile tm₁ ++ (add ∷ []))
≡⟨ run-++ (compile tm₂) (compile tm₁ ++ (add ∷ [])) stk ⟩
((run stk (compile tm₂)) >>= (λ stk′ → run stk′ (compile tm₁ ++ (add ∷ []))))
≡⟨ cong (_>>= _) (compiler-correctness tm₂ stk) ⟩
run (eval tm₂ ∷ stk) (compile tm₁ ++ (add ∷ []))
≡⟨ run-++ (compile tm₁) (add ∷ []) (eval tm₂ ∷ stk) ⟩
(run (eval tm₂ ∷ stk) (compile tm₁) >>= (λ stk′ → run stk′ (add ∷ [])))
≡⟨ cong (_>>= _) (compiler-correctness tm₁ (eval tm₂ ∷ stk)) ⟩
just ((eval tm₁ + eval tm₂) ∷ stk)
∎
As a corollary, running any compiled term on an empty stack returns a singleton stack containing the value of that term
main-result : ∀(tm : Tm)
→ run [] (compile tm) ≡ just (eval tm ∷ [])
main-result tm = compiler-correctness tm []