Skip to content

Instantly share code, notes, and snippets.

@rrbutani
Last active July 28, 2024 19:11
Show Gist options
  • Save rrbutani/da6f9ab08a84c3610fbff6774929d6b2 to your computer and use it in GitHub Desktop.
Save rrbutani/da6f9ab08a84c3610fbff6774929d6b2 to your computer and use it in GitHub Desktop.
An alternative approach to `mitm-cache` and `buildGradleApplication`'s use of verification-metadata.xml (https://github.com/NixOS/nixpkgs/tree/master/pkgs/development/tools/build-managers/gradle, https://github.com/raphiz/buildGradleApplication)
{ stdenvNoCC
, writeScript
, lib
, gradle
, python312
}:
# NOTE: prior art re: packaging Gradle projects + dealing with external
# dependencies:
# - https://github.com/NixOS/nixpkgs/blob/99dec1f6b06290f64a8d1711c41e5e13653a14c7/pkgs/development/tools/build-managers/gradle/README.md
# - https://github.com/NixOS/nixpkgs/blob/c12b2a0b196a42a20949cf648c86acfbe6418ad3/doc/languages-frameworks/gradle.section.md
# - https://github.com/raphiz/buildGradleApplication
#
# Our use case is not as general so we just use the gradle dependency cache as
# our mechanism for feeding pre-fetched external deps into the build:
# - https://docs.gradle.org/6.1.1/userguide/dependency_resolution.html#sec:dependency_cache
#
# We follow the same pattern used for many dependency managers modeled in
# nixpkgs: have a FOD that fetches the external deps and produces the cache
# and then a main build that consumes the cache.
#
# As per the docs linked above, `$GRADLE_HOME/caches/module-...` is what we're
# persisting.
#
# Corresponding main derivations should add this derivation to their
# `nativeBuildInputs`; the setup + configure hooks below will take care of the
# rest.
{ pname
, src
, gradleArgs ? []
, extraNativeBuildInputs ? []
, extraBuildInputs ? []
# Expected output hash, in SRI form.
, hash
# Task to run to get gradle to fetch the dependencies that will be used.
, task ? "properties"
# Whether to make a copy of `src` in this derivation or not.
, needsMutableSrc ? false
# Unix epoch milliseconds (64 bits) to use in gradle's metadata cache entries.
#
# Note that this does not appear to need to be newer than the current time;
# for cache hits there doesn't seem to be a TTL.. to be safe we default to
# using a far-in-the-future timestamp anyways.
, replacementTimestamp ? 8000000000000 # July 6th, 2223
# Whether to have `make-gradle-dependency-cache-metadata-reproducible.py`
# print information about the replacements it is making.
, debugFixups ? true
}: stdenvNoCC.mkDerivation (finalAttrs: let
drv = stdenvNoCC.mkDerivation {
# Having a different name but with an existing hash will still cause the
# derivation to be rebuilt.
#
# We intentionally place the gradle version in this derivation's name so
# that it'll be refetched if the gradle version changes — as per the docs,
# different gradle versions have different module/file/metadata cache
# versions: https://docs.gradle.org/current/userguide/dependency_resolution.html#sub:cache_copy
#
# We could technically model the table in the docs linked above so that we're
# only sensitive to differences in gradle version that actually alter the
# dependency cache schema versions but this is currently not worth the effort.
#
# Note that we do not place the main derivation's name in this derivation's
# name: the idea is that mismatches in the fetched deps vs. what the build
# needs will quickly result in loud and visible errors.
name = "gradle-${lib.getVersion gradle}-deps-for-" + pname;
nativeBuildInputs = [ gradle python312 ] ++ extraNativeBuildInputs;
buildInputs = extraBuildInputs;
inherit src;
dontUnpack = !needsMutableSrc;
GRADLE_USER_HOME = "./gradle-user-home";
gradleFlags = [
"--no-daemon"
"--no-parallel" # see ./make-gradle-dependency-cache-metadata-reproducible.py
# "--quiet"
] ++ lib.optional (!needsMutableSrc) "--project-dir ${src}";
buildPhase = lib.optionalString (!needsMutableSrc) ''
gradleFlagsArray+=(--project-cache-dir "$(mktemp -d)")
'' +
# Remove `--offline` from the default gradle setup hook.
#
# (we want the other flags from the setup hook though)
''
gradleFlagsArrayCopy=("''${gradleFlagsArray[@]}")
gradleFlagsArray=()
for flag in "''${gradleFlagsArrayCopy[@]}"; do
if [[ "$flag" == "--offline" ]]; then continue; fi
gradleFlagsArray+=("$flag")
done
'' + ''
set -x
gradle ${task} ${lib.strings.escapeShellArgs gradleArgs}
set +x
'';
installPhase = ''
mkdir $out
mv $GRADLE_USER_HOME/caches/modules-* $out/
'';
fixupPhase = lib.optionalString (replacementTimestamp != null) ''
export REPLACEMENT_TIMESTAMP="${builtins.toString replacementTimestamp}"
'' + lib.optionalString debugFixups ''
export DEBUG=true
'' + ''
python3 ${./make-gradle-dependency-cache-metadata-reproducible.py} \
--patch $out/*
'' +
# .lock files aren't reproducible and also aren't required:
''
rm $out/modules-*/*.lock
'';
# https://nix.dev/manual/nix/2.22/language/advanced-attributes.html?highlight=outputHash#adv-attr-outputHash
outputHashMode = "recursive";
outputHashAlgo = null; # users must provide hashes in SRI form
outputHash = hash;
};
in {
inherit (drv) name;
# Depends on what's being fetched of course. Assuming it's typically JARs +
# scripts.
meta.sourceProvenance = with lib.sourceTypes; [
# binaryNativeCode
binaryBytecode
fromSource
];
passthru = {
depsDrv = drv;
inner = drv;
};
dontUnpack = true; dontBuild = true;
# the main derivation should include this drv in `nativeBuildInputs` so that
# the following build hook can make `GRADLE_HOME` have the dependency cache we
# produced symlinked into it:
setupHook = writeScript "gradle-deps-setup-hook" ''
gradleDepsConfigureHook() {
if [ -z "''${GRADLE_USER_HOME-}" ]; then
GRADLE_USER_HOME="$(mktemp -d)"
fi
export GRADLE_USER_HOME
mkdir -p "''${GRADLE_USER_HOME}/caches/"
for m in ${drv}/*; do
echo "gradle-deps: symlinking in dependency cache at '$m'" >&2
ln -s $m "''${GRADLE_USER_HOME}/caches/"
done
}
if [ -z "''${dontUseGradleDepsConfigure-}" ]; then
preConfigureHooks+=(gradleDepsConfigureHook)
fi
'';
})
#!/usr/bin/env python3
# NOTE: there's a reproducibility issue with the metadata stored in:
# - modules-<ver>/metadata-<ver>/module-artifact.bin
# - modules-<ver>/metadata-<ver>/module-metadata.bin
# - modules-<ver>/metadata-<ver>/resource-at-url.bin
#
# This stems from Gradle encoding timestamps in these files:
# - https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L25-L55
#
# Unfortunately there does not appear to be a way to inhibit/override this at
# the application level; the closest thing available is
# `org.gradle.internal.test.clockoffset` but this doesn't help in this case:
# https://github.com/gradle/gradle/blob/dc6e12baff8fbfbe75ee3f5a238831e594d95cb8/subprojects/core/src/main/java/org/gradle/util/internal/BuildCommencedTimeProvider.java#L23
#
# Intercepting at a level below (i.e. using libfaketime:
# https://github.com/wolfcw/libfaketime) also does not work in this case because
# there's network I/O involves; TLS/cert verification complains if system time
# isn't approximately correct.
#
# So, rather than try to get gradle to produce reproducible metadata files we
# just fix them up after the fact.
#
# This script implements a parser for the metadata files and patches timestamps
# to fix the reproducibility issue.
#
# NOTE: there's another source of reproducibility issues: non-determinism about
# which repository dependencies are fetched from...
# - for now, running with `--no-parallel` seems to sufficiently mitigate this
#
# NOTE: there's actually a third source of reproducibility issues that I
# haven't gotten to the bottom of yet: sometimes some entries are listed
# multiple times...
# - in ~100 gradle runs I've only observed this once
# - to mitigate, perhaps it's worth having this script error if it sees
# duplicated entries?
# - "resolving" the issue would entail actually removing the duplicates but
# that's harder; would require having a real encoding flow rather than just
# decode + patch at offsets
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from io import BufferedRandom, BufferedReader
import os
from pathlib import Path
import sys
from typing import Any, ClassVar, Optional, Self, override
DEBUG = "DEBUG" in os.environ
# NOTE: the layout of the metadata files is not stable; metadata versions are
# listed here:
# - https://github.com/gradle/gradle/blob/497dffa06e9a0769c5e0e2ba866d0fbd88f7a1d5/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/CacheLayout.java#L37-L89
#
# For now we only support the latest version as of this writing (106):
SUPPORTED_METADATA_LAYOUT_VERSIONS = {
"106": None,
}
################################################################################
################################################################################
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/Block.java#L19-L21
class Block:
LONG_SIZE: int = 8
INT_SIZE: int = 4
SHORT_SIZE: int = 2
Stream = BufferedReader
def read_byte(stream: Stream) -> int:
return int.from_bytes(stream.read(1), byteorder="big")
def read_short(stream: Stream) -> int:
return int.from_bytes(stream.read(Block.SHORT_SIZE), byteorder="big")
def read_int(stream: Stream) -> int:
return int.from_bytes(stream.read(Block.INT_SIZE), byteorder="big")
def read_long(stream: Stream) -> int:
return int.from_bytes(stream.read(Block.LONG_SIZE), byteorder="big")
def read_boolean(stream: Stream) -> bool:
byte = read_byte(stream)
if byte == 0: return False
elif byte == 1: return True
else:
raise ValueError(f"Got {hex(byte)} for boolean at {hex(stream.tell())}")
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractDecoder.java#L40-L46
def read_byte_array(stream: Stream) -> bytes:
size = read_int(stream)
return stream.read(size)
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/InputStreamBackedDecoder.java#L76-L79
# DataInputStream.readUTF: https://stackoverflow.com/a/59691602
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/OutputStreamBackedEncoder.java#L61-L67
# DataOutputStream.writeUTF: https://docs.oracle.com/javase/8/docs/api/java/io/DataOutputStream.html#writeUTF-java.lang.String-
def read_string(stream: Stream) -> str:
size = read_short(stream)
raw = stream.read(size)
return raw.decode()
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractDecoder.java#L68-L75
def read_nullable_string(stream: Stream) -> Optional['DecodedString']:
if read_boolean(stream):
return DecodedString().decode(stream)
else:
return None
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L524-L578
# https://github.com/gradle/gradle/blob/e035e3763e7f954eb155a18fb8bf86972df6a57c/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/kryo/KryoBackedEncoder.java#L67-L70
def read_kryo_varint_positive(stream: Stream) -> int:
byte = read_byte(stream)
out = byte & 0x3F
if (byte & 0x40) != 0:
byte = read_byte(stream)
out |= (byte & 0x7F) << 6
if (byte & 0x80) != 0:
byte = read_byte(stream)
out |= (byte & 0x7F) << 13
if (byte & 0x80) != 0:
byte = read_byte(stream)
out |= (byte & 0x7F) << 20
if (byte & 0x80) != 0:
byte = read_byte(stream)
out |= (byte & 0x7F) << 27
return out
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L829-L848
def read_kryo_string(stream: Stream) -> Optional[str]:
read_flag = lambda b: b & 0x80 != 0
byte = peek_byte(stream)
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L850-L941
if not read_flag(byte):
# ascii
arr = []
while (byte := read_byte(stream) & 0x80) != 0x80: arr.append(byte)
arr.append(byte & 0x7F)
return bytes(arr).decode()
else:
num_chars = read_kryo_varint_positive(stream)
starting_pos = stream.tell()
if num_chars == 0:
return None
elif num_chars == 1:
return ""
else:
num_chars -= 1
# UTF-8:
curr_char = 0
while curr_char < num_chars:
hi = read_byte(stream) >> 4
if hi in range(0, 8):
pass
elif hi in [12, 13]:
read_byte(stream)
elif hi == 14:
read_byte(stream)
read_byte(stream)
curr_char += 1
len = stream.tell() - starting_pos
stream.seek(starting_pos)
return stream.read(len).decode()
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L651-L697
def read_kryo_varlong_positive(stream: Stream) -> int:
out = 0
byte_num = 0
while byte_num <= 8:
byte = read_byte(stream)
out |= (byte & 0x7F) << (byte_num * 7)
byte_num += 1
if (byte & 0x80) == 0:
break
return out
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractEncoder.java#L44-L48
def read_kryo_byte_array(stream: Stream) -> bytes:
size = read_kryo_varint_positive(stream)
return stream.read(size)
def peek_byte(stream: Stream) -> int:
return int.from_bytes(stream.peek(1)[:1], byteorder="big")
def peek_short(stream: Stream) -> int:
return int.from_bytes(stream.peek(Block.SHORT_SIZE)[:Block.SHORT_SIZE], byteorder="big")
def peek_int(stream: Stream) -> int:
return int.from_bytes(stream.peek(Block.INT_SIZE)[:Block.INT_SIZE], byteorder="big")
def peek_long(stream: Stream) -> int:
return int.from_bytes(stream.peek(Block.LONG_SIZE)[:Block.LONG_SIZE], byteorder="big")
def eq(a, b):
if a != b:
print(f"a: {a} ({hex(a)}), b: {b} ({hex(b)})", file=sys.stderr)
return False
else:
return True
class Decodable(ABC):
at_offset: Optional[int] = None
decoded_len: Optional[int] = None
already_decoded_into: bool = False
@abstractmethod
def size(self) -> int:
pass
def decode(self, stream: Stream) -> 'Self':
self.at_offset = stream.tell()
if self.already_decoded_into: raise "uh-oh"
self.already_decoded_into = True
self._decode(stream)
offset_after = stream.tell()
assert offset_after >= self.at_offset
self.decoded_len = offset_after - self.at_offset
return self
@abstractmethod
def _decode(self, stream: Stream): pass
class DecodableBlock(Decodable):
HEADER_SIZE: int = 1 + Block.INT_SIZE
TAIL_SIZE: int = Block.INT_SIZE
@abstractmethod
def type_byte(self) -> Optional[int]:
pass
@abstractmethod
def inner_size(self) -> int: pass
@abstractmethod
def _inner_decode(self, stream: Stream): pass
def size(self) -> int:
return self.HEADER_SIZE + self.inner_size() + self.TAIL_SIZE
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/FileBackedBlockStore.java#L209-L229
def _decode(self, stream: Stream):
pos = stream.tell()
type = read_byte(stream)
assert eq(type, self.type_byte())
len = read_int(stream)
self._inner_decode(stream)
pos_after_payload_read = stream.tell()
self.actual_payload_length = read_int(stream)
assert eq(pos_after_payload_read - pos, self.actual_payload_length)
assert eq(self.inner_size(), len)
stream.seek(pos + self.size())
class Patchable(ABC): # NOTE: assumes implementors are also `Decodable`
def fixed_timestamp(self, override: Optional[Any] = None) -> bytes:
default = str(0x77777777777) # 2230
ts = override or os.environ.get("REPLACEMENT_TIMESTAMP") or default
ts = int(ts).to_bytes(Block.LONG_SIZE, byteorder="big")
return ts
def patch(self, writer: BufferedRandom, hint: Optional[Any] = None):
assert self.already_decoded_into
start, size = self.at_offset, self.decoded_len
end = start + size
if DEBUG:
print(
f"[PATCHING] {type(self)}: {size} ({hex(size)}) bytes at: "
f"{start} to {end} ({hex(start)} to {hex(end)}):\n"
f" - {self}",
file=sys.stderr,
)
writer.seek(start)
orig_bytes = writer.read(size)
new_bytes = self._patch(orig_bytes, hint)
if new_bytes == None:
return
assert len(orig_bytes) == len(new_bytes) == size
if DEBUG:
print(
f" + making replacement:\n"
f" * orig: {orig_bytes}\n"
f" * new: {new_bytes}\n",
file=sys.stderr,
)
writer.seek(start)
writer.write(new_bytes)
def _patch(self, _bytes: bytes, _hint: Optional[Any] = None) -> None | bytes:
return None
@dataclass
class Timestamp(Decodable, Patchable):
millis_since_unix_epoch: int = 0 # 64-bit int
def size(self): return Block.LONG_SIZE
def _decode(self, inp: Stream):
self.millis_since_unix_epoch = read_long(inp)
def __repr__(self):
return datetime.fromtimestamp(self.millis_since_unix_epoch // 1000).__repr__()
@override
def _patch(self, _orig: bytes, hint: Optional[Any] = None) -> bytes:
return self.fixed_timestamp(hint)
@dataclass # useful if you want to know the offset of a string
class DecodedString(Decodable):
inner: str = ""
def size(self): return len(self.inner) # not useful pre-decode
def _decode(self, inp: Stream):
self.inner = read_string(inp)
@dataclass
class DecodedKryoString(Decodable):
inner: Optional[str] = ""
def size(self): raise "unknown"
def _decode(self, inp: Stream):
self.inner = read_kryo_string(inp)
################################################################################
################################################################################
@dataclass
class FreeListEntry(Decodable):
pos: int = 0
size_: int = 0
def size(self) -> int: return Block.LONG_SIZE + Block.INT_SIZE
def _decode(self, stream: Stream):
self.pos = read_long(stream)
self.size_ = read_int(stream)
# https://github.com/gradle/gradle/blob/bb763da97066c47f0dc2d0f119286320b4382401/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/FreeListBlockStore.java#L128-L256
@dataclass
class FreeListBlock(DecodableBlock):
max_free_list_entries: int
next_block: int = None
largest_in_next_block: int = None
entries: Optional[list[FreeListEntry]] = None
def type_byte(self) -> int: return 0x44
def inner_size(self) -> int:
return (
Block.LONG_SIZE + Block.INT_SIZE + Block.INT_SIZE
+ self.max_free_list_entries * FreeListEntry().size()
)
def _inner_decode(self, stream: Stream):
self.next_block = read_long(stream)
self.largest_in_next_block = read_int(stream)
count = read_int(stream)
self.entries = []
for _ in range(count):
self.entries.append(FreeListEntry().decode(stream))
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L328-L364
@dataclass
class BTreeHeaderBlock(DecodableBlock):
max_child_index_entries: int
def type_byte(self) -> int: return 0x55
def inner_size(self) -> int: return Block.LONG_SIZE + Block.SHORT_SIZE
def _inner_decode(self, stream: Stream):
self.root_pos = read_long(stream)
actual_child_index_entries = read_short(stream)
assert eq(actual_child_index_entries, self.max_child_index_entries)
@dataclass
class IndexEntry(Decodable):
hash_code: int = 0
data_block: int = 0
child_index_block: int = 0
def size(self) -> int: return 3 * Block.LONG_SIZE
def _decode(self, stream: Stream):
self.hash_code = read_long(stream)
self.data_block = read_long(stream)
self.child_index_block = read_long(stream)
def __repr__(self):
ch = None if self.child_index_block == 2 ** 64 - 1 else self.child_index_block
return f"IndexEntry(hash={hex(self.hash_code)}, data_block={hex(self.data_block)}, child_index_block = {ch})"
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L366-L396
@dataclass
class BTreeIndexBlock(DecodableBlock):
max_child_index_entries: int
entries: list[int] = None
tail_pos: int = None
def type_byte(self) -> int: return 0x77
def inner_size(self) -> int:
return Block.INT_SIZE + Block.LONG_SIZE + IndexEntry().size() * self.max_child_index_entries
def _inner_decode(self, stream: Stream):
count = read_int(stream)
self.entries = []
for _ in range(count):
self.entries.append(IndexEntry().decode(stream))
self.tail_pos = read_long(stream)
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L657-L707
@dataclass
class BTreeDataBlock[V: Decodable](DecodableBlock, Patchable):
value: V
size_: int = 0
byte_len: int = 0
def type_byte(self) -> int: return 0x33
def inner_size(self): return 2 * Block.INT_SIZE + self.size_
def _inner_decode(self, stream: Stream):
self.size_ = read_int(stream)
self.byte_len = read_int(stream)
# We only model and deserialize values (not keys) because.. that's all
# that's actually stored; key values aren't serialized, only hashes:
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L141-L164
# previously:
# self.value = stream.read(self.byte_len)
pos_before = stream.tell()
self.value.decode(stream)
pos_after = stream.tell()
read = pos_after - pos_before
if read != self.byte_len:
msg = (
f"expected {type(self.value)} to read {self.byte_len} "
f"({hex(self.byte_len)}) bytes ({hex(pos_before)} to "
f"{hex(pos_before + self.byte_len)}); actually read {read} "
f"({hex(read)}) bytes (up to {hex(pos_after)})"
f"\n\nvalue: {self.value}"
)
if read > self.byte_len:
print(ValueError(msg), file=sys.stderr)
raise ValueError(msg) # NOTE: choosing to not exit for now..
stream.seek(pos_before + self.byte_len)
elif DEBUG:
print(f"warning: {msg}", file=sys.stderr)
stream.seek(pos_before + self.byte_len)
@override
def patch(self, writer: BufferedRandom, hint: Any | None = None):
if issubclass(type(self.value), Patchable):
self.value.patch(writer, hint)
@dataclass
class BTreePersistentIndexedCache[V: Decodable](Decodable):
value_type: type[V]
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L60-L62
max_child_index_entries: int = 512
max_free_list_entries: int = 512
entries: Optional[list[Decodable]] = None
def size(self): raise "unknown"
def _decode(self, inp: Stream):
# technically the cache is always supposed to start with a
# `FreeListBlock` but... it's fine if we're overly permissive
self.entries = []
dbg = lambda *a, **kw: print(*a, **kw) if DEBUG else None
while inp.peek():
byte = peek_byte(inp)
dbg(f"[{hex(inp.tell())}] {hex(byte)}", end=": ", file=sys.stderr)
if byte == 0x33:
x = BTreeDataBlock[V]((self.value_type())).decode(inp)
elif byte == 0x44:
x = FreeListBlock(self.max_free_list_entries).decode(inp)
elif byte == 0x55:
x = BTreeHeaderBlock(self.max_child_index_entries).decode(inp)
elif byte == 0x77:
x = BTreeIndexBlock(self.max_child_index_entries).decode(inp)
else:
raise f"uh-oh: {hex(byte)}"
dbg(x, end="\n\n", file=sys.stderr)
self.entries.append(x)
################################################################################
################################################################################
# metadata bin files layout information:
# - module-artifact.bin:
# + https://github.com/gradle/gradle/blob/26101a599b44b7fd49b2db9e0cf3b475058be3a6/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/DependencyManagementBuildTreeScopeServices.java#L232-L238
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L45-L48
# + key: ArtifactAtRepositoryKey
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L84-L103
# + value: CachedArtifact
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L105-L181
# - module-metadata.bin:
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/PersistentModuleMetadataCache.java#L67
# + key: RevisionKey
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/PersistentModuleMetadataCache.java#L106-L136
# + value: ModuleMetadataCacheEntry
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L23-L55
# - resource-at-url.bin:
# + https://github.com/gradle/gradle/blob/26101a599b44b7fd49b2db9e0cf3b475058be3a6/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/DependencyManagementBuildTreeScopeServices.java#L126-L134
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/ByUrlCachedExternalResourceIndex.java#L28
# + key: String
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/ByUrlCachedExternalResourceIndex.java#L28
# + value: CachedExternalResource
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L40
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L86-L135
#
# We only care about modeling the value types; key contents actually aren't
# saved, only their hashes: https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L141-L164
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L105-L181
@dataclass
class CachedArtifact(Decodable, Patchable):
is_missing: bool = False
cached_at: Optional[Timestamp] = None # timestamp, UNIX epoch milliseconds
encoded_hash: Optional[bytes] = None
# one of these will be present, depending on `is_missing`
artifact_file: Optional[DecodedKryoString] = None # if is_missing == False
attempted: Optional[list[DecodedKryoString]] = None # if is_missing == True
def size(self): raise "not known ahead of time"
def _decode(self, inp: Stream):
self.is_missing = read_boolean(inp)
self.cached_at = Timestamp().decode(inp)
self.encoded_hash = read_kryo_byte_array(inp)
if self.is_missing:
self.attempted = []
size = read_kryo_varint_positive(inp)
for _ in range(size):
self.attempted.append(DecodedKryoString().decode(inp))
else:
self.artifact_file = DecodedKryoString().decode(inp)
@override
def patch(self, writer: BufferedRandom, _: Optional[Any] = None):
self.cached_at.patch(writer)
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L23-L55
@dataclass
class ModuleMetadataCacheEntry(Decodable, Patchable):
type: int = 0
create_timestamp: Optional[Timestamp] = None
is_changing: Optional[bool] = None # only present if type == TYPE_PRESENT
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntry.java#L22-L23
TYPE_MISSING: ClassVar[int] = 0
TYPE_PRESENT: ClassVar[int] = 1
def is_missing(self) -> bool: self.type == ModuleMetadataCacheEntry.TYPE_MISSING
def is_present(self) -> bool: self.type == ModuleMetadataCacheEntry.TYPE_PRESENT
def size(self) -> int: raise "not static"
def _decode(self, inp: Stream):
self.type = read_byte(inp)
match self.type:
case ModuleMetadataCacheEntry.TYPE_MISSING:
self.create_timestamp = Timestamp().decode(inp)
case ModuleMetadataCacheEntry.TYPE_PRESENT:
self.is_changing = read_boolean(inp)
self.create_timestamp = Timestamp().decode(inp)
case other:
raise ValueError(f"invalid type: {hex(other)}")
@override
def patch(self, writer: BufferedRandom, _: Optional[Any] = None):
# NOTE: we're adjusting missing entry cache entries to point to a date
# in the future... might be problematic if we want to actually attempt
# a refetch?
self.create_timestamp.patch(writer)
@dataclass
class ExternalResourceMetadata(Decodable, Patchable):
uri: Optional[DecodedKryoString] = None
last_modified: Optional[Timestamp] = None
content_type: Optional[DecodedKryoString] = None
content_length: int = 0 # long
etag: Optional[DecodedKryoString] = None
sha1: Optional[DecodedKryoString] = None
def size(self) -> int: raise "not known before-hand"
def _decode(self, inp: Stream):
self.uri = DecodedKryoString().decode(inp)
has_last_modified = read_boolean(inp)
if has_last_modified:
self.last_modified = Timestamp().decode(inp)
self.content_type = DecodedKryoString().decode(inp)
self.content_length = read_kryo_varlong_positive(inp)
self.etag = DecodedKryoString().decode(inp)
if read_boolean(inp):
self.sha1 = DecodedKryoString().decode(inp)
@override
def patch(self, writer: BufferedRandom, _: Optional[Any] = None):
# TODO: I think we may be able to leave these timestamps as is? they're
# provided by the repository IIUC
if self.last_modified:
self.last_modified.patch(writer)
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L86-L135
@dataclass
class CachedExternalResource(Decodable, Patchable):
cached_file: Optional[DecodedKryoString] = None
cached_at: Optional[Timestamp] = None
metadata: Optional[ExternalResourceMetadata] = None
def size(self) -> int: raise "unknown"
def _decode(self, inp: Stream):
has_cached_file = read_boolean(inp)
if has_cached_file:
self.cached_file = DecodedKryoString().decode(inp)
self.cached_at = Timestamp().decode(inp)
has_metadata = read_boolean(inp)
if has_metadata:
self.metadata = ExternalResourceMetadata().decode(inp)
@override
def patch(self, writer: BufferedRandom, _: Optional[Any] = None):
self.cached_at.patch(writer)
if self.metadata: self.metadata.patch(writer)
################################################################################
################################################################################
def patch(metadata_file: BufferedRandom, decoded: BTreePersistentIndexedCache):
for ent in decoded.entries:
if issubclass(type(ent), Patchable):
ent.patch(metadata_file)
################################################################################
################################################################################
def process_metadata_files_in_dir(dir: Path | str, do_patch: bool = True) -> int:
metadata_dirs = [
d for d in Path(dir).iterdir()
if d.name.startswith("metadata-")
if d.is_dir()
]
if not metadata_dirs:
print(f"No metadata directories found under {dir}; skipping..", file=sys.stderr)
return 0
for m in metadata_dirs:
version = m.name.removeprefix("metadata-")
if not version.startswith("2."):
print(
f"Error processing metadata dir {m}: don't know how to handle "
f"metadata version: {version}; expect metadata versions to "
f"start with '2.'",
file=sys.stderr,
)
return 1
version = version.removeprefix("2.")
if not version in SUPPORTED_METADATA_LAYOUT_VERSIONS:
print(
f"Error processing metadata dir {m}: metadata version "
f"{version} is unsupported. We only know how to handle these "
f"metadata versions: "
f"{SUPPORTED_METADATA_LAYOUT_VERSIONS.keys()}",
file=sys.stderr,
)
return 2
for file in sorted(m.iterdir()):
if not file.is_file(): continue
print(f"Processing '{file}'...", file=sys.stderr)
decoded = None
match file.name:
case "module-artifact.bin":
decoded = BTreePersistentIndexedCache(CachedArtifact)
case "module-metadata.bin":
decoded = BTreePersistentIndexedCache(ModuleMetadataCacheEntry)
case "resource-at-url.bin":
decoded = BTreePersistentIndexedCache(CachedExternalResource)
case _:
print(
f"Warning: unexpected file in metadata dir: {file}; "
f"ignoring...",
file=sys.stderr
)
continue
decoded.decode(open(file, "rb"))
if do_patch:
with open(file, "rb+") as w:
patch(w, decoded)
return 0
def process(dirs: list[str], patch: bool):
for dir in dirs:
if (ret := process_metadata_files_in_dir(dir, patch)) != 0:
sys.exit(ret)
if __name__ == "__main__":
match sys.argv[1:]:
case ["--dump", *dirs]:
DEBUG = True
process(dirs, False)
case ["--patch", *dirs]: process(dirs, True)
case _:
print("usage: [--dump | --patch] [... module metadata directories]")
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment