#!/bin/bash set -euo pipefail # normalize-app.sh — Normalize an unsigned .app bundle for reproducible comparison. # Zeros non-deterministic metadata (UUIDs, timestamps, build-machine info, code # signatures, temp paths) so that two builds from the same source produce identical # normalized output. # # Usage: ./scripts/normalize-app.sh # # Operates in-place on the .app bundle. Make a copy first if you need the original. APP_PATH="${1:?Usage: normalize-app.sh }" if [ ! -d "$APP_PATH" ]; then echo "Error: $APP_PATH is not a directory" >&2 exit 1 fi echo "==> Normalizing: $APP_PATH" # --------------------------------------------------------------------------- # 1. Canonicalize Assets.car files # Replace binary .car with sorted JSON dump (content-equivalent, order- # independent). Must happen BEFORE any binary zeroing that would corrupt # the BOM header. # --------------------------------------------------------------------------- echo " - Canonicalizing Assets.car..." find "$APP_PATH" -name "Assets.car" -type f | while read -r car_file; do json_file="${car_file}.json" if xcrun assetutil --info "$car_file" > "$json_file" 2>/dev/null; then # Sort JSON keys and strip non-deterministic fields for canonical representation python3 << PYEOF import json path = """$json_file""" with open(path) as f: data = json.load(f) def strip_timestamps(obj): if isinstance(obj, dict): return {k: strip_timestamps(v) for k, v in obj.items() if k not in ("Timestamp",)} elif isinstance(obj, list): return [strip_timestamps(item) for item in obj] return obj data = strip_timestamps(data) # Sort the list of asset entries by a stable key if isinstance(data, list): data.sort(key=lambda x: json.dumps(x, sort_keys=True)) with open(path, 'w') as f: json.dump(data, f, sort_keys=True, indent=2) PYEOF # Replace .car with canonical JSON mv "$json_file" "$car_file" else rm -f "$json_file" echo " Warning: assetutil failed for $car_file, zeroing known variable fields instead" # Fallback: zero the BOM tree ordering section python3 << PYEOF import struct path = """$car_file""" with open(path, 'rb') as f: data = bytearray(f.read()) # Zero BOM tree metadata at known offsets # BOM header timestamp (offset 0x18) if len(data) > 0x1C: struct.pack_into('/dev/null || true fi done # --------------------------------------------------------------------------- # 3. Zero LC_UUID in all Mach-O binaries # --------------------------------------------------------------------------- echo " - Zeroing LC_UUID in Mach-O binaries..." find "$APP_PATH" -type f | while read -r file; do if file "$file" | grep -q "Mach-O"; then python3 << PYEOF import struct, sys path = """$file""" with open(path, 'rb') as f: data = bytearray(f.read()) def get_slice_offsets(data): magic = struct.unpack_from('I', data, 4)[0] return [struct.unpack_from('>I', data, 8 + i * 20 + 8)[0] for i in range(nfat)] elif magic in (0xFEEDFACE, 0xFEEDFACF, 0xCEFAEDFE, 0xCFFAEDFE): return [0] return [] def zero_uuids(data, base): m = struct.unpack_from(' len(data): break cmd = struct.unpack_from('I', data, 4)[0] return [struct.unpack_from('>I', data, 8 + i * 20 + 8)[0] for i in range(nfat)] elif magic in (0xFEEDFACE, 0xFEEDFACF, 0xCEFAEDFE, 0xCFFAEDFE): return [0] return [] for base in get_slice_offsets(data): m = struct.unpack_from(' len(data): break cmd = struct.unpack_from('/dev/null || true done fi echo "==> Normalization complete: $APP_PATH"