Task 365: .LZ File Format

Task 365: .LZ File Format

The properties of the .LZ (Lzip) file format intrinsic to its file system are:

  • Magic Number: The 4-byte ASCII string "LZIP" (hex: 0x4C 0x5A 0x49 0x50).
  • Version Number: A 1-byte value, currently 1 (version 0 is obsolete and uses a different dictionary size encoding).
  • Coded Dictionary Size: A 1-byte value encoding the LZMA dictionary size (ranging from 4 KiB to 512 MiB). The lower 5 bits represent the base exponent (12 to 29 for 2^12 to 2^29), and the upper 3 bits represent a numerator (0 to 7) for subtraction: dictionary_size = 2^exponent - (numerator * 2^(exponent - 4)).
  • CRC32: A 4-byte unsigned integer (little-endian) representing the CRC32 checksum of the uncompressed data.
  • Uncompressed Data Size: An 8-byte unsigned integer (little-endian) representing the size of the uncompressed data (0 to 2^64 - 1 bytes).
  • Member Size: An 8-byte unsigned integer (little-endian) representing the total size of the member (header + compressed data + trailer), ranging from 20 to 2^64 - 1 bytes.

Two direct download links for .LZ files:

Here is the HTML with embedded JavaScript that can be embedded in a Ghost blog post (or any HTML page). It creates a drag-and-drop area for a .LZ file and dumps the properties to the screen, assuming a single-member file for simplicity:

LZ File Properties Dumper
Drag and drop a .LZ file here
  1. Here is a Python class for handling .LZ files. It can open a file, decode/parse the properties (assuming single-member for simplicity), and print them to console. For full read/write (compress/decompress), it uses the built-in lzma module to verify and create files.
import lzma
import struct
import zlib
import os

class LZFile:
    def __init__(self, filename):
        self.filename = filename
        self.magic = None
        self.version = None
        self.dict_size = None
        self.crc32 = None
        self.uncompressed_size = None
        self.member_size = None

    def read_and_decode(self):
        with open(self.filename, 'rb') as f:
            data = f.read()
        file_size = len(data)
        if file_size < 26:
            raise ValueError("File too small to be a valid .LZ file.")

        # Parse header
        self.magic = data[0:4].decode('ascii')
        self.version = data[4]
        ds = data[5]
        exponent = ds & 0x1F
        numerator = ds >> 5
        base_size = 1 << exponent
        subtract = (base_size >> 4) * numerator
        self.dict_size = base_size - subtract

        # Parse trailer
        trailer = data[-20:]
        self.crc32, self.uncompressed_size, self.member_size = struct.unpack('<IQQ', trailer)

        if self.member_size != file_size:
            raise ValueError("Multimember files not supported or invalid file.")

    def print_properties(self):
        print(f"Magic Number: {self.magic}")
        print(f"Version Number: {self.version}")
        print(f"Dictionary Size: {self.dict_size} bytes")
        print(f"CRC32: 0x{self.crc32:08X}")
        print(f"Uncompressed Data Size: {self.uncompressed_size} bytes")
        print(f"Member Size: {self.member_size} bytes")

    def verify_decompress(self):
        # Decompress to verify properties
        with open(self.filename, 'rb') as f:
            f.seek(6)  # Skip header
            compressed = f.read(self.member_size - 26)  # Compressed data size = member_size - header - trailer
        filters = [{'id': lzma.FILTER_LZMA1, 'dict_size': self.dict_size, 'lc': 3, 'lp': 0, 'pb': 2}]
        decompressor = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=filters)
        uncompressed = decompressor.decompress(compressed)
        computed_crc = zlib.crc32(uncompressed) & 0xFFFFFFFF
        computed_size = len(uncompressed)
        if computed_crc != self.crc32 or computed_size != self.uncompressed_size:
            raise ValueError("Verification failed: CRC or size mismatch.")
        print("Decompression verification successful.")

    @staticmethod
    def write_new_file(output_filename, input_data, dict_size=1 << 23):  # Default 8 MiB
        # Compress input_data to .LZ
        filters = [{'id': lzma.FILTER_LZMA1, 'dict_size': dict_size, 'lc': 3, 'lp': 0, 'pb': 2}]
        compressor = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=filters)
        compressed = compressor.compress(input_data) + compressor.flush()
        crc32 = zlib.crc32(input_data) & 0xFFFFFFFF
        uncompressed_size = len(input_data)
        member_size = 6 + len(compressed) + 20

        # Compute ds byte
        exponent = dict_size.bit_length() - 1
        base_size = 1 << exponent
        subtract = base_size - dict_size
        numerator = subtract // (base_size >> 4)
        ds = (numerator << 5) | exponent

        header = b'LZIP' + struct.pack('<BB', 1, ds)
        trailer = struct.pack('<IQQ', crc32, uncompressed_size, member_size)
        with open(output_filename, 'wb') as f:
            f.write(header + compressed + trailer)

Example usage:

lz = LZFile('example.lz')
lz.read_and_decode()
lz.print_properties()
lz.verify_decompress()
# To write: LZFile.write_new_file('new.lz', b'Hello world data')
  1. Here is a Java class for handling .LZ files. It can open a file, decode/parse the properties (assuming single-member), and print them to console. For full read/write, it assumes the XZ for Java library (org.tukaani.xz) is available for compress/decompress.
import java.io.*;
import java.math.BigInteger;
import java.util.zip.CRC32;
import org.tukaani.xz.*;  // Assume XZ for Java library is imported

public class LZFile {
    private String filename;
    private String magic;
    private int version;
    private long dictSize;
    private long crc32;
    private BigInteger uncompressedSize;
    private BigInteger memberSize;

    public LZFile(String filename) {
        this.filename = filename;
    }

    public void readAndDecode() throws IOException {
        File file = new File(filename);
        long fileSize = file.length();
        if (fileSize < 26) {
            throw new IOException("File too small to be a valid .LZ file.");
        }

        try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
            // Read header
            byte[] header = new byte[6];
            raf.readFully(header);
            magic = new String(header, 0, 4);
            version = header[4] & 0xFF;
            int ds = header[5] & 0xFF;
            int exponent = ds & 0x1F;
            int numerator = ds >> 5;
            long baseSize = 1L << exponent;
            long subtract = (baseSize >> 4) * numerator;
            dictSize = baseSize - subtract;

            // Read trailer
            raf.seek(fileSize - 20);
            byte[] trailer = new byte[20];
            raf.readFully(trailer);
            crc32 = readLittleEndian(trailer, 0, 4);
            uncompressedSize = readBigLittleEndian(trailer, 4, 8);
            memberSize = readBigLittleEndian(trailer, 12, 8);

            if (!memberSize.equals(BigInteger.valueOf(fileSize))) {
                throw new IOException("Multimember files not supported or invalid file.");
            }
        }
    }

    public void printProperties() {
        System.out.println("Magic Number: " + magic);
        System.out.println("Version Number: " + version);
        System.out.println("Dictionary Size: " + dictSize + " bytes");
        System.out.println("CRC32: 0x" + Long.toHexString(crc32).toUpperCase());
        System.out.println("Uncompressed Data Size: " + uncompressedSize + " bytes");
        System.out.println("Member Size: " + memberSize + " bytes");
    }

    public void verifyDecompress() throws IOException {
        // Decompress to verify
        byte[] uncompressed = decompress();
        CRC32 crc = new CRC32();
        crc.update(uncompressed);
        long computedCrc = crc.getValue();
        long computedSize = uncompressed.length;
        if (computedCrc != crc32 || computedSize != uncompressedSize.longValue()) {
            throw new IOException("Verification failed: CRC or size mismatch.");
        }
        System.out.println("Decompression verification successful.");
    }

    private byte[] decompress() throws IOException {
        try (FileInputStream fis = new FileInputStream(filename)) {
            fis.skip(6);  // Skip header
            LZMAInputStream lzmaIn = new LZMAInputStream(fis, -1, 0, dictSize);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            byte[] buffer = new byte[8192];
            int len;
            while ((len = lzmaIn.read(buffer)) != -1) {
                baos.write(buffer, 0, len);
            }
            return baos.toByteArray();
        }
    }

    public static void writeNewFile(String outputFilename, byte[] inputData, long dictSize) throws IOException {
        // Compress inputData to .LZ
        ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
        LZMAOutputStream lzmaOut = new LZMAOutputStream(compressedStream, new LZMAEncoder(dictSize, 3, 0, 2, 64, 4, 273, 1), -1);
        lzmaOut.write(inputData);
        lzmaOut.close();
        byte[] compressed = compressedStream.toByteArray();

        CRC32 crc = new CRC32();
        crc.update(inputData);
        long crcValue = crc.getValue();
        long uncompressedSize = inputData.length;
        long memberSize = 6 + compressed.length + 20;

        // Compute ds
        int exponent = 63 - Long.numberOfLeadingZeros(dictSize);
        long baseSize = 1L << exponent;
        long subtract = baseSize - dictSize;
        int numerator = (int) (subtract / (baseSize >> 4));
        int ds = (numerator << 5) | exponent;

        try (FileOutputStream fos = new FileOutputStream(outputFilename)) {
            fos.write("LZIP".getBytes());
            fos.write(1);
            fos.write(ds);
            fos.write(compressed);
            writeLittleEndian(fos, crcValue, 4);
            writeLittleEndian(fos, uncompressedSize, 8);
            writeLittleEndian(fos, memberSize, 8);
        }
    }

    private long readLittleEndian(byte[] buf, int offset, int bytes) {
        long val = 0;
        for (int i = 0; i < bytes; i++) {
            val |= (buf[offset + i] & 0xFFL) << (i * 8);
        }
        return val;
    }

    private BigInteger readBigLittleEndian(byte[] buf, int offset, int bytes) {
        byte[] rev = new byte[bytes];
        for (int i = 0; i < bytes; i++) {
            rev[i] = buf[offset + (bytes - 1 - i)];
        }
        return new BigInteger(1, rev);
    }

    private static void writeLittleEndian(OutputStream os, long val, int bytes) throws IOException {
        for (int i = 0; i < bytes; i++) {
            os.write((int) (val & 0xFF));
            val >>= 8;
        }
    }
}

Example usage:

LZFile lz = new LZFile("example.lz");
lz.readAndDecode();
lz.printProperties();
lz.verifyDecompress();
// To write: LZFile.writeNewFile("new.lz", "Hello world data".getBytes(), 1L << 23);
  1. Here is a JavaScript class for handling .LZ files. It can open a file (via File object), decode/parse the properties (assuming single-member), and print them to console. For browser use, pass a File from input or drop. Full decompress requires an LZMA library like lzma.js (not included here).
class LZFile {
    constructor(file) {
        this.file = file;
        this.magic = null;
        this.version = null;
        this.dictSize = null;
        this.crc32 = null;
        this.uncompressedSize = null;
        this.memberSize = null;
    }

    async readAndDecode() {
        const buffer = await this.file.arrayBuffer();
        const data = new Uint8Array(buffer);
        const fileSize = data.length;

        if (fileSize < 26) {
            throw new Error('File too small to be a valid .LZ file.');
        }

        // Parse header
        this.magic = String.fromCharCode(data[0], data[1], data[2], data[3]);
        this.version = data[4];
        const ds = data[5];
        const exponent = ds & 0x1F;
        const numerator = ds >> 5;
        const baseSize = 1 << exponent;
        const subtract = (baseSize >> 4) * numerator;
        this.dictSize = baseSize - subtract;

        // Parse trailer
        const trailerOffset = fileSize - 20;
        let offset = trailerOffset;
        this.crc32 = data[offset++] | (data[offset++] << 8) | (data[offset++] << 16) | (data[offset++] << 24);
        this.uncompressedSize = BigInt(data[offset++]) | (BigInt(data[offset++]) << 8n) | (BigInt(data[offset++]) << 16n) |
            (BigInt(data[offset++]) << 24n) | (BigInt(data[offset++]) << 32n) | (BigInt(data[offset++]) << 40n) |
            (BigInt(data[offset++]) << 48n) | (BigInt(data[offset++]) << 56n);
        this.memberSize = BigInt(data[offset++]) | (BigInt(data[offset++]) << 8n) | (BigInt(data[offset++]) << 16n) |
            (BigInt(data[offset++]) << 24n) | (BigInt(data[offset++]) << 32n) | (BigInt(data[offset++]) << 40n) |
            (BigInt(data[offset++]) << 48n) | (BigInt(data[offset++]) << 56n);

        if (Number(this.memberSize) !== fileSize) {
            throw new Error('Multimember files not supported or invalid file.');
        }
    }

    printProperties() {
        console.log(`Magic Number: ${this.magic}`);
        console.log(`Version Number: ${this.version}`);
        console.log(`Dictionary Size: ${this.dictSize} bytes`);
        console.log(`CRC32: 0x${this.crc32.toString(16).toUpperCase()}`);
        console.log(`Uncompressed Data Size: ${this.uncompressedSize} bytes`);
        console.log(`Member Size: ${this.memberSize} bytes`);
    }

    // For full decompress/verify, use an external LZMA library like lzma.js
    // Example stub:
    // async verifyDecompress() {
    //     // Implement with LZMA.decompress(compressedData, options)
    // }
    
    // For write, use LZMA library to compress
    // static async writeNewFile(inputData, dictSize = 1 << 23) {
    //     // Implement compression, header, trailer
    // }
}

// Example usage (in browser):
// const input = document.getElementById('fileInput');
// input.addEventListener('change', async (e) => {
//     const file = e.target.files[0];
//     const lz = new LZFile(file);
//     await lz.readAndDecode();
//     lz.printProperties();
// });
  1. Here is a C "class" (using struct and functions, as C has no classes; for C++ you could use class). It can open a file, decode/parse the properties (assuming single-member), and print them to console. For full read/write, it would require an LZMA library like lzlib (not included).
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <zlib.h>  // For CRC32

typedef struct {
    char *filename;
    char magic[5];
    uint8_t version;
    uint64_t dict_size;
    uint32_t crc32;
    uint64_t uncompressed_size;
    uint64_t member_size;
} LZFile;

LZFile* lzfile_new(const char *filename) {
    LZFile *lz = malloc(sizeof(LZFile));
    lz->filename = strdup(filename);
    return lz;
}

void lzfile_free(LZFile *lz) {
    free(lz->filename);
    free(lz);
}

int lzfile_read_and_decode(LZFile *lz) {
    FILE *f = fopen(lz->filename, "rb");
    if (!f) return -1;

    fseek(f, 0, SEEK_END);
    long file_size = ftell(f);
    if (file_size < 26) {
        fclose(f);
        return -2;
    }
    fseek(f, 0, SEEK_SET);

    uint8_t header[6];
    fread(header, 1, 6, f);
    strncpy(lz->magic, (char*)header, 4);
    lz->magic[4] = '\0';
    lz->version = header[4];
    uint8_t ds = header[5];
    uint8_t exponent = ds & 0x1F;
    uint8_t numerator = ds >> 5;
    uint64_t base_size = (uint64_t)1 << exponent;
    uint64_t subtract = (base_size >> 4) * numerator;
    lz->dict_size = base_size - subtract;

    fseek(f, file_size - 20, SEEK_SET);
    uint8_t trailer[20];
    fread(trailer, 1, 20, f);
    fclose(f);

    lz->crc32 = trailer[0] | (trailer[1] << 8) | (trailer[2] << 16) | (trailer[3] << 24);
    lz->uncompressed_size = (uint64_t)trailer[4] | ((uint64_t)trailer[5] << 8) | ((uint64_t)trailer[6] << 16) |
                            ((uint64_t)trailer[7] << 24) | ((uint64_t)trailer[8] << 32) | ((uint64_t)trailer[9] << 40) |
                            ((uint64_t)trailer[10] << 48) | ((uint64_t)trailer[11] << 56);
    lz->member_size = (uint64_t)trailer[12] | ((uint64_t)trailer[13] << 8) | ((uint64_t)trailer[14] << 16) |
                      ((uint64_t)trailer[15] << 24) | ((uint64_t)trailer[16] << 32) | ((uint64_t)trailer[17] << 40) |
                      ((uint64_t)trailer[18] << 48) | ((uint64_t)trailer[19] << 56);

    if (lz->member_size != (uint64_t)file_size) {
        return -3;  // Multimember not supported
    }
    return 0;
}

void lzfile_print_properties(LZFile *lz) {
    printf("Magic Number: %s\n", lz->magic);
    printf("Version Number: %u\n", lz->version);
    printf("Dictionary Size: %lu bytes\n", lz->dict_size);
    printf("CRC32: 0x%08X\n", lz->crc32);
    printf("Uncompressed Data Size: %lu bytes\n", lz->uncompressed_size);
    printf("Member Size: %lu bytes\n", lz->member_size);
}

// For full decompress/verify or write, integrate lzlib or similar library
// Example stub for verify:
// int lzfile_verify_decompress(LZFile *lz) {
//     // Open file, skip header, decompress with lzlib, compute CRC and size, compare
// }

int main() {
    LZFile *lz = lzfile_new("example.lz");
    if (lzfile_read_and_decode(lz) == 0) {
        lzfile_print_properties(lz);
    }
    lzfile_free(lz);
    return 0;
}