Task 365: .LZ File Format
Task 365: .LZ File Format
The properties of the .LZ (Lzip) file format intrinsic to its file system are:
- Magic Number: The 4-byte ASCII string "LZIP" (hex: 0x4C 0x5A 0x49 0x50).
- Version Number: A 1-byte value, currently 1 (version 0 is obsolete and uses a different dictionary size encoding).
- Coded Dictionary Size: A 1-byte value encoding the LZMA dictionary size (ranging from 4 KiB to 512 MiB). The lower 5 bits represent the base exponent (12 to 29 for 2^12 to 2^29), and the upper 3 bits represent a numerator (0 to 7) for subtraction: dictionary_size = 2^exponent - (numerator * 2^(exponent - 4)).
- CRC32: A 4-byte unsigned integer (little-endian) representing the CRC32 checksum of the uncompressed data.
- Uncompressed Data Size: An 8-byte unsigned integer (little-endian) representing the size of the uncompressed data (0 to 2^64 - 1 bytes).
- Member Size: An 8-byte unsigned integer (little-endian) representing the total size of the member (header + compressed data + trailer), ranging from 20 to 2^64 - 1 bytes.
Two direct download links for .LZ files:
- https://download.savannah.gnu.org/releases/lzip/lzip-1.24.tar.lz
- https://download.savannah.gnu.org/releases/lzip/clzip-1.14.tar.lz
Here is the HTML with embedded JavaScript that can be embedded in a Ghost blog post (or any HTML page). It creates a drag-and-drop area for a .LZ file and dumps the properties to the screen, assuming a single-member file for simplicity:
Drag and drop a .LZ file here
- Here is a Python class for handling .LZ files. It can open a file, decode/parse the properties (assuming single-member for simplicity), and print them to console. For full read/write (compress/decompress), it uses the built-in
lzma
module to verify and create files.
import lzma
import struct
import zlib
import os
class LZFile:
def __init__(self, filename):
self.filename = filename
self.magic = None
self.version = None
self.dict_size = None
self.crc32 = None
self.uncompressed_size = None
self.member_size = None
def read_and_decode(self):
with open(self.filename, 'rb') as f:
data = f.read()
file_size = len(data)
if file_size < 26:
raise ValueError("File too small to be a valid .LZ file.")
# Parse header
self.magic = data[0:4].decode('ascii')
self.version = data[4]
ds = data[5]
exponent = ds & 0x1F
numerator = ds >> 5
base_size = 1 << exponent
subtract = (base_size >> 4) * numerator
self.dict_size = base_size - subtract
# Parse trailer
trailer = data[-20:]
self.crc32, self.uncompressed_size, self.member_size = struct.unpack('<IQQ', trailer)
if self.member_size != file_size:
raise ValueError("Multimember files not supported or invalid file.")
def print_properties(self):
print(f"Magic Number: {self.magic}")
print(f"Version Number: {self.version}")
print(f"Dictionary Size: {self.dict_size} bytes")
print(f"CRC32: 0x{self.crc32:08X}")
print(f"Uncompressed Data Size: {self.uncompressed_size} bytes")
print(f"Member Size: {self.member_size} bytes")
def verify_decompress(self):
# Decompress to verify properties
with open(self.filename, 'rb') as f:
f.seek(6) # Skip header
compressed = f.read(self.member_size - 26) # Compressed data size = member_size - header - trailer
filters = [{'id': lzma.FILTER_LZMA1, 'dict_size': self.dict_size, 'lc': 3, 'lp': 0, 'pb': 2}]
decompressor = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=filters)
uncompressed = decompressor.decompress(compressed)
computed_crc = zlib.crc32(uncompressed) & 0xFFFFFFFF
computed_size = len(uncompressed)
if computed_crc != self.crc32 or computed_size != self.uncompressed_size:
raise ValueError("Verification failed: CRC or size mismatch.")
print("Decompression verification successful.")
@staticmethod
def write_new_file(output_filename, input_data, dict_size=1 << 23): # Default 8 MiB
# Compress input_data to .LZ
filters = [{'id': lzma.FILTER_LZMA1, 'dict_size': dict_size, 'lc': 3, 'lp': 0, 'pb': 2}]
compressor = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=filters)
compressed = compressor.compress(input_data) + compressor.flush()
crc32 = zlib.crc32(input_data) & 0xFFFFFFFF
uncompressed_size = len(input_data)
member_size = 6 + len(compressed) + 20
# Compute ds byte
exponent = dict_size.bit_length() - 1
base_size = 1 << exponent
subtract = base_size - dict_size
numerator = subtract // (base_size >> 4)
ds = (numerator << 5) | exponent
header = b'LZIP' + struct.pack('<BB', 1, ds)
trailer = struct.pack('<IQQ', crc32, uncompressed_size, member_size)
with open(output_filename, 'wb') as f:
f.write(header + compressed + trailer)
Example usage:
lz = LZFile('example.lz')
lz.read_and_decode()
lz.print_properties()
lz.verify_decompress()
# To write: LZFile.write_new_file('new.lz', b'Hello world data')
- Here is a Java class for handling .LZ files. It can open a file, decode/parse the properties (assuming single-member), and print them to console. For full read/write, it assumes the XZ for Java library (org.tukaani.xz) is available for compress/decompress.
import java.io.*;
import java.math.BigInteger;
import java.util.zip.CRC32;
import org.tukaani.xz.*; // Assume XZ for Java library is imported
public class LZFile {
private String filename;
private String magic;
private int version;
private long dictSize;
private long crc32;
private BigInteger uncompressedSize;
private BigInteger memberSize;
public LZFile(String filename) {
this.filename = filename;
}
public void readAndDecode() throws IOException {
File file = new File(filename);
long fileSize = file.length();
if (fileSize < 26) {
throw new IOException("File too small to be a valid .LZ file.");
}
try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
// Read header
byte[] header = new byte[6];
raf.readFully(header);
magic = new String(header, 0, 4);
version = header[4] & 0xFF;
int ds = header[5] & 0xFF;
int exponent = ds & 0x1F;
int numerator = ds >> 5;
long baseSize = 1L << exponent;
long subtract = (baseSize >> 4) * numerator;
dictSize = baseSize - subtract;
// Read trailer
raf.seek(fileSize - 20);
byte[] trailer = new byte[20];
raf.readFully(trailer);
crc32 = readLittleEndian(trailer, 0, 4);
uncompressedSize = readBigLittleEndian(trailer, 4, 8);
memberSize = readBigLittleEndian(trailer, 12, 8);
if (!memberSize.equals(BigInteger.valueOf(fileSize))) {
throw new IOException("Multimember files not supported or invalid file.");
}
}
}
public void printProperties() {
System.out.println("Magic Number: " + magic);
System.out.println("Version Number: " + version);
System.out.println("Dictionary Size: " + dictSize + " bytes");
System.out.println("CRC32: 0x" + Long.toHexString(crc32).toUpperCase());
System.out.println("Uncompressed Data Size: " + uncompressedSize + " bytes");
System.out.println("Member Size: " + memberSize + " bytes");
}
public void verifyDecompress() throws IOException {
// Decompress to verify
byte[] uncompressed = decompress();
CRC32 crc = new CRC32();
crc.update(uncompressed);
long computedCrc = crc.getValue();
long computedSize = uncompressed.length;
if (computedCrc != crc32 || computedSize != uncompressedSize.longValue()) {
throw new IOException("Verification failed: CRC or size mismatch.");
}
System.out.println("Decompression verification successful.");
}
private byte[] decompress() throws IOException {
try (FileInputStream fis = new FileInputStream(filename)) {
fis.skip(6); // Skip header
LZMAInputStream lzmaIn = new LZMAInputStream(fis, -1, 0, dictSize);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[8192];
int len;
while ((len = lzmaIn.read(buffer)) != -1) {
baos.write(buffer, 0, len);
}
return baos.toByteArray();
}
}
public static void writeNewFile(String outputFilename, byte[] inputData, long dictSize) throws IOException {
// Compress inputData to .LZ
ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
LZMAOutputStream lzmaOut = new LZMAOutputStream(compressedStream, new LZMAEncoder(dictSize, 3, 0, 2, 64, 4, 273, 1), -1);
lzmaOut.write(inputData);
lzmaOut.close();
byte[] compressed = compressedStream.toByteArray();
CRC32 crc = new CRC32();
crc.update(inputData);
long crcValue = crc.getValue();
long uncompressedSize = inputData.length;
long memberSize = 6 + compressed.length + 20;
// Compute ds
int exponent = 63 - Long.numberOfLeadingZeros(dictSize);
long baseSize = 1L << exponent;
long subtract = baseSize - dictSize;
int numerator = (int) (subtract / (baseSize >> 4));
int ds = (numerator << 5) | exponent;
try (FileOutputStream fos = new FileOutputStream(outputFilename)) {
fos.write("LZIP".getBytes());
fos.write(1);
fos.write(ds);
fos.write(compressed);
writeLittleEndian(fos, crcValue, 4);
writeLittleEndian(fos, uncompressedSize, 8);
writeLittleEndian(fos, memberSize, 8);
}
}
private long readLittleEndian(byte[] buf, int offset, int bytes) {
long val = 0;
for (int i = 0; i < bytes; i++) {
val |= (buf[offset + i] & 0xFFL) << (i * 8);
}
return val;
}
private BigInteger readBigLittleEndian(byte[] buf, int offset, int bytes) {
byte[] rev = new byte[bytes];
for (int i = 0; i < bytes; i++) {
rev[i] = buf[offset + (bytes - 1 - i)];
}
return new BigInteger(1, rev);
}
private static void writeLittleEndian(OutputStream os, long val, int bytes) throws IOException {
for (int i = 0; i < bytes; i++) {
os.write((int) (val & 0xFF));
val >>= 8;
}
}
}
Example usage:
LZFile lz = new LZFile("example.lz");
lz.readAndDecode();
lz.printProperties();
lz.verifyDecompress();
// To write: LZFile.writeNewFile("new.lz", "Hello world data".getBytes(), 1L << 23);
- Here is a JavaScript class for handling .LZ files. It can open a file (via File object), decode/parse the properties (assuming single-member), and print them to console. For browser use, pass a File from input or drop. Full decompress requires an LZMA library like lzma.js (not included here).
class LZFile {
constructor(file) {
this.file = file;
this.magic = null;
this.version = null;
this.dictSize = null;
this.crc32 = null;
this.uncompressedSize = null;
this.memberSize = null;
}
async readAndDecode() {
const buffer = await this.file.arrayBuffer();
const data = new Uint8Array(buffer);
const fileSize = data.length;
if (fileSize < 26) {
throw new Error('File too small to be a valid .LZ file.');
}
// Parse header
this.magic = String.fromCharCode(data[0], data[1], data[2], data[3]);
this.version = data[4];
const ds = data[5];
const exponent = ds & 0x1F;
const numerator = ds >> 5;
const baseSize = 1 << exponent;
const subtract = (baseSize >> 4) * numerator;
this.dictSize = baseSize - subtract;
// Parse trailer
const trailerOffset = fileSize - 20;
let offset = trailerOffset;
this.crc32 = data[offset++] | (data[offset++] << 8) | (data[offset++] << 16) | (data[offset++] << 24);
this.uncompressedSize = BigInt(data[offset++]) | (BigInt(data[offset++]) << 8n) | (BigInt(data[offset++]) << 16n) |
(BigInt(data[offset++]) << 24n) | (BigInt(data[offset++]) << 32n) | (BigInt(data[offset++]) << 40n) |
(BigInt(data[offset++]) << 48n) | (BigInt(data[offset++]) << 56n);
this.memberSize = BigInt(data[offset++]) | (BigInt(data[offset++]) << 8n) | (BigInt(data[offset++]) << 16n) |
(BigInt(data[offset++]) << 24n) | (BigInt(data[offset++]) << 32n) | (BigInt(data[offset++]) << 40n) |
(BigInt(data[offset++]) << 48n) | (BigInt(data[offset++]) << 56n);
if (Number(this.memberSize) !== fileSize) {
throw new Error('Multimember files not supported or invalid file.');
}
}
printProperties() {
console.log(`Magic Number: ${this.magic}`);
console.log(`Version Number: ${this.version}`);
console.log(`Dictionary Size: ${this.dictSize} bytes`);
console.log(`CRC32: 0x${this.crc32.toString(16).toUpperCase()}`);
console.log(`Uncompressed Data Size: ${this.uncompressedSize} bytes`);
console.log(`Member Size: ${this.memberSize} bytes`);
}
// For full decompress/verify, use an external LZMA library like lzma.js
// Example stub:
// async verifyDecompress() {
// // Implement with LZMA.decompress(compressedData, options)
// }
// For write, use LZMA library to compress
// static async writeNewFile(inputData, dictSize = 1 << 23) {
// // Implement compression, header, trailer
// }
}
// Example usage (in browser):
// const input = document.getElementById('fileInput');
// input.addEventListener('change', async (e) => {
// const file = e.target.files[0];
// const lz = new LZFile(file);
// await lz.readAndDecode();
// lz.printProperties();
// });
- Here is a C "class" (using struct and functions, as C has no classes; for C++ you could use class). It can open a file, decode/parse the properties (assuming single-member), and print them to console. For full read/write, it would require an LZMA library like lzlib (not included).
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <zlib.h> // For CRC32
typedef struct {
char *filename;
char magic[5];
uint8_t version;
uint64_t dict_size;
uint32_t crc32;
uint64_t uncompressed_size;
uint64_t member_size;
} LZFile;
LZFile* lzfile_new(const char *filename) {
LZFile *lz = malloc(sizeof(LZFile));
lz->filename = strdup(filename);
return lz;
}
void lzfile_free(LZFile *lz) {
free(lz->filename);
free(lz);
}
int lzfile_read_and_decode(LZFile *lz) {
FILE *f = fopen(lz->filename, "rb");
if (!f) return -1;
fseek(f, 0, SEEK_END);
long file_size = ftell(f);
if (file_size < 26) {
fclose(f);
return -2;
}
fseek(f, 0, SEEK_SET);
uint8_t header[6];
fread(header, 1, 6, f);
strncpy(lz->magic, (char*)header, 4);
lz->magic[4] = '\0';
lz->version = header[4];
uint8_t ds = header[5];
uint8_t exponent = ds & 0x1F;
uint8_t numerator = ds >> 5;
uint64_t base_size = (uint64_t)1 << exponent;
uint64_t subtract = (base_size >> 4) * numerator;
lz->dict_size = base_size - subtract;
fseek(f, file_size - 20, SEEK_SET);
uint8_t trailer[20];
fread(trailer, 1, 20, f);
fclose(f);
lz->crc32 = trailer[0] | (trailer[1] << 8) | (trailer[2] << 16) | (trailer[3] << 24);
lz->uncompressed_size = (uint64_t)trailer[4] | ((uint64_t)trailer[5] << 8) | ((uint64_t)trailer[6] << 16) |
((uint64_t)trailer[7] << 24) | ((uint64_t)trailer[8] << 32) | ((uint64_t)trailer[9] << 40) |
((uint64_t)trailer[10] << 48) | ((uint64_t)trailer[11] << 56);
lz->member_size = (uint64_t)trailer[12] | ((uint64_t)trailer[13] << 8) | ((uint64_t)trailer[14] << 16) |
((uint64_t)trailer[15] << 24) | ((uint64_t)trailer[16] << 32) | ((uint64_t)trailer[17] << 40) |
((uint64_t)trailer[18] << 48) | ((uint64_t)trailer[19] << 56);
if (lz->member_size != (uint64_t)file_size) {
return -3; // Multimember not supported
}
return 0;
}
void lzfile_print_properties(LZFile *lz) {
printf("Magic Number: %s\n", lz->magic);
printf("Version Number: %u\n", lz->version);
printf("Dictionary Size: %lu bytes\n", lz->dict_size);
printf("CRC32: 0x%08X\n", lz->crc32);
printf("Uncompressed Data Size: %lu bytes\n", lz->uncompressed_size);
printf("Member Size: %lu bytes\n", lz->member_size);
}
// For full decompress/verify or write, integrate lzlib or similar library
// Example stub for verify:
// int lzfile_verify_decompress(LZFile *lz) {
// // Open file, skip header, decompress with lzlib, compute CRC and size, compare
// }
int main() {
LZFile *lz = lzfile_new("example.lz");
if (lzfile_read_and_decode(lz) == 0) {
lzfile_print_properties(lz);
}
lzfile_free(lz);
return 0;
}