Task 180: .EMZ File Format

Task 180: .EMZ File Format

The .EMZ file format is a Windows Compressed Enhanced Metafile, which is an Enhanced Metafile (EMF) compressed using the GZIP compression algorithm with DEFLATE. It is primarily used by Microsoft applications like Visio, Word, and PowerPoint to store vector graphics (and sometimes raster elements) in a compressed form to reduce file size for storage and transfer. The format follows the GZIP specification (RFC 1952) for compression, with the uncompressed content being an EMF file (as defined in the Microsoft Open Specifications [MS-EMF]). There are no unique .EMZ-specific structures beyond the GZIP wrapper; the format does not define additional headers or fields.

List of all the properties of this file format intrinsic to its file system:

ID1: 1 byte, magic number, must be 0x1F.

ID2: 1 byte, magic number, must be 0x8B.

CM: 1 byte, compression method, must be 8 (Deflate).

FLG: 1 byte, flags, with bits indicating FTEXT (bit 0), FHCRC (bit 1), FEXTRA (bit 2), FNAME (bit 3), FCOMMENT (bit 4), reserved (bits 5-7 must be zero).

MTIME: 4 bytes, Unix time of last modification or compression start, 0 if no timestamp.

XFL: 1 byte, extra flags, values 0 (none), 2 (best compression, level 9), 4 (fastest compression, level 1).

OS: 1 byte, filesystem on which compression occurred.

XLEN: 0 or 2 bytes, size of extra field, present if FEXTRA flag is set.

Extra field: 0 or XLEN bytes, sequence of subfields with SI1 SI2 identifier and LEN value.

File name: 0 or varies bytes, null-terminated, ISO 8859-1 encoded, present if FNAME flag is set.

Comment: 0 or varies bytes, null-terminated, ISO 8859-1 encoded, present if FCOMMENT flag is set.

HCRC (CRC16): 0 or 2 bytes, two least significant bytes of CRC-32 of header, present if FHCRC flag is set.

Compressed data: varies bytes, the DEFLATE compressed EMF payload.

CRC32: 4 bytes, CRC-32 of the uncompressed data.

ISIZE: 4 bytes, size of the uncompressed data modulo 2^32.

Two direct download links for files of format .EMZ:

  1. Ghost blog embedded HTML JavaScript for drag and drop .EMZ file to dump properties to screen:
EMZ File Properties Dumper
Drag and drop .EMZ file here

This HTML can be embedded in a Ghost blog post by pasting it into a custom HTML block or code injection.

  1. Python class for opening, decoding, reading, writing, and printing .EMZ properties:
import struct

class EMZParser:
    def __init__(self, filename):
        with open(filename, 'rb') as f:
            self.data = f.read()
        self.properties = {}
        self.parse()

    def parse(self):
        pos = 0
        self.properties['ID1'] = self.data[pos]
        pos += 1
        self.properties['ID2'] = self.data[pos]
        pos += 1
        self.properties['CM'] = self.data[pos]
        pos += 1
        self.properties['FLG'] = self.data[pos]
        pos += 1
        self.properties['MTIME'] = struct.unpack_from('<I', self.data, pos)[0]
        pos += 4
        self.properties['XFL'] = self.data[pos]
        pos += 1
        self.properties['OS'] = self.data[pos]
        pos += 1

        flg = self.properties['FLG']

        if flg & 4:  # FEXTRA
            xlen = struct.unpack_from('<H', self.data, pos)[0]
            pos += 2
            self.properties['Extra'] = self.data[pos:pos + xlen]
            pos += xlen
        else:
            self.properties['Extra'] = None

        if flg & 8:  # FNAME
            start = pos
            while self.data[pos] != 0:
                pos += 1
            self.properties['Name'] = self.data[start:pos].decode('latin-1')
            pos += 1
        else:
            self.properties['Name'] = None

        if flg & 16:  # FCOMMENT
            start = pos
            while self.data[pos] != 0:
                pos += 1
            self.properties['Comment'] = self.data[start:pos].decode('latin-1')
            pos += 1
        else:
            self.properties['Comment'] = None

        if flg & 2:  # FHCRC
            self.properties['HCRC'] = struct.unpack_from('<H', self.data, pos)[0]
            pos += 2
        else:
            self.properties['HCRC'] = None

        self.compressed_start = pos
        self.compressed_end = len(self.data) - 8
        self.properties['Compressed data size'] = self.compressed_end - self.compressed_start

        pos = len(self.data) - 8
        self.properties['CRC32'] = struct.unpack_from('<I', self.data, pos)[0]
        pos += 4
        self.properties['ISIZE'] = struct.unpack_from('<I', self.data, pos)[0]

    def print_properties(self):
        print(f"ID1: 0x{self.properties['ID1']:02X}")
        print(f"ID2: 0x{self.properties['ID2']:02X}")
        print(f"CM: 0x{self.properties['CM']:02X}")
        print(f"FLG: 0x{self.properties['FLG']:02X}")
        print(f"MTIME: {self.properties['MTIME']}")
        print(f"XFL: 0x{self.properties['XFL']:02X}")
        print(f"OS: 0x{self.properties['OS']:02X}")
        if self.properties['Extra'] is not None:
            print(f"Extra: {self.properties['Extra']}")
        if self.properties['Name'] is not None:
            print(f"Name: {self.properties['Name']}")
        if self.properties['Comment'] is not None:
            print(f"Comment: {self.properties['Comment']}")
        if self.properties['HCRC'] is not None:
            print(f"HCRC: 0x{self.properties['HCRC']:04X}")
        print(f"Compressed data size: {self.properties['Compressed data size']} bytes")
        print(f"CRC32: 0x{self.properties['CRC32']:08X}")
        print(f"ISIZE: {self.properties['ISIZE']}")

    def write(self, filename):
        with open(filename, 'wb') as f:
            f.write(self.data)

# Example usage:
# parser = EMZParser('example.emz')
# parser.print_properties()
# parser.write('output.emz')
  1. Java class for opening, decoding, reading, writing, and printing .EMZ properties:
import java.io.*;
import java.nio.*;
import java.nio.file.*;

public class EMZParser {
    private byte[] data;
    private ByteBuffer bb;
    private byte id1, id2, cm, flg, xfl, os;
    private int mtime;
    private byte[] extra;
    private String name;
    private String comment;
    private Short hcrc;
    private int compressedSize;
    private int crc32;
    private int isize;

    public EMZParser(String filename) throws IOException {
        data = Files.readAllBytes(Paths.get(filename));
        bb = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
        parse();
    }

    private void parse() {
        int pos = 0;
        id1 = bb.get(pos++);
        id2 = bb.get(pos++);
        cm = bb.get(pos++);
        flg = bb.get(pos++);
        mtime = bb.getInt(pos); pos += 4;
        xfl = bb.get(pos++);
        os = bb.get(pos++);

        if ((flg & 4) != 0) { // FEXTRA
            short xlen = bb.getShort(pos); pos += 2;
            extra = new byte[xlen];
            bb.position(pos);
            bb.get(extra);
            pos += xlen;
        }

        if ((flg & 8) != 0) { // FNAME
            int start = pos;
            while (bb.get(pos) != 0) pos++;
            byte[] nameBytes = new byte[pos - start];
            bb.position(start);
            bb.get(nameBytes);
            name = new String(nameBytes, java.nio.charset.StandardCharsets.ISO_8859_1);
            pos++;
        }

        if ((flg & 16) != 0) { // FCOMMENT
            int start = pos;
            while (bb.get(pos) != 0) pos++;
            byte[] commentBytes = new byte[pos - start];
            bb.position(start);
            bb.get(commentBytes);
            comment = new String(commentBytes, java.nio.charset.StandardCharsets.ISO_8859_1);
            pos++;
        }

        if ((flg & 2) != 0) { // FHCRC
            hcrc = bb.getShort(pos); pos += 2;
        }

        compressedSize = data.length - pos - 8;

        bb.position(data.length - 8);
        crc32 = bb.getInt();
        isize = bb.getInt();
    }

    public void printProperties() {
        System.out.printf("ID1: 0x%02X%n", id1);
        System.out.printf("ID2: 0x%02X%n", id2);
        System.out.printf("CM: 0x%02X%n", cm);
        System.out.printf("FLG: 0x%02X%n", flg);
        System.out.printf("MTIME: %d%n", mtime);
        System.out.printf("XFL: 0x%02X%n", xfl);
        System.out.printf("OS: 0x%02X%n", os);
        if (extra != null) {
            System.out.print("Extra: ");
            for (byte b : extra) System.out.printf("%02X ", b);
            System.out.println();
        }
        if (name != null) System.out.printf("Name: %s%n", name);
        if (comment != null) System.out.printf("Comment: %s%n", comment);
        if (hcrc != null) System.out.printf("HCRC: 0x%04X%n", hcrc);
        System.out.printf("Compressed data size: %d bytes%n", compressedSize);
        System.out.printf("CRC32: 0x%08X%n", crc32);
        System.out.printf("ISIZE: %d%n", isize);
    }

    public void write(String filename) throws IOException {
        Files.write(Paths.get(filename), data);
    }

    // Example usage:
    // public static void main(String[] args) throws IOException {
    //     EMZParser parser = new EMZParser("example.emz");
    //     parser.printProperties();
    //     parser.write("output.emz");
    // }
}
  1. JavaScript class for opening, decoding, reading, writing, and printing .EMZ properties (Node.js version, requires 'fs' module):
const fs = require('fs');

class EMZParser {
  constructor(filename) {
    this.data = fs.readFileSync(filename);
    this.view = new DataView(this.data.buffer);
    this.properties = {};
    this.parse();
  }

  parse() {
    let pos = 0;
    this.properties.ID1 = this.view.getUint8(pos++);
    this.properties.ID2 = this.view.getUint8(pos++);
    this.properties.CM = this.view.getUint8(pos++);
    this.properties.FLG = this.view.getUint8(pos++);
    this.properties.MTIME = this.view.getUint32(pos, true); pos += 4;
    this.properties.XFL = this.view.getUint8(pos++);
    this.properties.OS = this.view.getUint8(pos++);

    const flg = this.properties.FLG;

    if (flg & 4) { // FEXTRA
      const xlen = this.view.getUint16(pos, true); pos += 2;
      this.properties.Extra = new Uint8Array(this.data.slice(pos, pos + xlen));
      pos += xlen;
    } else {
      this.properties.Extra = null;
    }

    if (flg & 8) { // FNAME
      let start = pos;
      while (this.view.getUint8(pos) !== 0) pos++;
      this.properties.Name = this.data.slice(start, pos).toString('latin1');
      pos++;
    } else {
      this.properties.Name = null;
    }

    if (flg & 16) { // FCOMMENT
      let start = pos;
      while (this.view.getUint8(pos) !== 0) pos++;
      this.properties.Comment = this.data.slice(start, pos).toString('latin1');
      pos++;
    } else {
      this.properties.Comment = null;
    }

    if (flg & 2) { // FHCRC
      this.properties.HCRC = this.view.getUint16(pos, true); pos += 2;
    } else {
      this.properties.HCRC = null;
    }

    this.properties['Compressed data size'] = this.data.length - pos - 8;

    pos = this.data.length - 8;
    this.properties.CRC32 = this.view.getUint32(pos, true); pos += 4;
    this.properties.ISIZE = this.view.getUint32(pos, true);
  }

  printProperties() {
    console.log(`ID1: 0x${this.properties.ID1.toString(16).padStart(2, '0').toUpperCase()}`);
    console.log(`ID2: 0x${this.properties.ID2.toString(16).padStart(2, '0').toUpperCase()}`);
    console.log(`CM: 0x${this.properties.CM.toString(16).padStart(2, '0').toUpperCase()}`);
    console.log(`FLG: 0x${this.properties.FLG.toString(16).padStart(2, '0').toUpperCase()}`);
    console.log(`MTIME: ${this.properties.MTIME}`);
    console.log(`XFL: 0x${this.properties.XFL.toString(16).padStart(2, '0').toUpperCase()}`);
    console.log(`OS: 0x${this.properties.OS.toString(16).padStart(2, '0').toUpperCase()}`);
    if (this.properties.Extra) console.log(`Extra: ${Array.from(this.properties.Extra).map(b => b.toString(16).padStart(2, '0').toUpperCase()).join(' ')}`);
    if (this.properties.Name) console.log(`Name: ${this.properties.Name}`);
    if (this.properties.Comment) console.log(`Comment: ${this.properties.Comment}`);
    if (this.properties.HCRC !== null) console.log(`HCRC: 0x${this.properties.HCRC.toString(16).padStart(4, '0').toUpperCase()}`);
    console.log(`Compressed data size: ${this.properties['Compressed data size']} bytes`);
    console.log(`CRC32: 0x${this.properties.CRC32.toString(16).padStart(8, '0').toUpperCase()}`);
    console.log(`ISIZE: ${this.properties.ISIZE}`);
  }

  write(filename) {
    fs.writeFileSync(filename, this.data);
  }
}

// Example usage:
// const parser = new EMZParser('example.emz');
// parser.printProperties();
// parser.write('output.emz');
  1. C class (implemented as C++ class for class support) for opening, decoding, reading, writing, and printing .EMZ properties:
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
#include <iomanip>
#include <cstring>

class EMZParser {
private:
    std::vector<uint8_t> data;
    uint8_t id1, id2, cm, flg, xfl, os;
    uint32_t mtime;
    std::vector<uint8_t> extra;
    std::string name;
    std::string comment;
    uint16_t hcrc;
    bool has_hcrc;
    size_t compressed_size;
    uint32_t crc32;
    uint32_t isize;

public:
    EMZParser(const std::string& filename) {
        std::ifstream file(filename, std::ios::binary);
        if (!file) {
            throw std::runtime_error("Failed to open file");
        }
        data.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
        parse();
    }

    void parse() {
        size_t pos = 0;
        id1 = data[pos++];
        id2 = data[pos++];
        cm = data[pos++];
        flg = data[pos++];
        mtime = *reinterpret_cast<uint32_t*>(&data[pos]); pos += 4;
        xfl = data[pos++];
        os = data[pos++];

        if (flg & 4) { // FEXTRA
            uint16_t xlen = *reinterpret_cast<uint16_t*>(&data[pos]); pos += 2;
            extra.assign(data.begin() + pos, data.begin() + pos + xlen);
            pos += xlen;
        }

        if (flg & 8) { // FNAME
            size_t start = pos;
            while (data[pos] != 0) pos++;
            name.assign(reinterpret_cast<char*>(&data[start]), pos - start);
            pos++;
        }

        if (flg & 16) { // FCOMMENT
            size_t start = pos;
            while (data[pos] != 0) pos++;
            comment.assign(reinterpret_cast<char*>(&data[start]), pos - start);
            pos++;
        }

        has_hcrc = false;
        if (flg & 2) { // FHCRC
            hcrc = *reinterpret_cast<uint16_t*>(&data[pos]); pos += 2;
            has_hcrc = true;
        }

        compressed_size = data.size() - pos - 8;

        pos = data.size() - 8;
        crc32 = *reinterpret_cast<uint32_t*>(&data[pos]); pos += 4;
        isize = *reinterpret_cast<uint32_t*>(&data[pos]);
    }

    void printProperties() const {
        std::cout << "ID1: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(id1) << std::dec << std::nouppercase << std::endl;
        std::cout << "ID2: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(id2) << std::dec << std::nouppercase << std::endl;
        std::cout << "CM: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(cm) << std::dec << std::nouppercase << std::endl;
        std::cout << "FLG: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(flg) << std::dec << std::nouppercase << std::endl;
        std::cout << "MTIME: " << mtime << std::endl;
        std::cout << "XFL: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(xfl) << std::dec << std::nouppercase << std::endl;
        std::cout << "OS: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(os) << std::dec << std::nouppercase << std::endl;
        if (!extra.empty()) {
            std::cout << "Extra: ";
            for (auto b : extra) std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(b) << " ";
            std::cout << std::dec << std::nouppercase << std::endl;
        }
        if (!name.empty()) std::cout << "Name: " << name << std::endl;
        if (!comment.empty()) std::cout << "Comment: " << comment << std::endl;
        if (has_hcrc) std::cout << "HCRC: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << hcrc << std::dec << std::nouppercase << std::endl;
        std::cout << "Compressed data size: " << compressed_size << " bytes" << std::endl;
        std::cout << "CRC32: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << crc32 << std::dec << std::nouppercase << std::endl;
        std::cout << "ISIZE: " << isize << std::endl;
    }

    void write(const std::string& filename) const {
        std::ofstream file(filename, std::ios::binary);
        if (!file) {
            throw std::runtime_error("Failed to write file");
        }
        file.write(reinterpret_cast<const char*>(data.data()), data.size());
    }
};

// Example usage:
// int main() {
//     try {
//         EMZParser parser("example.emz");
//         parser.printProperties();
//         parser.write("output.emz");
//     } catch (const std::exception& e) {
//         std::cerr << e.what() << std::endl;
//     }
//     return 0;
// }