Task 180: .EMZ File Format
Task 180: .EMZ File Format
The .EMZ file format is a Windows Compressed Enhanced Metafile, which is an Enhanced Metafile (EMF) compressed using the GZIP compression algorithm with DEFLATE. It is primarily used by Microsoft applications like Visio, Word, and PowerPoint to store vector graphics (and sometimes raster elements) in a compressed form to reduce file size for storage and transfer. The format follows the GZIP specification (RFC 1952) for compression, with the uncompressed content being an EMF file (as defined in the Microsoft Open Specifications [MS-EMF]). There are no unique .EMZ-specific structures beyond the GZIP wrapper; the format does not define additional headers or fields.
List of all the properties of this file format intrinsic to its file system:
ID1: 1 byte, magic number, must be 0x1F.
ID2: 1 byte, magic number, must be 0x8B.
CM: 1 byte, compression method, must be 8 (Deflate).
FLG: 1 byte, flags, with bits indicating FTEXT (bit 0), FHCRC (bit 1), FEXTRA (bit 2), FNAME (bit 3), FCOMMENT (bit 4), reserved (bits 5-7 must be zero).
MTIME: 4 bytes, Unix time of last modification or compression start, 0 if no timestamp.
XFL: 1 byte, extra flags, values 0 (none), 2 (best compression, level 9), 4 (fastest compression, level 1).
OS: 1 byte, filesystem on which compression occurred.
XLEN: 0 or 2 bytes, size of extra field, present if FEXTRA flag is set.
Extra field: 0 or XLEN bytes, sequence of subfields with SI1 SI2 identifier and LEN value.
File name: 0 or varies bytes, null-terminated, ISO 8859-1 encoded, present if FNAME flag is set.
Comment: 0 or varies bytes, null-terminated, ISO 8859-1 encoded, present if FCOMMENT flag is set.
HCRC (CRC16): 0 or 2 bytes, two least significant bytes of CRC-32 of header, present if FHCRC flag is set.
Compressed data: varies bytes, the DEFLATE compressed EMF payload.
CRC32: 4 bytes, CRC-32 of the uncompressed data.
ISIZE: 4 bytes, size of the uncompressed data modulo 2^32.
Two direct download links for files of format .EMZ:
- https://example-files.online-convert.com/raster image/emz/example.emz
- https://example-files.online-convert.com/raster image/emz/example.emz (Note: Extensive searches yielded only this publicly available sample; a second distinct sample was not located, but this link can be used for testing purposes.)
- Ghost blog embedded HTML JavaScript for drag and drop .EMZ file to dump properties to screen:
This HTML can be embedded in a Ghost blog post by pasting it into a custom HTML block or code injection.
- Python class for opening, decoding, reading, writing, and printing .EMZ properties:
import struct
class EMZParser:
def __init__(self, filename):
with open(filename, 'rb') as f:
self.data = f.read()
self.properties = {}
self.parse()
def parse(self):
pos = 0
self.properties['ID1'] = self.data[pos]
pos += 1
self.properties['ID2'] = self.data[pos]
pos += 1
self.properties['CM'] = self.data[pos]
pos += 1
self.properties['FLG'] = self.data[pos]
pos += 1
self.properties['MTIME'] = struct.unpack_from('<I', self.data, pos)[0]
pos += 4
self.properties['XFL'] = self.data[pos]
pos += 1
self.properties['OS'] = self.data[pos]
pos += 1
flg = self.properties['FLG']
if flg & 4: # FEXTRA
xlen = struct.unpack_from('<H', self.data, pos)[0]
pos += 2
self.properties['Extra'] = self.data[pos:pos + xlen]
pos += xlen
else:
self.properties['Extra'] = None
if flg & 8: # FNAME
start = pos
while self.data[pos] != 0:
pos += 1
self.properties['Name'] = self.data[start:pos].decode('latin-1')
pos += 1
else:
self.properties['Name'] = None
if flg & 16: # FCOMMENT
start = pos
while self.data[pos] != 0:
pos += 1
self.properties['Comment'] = self.data[start:pos].decode('latin-1')
pos += 1
else:
self.properties['Comment'] = None
if flg & 2: # FHCRC
self.properties['HCRC'] = struct.unpack_from('<H', self.data, pos)[0]
pos += 2
else:
self.properties['HCRC'] = None
self.compressed_start = pos
self.compressed_end = len(self.data) - 8
self.properties['Compressed data size'] = self.compressed_end - self.compressed_start
pos = len(self.data) - 8
self.properties['CRC32'] = struct.unpack_from('<I', self.data, pos)[0]
pos += 4
self.properties['ISIZE'] = struct.unpack_from('<I', self.data, pos)[0]
def print_properties(self):
print(f"ID1: 0x{self.properties['ID1']:02X}")
print(f"ID2: 0x{self.properties['ID2']:02X}")
print(f"CM: 0x{self.properties['CM']:02X}")
print(f"FLG: 0x{self.properties['FLG']:02X}")
print(f"MTIME: {self.properties['MTIME']}")
print(f"XFL: 0x{self.properties['XFL']:02X}")
print(f"OS: 0x{self.properties['OS']:02X}")
if self.properties['Extra'] is not None:
print(f"Extra: {self.properties['Extra']}")
if self.properties['Name'] is not None:
print(f"Name: {self.properties['Name']}")
if self.properties['Comment'] is not None:
print(f"Comment: {self.properties['Comment']}")
if self.properties['HCRC'] is not None:
print(f"HCRC: 0x{self.properties['HCRC']:04X}")
print(f"Compressed data size: {self.properties['Compressed data size']} bytes")
print(f"CRC32: 0x{self.properties['CRC32']:08X}")
print(f"ISIZE: {self.properties['ISIZE']}")
def write(self, filename):
with open(filename, 'wb') as f:
f.write(self.data)
# Example usage:
# parser = EMZParser('example.emz')
# parser.print_properties()
# parser.write('output.emz')
- Java class for opening, decoding, reading, writing, and printing .EMZ properties:
import java.io.*;
import java.nio.*;
import java.nio.file.*;
public class EMZParser {
private byte[] data;
private ByteBuffer bb;
private byte id1, id2, cm, flg, xfl, os;
private int mtime;
private byte[] extra;
private String name;
private String comment;
private Short hcrc;
private int compressedSize;
private int crc32;
private int isize;
public EMZParser(String filename) throws IOException {
data = Files.readAllBytes(Paths.get(filename));
bb = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
parse();
}
private void parse() {
int pos = 0;
id1 = bb.get(pos++);
id2 = bb.get(pos++);
cm = bb.get(pos++);
flg = bb.get(pos++);
mtime = bb.getInt(pos); pos += 4;
xfl = bb.get(pos++);
os = bb.get(pos++);
if ((flg & 4) != 0) { // FEXTRA
short xlen = bb.getShort(pos); pos += 2;
extra = new byte[xlen];
bb.position(pos);
bb.get(extra);
pos += xlen;
}
if ((flg & 8) != 0) { // FNAME
int start = pos;
while (bb.get(pos) != 0) pos++;
byte[] nameBytes = new byte[pos - start];
bb.position(start);
bb.get(nameBytes);
name = new String(nameBytes, java.nio.charset.StandardCharsets.ISO_8859_1);
pos++;
}
if ((flg & 16) != 0) { // FCOMMENT
int start = pos;
while (bb.get(pos) != 0) pos++;
byte[] commentBytes = new byte[pos - start];
bb.position(start);
bb.get(commentBytes);
comment = new String(commentBytes, java.nio.charset.StandardCharsets.ISO_8859_1);
pos++;
}
if ((flg & 2) != 0) { // FHCRC
hcrc = bb.getShort(pos); pos += 2;
}
compressedSize = data.length - pos - 8;
bb.position(data.length - 8);
crc32 = bb.getInt();
isize = bb.getInt();
}
public void printProperties() {
System.out.printf("ID1: 0x%02X%n", id1);
System.out.printf("ID2: 0x%02X%n", id2);
System.out.printf("CM: 0x%02X%n", cm);
System.out.printf("FLG: 0x%02X%n", flg);
System.out.printf("MTIME: %d%n", mtime);
System.out.printf("XFL: 0x%02X%n", xfl);
System.out.printf("OS: 0x%02X%n", os);
if (extra != null) {
System.out.print("Extra: ");
for (byte b : extra) System.out.printf("%02X ", b);
System.out.println();
}
if (name != null) System.out.printf("Name: %s%n", name);
if (comment != null) System.out.printf("Comment: %s%n", comment);
if (hcrc != null) System.out.printf("HCRC: 0x%04X%n", hcrc);
System.out.printf("Compressed data size: %d bytes%n", compressedSize);
System.out.printf("CRC32: 0x%08X%n", crc32);
System.out.printf("ISIZE: %d%n", isize);
}
public void write(String filename) throws IOException {
Files.write(Paths.get(filename), data);
}
// Example usage:
// public static void main(String[] args) throws IOException {
// EMZParser parser = new EMZParser("example.emz");
// parser.printProperties();
// parser.write("output.emz");
// }
}
- JavaScript class for opening, decoding, reading, writing, and printing .EMZ properties (Node.js version, requires 'fs' module):
const fs = require('fs');
class EMZParser {
constructor(filename) {
this.data = fs.readFileSync(filename);
this.view = new DataView(this.data.buffer);
this.properties = {};
this.parse();
}
parse() {
let pos = 0;
this.properties.ID1 = this.view.getUint8(pos++);
this.properties.ID2 = this.view.getUint8(pos++);
this.properties.CM = this.view.getUint8(pos++);
this.properties.FLG = this.view.getUint8(pos++);
this.properties.MTIME = this.view.getUint32(pos, true); pos += 4;
this.properties.XFL = this.view.getUint8(pos++);
this.properties.OS = this.view.getUint8(pos++);
const flg = this.properties.FLG;
if (flg & 4) { // FEXTRA
const xlen = this.view.getUint16(pos, true); pos += 2;
this.properties.Extra = new Uint8Array(this.data.slice(pos, pos + xlen));
pos += xlen;
} else {
this.properties.Extra = null;
}
if (flg & 8) { // FNAME
let start = pos;
while (this.view.getUint8(pos) !== 0) pos++;
this.properties.Name = this.data.slice(start, pos).toString('latin1');
pos++;
} else {
this.properties.Name = null;
}
if (flg & 16) { // FCOMMENT
let start = pos;
while (this.view.getUint8(pos) !== 0) pos++;
this.properties.Comment = this.data.slice(start, pos).toString('latin1');
pos++;
} else {
this.properties.Comment = null;
}
if (flg & 2) { // FHCRC
this.properties.HCRC = this.view.getUint16(pos, true); pos += 2;
} else {
this.properties.HCRC = null;
}
this.properties['Compressed data size'] = this.data.length - pos - 8;
pos = this.data.length - 8;
this.properties.CRC32 = this.view.getUint32(pos, true); pos += 4;
this.properties.ISIZE = this.view.getUint32(pos, true);
}
printProperties() {
console.log(`ID1: 0x${this.properties.ID1.toString(16).padStart(2, '0').toUpperCase()}`);
console.log(`ID2: 0x${this.properties.ID2.toString(16).padStart(2, '0').toUpperCase()}`);
console.log(`CM: 0x${this.properties.CM.toString(16).padStart(2, '0').toUpperCase()}`);
console.log(`FLG: 0x${this.properties.FLG.toString(16).padStart(2, '0').toUpperCase()}`);
console.log(`MTIME: ${this.properties.MTIME}`);
console.log(`XFL: 0x${this.properties.XFL.toString(16).padStart(2, '0').toUpperCase()}`);
console.log(`OS: 0x${this.properties.OS.toString(16).padStart(2, '0').toUpperCase()}`);
if (this.properties.Extra) console.log(`Extra: ${Array.from(this.properties.Extra).map(b => b.toString(16).padStart(2, '0').toUpperCase()).join(' ')}`);
if (this.properties.Name) console.log(`Name: ${this.properties.Name}`);
if (this.properties.Comment) console.log(`Comment: ${this.properties.Comment}`);
if (this.properties.HCRC !== null) console.log(`HCRC: 0x${this.properties.HCRC.toString(16).padStart(4, '0').toUpperCase()}`);
console.log(`Compressed data size: ${this.properties['Compressed data size']} bytes`);
console.log(`CRC32: 0x${this.properties.CRC32.toString(16).padStart(8, '0').toUpperCase()}`);
console.log(`ISIZE: ${this.properties.ISIZE}`);
}
write(filename) {
fs.writeFileSync(filename, this.data);
}
}
// Example usage:
// const parser = new EMZParser('example.emz');
// parser.printProperties();
// parser.write('output.emz');
- C class (implemented as C++ class for class support) for opening, decoding, reading, writing, and printing .EMZ properties:
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
#include <iomanip>
#include <cstring>
class EMZParser {
private:
std::vector<uint8_t> data;
uint8_t id1, id2, cm, flg, xfl, os;
uint32_t mtime;
std::vector<uint8_t> extra;
std::string name;
std::string comment;
uint16_t hcrc;
bool has_hcrc;
size_t compressed_size;
uint32_t crc32;
uint32_t isize;
public:
EMZParser(const std::string& filename) {
std::ifstream file(filename, std::ios::binary);
if (!file) {
throw std::runtime_error("Failed to open file");
}
data.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
parse();
}
void parse() {
size_t pos = 0;
id1 = data[pos++];
id2 = data[pos++];
cm = data[pos++];
flg = data[pos++];
mtime = *reinterpret_cast<uint32_t*>(&data[pos]); pos += 4;
xfl = data[pos++];
os = data[pos++];
if (flg & 4) { // FEXTRA
uint16_t xlen = *reinterpret_cast<uint16_t*>(&data[pos]); pos += 2;
extra.assign(data.begin() + pos, data.begin() + pos + xlen);
pos += xlen;
}
if (flg & 8) { // FNAME
size_t start = pos;
while (data[pos] != 0) pos++;
name.assign(reinterpret_cast<char*>(&data[start]), pos - start);
pos++;
}
if (flg & 16) { // FCOMMENT
size_t start = pos;
while (data[pos] != 0) pos++;
comment.assign(reinterpret_cast<char*>(&data[start]), pos - start);
pos++;
}
has_hcrc = false;
if (flg & 2) { // FHCRC
hcrc = *reinterpret_cast<uint16_t*>(&data[pos]); pos += 2;
has_hcrc = true;
}
compressed_size = data.size() - pos - 8;
pos = data.size() - 8;
crc32 = *reinterpret_cast<uint32_t*>(&data[pos]); pos += 4;
isize = *reinterpret_cast<uint32_t*>(&data[pos]);
}
void printProperties() const {
std::cout << "ID1: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(id1) << std::dec << std::nouppercase << std::endl;
std::cout << "ID2: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(id2) << std::dec << std::nouppercase << std::endl;
std::cout << "CM: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(cm) << std::dec << std::nouppercase << std::endl;
std::cout << "FLG: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(flg) << std::dec << std::nouppercase << std::endl;
std::cout << "MTIME: " << mtime << std::endl;
std::cout << "XFL: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(xfl) << std::dec << std::nouppercase << std::endl;
std::cout << "OS: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(os) << std::dec << std::nouppercase << std::endl;
if (!extra.empty()) {
std::cout << "Extra: ";
for (auto b : extra) std::cout << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << static_cast<int>(b) << " ";
std::cout << std::dec << std::nouppercase << std::endl;
}
if (!name.empty()) std::cout << "Name: " << name << std::endl;
if (!comment.empty()) std::cout << "Comment: " << comment << std::endl;
if (has_hcrc) std::cout << "HCRC: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << hcrc << std::dec << std::nouppercase << std::endl;
std::cout << "Compressed data size: " << compressed_size << " bytes" << std::endl;
std::cout << "CRC32: 0x" << std::hex << std::uppercase << std::setfill('0') << std::setw(8) << crc32 << std::dec << std::nouppercase << std::endl;
std::cout << "ISIZE: " << isize << std::endl;
}
void write(const std::string& filename) const {
std::ofstream file(filename, std::ios::binary);
if (!file) {
throw std::runtime_error("Failed to write file");
}
file.write(reinterpret_cast<const char*>(data.data()), data.size());
}
};
// Example usage:
// int main() {
// try {
// EMZParser parser("example.emz");
// parser.printProperties();
// parser.write("output.emz");
// } catch (const std::exception& e) {
// std::cerr << e.what() << std::endl;
// }
// return 0;
// }