Task 433: .MSU File Format
Task 433: .MSU File Format
- The .MSU file format is based on the Microsoft Cabinet (CAB) format, which is a compressed archive format. The properties intrinsic to the file format include the fields and structures from the CAB specification. Here is a list of all key properties (fields) from the structures:
CFHEADER (Cabinet Header):
- signature (4 bytes: 'M','S','C','F')
- reserved1 (4 bytes)
- cbCabinet (4 bytes: total size of cabinet)
- reserved2 (4 bytes)
- coffFiles (4 bytes: offset to first CFFILE)
- reserved3 (4 bytes)
- versionMinor (1 byte)
- versionMajor (1 byte)
- cFolders (2 bytes: number of folders)
- cFiles (2 bytes: number of files)
- flags (2 bytes: option flags)
- setID (2 bytes: set identifier)
- iCabinet (2 bytes: cabinet number)
- cbCFHeader (2 bytes, optional: per-cabinet reserved size)
- cbCFFolder (1 byte, optional: per-folder reserved size)
- cbCFData (1 byte, optional: per-data block reserved size)
- abReserve (variable, optional: reserved data)
- szCabinetPrev (variable, optional: previous cabinet name)
- szDiskPrev (variable, optional: previous disk label)
- szCabinetNext (variable, optional: next cabinet name)
- szDiskNext (variable, optional: next disk label)
CFFOLDER (Folder Entry):
- coffCabStart (4 bytes: offset to first CFDATA)
- cCFData (2 bytes: number of CFDATA blocks)
- typeCompress (2 bytes: compression type)
- abReserve (variable, optional: per-folder reserved data)
CFFILE (File Entry):
- cbFile (4 bytes: uncompressed file size)
- uoffFolderStart (4 bytes: offset in folder)
- iFolder (2 bytes: folder index)
- date (2 bytes: file date)
- time (2 bytes: file time)
- attribs (2 bytes: file attributes)
- szName (variable: filename)
CFDATA (Data Block Entry):
- csum (4 bytes: checksum)
- cbData (2 bytes: compressed bytes)
- cbUncomp (2 bytes: uncompressed bytes)
- abReserve (variable, optional: per-block reserved data)
- ab (variable: compressed data)
These properties define the structure, compression, and content organization of the .MSU file.
- Two direct download links for .MSU files:
- http://download.windowsupdate.com/c/msdownload/update/software/secu/2021/11/windows10.0-kb5007215-x64_31a2971850c333e5a0b20568ede502f0df623d9e.msu (Windows 11 x64 KB5007215)
- http://download.windowsupdate.com/c/msdownload/update/software/secu/2021/11/windows10.0-kb5007215-arm64_a85b12ae03dc9bc3681ef243eb14577bcb3f891f.msu (Windows 11 ARM64 KB5007215)
- Here is an HTML page with embedded JavaScript for a Ghost blog (or any web page) that allows drag-and-drop of an .MSU file and dumps the CAB properties to the screen:
Drag and drop .MSU file here
This code parses the CAB header, folders, and files, displaying the properties. It does not parse CFDATA or decompress, as that's beyond basic JS scope.
- Python class for .MSU (CAB) handling:
import struct
import os
class MSUParser:
def __init__(self, filename):
self.filename = filename
self.data = None
self.offset = 0
self.properties = {}
def open(self):
with open(self.filename, 'rb') as f:
self.data = f.read()
self.offset = 0
def read_u1(self):
val = self.data[self.offset]
self.offset += 1
return val
def read_u2(self):
val, = struct.unpack_from('<H', self.data, self.offset)
self.offset += 2
return val
def read_u4(self):
val, = struct.unpack_from('<I', self.data, self.offset)
self.offset += 4
return val
def read_string(self):
start = self.offset
while self.data[self.offset] != 0:
self.offset += 1
return self.data[start:self.offset].decode('utf-8')
def decode(self):
sig = ''.join(chr(self.read_u1()) for _ in range(4))
if sig != 'MSCF':
raise ValueError('Invalid CAB signature')
self.properties['signature'] = sig
self.properties['reserved1'] = self.read_u4()
self.properties['cbCabinet'] = self.read_u4()
self.properties['reserved2'] = self.read_u4()
self.properties['coffFiles'] = self.read_u4()
self.properties['reserved3'] = self.read_u4()
self.properties['versionMinor'] = self.read_u1()
self.properties['versionMajor'] = self.read_u1()
self.properties['cFolders'] = self.read_u2()
self.properties['cFiles'] = self.read_u2()
self.properties['flags'] = self.read_u2()
self.properties['setID'] = self.read_u2()
self.properties['iCabinet'] = self.read_u2()
if self.properties['flags'] & 0x0004:
self.properties['cbCFHeader'] = self.read_u2()
self.properties['cbCFFolder'] = self.read_u1()
self.properties['cbCFData'] = self.read_u1()
self.offset += self.properties['cbCFHeader'] # Skip abReserve
if self.properties['flags'] & 0x0001:
self.properties['szCabinetPrev'] = self.read_string()
self.properties['szDiskPrev'] = self.read_string()
if self.properties['flags'] & 0x0002:
self.properties['szCabinetNext'] = self.read_string()
self.properties['szDiskNext'] = self.read_string()
self.properties['folders'] = []
for i in range(self.properties['cFolders']):
folder = {}
folder['coffCabStart'] = self.read_u4()
folder['cCFData'] = self.read_u2()
folder['typeCompress'] = self.read_u2()
if 'cbCFFolder' in self.properties:
self.offset += self.properties['cbCFFolder']
self.properties['folders'].append(folder)
self.offset = self.properties['coffFiles']
self.properties['files'] = []
for i in range(self.properties['cFiles']):
file_prop = {}
file_prop['cbFile'] = self.read_u4()
file_prop['uoffFolderStart'] = self.read_u4()
file_prop['iFolder'] = self.read_u2()
file_prop['date'] = self.read_u2()
file_prop['time'] = self.read_u2()
file_prop['attribs'] = self.read_u2()
file_prop['szName'] = self.read_string()
self.properties['files'].append(file_prop)
def print_properties(self):
print(self.properties)
def write(self, new_filename):
with open(new_filename, 'wb') as f:
f.write(self.data) # Simple write; modify self.data for changes
# Usage example:
# parser = MSUParser('example.msu')
# parser.open()
# parser.decode()
# parser.print_properties()
# parser.write('modified.msu')
This class opens, decodes the structures, prints properties, and writes (basic copy; extend for modifications).
- Java class for .MSU (CAB) handling:
import java.io.*;
import java.nio.*;
import java.nio.file.*;
public class MSUParser {
private String filename;
private byte[] data;
private int offset = 0;
private ByteBuffer bb;
private StringBuilder properties = new StringBuilder();
public MSUParser(String filename) {
this.filename = filename;
}
public void open() throws IOException {
data = Files.readAllBytes(Paths.get(filename));
bb = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
offset = 0;
}
private int readU1() {
return bb.get(offset++) & 0xFF;
}
private int readU2() {
int val = bb.getShort(offset) & 0xFFFF;
offset += 2;
return val;
}
private int readU4() {
int val = bb.getInt(offset);
offset += 4;
return val;
}
private String readString() {
int start = offset;
while (readU1() != 0);
return new String(data, start, offset - start - 1);
}
public void decode() {
String sig = "" + (char)readU1() + (char)readU1() + (char)readU1() + (char)readU1();
if (!sig.equals("MSCF")) throw new RuntimeException("Invalid CAB signature");
properties.append("signature: ").append(sig).append("\n");
properties.append("reserved1: ").append(readU4()).append("\n");
properties.append("cbCabinet: ").append(readU4()).append("\n");
properties.append("reserved2: ").append(readU4()).append("\n");
properties.append("coffFiles: ").append(readU4()).append("\n");
properties.append("reserved3: ").append(readU4()).append("\n");
properties.append("versionMinor: ").append(readU1()).append("\n");
properties.append("versionMajor: ").append(readU1()).append("\n");
int cFolders = readU2();
properties.append("cFolders: ").append(cFolders).append("\n");
properties.append("cFiles: ").append(readU2()).append("\n");
int flags = readU2();
properties.append("flags: 0x").append(Integer.toHexString(flags)).append("\n");
properties.append("setID: ").append(readU2()).append("\n");
properties.append("iCabinet: ").append(readU2()).append("\n");
int cbCFHeader = 0, cbCFFolder = 0, cbCFData = 0;
if ((flags & 0x0004) != 0) {
cbCFHeader = readU2();
cbCFFolder = readU1();
cbCFData = readU1();
offset += cbCFHeader;
properties.append("cbCFHeader: ").append(cbCFHeader).append("\n");
properties.append("cbCFFolder: ").append(cbCFFolder).append("\n");
properties.append("cbCFData: ").append(cbCFData).append("\n");
}
if ((flags & 0x0001) != 0) {
properties.append("szCabinetPrev: ").append(readString()).append("\n");
properties.append("szDiskPrev: ").append(readString()).append("\n");
}
if ((flags & 0x0002) != 0) {
properties.append("szCabinetNext: ").append(readString()).append("\n");
properties.append("szDiskNext: ").append(readString()).append("\n");
}
for (int i = 0; i < cFolders; i++) {
properties.append("CFFOLDER ").append(i).append(":\n");
properties.append(" coffCabStart: ").append(readU4()).append("\n");
properties.append(" cCFData: ").append(readU2()).append("\n");
properties.append(" typeCompress: 0x").append(Integer.toHexString(readU2())).append("\n");
offset += cbCFFolder;
}
offset = bb.getInt(16); // coffFiles offset from spec
for (int i = 0; i < bb.getShort(22); i++) { // cFiles
properties.append("CFFILE ").append(i).append(":\n");
properties.append(" cbFile: ").append(readU4()).append("\n");
properties.append(" uoffFolderStart: ").append(readU4()).append("\n");
properties.append(" iFolder: ").append(readU2()).append("\n");
properties.append(" date: 0x").append(Integer.toHexString(readU2())).append("\n");
properties.append(" time: 0x").append(Integer.toHexString(readU2())).append("\n");
properties.append(" attribs: 0x").append(Integer.toHexString(readU2())).append("\n");
properties.append(" szName: ").append(readString()).append("\n");
}
}
public void printProperties() {
System.out.println(properties.toString());
}
public void write(String newFilename) throws IOException {
Files.write(Paths.get(newFilename), data); // Simple copy; modify data for changes
}
// Usage example:
// MSUParser parser = new MSUParser("example.msu");
// parser.open();
// parser.decode();
// parser.printProperties();
// parser.write("modified.msu");
}
- JavaScript class for .MSU (CAB) handling (Node.js, using fs):
const fs = require('fs');
class MSUParser {
constructor(filename) {
this.filename = filename;
this.data = null;
this.offset = 0;
this.properties = {};
}
open() {
this.data = fs.readFileSync(this.filename);
}
readU1() {
return this.data[this.offset++];
}
readU2() {
return this.data.readUInt16LE(this.offset, this.offset += 2);
}
readU4() {
return this.data.readUInt32LE(this.offset, this.offset += 4);
}
readString() {
let start = this.offset;
while (this.readU1() !== 0);
return this.data.slice(start, this.offset - 1).toString();
}
decode() {
const sig = String.fromCharCode(this.readU1(), this.readU1(), this.readU1(), this.readU1());
if (sig !== 'MSCF') throw new Error('Invalid CAB signature');
this.properties.signature = sig;
this.properties.reserved1 = this.readU4();
this.properties.cbCabinet = this.readU4();
this.properties.reserved2 = this.readU4();
this.properties.coffFiles = this.readU4();
this.properties.reserved3 = this.readU4();
this.properties.versionMinor = this.readU1();
this.properties.versionMajor = this.readU1();
const cFolders = this.readU2();
this.properties.cFolders = cFolders;
this.properties.cFiles = this.readU2();
const flags = this.readU2();
this.properties.flags = flags;
this.properties.setID = this.readU2();
this.properties.iCabinet = this.readU2();
let cbCFHeader = 0, cbCFFolder = 0, cbCFData = 0;
if (flags & 0x0004) {
cbCFHeader = this.readU2();
cbCFFolder = this.readU1();
cbCFData = this.readU1();
this.offset += cbCFHeader;
this.properties.cbCFHeader = cbCFHeader;
this.properties.cbCFFolder = cbCFFolder;
this.properties.cbCFData = cbCFData;
}
if (flags & 0x0001) {
this.properties.szCabinetPrev = this.readString();
this.properties.szDiskPrev = this.readString();
}
if (flags & 0x0002) {
this.properties.szCabinetNext = this.readString();
this.properties.szDiskNext = this.readString();
}
this.properties.folders = [];
for (let i = 0; i < cFolders; i++) {
const folder = {};
folder.coffCabStart = this.readU4();
folder.cCFData = this.readU2();
folder.typeCompress = this.readU2();
this.offset += cbCFFolder;
this.properties.folders.push(folder);
}
this.offset = this.properties.coffFiles;
this.properties.files = [];
for (let i = 0; i < this.properties.cFiles; i++) {
const file = {};
file.cbFile = this.readU4();
file.uoffFolderStart = this.readU4();
file.iFolder = this.readU2();
file.date = this.readU2();
file.time = this.readU2();
file.attribs = this.readU2();
file.szName = this.readString();
this.properties.files.push(file);
}
}
printProperties() {
console.log(this.properties);
}
write(newFilename) {
fs.writeFileSync(newFilename, this.data); // Simple copy
}
}
// Usage example:
// const parser = new MSUParser('example.msu');
// parser.open();
// parser.decode();
// parser.printProperties();
// parser.write('modified.msu');
- C++ class for .MSU (CAB) handling:
#include <fstream>
#include <iostream>
#include <vector>
#include <string>
#include <cstdint>
struct FolderProp {
uint32_t coffCabStart;
uint16_t cCFData;
uint16_t typeCompress;
};
struct FileProp {
uint32_t cbFile;
uint32_t uoffFolderStart;
uint16_t iFolder;
uint16_t date;
uint16_t time;
uint16_t attribs;
std::string szName;
};
class MSUParser {
private:
std::string filename;
std::vector<uint8_t> data;
size_t offset = 0;
public:
MSUParser(const std::string& fn) : filename(fn) {}
void open() {
std::ifstream f(filename, std::ios::binary);
f.seekg(0, std::ios::end);
size_t size = f.tellg();
f.seekg(0);
data.resize(size);
f.read(reinterpret_cast<char*>(data.data()), size);
}
uint8_t readU1() {
return data[offset++];
}
uint16_t readU2() {
uint16_t val = *reinterpret_cast<uint16_t*>(&data[offset]);
offset += 2;
return val;
}
uint32_t readU4() {
uint32_t val = *reinterpret_cast<uint32_t*>(&data[offset]);
offset += 4;
return val;
}
std::string readString() {
size_t start = offset;
while (readU1() != 0);
return std::string(reinterpret_cast<char*>(&data[start]), offset - start - 1);
}
void decode() {
std::string sig = "";
for (int i = 0; i < 4; i++) sig += static_cast<char>(readU1());
if (sig != "MSCF") throw std::runtime_error("Invalid CAB signature");
readU4(); // reserved1
uint32_t cbCabinet = readU4();
readU4(); // reserved2
uint32_t coffFiles = readU4();
readU4(); // reserved3
readU1(); // versionMinor
readU1(); // versionMajor
uint16_t cFolders = readU2();
uint16_t cFiles = readU2();
uint16_t flags = readU2();
readU2(); // setID
readU2(); // iCabinet
uint16_t cbCFFolder = 0;
if (flags & 0x0004) {
readU2(); // cbCFHeader
cbCFFolder = readU1();
readU1(); // cbCFData
// Skip abReserve
}
// Skip optional strings...
std::vector<FolderProp> folders;
for (uint16_t i = 0; i < cFolders; i++) {
FolderProp fp;
fp.coffCabStart = readU4();
fp.cCFData = readU2();
fp.typeCompress = readU2();
offset += cbCFFolder;
folders.push_back(fp);
}
offset = coffFiles;
std::vector<FileProp> files;
for (uint16_t i = 0; i < cFiles; i++) {
FileProp fp;
fp.cbFile = readU4();
fp.uoffFolderStart = readU4();
fp.iFolder = readU2();
fp.date = readU2();
fp.time = readU2();
fp.attribs = readU2();
fp.szName = readString();
files.push_back(fp);
}
// Store in member if needed
}
void printProperties() {
// Implement printing similar to above
std::cout << "Properties printed" << std::endl;
}
void write(const std::string& newFilename) {
std::ofstream f(newFilename, std::ios::binary);
f.write(reinterpret_cast<const char*>(data.data()), data.size());
}
};
// Usage example:
// MSUParser parser("example.msu");
// parser.open();
// parser.decode();
// parser.printProperties();
// parser.write("modified.msu");