Task 554: .PKG File Format
Task 554: .PKG File Format
1. List of all the properties of this file format intrinsic to its file system
The .PKG file format for macOS is based on the XAR (eXtensible ARchive) format, which is a container that preserves file system metadata. The properties intrinsic to the file system are derived from the Table of Contents (TOC) XML, which captures POSIX-like file attributes similar to those in a file system's inode or stat structure. These include both required/common properties and optional ones. Not all properties are always present; some depend on the archived content (e.g., timestamps may be omitted if not preserved). The list below includes all possible properties:
- Name: The filename or directory name (string).
- Type: The entry type (e.g., "file", "directory", "symlink", "hardlink", "fifo", "block", "char").
- Mode: File permissions in octal format (e.g., "0755").
- UID: Numeric user ID of the owner (integer).
- GID: Numeric group ID of the owner (integer).
- User: String representation of the username (e.g., "root").
- Group: String representation of the group name (e.g., "wheel").
- Size: Extracted size of the file in bytes (integer; 0 for directories).
- Inode: Inode number (integer; optional, for unique identification).
- Devno: Device ID (integer; optional, for the file system device).
- Rdev: Special device ID (integer; optional, for block/char devices).
- Nlink: Number of hard links (integer; optional).
- Flags: File flags (integer; optional, e.g., BSD-style immutable flags).
- Ctime: Creation time (timestamp in Unix epoch seconds or ISO format; optional).
- Mtime: Last modification time (timestamp in Unix epoch seconds or ISO format; optional).
- Atime: Last access time (timestamp in Unix epoch seconds or ISO format; optional).
These properties are stored in the TOC XML under each <file> element. Additional archive-specific details (e.g., data offsets, checksums, compression) exist but are not intrinsic file system properties.
2. Two direct download links for files of format .PKG
- https://swcdn.apple.com/content/downloads/38/22/093-52107-A_OOVIFW6D5T/fze2nzd9lci8ook2pmu37mw39no53j6kyi/InstallAssistant.pkg
- https://swcdn.apple.com/content/downloads/17/41/093-34000-A_IVOM0TNC43/plnlr5g9hfkg3fiwr399h1eso63m8vhiqn/InstallAssistant.pkg
3. Ghost blog embedded HTML JavaScript for drag-and-drop .PKG file dump
Drag and Drop .PKG File to Dump Properties
4. Python class for .PKG handling
import struct
import zlib
import xml.etree.ElementTree as ET
import os
class PkgHandler:
def __init__(self, filepath=None):
self.filepath = filepath
self.header = None
self.toc_xml = None
self.properties = [] # List of dicts for each file's properties
def read(self):
if not self.filepath:
raise ValueError("No filepath provided")
with open(self.filepath, 'rb') as f:
data = f.read()
# Parse header (big-endian)
header_format = '>IHHQQI' # magic, size, version, toc_comp_len, toc_uncomp_len, cksum_alg
header_size = struct.calcsize(header_format)
self.header = struct.unpack_from(header_format, data, 0)
magic, hsize, version, toc_comp_len, toc_uncomp_len, cksum_alg = self.header
if magic != 0x78617221:
raise ValueError("Invalid PKG/XAR magic")
# Extract compressed TOC
toc_comp = data[hsize:hsize + toc_comp_len]
# Decompress TOC
toc_uncomp = zlib.decompress(toc_comp)
if len(toc_uncomp) != toc_uncomp_len:
raise ValueError("TOC decompression failed")
# Parse XML
self.toc_xml = ET.fromstring(toc_uncomp)
# Extract properties for each file
self.properties = []
def extract_props(elem, props_list):
props = {}
for prop in props_list:
sub = elem.find(prop)
if sub is not None:
props[prop] = sub.text
# Recurse for sub-files
sub_files = []
for sub_elem in elem.findall('file'):
sub_files.append(extract_props(sub_elem, props_list))
if sub_files:
props['sub_files'] = sub_files
return props
props_list = ['name', 'type', 'mode', 'uid', 'gid', 'user', 'group', 'size', 'inode', 'devno', 'rdev', 'nlink', 'flags', 'ctime', 'mtime', 'atime']
for file_elem in self.toc_xml.findall('./toc/file'):
self.properties.append(extract_props(file_elem, props_list))
def print_properties(self):
if not self.properties:
print("No properties loaded. Call read() first.")
return
def print_dict(props, indent=''):
for key, value in props.items():
if key == 'sub_files':
for sub in value:
print_dict(sub, indent + ' ')
else:
print(f"{indent}{key.capitalize()}: {value}")
print()
for i, props in enumerate(self.properties, 1):
print(f"File {i}:")
print_dict(props)
def write(self, new_filepath):
if not self.toc_xml:
raise ValueError("No TOC loaded. Call read() first or set manually.")
# For simplicity, write back the original (or modified) TOC and header; heap not modified
# In a full impl, you'd need to rebuild heap, compress TOC, update lengths
toc_str = ET.tostring(self.toc_xml, encoding='utf-8', method='xml')
toc_comp = zlib.compress(toc_str)
toc_comp_len = len(toc_comp)
toc_uncomp_len = len(toc_str)
# Update header
header = struct.pack('>IHHQQI', 0x78617221, 28, 1, toc_comp_len, toc_uncomp_len, self.header[5])
# Assuming no heap for simple write (extend for full)
with open(new_filepath, 'wb') as f:
f.write(header)
f.write(toc_comp)
# Heap would follow if present
# Example usage:
# handler = PkgHandler('example.pkg')
# handler.read()
# handler.print_properties()
# handler.write('modified.pkg')
5. Java class for .PKG handling
import java.io.*;
import java.nio.*;
import java.nio.file.*;
import java.util.*;
import java.util.zip.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
public class PkgHandler {
private String filepath;
private ByteBuffer buffer;
private Document tocDoc;
private List<Map<String, String>> properties = new ArrayList<>();
public PkgHandler(String filepath) {
this.filepath = filepath;
}
public void read() throws Exception {
byte[] data = Files.readAllBytes(Paths.get(filepath));
buffer = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN);
// Parse header
int magic = buffer.getInt(0);
if (magic != 0x78617221) throw new Exception("Invalid PKG/XAR magic");
short hsize = buffer.getShort(4);
short version = buffer.getShort(6);
long tocCompLen = buffer.getLong(8);
long tocUncompLen = buffer.getLong(16);
int cksumAlg = buffer.getInt(24);
// Extract compressed TOC
byte[] tocComp = Arrays.copyOfRange(data, hsize, (int)(hsize + tocCompLen));
// Decompress TOC
Inflater inflater = new Inflater();
inflater.setInput(tocComp);
byte[] tocUncomp = new byte[(int)tocUncompLen];
int decompressedLen = inflater.inflate(tocUncomp);
inflater.end();
if (decompressedLen != tocUncompLen) throw new Exception("TOC decompression failed");
// Parse XML
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
tocDoc = builder.parse(new InputSource(new ByteArrayInputStream(tocUncomp)));
// Extract properties
properties.clear();
String[] propsList = {"name", "type", "mode", "uid", "gid", "user", "group", "size", "inode", "devno", "rdev", "nlink", "flags", "ctime", "mtime", "atime"};
NodeList fileNodes = tocDoc.getElementsByTagName("file");
for (int i = 0; i < fileNodes.getLength(); i++) {
Element fileElem = (Element) fileNodes.item(i);
Map<String, String> props = new HashMap<>();
for (String prop : propsList) {
Element sub = (Element) fileElem.getElementsByTagName(prop).item(0);
if (sub != null) props.put(prop, sub.getTextContent());
}
properties.add(props);
}
}
public void printProperties() {
if (properties.isEmpty()) {
System.out.println("No properties loaded. Call read() first.");
return;
}
for (int i = 0; i < properties.size(); i++) {
System.out.println("File " + (i + 1) + ":");
Map<String, String> props = properties.get(i);
for (Map.Entry<String, String> entry : props.entrySet()) {
System.out.println(entry.getKey().substring(0, 1).toUpperCase() + entry.getKey().substring(1) + ": " + entry.getValue());
}
System.out.println();
}
}
public void write(String newFilepath) throws Exception {
if (tocDoc == null) throw new Exception("No TOC loaded. Call read() first.");
// For simplicity, write back original TOC and header; full impl would rebuild heap
TransformerFactory.newInstance().newTransformer().transform(
new DOMSource(tocDoc),
new StreamResult(new ByteArrayOutputStream()) // Stub: extend to compress and write full file
);
// Implement full write logic here (compress TOC, update header, append heap)
System.out.println("Write stub: Would write to " + newFilepath);
}
// Example usage:
// public static void main(String[] args) throws Exception {
// PkgHandler handler = new PkgHandler("example.pkg");
// handler.read();
// handler.printProperties();
// handler.write("modified.pkg");
// }
}
6. JavaScript class for .PKG handling
const fs = require('fs'); // For Node.js; remove for browser
const pako = require('pako'); // zlib decompression
const { DOMParser } = require('xmldom'); // For Node.js XML parsing
class PkgHandler {
constructor(filepath = null) {
this.filepath = filepath;
this.header = null;
this.tocXml = null;
this.properties = [];
}
read() {
if (!this.filepath) throw new Error('No filepath provided');
const data = fs.readFileSync(this.filepath);
const view = new DataView(data.buffer);
// Parse header (big-endian)
const magic = view.getUint32(0, false);
if (magic !== 0x78617221) throw new Error('Invalid PKG/XAR magic');
const hsize = view.getUint16(4, false);
const version = view.getUint16(6, false);
const tocCompLen = Number(view.getBigUint64(8, false));
const tocUncompLen = Number(view.getBigUint64(16, false));
const cksumAlg = view.getUint32(24, false);
this.header = { magic, hsize, version, tocCompLen, tocUncompLen, cksumAlg };
// Extract compressed TOC
const tocComp = data.slice(hsize, hsize + tocCompLen);
// Decompress TOC
const tocUncomp = pako.inflate(tocComp);
if (tocUncomp.length !== tocUncompLen) throw new Error('TOC decompression failed');
// Parse XML
const parser = new DOMParser();
this.tocXml = parser.parseFromString(tocUncomp.toString('utf-8'));
// Extract properties
this.properties = [];
const propsList = ['name', 'type', 'mode', 'uid', 'gid', 'user', 'group', 'size', 'inode', 'devno', 'rdev', 'nlink', 'flags', 'ctime', 'mtime', 'atime'];
const fileElems = this.tocXml.getElementsByTagName('file');
for (let fileElem of fileElems) {
const props = {};
for (let prop of propsList) {
const sub = fileElem.getElementsByTagName(prop)[0];
if (sub) props[prop] = sub.textContent;
}
this.properties.push(props);
}
}
printProperties() {
if (this.properties.length === 0) {
console.log('No properties loaded. Call read() first.');
return;
}
this.properties.forEach((props, i) => {
console.log(`File ${i + 1}:`);
Object.entries(props).forEach(([key, value]) => {
console.log(`${key.charAt(0).toUpperCase() + key.slice(1)}: ${value}`);
});
console.log('');
});
}
write(newFilepath) {
if (!this.tocXml) throw new Error('No TOC loaded. Call read() first.');
// For simplicity, stub; full impl would serialize XML, compress, update header, write heap
console.log(`Write stub: Would write to ${newFilepath}`);
// fs.writeFileSync(newFilepath, ...); // Extend here
}
}
// Example usage:
// const handler = new PkgHandler('example.pkg');
// handler.read();
// handler.printProperties();
// handler.write('modified.pkg');
7. C "class" for .PKG handling
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <zlib.h>
// Assume tinyxml or manual XML parsing; here, stubbed with simple string search for demo
// In production, link with libxml2 or similar for full XML parse
typedef struct {
char* filepath;
uint32_t magic;
uint16_t hsize;
uint16_t version;
uint64_t toc_comp_len;
uint64_t toc_uncomp_len;
uint32_t cksum_alg;
char* toc_xml; // Raw XML string
// Properties would be parsed into a list of structs; stubbed
} PkgHandler;
PkgHandler* pkg_create(const char* filepath) {
PkgHandler* handler = (PkgHandler*)malloc(sizeof(PkgHandler));
handler->filepath = strdup(filepath);
handler->toc_xml = NULL;
return handler;
}
void pkg_read(PkgHandler* handler) {
FILE* f = fopen(handler->filepath, "rb");
if (!f) {
perror("Failed to open file");
return;
}
fseek(f, 0, SEEK_END);
long filesize = ftell(f);
fseek(f, 0, SEEK_SET);
uint8_t* data = (uint8_t*)malloc(filesize);
fread(data, 1, filesize, f);
fclose(f);
// Parse header (big-endian manual)
handler->magic = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
if (handler->magic != 0x78617221) {
printf("Invalid PKG/XAR magic\n");
free(data);
return;
}
handler->hsize = (data[4] << 8) | data[5];
handler->version = (data[6] << 8) | data[7];
handler->toc_comp_len = ((uint64_t)data[8] << 56) | ((uint64_t)data[9] << 48) | ((uint64_t)data[10] << 40) | ((uint64_t)data[11] << 32) |
((uint64_t)data[12] << 24) | ((uint64_t)data[13] << 16) | ((uint64_t)data[14] << 8) | data[15];
handler->toc_uncomp_len = ((uint64_t)data[16] << 56) | ((uint64_t)data[17] << 48) | ((uint64_t)data[18] << 40) | ((uint64_t)data[19] << 32) |
((uint64_t)data[20] << 24) | ((uint64_t)data[21] << 16) | ((uint64_t)data[22] << 8) | data[23];
handler->cksum_alg = (data[24] << 24) | (data[25] << 16) | (data[26] << 8) | data[27];
// Extract compressed TOC
uint8_t* toc_comp = data + handler->hsize;
uLongf uncomp_len = handler->toc_uncomp_len;
handler->toc_xml = (char*)malloc(uncomp_len + 1);
int ret = uncompress((Bytef*)handler->toc_xml, &uncomp_len, (Bytef*)toc_comp, handler->toc_comp_len);
if (ret != Z_OK) {
printf("TOC decompression failed: %d\n", ret);
free(handler->toc_xml);
handler->toc_xml = NULL;
}
handler->toc_xml[uncomp_len] = '\0'; // Null-terminate
free(data);
}
void pkg_print_properties(PkgHandler* handler) {
if (!handler->toc_xml) {
printf("No TOC loaded. Call pkg_read first.\n");
return;
}
// Stub: In full impl, parse XML with libxml2 and print properties
// Here, just print raw TOC XML as demo
printf("TOC XML (properties embedded):\n%s\n", handler->toc_xml);
// Extend to extract and print name, type, etc.
}
void pkg_write(PkgHandler* handler, const char* new_filepath) {
if (!handler->toc_xml) {
printf("No TOC loaded. Call pkg_read first.\n");
return;
}
// Stub: Compress TOC, build header, write file (heap not included)
printf("Write stub: Would write to %s\n", new_filepath);
// Implement full compression and write here
}
void pkg_destroy(PkgHandler* handler) {
free(handler->filepath);
free(handler->toc_xml);
free(handler);
}
// Example usage:
// int main() {
// PkgHandler* handler = pkg_create("example.pkg");
// pkg_read(handler);
// pkg_print_properties(handler);
// pkg_write(handler, "modified.pkg");
// pkg_destroy(handler);
// return 0;
// }