Task 479: .OGG File Format

Task 479: .OGG File Format

Ogg File Format Specifications

The .OGG file format is an open, royalty-free multimedia container developed by Xiph.org, primarily used for streaming and storing audio (e.g., with Vorbis codec), but it can also contain video, text, and metadata. It structures data into pages for efficient framing, synchronization, and error recovery, without a central header—instead, information is distributed across beginning-of-stream (BOS) pages.

1. List of Properties Intrinsic to the File Format

Based on the Ogg framing specification, the key structural properties are defined per page (the file is a sequence of pages). These are the core elements that define the format's layout and can be extracted from any .OGG file:

  • Capture Pattern: 4-byte magic string 'OggS' (bytes 0-3) for page synchronization.
  • Stream Structure Version: 1-byte value (byte 4), always 0x00 for the current specification.
  • Header Type Flag: 1-byte bitfield (byte 5):
  • Bit 0 (0x01): Packet continuation flag (set if the first packet on this page is a continuation from the previous page).
  • Bit 1 (0x02): Beginning of Stream (BOS) flag (set if this is the first page of a logical bitstream).
  • Bit 2 (0x04): End of Stream (EOS) flag (set if this is the last page of a logical bitstream).
  • Absolute Granule Position: 8-byte signed integer (bytes 6-13, little-endian), representing the absolute position (e.g., sample count for audio) after all packets on this page complete; -1 if no packets finish.
  • Stream Serial Number: 4-byte unsigned integer (bytes 14-17, little-endian), unique identifier for the logical stream within the physical file.
  • Page Sequence Number: 4-byte unsigned integer (bytes 18-21, little-endian), incremental counter for pages in the logical stream.
  • Page Checksum: 4-byte unsigned integer (bytes 22-25, little-endian), CRC-32 checksum of the entire page (header with checksum zeroed + data).
  • Page Segments: 1-byte value (byte 26), number of entries (0-255) in the segment table.
  • Segment Table: Variable-length array of 1-byte values (bytes 27 to 26 + page_segments), lacing values indicating segment sizes (0-255 bytes each; 255 indicates continuation to the next value).

These properties are repeated for each page in the file. Additional codec-specific properties (e.g., Vorbis metadata like artist or bitrate) may exist in the data segments of BOS pages but are not intrinsic to the Ogg container itself.

3. Ghost Blog Embedded HTML/JavaScript for Drag-and-Drop .OGG Property Dumper

This is a self-contained HTML snippet with JavaScript that can be embedded in a Ghost blog post (or any HTML page). It allows dragging and dropping a .OGG file, parses it, and dumps the properties to the screen. It uses DataView for byte-level parsing.

Drag and drop a .OGG file here

4. Python Class for .OGG Handling

This Python class uses struct for decoding. It can open a file, decode and print properties, and write the parsed data back to a new file (as a copy, for simplicity; modification can be added by altering the parsed pages).

import struct
import os

class OggHandler:
    def __init__(self):
        self.pages = []

    def read_and_decode(self, filename):
        with open(filename, 'rb') as f:
            data = f.read()
        offset = 0
        while offset < len(data):
            if data[offset:offset+4] != b'OggS':
                raise ValueError(f"Invalid capture pattern at offset {offset}")
            (version,) = struct.unpack_from('<B', data, offset + 4)
            (header_type,) = struct.unpack_from('<B', data, offset + 5)
            granule = struct.unpack_from('<q', data, offset + 6)[0]
            serial = struct.unpack_from('<I', data, offset + 14)[0]
            sequence = struct.unpack_from('<I', data, offset + 18)[0]
            checksum = struct.unpack_from('<I', data, offset + 22)[0]
            segments = data[offset + 26]
            segment_table = list(data[offset + 27:offset + 27 + segments])
            page_data_size = sum(segment_table)
            page_data = data[offset + 27 + segments:offset + 27 + segments + page_data_size]
            self.pages.append({
                'capture': 'OggS',
                'version': version,
                'header_type': header_type,
                'continuation': bool(header_type & 0x01),
                'bos': bool(header_type & 0x02),
                'eos': bool(header_type & 0x04),
                'granule': granule,
                'serial': serial,
                'sequence': sequence,
                'checksum': checksum,
                'segments': segments,
                'segment_table': segment_table,
                'data': page_data
            })
            offset += 27 + segments + page_data_size

    def print_properties(self):
        for i, page in enumerate(self.pages):
            print(f"Page {i+1}:")
            print(f"  Capture Pattern: {page['capture']}")
            print(f"  Stream Structure Version: {page['version']}")
            print(f"  Header Type Flag: 0x{page['header_type']:02x} (Continuation: {page['continuation']}, BOS: {page['bos']}, EOS: {page['eos']})")
            print(f"  Absolute Granule Position: {page['granule']}")
            print(f"  Stream Serial Number: {page['serial']}")
            print(f"  Page Sequence Number: {page['sequence']}")
            print(f"  Page Checksum: 0x{page['checksum']:08x}")
            print(f"  Page Segments: {page['segments']}")
            print(f"  Segment Table: {page['segment_table']}")
            print()

    def write(self, output_filename):
        with open(output_filename, 'wb') as f:
            for page in self.pages:
                header = b'OggS' + struct.pack('<B', page['version']) + struct.pack('<B', page['header_type']) + \
                         struct.pack('<q', page['granule']) + struct.pack('<I', page['serial']) + \
                         struct.pack('<I', page['sequence']) + struct.pack('<I', 0) +  # Checksum placeholder
                         struct.pack('<B', page['segments'])
                segment_bytes = bytes(page['segment_table'])
                full_page = header + segment_bytes + page['data']
                # Calculate checksum (simple CRC placeholder; implement full CRC if needed)
                checksum = self._calculate_checksum(full_page)
                struct.pack_into('<I', full_page, 22, checksum)
                f.write(full_page)

    def _calculate_checksum(self, page_data):
        # Placeholder for CRC-32 (polynomial 0x04c11db7, initial 0, no XOR). Implement as needed.
        crc = 0
        for byte in page_data:
            crc = (crc << 8) ^ (crc >> 24) ^ byte  # Simplified; use proper impl for accuracy
        return crc

# Example usage:
# handler = OggHandler()
# handler.read_and_decode('example.ogg')
# handler.print_properties()
# handler.write('output.ogg')

Note: The checksum calculation is simplified for brevity; a full CRC-32 implementation (using the specified polynomial) should be added for accurate writing.

5. Java Class for .OGG Handling

This Java class uses ByteBuffer for decoding. It reads, decodes, prints properties, and writes the parsed data to a new file.

import java.io.*;
import java.nio.*;
import java.nio.channels.FileChannel;
import java.nio.file.*;

public class OggHandler {
    private static class Page {
        String capture;
        byte version;
        byte headerType;
        boolean continuation;
        boolean bos;
        boolean eos;
        long granule;
        int serial;
        int sequence;
        int checksum;
        byte segments;
        byte[] segmentTable;
        byte[] data;
    }

    private Page[] pages;

    public void readAndDecode(String filename) throws IOException {
        byte[] data = Files.readAllBytes(Paths.get(filename));
        ByteBuffer buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
        int offset = 0;
        java.util.List<Page> pageList = new java.util.ArrayList<>();
        while (offset < data.length) {
            if (buffer.get(offset) != 0x4F || buffer.get(offset + 1) != 0x67 ||
                buffer.get(offset + 2) != 0x67 || buffer.get(offset + 3) != 0x53) {
                throw new IllegalArgumentException("Invalid capture pattern at offset " + offset);
            }
            Page page = new Page();
            page.capture = "OggS";
            page.version = buffer.get(offset + 4);
            page.headerType = buffer.get(offset + 5);
            page.continuation = (page.headerType & 0x01) != 0;
            page.bos = (page.headerType & 0x02) != 0;
            page.eos = (page.headerType & 0x04) != 0;
            page.granule = buffer.getLong(offset + 6);
            page.serial = buffer.getInt(offset + 14);
            page.sequence = buffer.getInt(offset + 18);
            page.checksum = buffer.getInt(offset + 22);
            page.segments = buffer.get(offset + 26);
            page.segmentTable = new byte[page.segments];
            buffer.position(offset + 27);
            buffer.get(page.segmentTable);
            int dataSize = 0;
            for (byte val : page.segmentTable) dataSize += Byte.toUnsignedInt(val);
            page.data = new byte[dataSize];
            buffer.get(page.data);
            pageList.add(page);
            offset = buffer.position();
        }
        pages = pageList.toArray(new Page[0]);
    }

    public void printProperties() {
        for (int i = 0; i < pages.length; i++) {
            Page page = pages[i];
            System.out.println("Page " + (i + 1) + ":");
            System.out.println("  Capture Pattern: " + page.capture);
            System.out.println("  Stream Structure Version: " + page.version);
            System.out.printf("  Header Type Flag: 0x%02X (Continuation: %b, BOS: %b, EOS: %b)\n",
                    page.headerType, page.continuation, page.bos, page.eos);
            System.out.println("  Absolute Granule Position: " + page.granule);
            System.out.println("  Stream Serial Number: " + page.serial);
            System.out.println("  Page Sequence Number: " + page.sequence);
            System.out.printf("  Page Checksum: 0x%08X\n", page.checksum);
            System.out.println("  Page Segments: " + Byte.toUnsignedInt(page.segments));
            System.out.print("  Segment Table: [");
            for (int j = 0; j < page.segmentTable.length; j++) {
                System.out.print(Byte.toUnsignedInt(page.segmentTable[j]));
                if (j < page.segmentTable.length - 1) System.out.print(", ");
            }
            System.out.println("]");
            System.out.println();
        }
    }

    public void write(String outputFilename) throws IOException {
        try (FileOutputStream fos = new FileOutputStream(outputFilename);
             FileChannel channel = fos.getChannel()) {
            for (Page page : pages) {
                ByteBuffer header = ByteBuffer.allocate(27 + page.segmentTable.length).order(ByteOrder.LITTLE_ENDIAN);
                header.put("OggS".getBytes());
                header.put(page.version);
                header.put(page.headerType);
                header.putLong(page.granule);
                header.putInt(page.serial);
                header.putInt(page.sequence);
                header.putInt(0); // Checksum placeholder
                header.put(page.segments);
                header.put(page.segmentTable);
                header.flip();
                ByteBuffer fullPage = ByteBuffer.allocate(header.limit() + page.data.length).order(ByteOrder.LITTLE_ENDIAN);
                fullPage.put(header);
                fullPage.put(page.data);
                fullPage.flip();
                // Calculate checksum (simplified; implement full CRC)
                int checksum = calculateChecksum(fullPage);
                fullPage.putInt(22, checksum);
                channel.write(fullPage);
            }
        }
    }

    private int calculateChecksum(ByteBuffer pageData) {
        // Placeholder for CRC-32. Implement proper algorithm.
        int crc = 0;
        pageData.position(0);
        while (pageData.hasRemaining()) {
            crc = (crc << 8) ^ (crc >>> 24) ^ Byte.toUnsignedInt(pageData.get());
        }
        return crc;
    }

    // Example usage:
    // public static void main(String[] args) throws IOException {
    //     OggHandler handler = new OggHandler();
    //     handler.readAndDecode("example.ogg");
    //     handler.printProperties();
    //     handler.write("output.ogg");
    // }
}

Note: Checksum is simplified; add a full CRC-32 for production.

6. JavaScript Class for .OGG Handling

This JavaScript class (for Node.js) uses fs and Buffer for handling. It reads, decodes, prints to console, and writes.

const fs = require('fs');

class OggHandler {
  constructor() {
    this.pages = [];
  }

  readAndDecode(filename) {
    const data = fs.readFileSync(filename);
    let offset = 0;
    while (offset < data.length) {
      if (data.slice(offset, offset + 4).toString() !== 'OggS') {
        throw new Error(`Invalid capture pattern at offset ${offset}`);
      }
      const version = data.readUInt8(offset + 4);
      const headerType = data.readUInt8(offset + 5);
      const granule = data.readBigInt64LE(offset + 6);
      const serial = data.readUInt32LE(offset + 14);
      const sequence = data.readUInt32LE(offset + 18);
      const checksum = data.readUInt32LE(offset + 22);
      const segments = data.readUInt8(offset + 26);
      const segmentTable = [];
      for (let i = 0; i < segments; i++) {
        segmentTable.push(data.readUInt8(offset + 27 + i));
      }
      let dataSize = segmentTable.reduce((a, b) => a + b, 0);
      const pageData = data.slice(offset + 27 + segments, offset + 27 + segments + dataSize);
      this.pages.push({
        capture: 'OggS',
        version,
        headerType,
        continuation: !!(headerType & 0x01),
        bos: !!(headerType & 0x02),
        eos: !!(headerType & 0x04),
        granule: granule.toString(),
        serial,
        sequence,
        checksum,
        segments,
        segmentTable,
        data: pageData
      });
      offset += 27 + segments + dataSize;
    }
  }

  printProperties() {
    this.pages.forEach((page, i) => {
      console.log(`Page ${i + 1}:`);
      console.log(`  Capture Pattern: ${page.capture}`);
      console.log(`  Stream Structure Version: ${page.version}`);
      console.log(`  Header Type Flag: 0x${page.headerType.toString(16)} (Continuation: ${page.continuation}, BOS: ${page.bos}, EOS: ${page.eos})`);
      console.log(`  Absolute Granule Position: ${page.granule}`);
      console.log(`  Stream Serial Number: ${page.serial}`);
      console.log(`  Page Sequence Number: ${page.sequence}`);
      console.log(`  Page Checksum: 0x${page.checksum.toString(16)}`);
      console.log(`  Page Segments: ${page.segments}`);
      console.log(`  Segment Table: [${page.segmentTable.join(', ')}]`);
      console.log();
    });
  }

  write(outputFilename) {
    const buffers = [];
    this.pages.forEach(page => {
      const header = Buffer.alloc(27 + page.segmentTable.length);
      header.write('OggS', 0, 4);
      header.writeUInt8(page.version, 4);
      header.writeUInt8(page.headerType, 5);
      header.writeBigInt64LE(BigInt(page.granule), 6);
      header.writeUInt32LE(page.serial, 14);
      header.writeUInt32LE(page.sequence, 18);
      header.writeUInt32LE(0, 22); // Placeholder
      header.writeUInt8(page.segments, 26);
      page.segmentTable.forEach((val, i) => header.writeUInt8(val, 27 + i));
      const fullPage = Buffer.concat([header, page.data]);
      const checksum = this.calculateChecksum(fullPage);
      fullPage.writeUInt32LE(checksum, 22);
      buffers.push(fullPage);
    });
    fs.writeFileSync(outputFilename, Buffer.concat(buffers));
  }

  calculateChecksum(pageData) {
    // Placeholder CRC-32
    let crc = 0;
    for (let byte of pageData) {
      crc = (crc << 8) ^ (crc >>> 24) ^ byte;
    }
    return crc >>> 0;
  }
}

// Example usage:
// const handler = new OggHandler();
// handler.readAndDecode('example.ogg');
// handler.printProperties();
// handler.write('output.ogg');

Note: Requires Node.js. Checksum simplified.

7. C Class (Struct-Based) for .OGG Handling

This C code uses a struct for pages. It reads, decodes, prints to console, and writes. Compile with gcc ogg_handler.c -o ogg_handler.

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>

typedef struct {
    char capture[5];
    uint8_t version;
    uint8_t header_type;
    bool continuation;
    bool bos;
    bool eos;
    int64_t granule;
    uint32_t serial;
    uint32_t sequence;
    uint32_t checksum;
    uint8_t segments;
    uint8_t *segment_table;
    uint8_t *data;
    size_t data_size;
} OggPage;

typedef struct {
    OggPage *pages;
    size_t page_count;
} OggHandler;

void init_handler(OggHandler *handler) {
    handler->pages = NULL;
    handler->page_count = 0;
}

void read_and_decode(OggHandler *handler, const char *filename) {
    FILE *f = fopen(filename, "rb");
    if (!f) {
        perror("Failed to open file");
        exit(1);
    }
    fseek(f, 0, SEEK_END);
    size_t size = ftell(f);
    fseek(f, 0, SEEK_SET);
    uint8_t *data = malloc(size);
    fread(data, 1, size, f);
    fclose(f);

    size_t offset = 0;
    while (offset < size) {
        if (memcmp(data + offset, "OggS", 4) != 0) {
            fprintf(stderr, "Invalid capture pattern at offset %zu\n", offset);
            free(data);
            exit(1);
        }
        OggPage page;
        strcpy(page.capture, "OggS");
        page.version = data[offset + 4];
        page.header_type = data[offset + 5];
        page.continuation = (page.header_type & 0x01) != 0;
        page.bos = (page.header_type & 0x02) != 0;
        page.eos = (page.header_type & 0x04) != 0;
        memcpy(&page.granule, data + offset + 6, 8); // Assume little-endian host or swap if needed
        memcpy(&page.serial, data + offset + 14, 4);
        memcpy(&page.sequence, data + offset + 18, 4);
        memcpy(&page.checksum, data + offset + 22, 4);
        page.segments = data[offset + 26];
        page.segment_table = malloc(page.segments);
        memcpy(page.segment_table, data + offset + 27, page.segments);
        page.data_size = 0;
        for (int i = 0; i < page.segments; i++) page.data_size += page.segment_table[i];
        page.data = malloc(page.data_size);
        memcpy(page.data, data + offset + 27 + page.segments, page.data_size);

        handler->pages = realloc(handler->pages, sizeof(OggPage) * (handler->page_count + 1));
        handler->pages[handler->page_count] = page;
        handler->page_count++;
        offset += 27 + page.segments + page.data_size;
    }
    free(data);
}

void print_properties(const OggHandler *handler) {
    for (size_t i = 0; i < handler->page_count; i++) {
        const OggPage *page = &handler->pages[i];
        printf("Page %zu:\n", i + 1);
        printf("  Capture Pattern: %s\n", page->capture);
        printf("  Stream Structure Version: %u\n", page->version);
        printf("  Header Type Flag: 0x%02x (Continuation: %s, BOS: %s, EOS: %s)\n",
               page->header_type, page->continuation ? "true" : "false",
               page->bos ? "true" : "false", page->eos ? "true" : "false");
        printf("  Absolute Granule Position: %ld\n", page->granule);
        printf("  Stream Serial Number: %u\n", page->serial);
        printf("  Page Sequence Number: %u\n", page->sequence);
        printf("  Page Checksum: 0x%08x\n", page->checksum);
        printf("  Page Segments: %u\n", page->segments);
        printf("  Segment Table: [");
        for (int j = 0; j < page->segments; j++) {
            printf("%u", page->segment_table[j]);
            if (j < page->segments - 1) printf(", ");
        }
        printf("]\n\n");
    }
}

void write(const OggHandler *handler, const char *output_filename) {
    FILE *f = fopen(output_filename, "wb");
    if (!f) {
        perror("Failed to open output file");
        exit(1);
    }
    for (size_t i = 0; i < handler->page_count; i++) {
        const OggPage *page = &handler->pages[i];
        uint8_t header[27 + page->segments];
        memcpy(header, "OggS", 4);
        header[4] = page->version;
        header[5] = page->header_type;
        memcpy(header + 6, &page->granule, 8);
        memcpy(header + 14, &page->serial, 4);
        memcpy(header + 18, &page->sequence, 4);
        uint32_t checksum_placeholder = 0;
        memcpy(header + 22, &checksum_placeholder, 4);
        header[26] = page->segments;
        memcpy(header + 27, page->segment_table, page->segments);

        size_t full_size = sizeof(header) + page->data_size;
        uint8_t *full_page = malloc(full_size);
        memcpy(full_page, header, sizeof(header));
        memcpy(full_page + sizeof(header), page->data, page->data_size);

        // Calculate checksum (simplified)
        uint32_t crc = 0;
        for (size_t j = 0; j < full_size; j++) {
            crc = (crc << 8) ^ (crc >> 24) ^ full_page[j];
        }
        memcpy(full_page + 22, &crc, 4);

        fwrite(full_page, 1, full_size, f);
        free(full_page);
    }
    fclose(f);
}

void free_handler(OggHandler *handler) {
    for (size_t i = 0; i < handler->page_count; i++) {
        free(handler->pages[i].segment_table);
        free(handler->pages[i].data);
    }
    free(handler->pages);
}

// Example usage:
// int main(int argc, char **argv) {
//     OggHandler handler;
//     init_handler(&handler);
//     read_and_decode(&handler, "example.ogg");
//     print_properties(&handler);
//     write(&handler, "output.ogg");
//     free_handler(&handler);
//     return 0;
// }

Note: Assumes little-endian host; add byte-swapping if big-endian. Checksum simplified.