Task 722: .TBZ2 File Format

Task 722: .TBZ2 File Format

1. List of Properties

The .TBZ2 file format is a bzip2-compressed tar archive. The properties intrinsic to its file system include:

  • bzip2 magic bytes: 'BZh'
  • bzip2 block size level: 1-9 (indicating block size of 100-900 kB)
  • For each item in the tar archive:
  • Name (file or directory name)
  • Mode (permissions in octal, e.g., '0644')
  • UID (user ID, numeric)
  • GID (group ID, numeric)
  • Size (file size in bytes)
  • Mtime (modification time as Unix timestamp)
  • Typeflag (file type: '0' regular file, '1' hard link, '2' symlink, '3' char device, '4' block device, '5' directory, '6' FIFO, etc.)
  • Linkname (target name if link)
  • Uname (user name)
  • Gname (group name)
  • Devmajor (major device number, for special files)
  • Devminor (minor device number, for special files)
  • Prefix (path prefix for long names)

3. Ghost Blog Embedded HTML JavaScript

TBZ2 File Analyzer
Drag and drop .tbz2 file here


    

4. Python Class

import bz2
import tarfile
import os
import io
from datetime import datetime

class TBZ2Handler:
    def __init__(self, filepath=None):
        self.filepath = filepath
        self.properties = {}
        if filepath:
            self.read_properties()

    def read_properties(self):
        with open(self.filepath, 'rb') as f:
            data = f.read()
        
        # Extract bzip2 header
        magic = data[:3].decode('ascii')
        block_level = int(chr(data[3]))
        self.properties['bzip2_magic'] = magic
        self.properties['bzip2_block_level'] = block_level
        
        # Decompress
        decompressed = bz2.decompress(data)
        
        # Parse tar
        self.properties['items'] = []
        with io.BytesIO(decompressed) as tar_io:
            with tarfile.open(fileobj=tar_io, mode='r') as tar:
                for member in tar:
                    prop = {
                        'name': member.name,
                        'mode': oct(member.mode),
                        'uid': member.uid,
                        'gid': member.gid,
                        'size': member.size,
                        'mtime': member.mtime,
                        'mtime_str': datetime.fromtimestamp(member.mtime).isoformat(),
                        'typeflag': member.type,
                        'linkname': member.linkname,
                        'uname': member.uname,
                        'gname': member.gname,
                        'devmajor': member.devmajor,
                        'devminor': member.devminor,
                    }
                    self.properties['items'].append(prop)

    def print_properties(self):
        print(f"bzip2 Magic: {self.properties['bzip2_magic']}")
        print(f"bzip2 Block Size Level: {self.properties['bzip2_block_level']}\n")
        for item in self.properties['items']:
            print(f"Item: {item['name']}")
            print(f"  Mode: {item['mode']}")
            print(f"  UID: {item['uid']}")
            print(f"  GID: {item['gid']}")
            print(f"  Size: {item['size']}")
            print(f"  Mtime: {item['mtime']} ({item['mtime_str']})")
            print(f"  Typeflag: {item['typeflag']}")
            print(f"  Linkname: {item['linkname']}")
            print(f"  Uname: {item['uname']}")
            print(f"  Gname: {item['gname']}")
            print(f"  Devmajor: {item['devmajor']}")
            print(f"  Devminor: {item['devminor']}\n")

    def write(self, output_path, files_to_add):
        # Create tar in memory
        tar_io = io.BytesIO()
        with tarfile.open(fileobj=tar_io, mode='w') as tar:
            for file_path in files_to_add:
                tar.add(file_path, arcname=os.path.basename(file_path))
        
        # Compress with bzip2 (level 9 by default)
        compressed = bz2.compress(tar_io.getvalue())
        
        # Write to file
        with open(output_path, 'wb') as f:
            f.write(compressed)

# Example usage:
# handler = TBZ2Handler('example.tbz2')
# handler.print_properties()
# handler.write('new.tbz2', ['file1.txt', 'file2.txt'])

5. Java Class

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.*;

public class TBZ2Handler {
    private String filepath;
    private Map<String, Object> properties = new HashMap<>();

    public TBZ2Handler(String filepath) {
        this.filepath = filepath;
        readProperties();
    }

    private void readProperties() {
        try (InputStream fis = new FileInputStream(filepath);
             BufferedInputStream bis = new BufferedInputStream(fis)) {

            // Read bzip2 header
            byte[] header = new byte[4];
            bis.read(header);
            String magic = new String(header, 0, 3);
            int blockLevel = header[3] - '0';
            properties.put("bzip2_magic", magic);
            properties.put("bzip2_block_level", blockLevel);

            // Reset stream for decompression
            fis.getChannel().position(0); // Reset to start
            try (BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
                 TarArchiveInputStream tarIn = new TarArchiveInputStream(bzIn)) {

                List<Map<String, Object>> items = new ArrayList<>();
                TarArchiveEntry entry;
                while ((entry = tarIn.getNextTarEntry()) != null) {
                    Map<String, Object> item = new HashMap<>();
                    item.put("name", entry.getName());
                    item.put("mode", Integer.toOctalString(entry.getMode()));
                    item.put("uid", entry.getUserId());
                    item.put("gid", entry.getGroupId());
                    item.put("size", entry.getSize());
                    item.put("mtime", entry.getModTime().getTime() / 1000);
                    item.put("mtime_str", new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(entry.getModTime()));
                    item.put("typeflag", String.valueOf(entry.getFileTypeFlag())); // Approximate
                    item.put("linkname", entry.getLinkName());
                    item.put("uname", entry.getUserName());
                    item.put("gname", entry.getGroupName());
                    item.put("devmajor", entry.getDevMajor());
                    item.put("devminor", entry.getDevMinor());
                    items.add(item);
                }
                properties.put("items", items);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void printProperties() {
        System.out.println("bzip2 Magic: " + properties.get("bzip2_magic"));
        System.out.println("bzip2 Block Size Level: " + properties.get("bzip2_block_level") + "\n");
        @SuppressWarnings("unchecked")
        List<Map<String, Object>> items = (List<Map<String, Object>>) properties.get("items");
        for (Map<String, Object> item : items) {
            System.out.println("Item: " + item.get("name"));
            System.out.println("  Mode: " + item.get("mode"));
            System.out.println("  UID: " + item.get("uid"));
            System.out.println("  GID: " + item.get("gid"));
            System.out.println("  Size: " + item.get("size"));
            System.out.println("  Mtime: " + item.get("mtime") + " (" + item.get("mtime_str") + ")");
            System.out.println("  Typeflag: " + item.get("typeflag"));
            System.out.println("  Linkname: " + item.get("linkname"));
            System.out.println("  Uname: " + item.get("uname"));
            System.out.println("  Gname: " + item.get("gname"));
            System.out.println("  Devmajor: " + item.get("devmajor"));
            System.out.println("  Devminor: " + item.get("devminor") + "\n");
        }
    }

    public void write(String outputPath, List<String> filesToAdd) {
        try (OutputStream fos = new FileOutputStream(outputPath);
             BufferedOutputStream bos = new BufferedOutputStream(fos);
             BZip2CompressorOutputStream bzOut = new BZip2CompressorOutputStream(bos);
             TarArchiveOutputStream tarOut = new TarArchiveOutputStream(bzOut)) {

            for (String filePath : filesToAdd) {
                File file = new File(filePath);
                TarArchiveEntry entry = new TarArchiveEntry(file, file.getName());
                tarOut.putArchiveEntry(entry);
                Files.copy(Paths.get(filePath), tarOut);
                tarOut.closeArchiveEntry();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // Example usage:
    // public static void main(String[] args) {
    //     TBZ2Handler handler = new TBZ2Handler("example.tbz2");
    //     handler.printProperties();
    //     handler.write("new.tbz2", Arrays.asList("file1.txt", "file2.txt"));
    // }
}

6. JavaScript Class

// For Node.js environment; requires 'bz2' and 'tar-stream' modules (install via npm)
// Assume: npm install bz2 tar-stream fs

const fs = require('fs');
const bz2 = require('bz2');
const tar = require('tar-stream');
const { promisify } = require('util');
const pipeline = promisify(require('stream').pipeline);

class TBZ2Handler {
    constructor(filepath) {
        this.filepath = filepath;
        this.properties = {};
    }

    async readProperties() {
        const data = fs.readFileSync(this.filepath);
        
        // Extract bzip2 header
        const magic = data.slice(0, 3).toString('ascii');
        const blockLevel = data[3] - 0x30; // ASCII '0' is 48
        this.properties.bzip2_magic = magic;
        this.properties.bzip2_block_level = blockLevel;
        
        // Decompress
        const decompressed = bz2.decompress(data);
        
        // Parse tar
        this.properties.items = [];
        const extract = tar.extract();
        extract.on('entry', (header, stream, next) => {
            const item = {
                name: header.name,
                mode: header.mode.toString(8),
                uid: header.uid,
                gid: header.gid,
                size: header.size,
                mtime: Math.floor(header.mtime.getTime() / 1000),
                mtime_str: header.mtime.toISOString(),
                typeflag: header.type, // 'file', 'directory', etc.
                linkname: header.linkname,
                uname: header.uname,
                gname: header.gname,
                devmajor: header.devmajor,
                devminor: header.devminor,
            };
            this.properties.items.push(item);
            stream.resume(); // Drain stream
            next();
        });

        await pipeline([decompressed, extract]);
    }

    printProperties() {
        console.log(`bzip2 Magic: ${this.properties.bzip2_magic}`);
        console.log(`bzip2 Block Size Level: ${this.properties.bzip2_block_level}\n`);
        this.properties.items.forEach(item => {
            console.log(`Item: ${item.name}`);
            console.log(`  Mode: ${item.mode}`);
            console.log(`  UID: ${item.uid}`);
            console.log(`  GID: ${item.gid}`);
            console.log(`  Size: ${item.size}`);
            console.log(`  Mtime: ${item.mtime} (${item.mtime_str})`);
            console.log(`  Typeflag: ${item.typeflag}`);
            console.log(`  Linkname: ${item.linkname}`);
            console.log(`  Uname: ${item.uname}`);
            console.log(`  Gname: ${item.gname}`);
            console.log(`  Devmajor: ${item.devmajor}`);
            console.log(`  Devminor: ${item.devminor}\n`);
        });
    }

    async write(outputPath, filesToAdd) {
        const pack = tar.pack();
        for (const filePath of filesToAdd) {
            const stats = fs.statSync(filePath);
            pack.entry({ name: filePath, size: stats.size }, fs.readFileSync(filePath));
        }
        pack.finalize();
        
        const compressedStream = bz2.compress(pack); // Assuming bz2.compress returns a stream
        await pipeline(compressedStream, fs.createWriteStream(outputPath));
    }
}

// Example usage:
// (async () => {
//     const handler = new TBZ2Handler('example.tbz2');
//     await handler.readProperties();
//     handler.printProperties();
//     await handler.write('new.tbz2', ['file1.txt', 'file2.txt']);
// })();

7. C Class

// Compile with: gcc tbz2_handler.c -o tbz2_handler -lbz2 -lz (may need additional libs for tar, but here using simple tar parse)
// Note: For full tar support, consider libtar, but here a basic manual parser for ustar.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <bzlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>

#define BLOCK_SIZE 512

typedef struct {
    char name[100];
    char mode[8];
    char uid[8];
    char gid[8];
    char size[12];
    char mtime[12];
    char chksum[8];
    char typeflag;
    char linkname[100];
    char magic[6];
    char version[2];
    char uname[32];
    char gname[32];
    char devmajor[8];
    char devminor[8];
    char prefix[155];
    char pad[12];
} TarHeader;

typedef struct {
    char* bzip2_magic;
    int bzip2_block_level;
    // List of items would be dynamic array in full impl
} TBZ2Properties;

void print_properties(unsigned char* decompressed, size_t decomp_size) {
    size_t offset = 0;
    while (offset < decomp_size) {
        if (decompressed[offset] == 0) break;
        
        TarHeader* header = (TarHeader*)(decompressed + offset);
        if (strncmp(header->magic, "ustar", 5) != 0) break;
        
        char full_name[256];
        snprintf(full_name, sizeof(full_name), "%s%s%s", header->prefix, header->prefix[0] ? "/" : "", header->name);
        
        long size = strtol(header->size, NULL, 8);
        long mtime = strtol(header->mtime, NULL, 8);
        struct tm* tm = localtime(&mtime);
        char time_str[20];
        strftime(time_str, sizeof(time_str), "%Y-%m-%dT%H:%M:%S", tm);
        
        printf("Item: %s\n", full_name);
        printf("  Mode: %lo\n", strtol(header->mode, NULL, 8));
        printf("  UID: %ld\n", strtol(header->uid, NULL, 8));
        printf("  GID: %ld\n", strtol(header->gid, NULL, 8));
        printf("  Size: %ld\n", size);
        printf("  Mtime: %ld (%s)\n", mtime, time_str);
        printf("  Typeflag: %c\n", header->typeflag);
        printf("  Linkname: %s\n", header->linkname);
        printf("  Uname: %s\n", header->uname);
        printf("  Gname: %s\n", header->gname);
        printf("  Devmajor: %ld\n", strtol(header->devmajor, NULL, 8));
        printf("  Devminor: %ld\n", strtol(header->devminor, NULL, 8));
        printf("\n");
        
        offset += BLOCK_SIZE + ((size + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE);
    }
}

int main(int argc, char** argv) {
    if (argc < 2) {
        printf("Usage: %s <file.tbz2>\n", argv[0]);
        return 1;
    }
    
    // Read file
    FILE* file = fopen(argv[1], "rb");
    fseek(file, 0, SEEK_END);
    size_t file_size = ftell(file);
    fseek(file, 0, SEEK_SET);
    unsigned char* compressed = malloc(file_size);
    fread(compressed, 1, file_size, file);
    fclose(file);
    
    // bzip2 header
    printf("bzip2 Magic: %c%c%c\n", compressed[0], compressed[1], compressed[2]);
    int block_level = compressed[3] - '0';
    printf("bzip2 Block Size Level: %d\n\n", block_level);
    
    // Decompress
    unsigned int decomp_size = file_size * 10; // Estimate
    unsigned char* decompressed = malloc(decomp_size);
    int ret = BZ2_bzBuffToBuffDecompress((char*)decompressed, &decomp_size, (char*)compressed, file_size, 0, 0);
    if (ret != BZ_OK) {
        printf("Decompression failed: %d\n", ret);
        free(compressed);
        free(decompressed);
        return 1;
    }
    
    // Print tar properties
    print_properties(decompressed, decomp_size);
    
    free(compressed);
    free(decompressed);
    
    // For write: Implement similar with BZ2_bzBuffToBuffCompress and tar header creation (omitted for brevity)
    
    return 0;
}