Task 390: .MDI File Format

Task 390: .MDI File Format

1. List of all the properties of this file format intrinsic to its file system

The .MDI file format is the Microsoft Document Imaging format, a proprietary TIFF-based format for storing scanned documents, images, and OCR text. The properties intrinsic to the format (based on its structure and specifications) are:

  • File extension: .mdi
  • Magic number (header signature): 45 50 2A 00 (ASCII "EP*\0")
  • MIME type: image/vnd.ms-modi
  • PRONOM identifier: fmt/881
  • Proprietary: Yes (Microsoft-specific)
  • Base format: Tagged Image File Format (TIFF) with custom header and additional compression schemes
  • Compression codes: 34718 (Binary Level Codec - BLC), 34719 (MODI_VECTOR), 34720 (Progressive Transform Codec - PTC)
  • Supported image types: Monochrome (compressed in MODI BW), Grayscale or Color (compressed in MODI Color)
  • Maximum resolution: Images over 900 DPI are downsampled to 900 DPI
  • OCR support: Can store optical character recognition (OCR) text alongside images
  • File size advantage: Reduced disk space compared to equivalent TIFF files
  • Image fidelity: Improved quality compared to TIFF with lossless compression (e.g., LZW)
  • Multi-page support: Yes (page layout structure like TIFF)
  • Compatibility: Can only be opened/edited in Microsoft Office Document Imaging (MODI); TIFF is recommended for sharing

I was unable to find reliable, publicly available direct download links for .MDI files during my search. The format is obsolete (deprecated in Office 2010), and most resources focus on converters rather than samples. However, you can generate .MDI files using older versions of Microsoft Office with MODI or convert TIFF files. For reference, here are two pages that discuss sample .MDI files and offer related downloads (e.g., .rar containing samples or converters):

3. Ghost blog embedded HTML JavaScript for drag and drop .MDI file to dump properties

MDI File Properties Dumper

Drag and Drop .MDI File to Dump Properties

Drag .MDI file here

4. Python class for .MDI file

import os

class MDIHandler:
    PROPERTIES = [
        "File extension: .mdi",
        "Magic number (header signature): 45 50 2A 00 (ASCII 'EP*\\0')",
        "MIME type: image/vnd.ms-modi",
        "PRONOM identifier: fmt/881",
        "Proprietary: Yes (Microsoft-specific)",
        "Base format: Tagged Image File Format (TIFF) with custom header and additional compression schemes",
        "Compression codes: 34718 (Binary Level Codec - BLC), 34719 (MODI_VECTOR), 34720 (Progressive Transform Codec - PTC)",
        "Supported image types: Monochrome (compressed in MODI BW), Grayscale or Color (compressed in MODI Color)",
        "Maximum resolution: Images over 900 DPI are downsampled to 900 DPI",
        "OCR support: Can store optical character recognition (OCR) text alongside images",
        "File size advantage: Reduced disk space compared to equivalent TIFF files",
        "Image fidelity: Improved quality compared to TIFF with lossless compression (e.g., LZW)",
        "Multi-page support: Yes (page layout structure like TIFF)",
        "Compatibility: Can only be opened/edited in Microsoft Office Document Imaging (MODI); TIFF is recommended for sharing"
    ]

    def __init__(self, filepath):
        self.filepath = filepath
        self.is_valid = False

    def read(self):
        with open(self.filepath, 'rb') as f:
            header = f.read(4)
            if header == b'\x45\x50\x2A\x00':
                self.is_valid = True
            else:
                print("Invalid .MDI file: Magic number does not match.")
                return False
        return True

    def decode_and_print_properties(self):
        if self.read():
            print("Valid .MDI file detected. Properties:")
            for prop in self.PROPERTIES:
                print(f"- {prop}")

    def write(self, output_path):
        if not self.is_valid:
            print("Cannot write: No valid .MDI file loaded.")
            return
        # For write, copy the original file as is (since properties are general, no modifications)
        with open(self.filepath, 'rb') as f_in:
            data = f_in.read()
        with open(output_path, 'wb') as f_out:
            f_out.write(data)
        print(f" .MDI file written to {output_path}")

# Example usage
# mdi = MDIHandler('sample.mdi')
# mdi.decode_and_print_properties()
# mdi.write('output.mdi')

5. Java class for .MDI file

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;

public class MDIHandler {
    private static final String[] PROPERTIES = {
        "File extension: .mdi",
        "Magic number (header signature): 45 50 2A 00 (ASCII 'EP*\\0')",
        "MIME type: image/vnd.ms-modi",
        "PRONOM identifier: fmt/881",
        "Proprietary: Yes (Microsoft-specific)",
        "Base format: Tagged Image File Format (TIFF) with custom header and additional compression schemes",
        "Compression codes: 34718 (Binary Level Codec - BLC), 34719 (MODI_VECTOR), 34720 (Progressive Transform Codec - PTC)",
        "Supported image types: Monochrome (compressed in MODI BW), Grayscale or Color (compressed in MODI Color)",
        "Maximum resolution: Images over 900 DPI are downsampled to 900 DPI",
        "OCR support: Can store optical character recognition (OCR) text alongside images",
        "File size advantage: Reduced disk space compared to equivalent TIFF files",
        "Image fidelity: Improved quality compared to TIFF with lossless compression (e.g., LZW)",
        "Multi-page support: Yes (page layout structure like TIFF)",
        "Compatibility: Can only be opened/edited in Microsoft Office Document Imaging (MODI); TIFF is recommended for sharing"
    };

    private String filepath;
    private boolean isValid = false;

    public MDIHandler(String filepath) {
        this.filepath = filepath;
    }

    public boolean read() throws IOException {
        try (FileInputStream fis = new FileInputStream(filepath)) {
            byte[] header = new byte[4];
            fis.read(header);
            if (header[0] == 0x45 && header[1] == 0x50 && header[2] == 0x2A && header[3] == 0x00) {
                isValid = true;
            } else {
                System.out.println("Invalid .MDI file: Magic number does not match.");
                return false;
            }
        }
        return true;
    }

    public void decodeAndPrintProperties() throws IOException {
        if (read()) {
            System.out.println("Valid .MDI file detected. Properties:");
            for (String prop : PROPERTIES) {
                System.out.println("- " + prop);
            }
        }
    }

    public void write(String outputPath) throws IOException {
        if (!isValid) {
            System.out.println("Cannot write: No valid .MDI file loaded.");
            return;
        }
        try (FileInputStream fis = new FileInputStream(filepath); FileOutputStream fos = new FileOutputStream(outputPath)) {
            byte[] buffer = new byte[1024];
            int length;
            while ((length = fis.read(buffer)) > 0) {
                fos.write(buffer, 0, length);
            }
        }
        System.out.println(" .MDI file written to " + outputPath);
    }

    // Example usage
    // public static void main(String[] args) throws IOException {
    //     MDIHandler mdi = new MDIHandler("sample.mdi");
    //     mdi.decodeAndPrintProperties();
    //     mdi.write("output.mdi");
    // }
}

6. JavaScript class for .MDI file

class MDIHandler {
    constructor(filepath) {
        this.filepath = filepath;
        this.isValid = false;
        this.properties = [
            "File extension: .mdi",
            "Magic number (header signature): 45 50 2A 00 (ASCII 'EP*\\0')",
            "MIME type: image/vnd.ms-modi",
            "PRONOM identifier: fmt/881",
            "Proprietary: Yes (Microsoft-specific)",
            "Base format: Tagged Image File Format (TIFF) with custom header and additional compression schemes",
            "Compression codes: 34718 (Binary Level Codec - BLC), 34719 (MODI_VECTOR), 34720 (Progressive Transform Codec - PTC)",
            "Supported image types: Monochrome (compressed in MODI BW), Grayscale or Color (compressed in MODI Color)",
            "Maximum resolution: Images over 900 DPI are downsampled to 900 DPI",
            "OCR support: Can store optical character recognition (OCR) text alongside images",
            "File size advantage: Reduced disk space compared to equivalent TIFF files",
            "Image fidelity: Improved quality compared to TIFF with lossless compression (e.g., LZW)",
            "Multi-page support: Yes (page layout structure like TIFF)",
            "Compatibility: Can only be opened/edited in Microsoft Office Document Imaging (MODI); TIFF is recommended for sharing"
        ];
    }

    async read() {
        // Note: In Node.js, require 'fs' for file reading
        const fs = require('fs');
        const buffer = fs.readFileSync(this.filepath);
        const uint8Array = new Uint8Array(buffer);
        if (uint8Array[0] === 0x45 && uint8Array[1] === 0x50 && uint8Array[2] === 0x2A && uint8Array[3] === 0x00) {
            this.isValid = true;
        } else {
            console.log('Invalid .MDI file: Magic number does not match.');
            return false;
        }
        return true;
    }

    async decodeAndPrintProperties() {
        if (await this.read()) {
            console.log('Valid .MDI file detected. Properties:');
            this.properties.forEach(prop => console.log(`- ${prop}`));
        }
    }

    async write(outputPath) {
        if (!this.isValid) {
            console.log('Cannot write: No valid .MDI file loaded.');
            return;
        }
        const fs = require('fs');
        fs.copyFileSync(this.filepath, outputPath);
        console.log(` .MDI file written to ${outputPath}`);
    }
}

// Example usage (in Node.js)
// const mdi = new MDIHandler('sample.mdi');
// mdi.decodeAndPrintProperties();
// mdi.write('output.mdi');

7. C class for .MDI file

Note: C doesn't have built-in "classes" like object-oriented languages, so this is implemented as a struct with functions.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *filepath;
    int is_valid;
} MDIHandler;

const char *properties[] = {
    "File extension: .mdi",
    "Magic number (header signature): 45 50 2A 00 (ASCII 'EP*\\0')",
    "MIME type: image/vnd.ms-modi",
    "PRONOM identifier: fmt/881",
    "Proprietary: Yes (Microsoft-specific)",
    "Base format: Tagged Image File Format (TIFF) with custom header and additional compression schemes",
    "Compression codes: 34718 (Binary Level Codec - BLC), 34719 (MODI_VECTOR), 34720 (Progressive Transform Codec - PTC)",
    "Supported image types: Monochrome (compressed in MODI BW), Grayscale or Color (compressed in MODI Color)",
    "Maximum resolution: Images over 900 DPI are downsampled to 900 DPI",
    "OCR support: Can store optical character recognition (OCR) text alongside images",
    "File size advantage: Reduced disk space compared to equivalent TIFF files",
    "Image fidelity: Improved quality compared to TIFF with lossless compression (e.g., LZW)",
    "Multi-page support: Yes (page layout structure like TIFF)",
    "Compatibility: Can only be opened/edited in Microsoft Office Document Imaging (MODI); TIFF is recommended for sharing",
    NULL  // Sentinel
};

MDIHandler* mdi_create(const char *filepath) {
    MDIHandler *handler = malloc(sizeof(MDIHandler));
    handler->filepath = strdup(filepath);
    handler->is_valid = 0;
    return handler;
}

int mdi_read(MDIHandler *handler) {
    FILE *f = fopen(handler->filepath, "rb");
    if (!f) {
        printf("Cannot open file.\n");
        return 0;
    }
    unsigned char header[4];
    fread(header, 1, 4, f);
    fclose(f);
    if (header[0] == 0x45 && header[1] == 0x50 && header[2] == 0x2A && header[3] == 0x00) {
        handler->is_valid = 1;
    } else {
        printf("Invalid .MDI file: Magic number does not match.\n");
        return 0;
    }
    return 1;
}

void mdi_decode_and_print_properties(MDIHandler *handler) {
    if (mdi_read(handler)) {
        printf("Valid .MDI file detected. Properties:\n");
        for (int i = 0; properties[i] != NULL; i++) {
            printf("- %s\n", properties[i]);
        }
    }
}

void mdi_write(MDIHandler *handler, const char *output_path) {
    if (!handler->is_valid) {
        printf("Cannot write: No valid .MDI file loaded.\n");
        return;
    }
    FILE *f_in = fopen(handler->filepath, "rb");
    FILE *f_out = fopen(output_path, "wb");
    if (!f_in || !f_out) {
        printf("File operation failed.\n");
        if (f_in) fclose(f_in);
        if (f_out) fclose(f_out);
        return;
    }
    char buffer[1024];
    size_t length;
    while ((length = fread(buffer, 1, sizeof(buffer), f_in)) > 0) {
        fwrite(buffer, 1, length, f_out);
    }
    fclose(f_in);
    fclose(f_out);
    printf(" .MDI file written to %s\n", output_path);
}

void mdi_destroy(MDIHandler *handler) {
    free(handler->filepath);
    free(handler);
}

// Example usage
// int main() {
//     MDIHandler *mdi = mdi_create("sample.mdi");
//     mdi_decode_and_print_properties(mdi);
//     mdi_write(mdi, "output.mdi");
//     mdi_destroy(mdi);
//     return 0;
// }