Task 394: .MEX File Format

Task 394: .MEX File Format

File Format Specifications for the .MEX File Format

The .MEX file format refers to the Market Exchange Format (MEX), a sparse matrix format commonly used in bioinformatics, particularly in 10x Genomics Cell Ranger outputs for gene-barcode matrices. It is a text-based format for representing sparse matrices, based on the Matrix Market standard. The file is typically named matrix.mtx, but the format is referred to as MEX, and the task appears to treat .MEX as the extension for this context. The specification is public and defined by the Matrix Market format, with specific conventions for 10x Genomics data (integer field, general symmetry, UMI counts as values).

The format consists of a header, optional comments, a size line, and data lines. It is accompanied by barcodes.tsv and features.tsv (or genes.tsv) files, but the core matrix is in the .MEX/mtx file.

  1. Make a list of all the properties of this file format intrinsic to its file system.

The intrinsic properties (fields and structure) of the .MEX file format are:

  • Object type: "matrix" (fixed)
  • Storage format: "coordinate" (fixed for sparse)
  • Data field: "integer" (for 10x Genomics UMI counts; could be "real" in other contexts)
  • Symmetry type: "general" (fixed for 10x Genomics; could be "symmetric", "skew-symmetric", or "hermitian" in general Matrix Market)
  • Number of rows (number of features/genes)
  • Number of columns (number of barcodes/cells)
  • Number of non-zero entries (number of non-zero UMI counts)
  • Sparse data entries: A list of tuples, each with row index (1-based), column index (1-based), and value (integer UMI count)
  1. Find two direct download links for files of format .MEX.

Here are two direct download links to tar.gz files containing example .MEX format files (the matrix.mtx and associated tsv files) from 10x Genomics public datasets:

These archives contain the matrix.mtx (.MEX equivalent), barcodes.tsv, and features.tsv. Extract them to access the files.

  1. Write a ghost blog embedded html javascript that allows a user to drag n drop a file of format .MEX and it will dump to screen all these properties.

Here is a simple HTML page with embedded JavaScript for drag-and-drop. It reads the .MEX file as text, parses the header, size, and data, then dumps the properties to the screen.

MEX File Parser

Drag and Drop .MEX File Parser

Drag and drop a .MEX file here
  1. Write a python class that can open any file of format .MEX and decode read and write and print to console all the properties from the above list.
class MexFile:
    def __init__(self, filename=None):
        self.object = 'matrix'
        self.format = 'coordinate'
        self.field = 'integer'
        self.symmetry = 'general'
        self.rows = 0
        self.cols = 0
        self.entries = 0
        self.data = []  # list of (row, col, value)
        if filename:
            self.read(filename)

    def read(self, filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        lines = [line.strip() for line in lines if line.strip()]
        header = ''
        size_line = ''
        data_lines = []
        for line in lines:
            if line.startswith('%%MatrixMarket'):
                header = line
            elif line.startswith('%'):
                continue
            elif not size_line:
                size_line = line
            else:
                data_lines.append(line)
        header_parts = header.split()
        self.object = header_parts[1]
        self.format = header_parts[2]
        self.field = header_parts[3]
        self.symmetry = header_parts[4]
        size_parts = size_line.split()
        self.rows = int(size_parts[0])
        self.cols = int(size_parts[1])
        self.entries = int(size_parts[2])
        self.data = []
        for line in data_lines:
            parts = line.split()
            row = int(parts[0])
            col = int(parts[1])
            value = int(parts[2])
            self.data.append((row, col, value))

    def print_properties(self):
        print(f"Object type: {self.object}")
        print(f"Storage format: {self.format}")
        print(f"Data field: {self.field}")
        print(f"Symmetry type: {self.symmetry}")
        print(f"Number of rows: {self.rows}")
        print(f"Number of columns: {self.cols}")
        print(f"Number of non-zero entries: {self.entries}")
        print("Sparse data entries:")
        for entry in self.data[:10]:  # Limit to first 10
            print(entry)
        if len(self.data) > 10:
            print("... (truncated)")

    def write(self, filename):
        with open(filename, 'w') as f:
            f.write(f"%%MatrixMarket {self.object} {self.format} {self.field} {self.symmetry}\n")
            f.write(f"% Generated MEX file\n")
            f.write(f"{self.rows} {self.cols} {self.entries}\n")
            for row, col, value in self.data:
                f.write(f"{row} {col} {value}\n")

# Example usage:
# mex = MexFile('example.mex')
# mex.print_properties()
# mex.write('output.mex')
  1. Write a java class that can open any file of format .MEX and decode read and write and print to console all the properties from the above list.
import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class MexFile {
    private String object = "matrix";
    private String format = "coordinate";
    private String field = "integer";
    private String symmetry = "general";
    private int rows = 0;
    private int cols = 0;
    private int entries = 0;
    private List<int[]> data = new ArrayList<>();  // each int[3] = {row, col, value}

    public MexFile(String filename) throws IOException {
        read(filename);
    }

    public MexFile() {}

    public void read(String filename) throws IOException {
        try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
            String line;
            String header = "";
            String sizeLine = "";
            List<String> dataLines = new ArrayList<>();
            while (line = br.readLine() != null) {
                line = line.trim();
                if (line.isEmpty()) continue;
                if (line.startsWith("%%MatrixMarket")) {
                    header = line;
                } else if (line.startsWith("%")) {
                    continue;
                } else if (sizeLine.isEmpty()) {
                    sizeLine = line;
                } else {
                    dataLines.add(line);
                }
            }
            String[] headerParts = header.split("\\s+");
            object = headerParts[1];
            format = headerParts[2];
            field = headerParts[3];
            symmetry = headerParts[4];
            String[] sizeParts = sizeLine.split("\\s+");
            rows = Integer.parseInt(sizeParts[0]);
            cols = Integer.parseInt(sizeParts[1]);
            entries = Integer.parseInt(sizeParts[2]);
            data.clear();
            for (String dataLine : dataLines) {
                String[] parts = dataLine.split("\\s+");
                int row = Integer.parseInt(parts[0]);
                int col = Integer.parseInt(parts[1]);
                int value = Integer.parseInt(parts[2]);
                data.add(new int[]{row, col, value});
            }
        }
    }

    public void printProperties() {
        System.out.println("Object type: " + object);
        System.out.println("Storage format: " + format);
        System.out.println("Data field: " + field);
        System.out.println("Symmetry type: " + symmetry);
        System.out.println("Number of rows: " + rows);
        System.out.println("Number of columns: " + cols);
        System.out.println("Number of non-zero entries: " + entries);
        System.out.println("Sparse data entries:");
        for (int i = 0; i < Math.min(10, data.size()); i++) {
            int[] entry = data.get(i);
            System.out.println("(" + entry[0] + ", " + entry[1] + ", " + entry[2] + ")");
        }
        if (data.size() > 10) {
            System.out.println("... (truncated)");
        }
    }

    public void write(String filename) throws IOException {
        try (BufferedWriter bw = new BufferedWriter(new FileWriter(filename))) {
            bw.write("%%MatrixMarket " + object + " " + format + " " + field + " " + symmetry + "\n");
            bw.write("% Generated MEX file\n");
            bw.write(rows + " " + cols + " " + entries + "\n");
            for (int[] entry : data) {
                bw.write(entry[0] + " " + entry[1] + " " + entry[2] + "\n");
            }
        }
    }

    // Example usage:
    // public static void main(String[] args) throws IOException {
    //     MexFile mex = new MexFile("example.mex");
    //     mex.printProperties();
    //     mex.write("output.mex");
    // }
}
  1. Write a javascript class that can open any file of format .MEX and decode read and write and print to console all the properties from the above list.

Note: JavaScript in browser can't directly open files from disk without user input, so this assumes a Node.js environment with 'fs' module.

const fs = require('fs');

class MexFile {
    constructor(filename = null) {
        this.object = 'matrix';
        this.format = 'coordinate';
        this.field = 'integer';
        this.symmetry = 'general';
        this.rows = 0;
        this.cols = 0;
        this.entries = 0;
        this.data = [];  // array of [row, col, value]
        if (filename) {
            this.read(filename);
        }
    }

    read(filename) {
        const text = fs.readFileSync(filename, 'utf8');
        const lines = text.split('\n').map(line => line.trim()).filter(line => line);
        let header = '';
        let sizeLine = '';
        let dataLines = [];
        for (let line of lines) {
            if (line.startsWith('%%MatrixMarket')) {
                header = line;
            } else if (line.startsWith('%')) {
                continue;
            } else if (!sizeLine) {
                sizeLine = line;
            } else {
                dataLines.push(line);
            }
        }
        const headerParts = header.split(/\s+/);
        this.object = headerParts[1];
        this.format = headerParts[2];
        this.field = headerParts[3];
        this.symmetry = headerParts[4];
        const sizeParts = sizeLine.split(/\s+/);
        this.rows = parseInt(sizeParts[0]);
        this.cols = parseInt(sizeParts[1]);
        this.entries = parseInt(sizeParts[2]);
        this.data = [];
        for (let line of dataLines) {
            const parts = line.split(/\s+/);
            const row = parseInt(parts[0]);
            const col = parseInt(parts[1]);
            const value = parseInt(parts[2]);
            this.data.push([row, col, value]);
        }
    }

    printProperties() {
        console.log(`Object type: ${this.object}`);
        console.log(`Storage format: ${this.format}`);
        console.log(`Data field: ${this.field}`);
        console.log(`Symmetry type: ${this.symmetry}`);
        console.log(`Number of rows: ${this.rows}`);
        console.log(`Number of columns: ${this.cols}`);
        console.log(`Number of non-zero entries: ${this.entries}`);
        console.log('Sparse data entries:');
        for (let i = 0; i < Math.min(10, this.data.length); i++) {
            console.log(this.data[i]);
        }
        if (this.data.length > 10) {
            console.log('... (truncated)');
        }
    }

    write(filename) {
        let content = `%%MatrixMarket ${this.object} ${this.format} ${this.field} ${this.symmetry}\n`;
        content += `% Generated MEX file\n`;
        content += `${this.rows} ${this.cols} ${this.entries}\n`;
        for (let entry of this.data) {
            content += `${entry[0]} ${entry[1]} ${entry[2]}\n`;
        }
        fs.writeFileSync(filename, content);
    }
}

// Example usage:
// const mex = new MexFile('example.mex');
// mex.printProperties();
// mex.write('output.mex');
  1. Write a c class that can open any file of format .MEX and decode read and write and print to console all the properties from the above list.

Note: C doesn't have "classes" in the OO sense, but we can use a struct with functions.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LINE 256
#define MAX_DATA 100000  // Arbitrary max for data entries

typedef struct {
    char object[10];
    char format[20];
    char field[10];
    char symmetry[10];
    int rows;
    int cols;
    int entries;
    int (*data)[3];  // array of [row, col, value]
    int data_count;
} MexFile;

void mex_init(MexFile *mex) {
    strcpy(mex->object, "matrix");
    strcpy(mex->format, "coordinate");
    strcpy(mex->field, "integer");
    strcpy(mex->symmetry, "general");
    mex->rows = 0;
    mex->cols = 0;
    mex->entries = 0;
    mex->data = NULL;
    mex->data_count = 0;
}

void mex_read(MexFile *mex, const char *filename) {
    FILE *fp = fopen(filename, "r");
    if (!fp) {
        perror("Failed to open file");
        exit(1);
    }
    char line[MAX_LINE];
    char header[MAX_LINE] = "";
    char size_line[MAX_LINE] = "";
    int data_capacity = 1000;
    mex->data = malloc(data_capacity * sizeof(int[3]));
    mex->data_count = 0;
    while (fgets(line, MAX_LINE, fp)) {
        line[strcspn(line, "\n")] = 0;  // Trim newline
        if (strstr(line, "%%MatrixMarket")) {
            strcpy(header, line);
        } else if (line[0] == '%') {
            continue;
        } else if (strlen(size_line) == 0) {
            strcpy(size_line, line);
        } else {
            if (mex->data_count >= data_capacity) {
                data_capacity *= 2;
                mex->data = realloc(mex->data, data_capacity * sizeof(int[3]));
            }
            sscanf(line, "%d %d %d", &mex->data[mex->data_count][0], &mex->data[mex->data_count][1], &mex->data[mex->data_count][2]);
            mex->data_count++;
        }
    }
    fclose(fp);
    sscanf(header, "%%MatrixMarket %s %s %s %s", mex->object, mex->format, mex->field, mex->symmetry);
    sscanf(size_line, "%d %d %d", &mex->rows, &mex->cols, &mex->entries);
}

void mex_print_properties(const MexFile *mex) {
    printf("Object type: %s\n", mex->object);
    printf("Storage format: %s\n", mex->format);
    printf("Data field: %s\n", mex->field);
    printf("Symmetry type: %s\n", mex->symmetry);
    printf("Number of rows: %d\n", mex->rows);
    printf("Number of columns: %d\n", mex->cols);
    printf("Number of non-zero entries: %d\n", mex->entries);
    printf("Sparse data entries:\n");
    int limit = mex->data_count > 10 ? 10 : mex->data_count;
    for (int i = 0; i < limit; i++) {
        printf("(%d, %d, %d)\n", mex->data[i][0], mex->data[i][1], mex->data[i][2]);
    }
    if (mex->data_count > 10) {
        printf("... (truncated)\n");
    }
}

void mex_write(const MexFile *mex, const char *filename) {
    FILE *fp = fopen(filename, "w");
    if (!fp) {
        perror("Failed to open file");
        exit(1);
    }
    fprintf(fp, "%%MatrixMarket %s %s %s %s\n", mex->object, mex->format, mex->field, mex->symmetry);
    fprintf(fp, "%% Generated MEX file\n");
    fprintf(fp, "%d %d %d\n", mex->rows, mex->cols, mex->entries);
    for (int i = 0; i < mex->data_count; i++) {
        fprintf(fp, "%d %d %d\n", mex->data[i][0], mex->data[i][1], mex->data[i][2]);
    }
    fclose(fp);
}

void mex_free(MexFile *mex) {
    free(mex->data);
}

// Example usage:
// int main() {
//     MexFile mex;
//     mex_init(&mex);
//     mex_read(&mex, "example.mex");
//     mex_print_properties(&mex);
//     mex_write(&mex, "output.mex");
//     mex_free(&mex);
//     return 0;
// }