Task 422: .MR File Format
Task 422: .MR File Format
.MR File Format Specifications
The .MR file format is the Mapped Read Format (MRF), a text-based format used in computational genomics for storing mapped reads from bisulfite sequencing or other high-throughput sequencing data. It is associated with tools like MethPipe and Preseq from The Smith Lab. The format is tab-separated values (TSV), with no header, and each line represents a single mapped read or an invalid entry ("NULL"). The format is designed for large datasets and is typically sorted by chromosome, start position, and strand for efficient processing.
1. List of Properties Intrinsic to the File Format
The .MR file is a plain text file with the following properties (fields) for each valid entry. These are intrinsic to the format's structure and are separated by tabs:
- Chromosome: The name of the chromosome or contig (string, e.g., "chr1").
- Start Position: The 0-based starting position of the read on the chromosome (integer).
- Strand: The strand the read is mapped to ("+" or "-").
- Sequence: The original sequence of the read (string, A/C/G/T bases).
- Quality Scores: The Phred quality scores for each base in the sequence (string, same length as sequence, encoded as ASCII characters).
- Converted Sequence: The sequence after bisulfite conversion (string, same length as sequence, with C->T or G->A adjustments depending on strand).
Invalid lines may simply contain "NULL".
The file has no header or footer, and lines are newline-separated. The format is human-readable but often large (gigabytes), so it's processed line-by-line.
2. Two Direct Download Links for .MR Files
- https://raw.githubusercontent.com/smithlabcode/preseq/master/data/SRR1003759_5M_subset.mr (Sample from Preseq repository, a subset of SRR1003759 data in .MR format).
- https://smithlabresearch.org/wp-content/uploads/sample_mr_file.mr (Example sample .MR file from The Smith Lab resources; note: this is a placeholder based on search results, as direct public links are rare—generate your own using to-mr tool from Preseq if needed).
3. Ghost Blog Embedded HTML JavaScript for Drag and Drop
Here is a standalone HTML page with embedded JavaScript that can be embedded in a Ghost blog (or any HTML-enabled blog). It allows users to drag and drop a .MR file, parses it, and dumps the properties to the screen in a table format. Invalid lines are noted.
Drag and Drop .MR File to View Properties
4. Python Class for .MR File
Here is a Python class that can open, decode (parse), read, write, and print the properties of a .MR file to console.
class MRFile:
def __init__(self, filename, mode='r'):
self.filename = filename
self.mode = mode
self.file = None
self.reads = []
if mode == 'r':
self._read_file()
def _read_file(self):
with open(self.filename, 'r') as f:
for line in f:
line = line.strip()
if line == 'NULL':
self.reads.append({'type': 'NULL'})
elif line:
fields = line.split('\t')
if len(fields) == 6:
self.reads.append({
'chromosome': fields[0],
'start': int(fields[1]),
'strand': fields[2],
'sequence': fields[3],
'quality': fields[4],
'converted_sequence': fields[5]
})
else:
print(f"Invalid line: {line}")
def print_properties(self):
for i, read in enumerate(self.reads, 1):
print(f"Read {i}:")
if 'type' in read and read['type'] == 'NULL':
print("NULL (Invalid Entry)")
else:
for key, value in read.items():
print(f" {key.capitalize()}: {value}")
print("---")
def add_read(self, chromosome, start, strand, sequence, quality, converted_sequence):
if self.mode != 'w':
raise ValueError("File not opened in write mode")
self.reads.append({
'chromosome': chromosome,
'start': start,
'strand': strand,
'sequence': sequence,
'quality': quality,
'converted_sequence': converted_sequence
})
def write_file(self):
if self.mode != 'w':
raise ValueError("File not opened in write mode")
with open(self.filename, 'w') as f:
for read in self.reads:
if 'type' in read and read['type'] == 'NULL':
f.write("NULL\n")
else:
line = '\t'.join([str(read[key]) for key in ['chromosome', 'start', 'strand', 'sequence', 'quality', 'converted_sequence']])
f.write(line + '\n')
# Example usage:
# reader = MRFile('sample.mr')
# reader.print_properties()
# writer = MRFile('new.mr', 'w')
# writer.add_read('chr1', 100, '+', 'ATCG', 'HHHH', 'ATTG')
# writer.write_file()
5. Java Class for .MR File
Here is a Java class that can open, decode, read, write, and print the properties of a .MR file to console.
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class MRFile {
private String filename;
private char mode;
private List<MRRead> reads = new ArrayList<>();
public MRFile(String filename, char mode) {
this.filename = filename;
this.mode = mode;
if (mode == 'r') {
readFile();
}
}
private void readFile() {
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
while (line = br.readLine() != null) {
line = line.trim();
if (line.equals("NULL")) {
reads.add(new MRRead(true));
} else if (!line.isEmpty()) {
String[] fields = line.split("\t");
if (fields.length == 6) {
reads.add(new MRRead(fields[0], Integer.parseInt(fields[1]), fields[2], fields[3], fields[4], fields[5]));
} else {
System.out.println("Invalid line: " + line);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void printProperties() {
for (int i = 0; i < reads.size(); i++) {
System.out.println("Read " + (i + 1) + ":");
MRRead read = reads.get(i);
if (read.isNull) {
System.out.println("NULL (Invalid Entry)");
} else {
System.out.println(" Chromosome: " + read.chromosome);
System.out.println(" Start Position: " + read.start);
System.out.println(" Strand: " + read.strand);
System.out.println(" Sequence: " + read.sequence);
System.out.println(" Quality Scores: " + read.quality);
System.out.println(" Converted Sequence: " + read.convertedSequence);
}
System.out.println("---");
}
}
public void addRead(String chromosome, int start, String strand, String sequence, String quality, String convertedSequence) {
if (mode != 'w') {
throw new IllegalStateException("File not opened in write mode");
}
reads.add(new MRRead(chromosome, start, strand, sequence, quality, convertedSequence));
}
public void writeFile() {
if (mode != 'w') {
throw new IllegalStateException("File not opened in write mode");
}
try (BufferedWriter bw = new BufferedWriter(new FileWriter(filename))) {
for (MRRead read : reads) {
if (read.isNull) {
bw.write("NULL\n");
} else {
bw.write(read.chromosome + "\t" + read.start + "\t" + read.strand + "\t" + read.sequence + "\t" + read.quality + "\t" + read.convertedSequence + "\n");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static class MRRead {
boolean isNull;
String chromosome;
int start;
String strand;
String sequence;
String quality;
String convertedSequence;
MRRead(boolean isNull) {
this.isNull = isNull;
}
MRRead(String chromosome, int start, String strand, String sequence, String quality, String convertedSequence) {
this.isNull = false;
this.chromosome = chromosome;
this.start = start;
this.strand = strand;
this.sequence = sequence;
this.quality = quality;
this.convertedSequence = convertedSequence;
}
}
// Example usage:
// public static void main(String[] args) {
// MRFile reader = new MRFile("sample.mr", 'r');
// reader.printProperties();
// MRFile writer = new MRFile("new.mr", 'w');
// writer.addRead("chr1", 100, "+", "ATCG", "HHHH", "ATTG");
// writer.writeFile();
// }
}
6. JavaScript Class for .MR File
Here is a JavaScript class that can open (using FileReader), decode, read, write (using Blob for download), and print the properties of a .MR file to console.
class MRFile {
constructor() {
this.reads = [];
}
open(file, callback) {
const reader = new FileReader();
reader.onload = (e) => {
const content = e.target.result;
const lines = content.split('\n').filter(line => line.trim());
lines.forEach(line => {
if (line === 'NULL') {
this.reads.push({ type: 'NULL' });
} else {
const fields = line.split('\t');
if (fields.length === 6) {
this.reads.push({
chromosome: fields[0],
start: parseInt(fields[1]),
strand: fields[2],
sequence: fields[3],
quality: fields[4],
converted_sequence: fields[5]
});
} else {
console.log(`Invalid line: ${line}`);
}
}
});
callback();
};
reader.readAsText(file);
}
printProperties() {
this.reads.forEach((read, index) => {
console.log(`Read ${index + 1}:`);
if (read.type === 'NULL') {
console.log('NULL (Invalid Entry)');
} else {
console.log(` Chromosome: ${read.chromosome}`);
console.log(` Start Position: ${read.start}`);
console.log(` Strand: ${read.strand}`);
console.log(` Sequence: ${read.sequence}`);
console.log(` Quality Scores: ${read.quality}`);
console.log(` Converted Sequence: ${read.converted_sequence}`);
}
console.log('---');
});
}
addRead(chromosome, start, strand, sequence, quality, converted_sequence) {
this.reads.push({
chromosome,
start,
strand,
sequence,
quality,
converted_sequence
});
}
write(filename) {
let content = '';
this.reads.forEach(read => {
if (read.type === 'NULL') {
content += 'NULL\n';
} else {
content += `${read.chromosome}\t${read.start}\t${read.strand}\t${read.sequence}\t${read.quality}\t${read.converted_sequence}\n`;
}
});
const blob = new Blob([content], { type: 'text/plain' });
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = filename;
a.click();
}
}
// Example usage:
// const mr = new MRFile();
// const input = document.createElement('input');
// input.type = 'file';
// input.onchange = (e) => {
// mr.open(e.target.files[0], () => mr.printProperties());
// };
// input.click();
// mr.addRead('chr1', 100, '+', 'ATCG', 'HHHH', 'ATTG');
// mr.write('new.mr');
7. C Class for .MR File
Since C is not object-oriented, here is a C implementation using a struct for the read and functions for open, read, write, and print. (If C++ is intended, it can be adapted.)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char* chromosome;
int start;
char* strand;
char* sequence;
char* quality;
char* converted_sequence;
int is_null;
} MRRead;
typedef struct {
char* filename;
char mode;
MRRead* reads;
size_t count;
} MRFile;
MRFile* mr_open(const char* filename, char mode) {
MRFile* mr = malloc(sizeof(MRFile));
mr->filename = strdup(filename);
mr->mode = mode;
mr->reads = NULL;
mr->count = 0;
if (mode == 'r') {
FILE* fp = fopen(filename, "r");
if (fp) {
char line[1024];
while (fgets(line, sizeof(line), fp)) {
line[strcspn(line, "\n")] = 0;
if (strcmp(line, "NULL") == 0) {
mr->reads = realloc(mr->reads, sizeof(MRRead) * (mr->count + 1));
mr->reads[mr->count].is_null = 1;
mr->count++;
} else if (strlen(line) > 0) {
char* fields[6];
int fcount = 0;
char* token = strtok(line, "\t");
while (token && fcount < 6) {
fields[fcount++] = token;
token = strtok(NULL, "\t");
}
if (fcount == 6) {
mr->reads = realloc(mr->reads, sizeof(MRRead) * (mr->count + 1));
MRRead* read = &mr->reads[mr->count];
read->is_null = 0;
read->chromosome = strdup(fields[0]);
read->start = atoi(fields[1]);
read->strand = strdup(fields[2]);
read->sequence = strdup(fields[3]);
read->quality = strdup(fields[4]);
read->converted_sequence = strdup(fields[5]);
mr->count++;
} else {
printf("Invalid line: %s\n", line);
}
}
}
fclose(fp);
}
}
return mr;
}
void mr_print_properties(MRFile* mr) {
for (size_t i = 0; i < mr->count; i++) {
printf("Read %zu:\n", i + 1);
MRRead read = mr->reads[i];
if (read.is_null) {
printf("NULL (Invalid Entry)\n");
} else {
printf(" Chromosome: %s\n", read.chromosome);
printf(" Start Position: %d\n", read.start);
printf(" Strand: %s\n", read.strand);
printf(" Sequence: %s\n", read.sequence);
printf(" Quality Scores: %s\n", read.quality);
printf(" Converted Sequence: %s\n", read.converted_sequence);
}
printf("---\n");
}
}
void mr_add_read(MRFile* mr, const char* chromosome, int start, const char* strand, const char* sequence, const char* quality, const char* converted_sequence) {
if (mr->mode != 'w') {
fprintf(stderr, "File not opened in write mode\n");
return;
}
mr->reads = realloc(mr->reads, sizeof(MRRead) * (mr->count + 1));
MRRead* read = &mr->reads[mr->count];
read->is_null = 0;
read->chromosome = strdup(chromosome);
read->start = start;
read->strand = strdup(strand);
read->sequence = strdup(sequence);
read->quality = strdup(quality);
read->converted_sequence = strdup(converted_sequence);
mr->count++;
}
void mr_write(MRFile* mr) {
if (mr->mode != 'w') {
fprintf(stderr, "File not opened in write mode\n");
return;
}
FILE* fp = fopen(mr->filename, "w");
if (fp) {
for (size_t i = 0; i < mr->count; i++) {
MRRead read = mr->reads[i];
if (read.is_null) {
fprintf(fp, "NULL\n");
} else {
fprintf(fp, "%s\t%d\t%s\t%s\t%s\t%s\n", read.chromosome, read.start, read.strand, read.sequence, read.quality, read.converted_sequence);
}
}
fclose(fp);
}
}
void mr_close(MRFile* mr) {
for (size_t i = 0; i < mr->count; i++) {
if (!mr->reads[i].is_null) {
free(mr->reads[i].chromosome);
free(mr->reads[i].strand);
free(mr->reads[i].sequence);
free(mr->reads[i].quality);
free(mr->reads[i].converted_sequence);
}
}
free(mr->reads);
free(mr->filename);
free(mr);
}
// Example usage:
// int main() {
// MRFile* reader = mr_open("sample.mr", 'r');
// mr_print_properties(reader);
// mr_close(reader);
// MRFile* writer = mr_open("new.mr", 'w');
// mr_add_read(writer, "chr1", 100, "+", "ATCG", "HHHH", "ATTG");
// mr_write(writer);
// mr_close(writer);
// return 0;
// }