Task 395: .MGF File Format
Task 395: .MGF File Format
The .MGF file format refers to the Mascot Generic Format, a plain-text format commonly used in mass spectrometry to store MS/MS peak lists and associated metadata for proteomics searches.
- Based on the format specification, the properties (key-value parameters) intrinsic to the format are as follows. These can appear either globally (at the file level, before any query blocks) or locally (within individual spectrum blocks delimited by BEGIN IONS and END IONS). Not all are required; most are optional depending on the data and search type. The list includes:
- ACCESSION: List of database accession numbers to search against.
- CHARGE: Precursor charge state(s), e.g., "2+" or "1- and 2-".
- CLE: Cleavage enzyme, e.g., "Trypsin".
- COM: Search title or comment (free text).
- COMP: Amino acid composition of the peptide.
- CROSSLINKING: Crosslinking method definition.
- CUTOUT: Precursor removal parameters (pair of integers).
- DB: Database name(s) to search.
- DECOY: Flag for decoy database search (0 or 1).
- ERRORTOLERANT: Flag for error-tolerant search (0 or 1).
- ET_CLASSIFICATIONS: Restrictions for error-tolerant search classifications.
- ETAG: Error-tolerant sequence tag.
- FORMAT: Data file format declaration (e.g., "Mascot generic").
- FRAMES: Nucleic acid translation frames (comma-separated list).
- INSTRUMENT: Instrument type for fragmentation rules (e.g., "ESI-QUAD-TOF").
- ION_MOBILITY: Ion mobility value.
- DRIFT_TIME: Drift time for ion mobility spectrometry.
- CCS: Collision cross-section value.
- INVERSE_ION_MOBILITY: Inverse reduced ion mobility.
- IT_MODS: Variable modifications.
- ITOL: MS/MS ion tolerance value.
- ITOLU: MS/MS ion tolerance units (e.g., "Da" or "ppm").
- MASS: Mass type (e.g., "Monoisotopic").
- MODS: Fixed modifications.
- MULTI_SITE: Multi-site modification handling.
- OVERWRITE: Flag to overwrite existing results (0 or 1).
- PEPMASS: Precursor m/z value and optional intensity.
- PFA: Allowed missed cleavages (partial cleavages factor).
- PRECURSOR: Precursor m/z (alternative to PEPMASS).
- QUANTITATION: Quantitation method.
- RAWSCANS: Raw scan number(s).
- REPORT: Report type (e.g., "AUTO").
- RTINSECONDS: Retention time in seconds.
- SCANS: Scan number(s).
- SEARCH: Search type (e.g., "MIS" for MS/MS Ion Search).
- SEQ: Sequence tag.
- TAG: Sequence tag.
- TAXONOMY: Taxonomy restriction.
- TITLE: Spectrum title (free text).
- TOL: Peptide mass tolerance value.
- TOLU: Peptide mass tolerance units (e.g., "Da" or "ppm").
- USER00 to USER12: User-defined custom fields.
- USEREMAIL: User's email address.
- USERNAME: User's name.
- Two direct download links for sample .MGF files:
- http://mendel.bii.a-star.edu.sg/mass-spectrometry/MSCleaner-2.0/examples/bsa_run1_merge.mgf
- https://fiehnlab.ucdavis.edu/downloads/projects/LipidBlast/mgf-files/demo758Dacentroid.mgf
- Below is a standalone HTML page with embedded JavaScript that can be embedded in a Ghost blog post (or used independently). It provides a drag-and-drop area for a .MGF file and dumps all extracted properties to the screen (global properties and per-spectrum properties, ignoring peak lists and comments).
Drag and drop .MGF file here
- Below is a Python class for handling .MGF files. It can read/decode a file, print all properties to console (global and per-spectrum), and write a new file with the properties (peaks are ignored since the focus is properties).
class MGFHandler:
def __init__(self):
self.global_props = {}
self.spectra = []
def read(self, filename):
with open(filename, 'r') as f:
lines = f.readlines()
i = 0
current_spectrum = None
while i < len(lines):
line = lines[i].strip()
if line == 'BEGIN IONS':
current_spectrum = {}
elif line == 'END IONS':
if current_spectrum is not None:
self.spectra.append(current_spectrum)
current_spectrum = None
elif '=' in line and not line.startswith(('#', ';', '!', '/')) and not line.split()[0].replace('.', '').isdigit():
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
if current_spectrum is None:
self.global_props[key] = value
else:
current_spectrum[key] = value
i += 1
def print_properties(self):
print("Global properties:")
for key, value in self.global_props.items():
print(f"{key}: {value}")
for idx, spec in enumerate(self.spectra, start=1):
print(f"\nSpectrum {idx} properties:")
for key, value in spec.items():
print(f"{key}: {value}")
def write(self, filename):
with open(filename, 'w') as f:
for key, value in self.global_props.items():
f.write(f"{key}={value}\n")
for spec in self.spectra:
f.write("BEGIN IONS\n")
for key, value in spec.items():
f.write(f"{key}={value}\n")
f.write("END IONS\n")
Example usage:
handler = MGFHandler()
handler.read('example.mgf')
handler.print_properties()
handler.write('new.mgf')
- Below is a Java class for handling .MGF files. It can read/decode a file, print all properties to console (global and per-spectrum), and write a new file with the properties (peaks are ignored since the focus is properties).
import java.io.*;
import java.util.*;
public class MGFHandler {
private Map<String, String> globalProps = new HashMap<>();
private List<Map<String, String>> spectra = new ArrayList<>();
public void read(String filename) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
Map<String, String> currentSpectrum = null;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.equals("BEGIN IONS")) {
currentSpectrum = new HashMap<>();
} else if (line.equals("END IONS")) {
if (currentSpectrum != null) {
spectra.add(currentSpectrum);
currentSpectrum = null;
}
} else if (line.contains("=") && !line.startsWith("#") && !line.startsWith(";") && !line.startsWith("!") && !line.startsWith("/") && !isPeakLine(line)) {
String[] parts = line.split("=", 2);
String key = parts[0].trim();
String value = parts[1].trim();
if (currentSpectrum == null) {
globalProps.put(key, value);
} else {
currentSpectrum.put(key, value);
}
}
}
}
}
private boolean isPeakLine(String line) {
String[] parts = line.split("\\s+");
if (parts.length >= 2) {
try {
Double.parseDouble(parts[0]);
Double.parseDouble(parts[1]);
return true;
} catch (NumberFormatException e) {
return false;
}
}
return false;
}
public void printProperties() {
System.out.println("Global properties:");
globalProps.forEach((key, value) -> System.out.println(key + ": " + value));
for (int i = 0; i < spectra.size(); i++) {
System.out.println("\nSpectrum " + (i + 1) + " properties:");
spectra.get(i).forEach((key, value) -> System.out.println(key + ": " + value));
}
}
public void write(String filename) throws IOException {
try (PrintWriter pw = new PrintWriter(new FileWriter(filename))) {
globalProps.forEach((key, value) -> pw.println(key + "=" + value));
for (Map<String, String> spec : spectra) {
pw.println("BEGIN IONS");
spec.forEach((key, value) -> pw.println(key + "=" + value));
pw.println("END IONS");
}
}
}
// Example usage:
// public static void main(String[] args) throws IOException {
// MGFHandler handler = new MGFHandler();
// handler.read("example.mgf");
// handler.printProperties();
// handler.write("new.mgf");
// }
}
- Below is a JavaScript class for handling .MGF files (Node.js compatible). It can read/decode a file, print all properties to console (global and per-spectrum), and write a new file with the properties (peaks are ignored since the focus is properties). Requires 'fs' module.
const fs = require('fs');
class MGFHandler {
constructor() {
this.globalProps = {};
this.spectra = [];
}
read(filename) {
const text = fs.readFileSync(filename, 'utf8');
const lines = text.split(/\r?\n/);
let currentSpectrum = null;
for (let line of lines) {
line = line.trim();
if (line === 'BEGIN IONS') {
currentSpectrum = {};
} else if (line === 'END IONS') {
if (currentSpectrum) {
this.spectra.push(currentSpectrum);
currentSpectrum = null;
}
} else if (line.includes('=') && !line.startsWith('#') && !line.startsWith(';') && !line.startsWith('!') && !line.startsWith('/') && !this.isPeakLine(line)) {
const [key, value] = line.split('=', 2);
if (currentSpectrum) {
currentSpectrum[key.trim()] = value.trim();
} else {
this.globalProps[key.trim()] = value.trim();
}
}
}
}
isPeakLine(line) {
const parts = line.split(/\s+/);
if (parts.length >= 2) {
return !isNaN(parseFloat(parts[0])) && !isNaN(parseFloat(parts[1]));
}
return false;
}
printProperties() {
console.log('Global properties:');
Object.entries(this.globalProps).forEach(([key, value]) => console.log(`${key}: ${value}`));
this.spectra.forEach((spec, index) => {
console.log(`\nSpectrum ${index + 1} properties:`);
Object.entries(spec).forEach(([key, value]) => console.log(`${key}: ${value}`));
});
}
write(filename) {
let content = '';
Object.entries(this.globalProps).forEach(([key, value]) => content += `${key}=${value}\n`);
this.spectra.forEach(spec => {
content += 'BEGIN IONS\n';
Object.entries(spec).forEach(([key, value]) => content += `${key}=${value}\n`);
content += 'END IONS\n';
});
fs.writeFileSync(filename, content);
}
}
// Example usage:
// const handler = new MGFHandler();
// handler.read('example.mgf');
// handler.printProperties();
// handler.write('new.mgf');
- Below is a C++ class for handling .MGF files. It can read/decode a file, print all properties to console (global and per-spectrum), and write a new file with the properties (peaks are ignored since the focus is properties). Compile with a C++ compiler (e.g., g++).
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <cctype>
std::string trim(const std::string& str) {
size_t first = str.find_first_not_of(" \t");
if (first == std::string::npos) return "";
size_t last = str.find_last_not_of(" \t");
return str.substr(first, (last - first + 1));
}
class MGFHandler {
private:
std::map<std::string, std::string> globalProps;
std::vector<std::map<std::string, std::string>> spectra;
bool isPeakLine(const std::string& line) {
std::string temp = trim(line);
size_t spacePos = temp.find(' ');
if (spacePos != std::string::npos) {
std::string mz = temp.substr(0, spacePos);
std::string intensity = trim(temp.substr(spacePos + 1));
bool isNum1 = !mz.empty() && (std::isdigit(mz[0]) || mz[0] == '.' || mz[0] == '-');
bool isNum2 = !intensity.empty() && (std::isdigit(intensity[0]) || intensity[0] == '.' || intensity[0] == '-');
return isNum1 && isNum2;
}
return false;
}
public:
void read(const std::string& filename) {
std::ifstream ifs(filename);
if (!ifs) return;
std::string line;
std::map<std::string, std::string> currentSpectrum;
while (std::getline(ifs, line)) {
line = trim(line);
if (line == "BEGIN IONS") {
currentSpectrum.clear();
} else if (line == "END IONS") {
spectra.push_back(currentSpectrum);
} else if (line.find('=') != std::string::npos && line[0] != '#' && line[0] != ';' && line[0] != '!' && line[0] != '/' && !isPeakLine(line)) {
size_t pos = line.find('=');
std::string key = trim(line.substr(0, pos));
std::string value = trim(line.substr(pos + 1));
if (currentSpectrum.empty()) {
globalProps[key] = value;
} else {
currentSpectrum[key] = value;
}
}
}
}
void printProperties() {
std::cout << "Global properties:" << std::endl;
for (const auto& p : globalProps) {
std::cout << p.first << ": " << p.second << std::endl;
}
for (size_t i = 0; i < spectra.size(); ++i) {
std::cout << "\nSpectrum " << (i + 1) << " properties:" << std::endl;
for (const auto& p : spectra[i]) {
std::cout << p.first << ": " << p.second << std::endl;
}
}
}
void write(const std::string& filename) {
std::ofstream ofs(filename);
for (const auto& p : globalProps) {
ofs << p.first << "=" << p.second << std::endl;
}
for (const auto& spec : spectra) {
ofs << "BEGIN IONS" << std::endl;
for (const auto& p : spec) {
ofs << p.first << "=" << p.second << std::endl;
}
ofs << "END IONS" << std::endl;
}
}
};
// Example usage:
// int main() {
// MGFHandler handler;
// handler.read("example.mgf");
// handler.printProperties();
// handler.write("new.mgf");
// return 0;
// }