Task 566: .PEG File Format

Task 566: .PEG File Format

The .PEG file format refers to text-based files used to define Parsing Expression Grammars (PEGs), a recognition-based formalism for describing formal languages. The specifications are derived from Bryan Ford's foundational paper on PEGs (2004), which provides a self-describing grammar, and standardized descriptions in resources such as Wikipedia. PEG files contain a set of rules in a concrete syntax, typically with one rule per line or block, defining nonterminals and their corresponding parsing expressions. The format is plain text, with no binary header or footer, and supports comments, spacing, and lexical elements for readability.

The properties intrinsic to the .PEG file format include the following structural and syntactic elements, as defined in the self-describing PEG grammar:

  • Grammar: The overall structure, consisting of spacing, one or more definitions, and an end-of-file marker.
  • Definition: A rule composed of an identifier, a left arrow, and an expression.
  • Expression: A choice-separated list of sequences.
  • Sequence: A series of prefixes.
  • Prefix: An optional and- or not-predicate followed by a suffix.
  • Suffix: A primary expression optionally followed by a quantifier (question, star, or plus).
  • Primary: An identifier (not followed by left arrow), grouped expression, literal, class, or dot.
  • Identifier: Starting with an ident-start character, followed by zero or more ident-cont characters.
  • IdentStart: Alphabetic characters or underscore.
  • IdentCont: Alphabetic, numeric, or underscore.
  • Literal: A quoted string (single or double quotes) with optional escape sequences.
  • Class: A bracketed list of characters or ranges.
  • Range: A character range using a hyphen.
  • Char: A single character, potentially escaped.
  • LEFTARROW: The rule assignment symbol (e.g., "<-").
  • QUESTION: The optional quantifier ("?").
  • STAR: The zero-or-more quantifier ("*").
  • PLUS: The one-or-more quantifier ("+").
  • AND: The and-predicate symbol ("&").
  • NOT: The not-predicate symbol ("!").
  • OPEN: Opening parenthesis for grouping ("(").
  • CLOSE: Closing parenthesis for grouping (")").
  • DOT: The any-character matcher (".").
  • Spacing: Whitespace, including spaces, end-of-lines, and comments.
  • Comment: Nested comments delimited by "(" and ")".
  • Space: Horizontal space or tab.
  • EndOfLine: Line feed or carriage return/line feed.
  • EndOfFile: End of input, ensured by a not-predicate on any character.

These elements collectively define the format's structure, ensuring deterministic parsing without ambiguity.

Two direct download links for .PEG files:

The following is an embedded HTML/JavaScript snippet suitable for a Ghost blog post. It enables drag-and-drop of a .PEG file, parses it to extract rules (assuming standard rule format of "Identifier <- Expression"), and displays the properties (rule names and expressions) on the screen.

Drag and drop a .PEG file here
  1. The following Python class handles opening, decoding (parsing), reading, writing, and printing properties from a .PEG file.
import re

class PEGHandler:
    def __init__(self, filepath):
        self.filepath = filepath
        self.rules = {}

    def read_and_decode(self):
        with open(self.filepath, 'r', encoding='utf-8') as f:
            content = f.read()
        lines = content.split('\n')
        for line in lines:
            line = line.strip()
            if line and not line.startswith('#') and not line.startswith('//'):
                match = re.match(r'^(\w+)\s*<-\s*(.*)$', line)
                if match:
                    identifier, expression = match.groups()
                    self.rules[identifier] = expression

    def print_properties(self):
        if not self.rules:
            print("No properties found.")
            return
        print("Extracted Properties (Rules):")
        for identifier, expression in self.rules.items():
            print(f"Identifier: {identifier}")
            print(f"Expression: {expression}")
            print("-" * 40)

    def write(self, new_filepath=None):
        filepath = new_filepath or self.filepath
        with open(filepath, 'w', encoding='utf-8') as f:
            for identifier, expression in self.rules.items():
                f.write(f"{identifier} <- {expression}\n")

# Example usage:
# handler = PEGHandler('example.peg')
# handler.read_and_decode()
# handler.print_properties()
# handler.write('new.peg')
  1. The following Java class handles opening, decoding (parsing), reading, writing, and printing properties from a .PEG file.
import java.io.*;
import java.util.*;
import java.util.regex.*;

public class PEGHandler {
    private String filepath;
    private Map<String, String> rules = new LinkedHashMap<>();

    public PEGHandler(String filepath) {
        this.filepath = filepath;
    }

    public void readAndDecode() throws IOException {
        try (BufferedReader reader = new BufferedReader(new FileReader(filepath))) {
            String line;
            Pattern pattern = Pattern.compile("^(\\w+)\\s*<-\\s*(.*)$");
            while ((line = reader.readLine()) != null) {
                line = line.trim();
                if (!line.isEmpty() && !line.startsWith("#") && !line.startsWith("//")) {
                    Matcher matcher = pattern.matcher(line);
                    if (matcher.matches()) {
                        String identifier = matcher.group(1);
                        String expression = matcher.group(2);
                        rules.put(identifier, expression);
                    }
                }
            }
        }
    }

    public void printProperties() {
        if (rules.isEmpty()) {
            System.out.println("No properties found.");
            return;
        }
        System.out.println("Extracted Properties (Rules):");
        for (Map.Entry<String, String> entry : rules.entrySet()) {
            System.out.println("Identifier: " + entry.getKey());
            System.out.println("Expression: " + entry.getValue());
            System.out.println("----------------------------------------");
        }
    }

    public void write(String newFilepath) throws IOException {
        String outputPath = (newFilepath != null) ? newFilepath : filepath;
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputPath))) {
            for (Map.Entry<String, String> entry : rules.entrySet()) {
                writer.write(entry.getKey() + " <- " + entry.getValue() + "\n");
            }
        }
    }

    // Example usage:
    // public static void main(String[] args) throws IOException {
    //     PEGHandler handler = new PEGHandler("example.peg");
    //     handler.readAndDecode();
    //     handler.printProperties();
    //     handler.write("new.peg");
    // }
}
  1. The following JavaScript class handles opening, decoding (parsing), reading, writing, and printing properties from a .PEG file (using Node.js for file I/O).
const fs = require('fs');

class PEGHandler {
  constructor(filepath) {
    this.filepath = filepath;
    this.rules = {};
  }

  readAndDecode() {
    const content = fs.readFileSync(this.filepath, 'utf8');
    const lines = content.split('\n');
    lines.forEach(line => {
      line = line.trim();
      if (line && !line.startsWith('#') && !line.startsWith('//')) {
        const match = line.match(/^(\w+)\s*<-\s*(.*)$/);
        if (match) {
          const [, identifier, expression] = match;
          this.rules[identifier] = expression;
        }
      }
    });
  }

  printProperties() {
    if (Object.keys(this.rules).length === 0) {
      console.log('No properties found.');
      return;
    }
    console.log('Extracted Properties (Rules):');
    for (const [identifier, expression] of Object.entries(this.rules)) {
      console.log(`Identifier: ${identifier}`);
      console.log(`Expression: ${expression}`);
      console.log('----------------------------------------');
    }
  }

  write(newFilepath = this.filepath) {
    let output = '';
    for (const [identifier, expression] of Object.entries(this.rules)) {
      output += `${identifier} <- ${expression}\n`;
    }
    fs.writeFileSync(newFilepath, output, 'utf8');
  }
}

// Example usage:
// const handler = new PEGHandler('example.peg');
// handler.readAndDecode();
// handler.printProperties();
// handler.write('new.peg');
  1. The following C++ class handles opening, decoding (parsing), reading, writing, and printing properties from a .PEG file.
#include <iostream>
#include <fstream>
#include <string>
#include <map>
#include <regex>

class PEGHandler {
private:
    std::string filepath;
    std::map<std::string, std::string> rules;

public:
    PEGHandler(const std::string& fp) : filepath(fp) {}

    void readAndDecode() {
        std::ifstream file(filepath);
        if (!file.is_open()) {
            std::cerr << "Failed to open file." << std::endl;
            return;
        }
        std::string line;
        std::regex pattern(R"(^(\w+)\s*<-\s*(.*)$)");
        while (std::getline(file, line)) {
            line.erase(0, line.find_first_not_of(" \t"));
            line.erase(line.find_last_not_of(" \t") + 1);
            if (!line.empty() && line[0] != '#' && !line.substr(0, 2).compare("//") != 0) {
                std::smatch match;
                if (std::regex_match(line, match, pattern)) {
                    rules[match[1].str()] = match[2].str();
                }
            }
        }
        file.close();
    }

    void printProperties() const {
        if (rules.empty()) {
            std::cout << "No properties found." << std::endl;
            return;
        }
        std::cout << "Extracted Properties (Rules):" << std::endl;
        for (const auto& pair : rules) {
            std::cout << "Identifier: " << pair.first << std::endl;
            std::cout << "Expression: " << pair.second << std::endl;
            std::cout << "----------------------------------------" << std::endl;
        }
    }

    void write(const std::string& newFilepath = "") const {
        std::string outputPath = newFilepath.empty() ? filepath : newFilepath;
        std::ofstream file(outputPath);
        if (!file.is_open()) {
            std::cerr << "Failed to write file." << std::endl;
            return;
        }
        for (const auto& pair : rules) {
            file << pair.first << " <- " << pair.second << "\n";
        }
        file.close();
    }
};

// Example usage:
// int main() {
//     PEGHandler handler("example.peg");
//     handler.readAndDecode();
//     handler.printProperties();
//     handler.write("new.peg");
//     return 0;
// }