Task 245: .FS File Format

Task 245: .FS File Format

File Format Specifications for .FS

The .FS file format is a text-based format used for encoding tree annotations of sentences in natural language processing, particularly in the Prague Dependency Treebank (PDT) and related linguistic tools like Netgraph. It represents sentence structures as trees, with nodes corresponding to words or phrases, and supports attribute-value pairs for annotation. The format is UTF-8 encoded, with a header for attribute definitions, followed by tree structures in a parenthesized notation. The specification originates from the Institute of Formal and Applied Linguistics at Charles University and is designed for syntactic and semantic annotation.

  1. List of all the properties of this file format intrinsic to its file system

The .FS format's "file system" refers to its hierarchical tree structure for representing sentence dependencies, akin to a directory tree. The intrinsic properties are the attribute types defined in the header (lines starting with @), which govern how attributes behave in nodes. These are:

  • K (Key attribute): No specific constraints; used for unique identification without additional rules.
  • P (Positional attribute): Attribute name is inferred from its position in the list of positional attributes; no explicit name needed in node definitions.
  • O (Obligatory attribute): Must have a non-empty value in every node (defaults to empty string if omitted, but violation may cause errors in tools).
  • L (List attribute): Value must be one from a predefined list of allowed values (specified in the header with | separators) or empty; values cannot repeat in the list.
  • H (Hiding attribute): Nodes with value "true" or "hide" are hidden in tree viewers (including subtrees) when hiding mode is enabled.
  • N (Numeric attribute): Non-negative real number specifying left-to-right ordering of nodes in visualizations; at most one per file (used for x-coordinate in tree editors); if absent, trees are centered.
  • W (Numeric attribute): Non-negative real number denoting word order in the sentence for linear display; at most one per file; if absent, falls back to @N attribute.
  • V (Value attribute): String used to assemble the linear form of the sentence in status lines or views; at most one per file; subtypes include @VH (default, hides hidden nodes) and @VA (shows hidden nodes).

These properties can be combined for a single attribute (e.g., @PO lemma for positional and obligatory). Length limits: attribute names ≤ 30 bytes, values ≤ 5000 bytes. Identifiers can escape functional characters (\ =, [ ] | < > !).

  1. Two direct download links for files of format .FS
  1. Ghost blog embedded HTML JavaScript for drag n drop .FS file and dumping properties

Embed this HTML code in a Ghost blog post (use the HTML card in the editor). It creates a drag-and-drop zone that parses the .FS header and dumps the attribute properties to the screen.

Drag and drop a .FS file here to parse properties
  1. Python class for .FS file handling
import json

class FSReader:
    def __init__(self, filename):
        with open(filename, 'r', encoding='utf-8') as f:
            self.content = f.read()
        self.attributes = self._parse_header()

    def _parse_header(self):
        lines = self.content.split('\n')
        header = []
        i = 0
        if lines and lines[i].startswith('@E '):
            i += 1
        while i < len(lines) and lines[i].strip():
            if lines[i].startswith('@'):
                header.append(lines[i].strip())
            i += 1
        attributes = {}
        for line in header:
            if line.startswith('@L '):
                parts = line[3:].split('|')
                name = parts[0].strip()
                values = [p.strip() for p in parts[1:]]
                attributes[name] = {'L': values}
            else:
                prop = line[1]
                name = line[3:].strip()
                if name not in attributes:
                    attributes[name] = {}
                attributes[name][prop] = True
        return attributes

    def print_properties(self):
        for name, props in self.attributes.items():
            prop_list = list(props.keys()) if isinstance(props, dict) else [props]
            print(f"Attribute '{name}': {', '.join(prop_list)}")

    def write(self, output_filename):
        with open(output_filename, 'w', encoding='utf-8') as f:
            f.write(self.content)
        print(f"Written to {output_filename}")

# Example usage:
# reader = FSReader('example.fs')
# reader.print_properties()
# reader.write('output.fs')

To arrive at the solution, the class reads the file as UTF-8 text, splits into lines, skips the optional encoding line, collects header lines until a blank, parses each @ line to map attributes to their properties (handling @L specially for lists), prints the properties, and writes the original content back for the write function.

  1. Java class for .FS file handling
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;

public class FSReader {
    private String content;
    private Map<String, Map<String, Object>> attributes = new HashMap<>();

    public FSReader(String filename) throws IOException {
        this.content = new String(Files.readAllBytes(Paths.get(filename)), "UTF-8");
        parseHeader();
    }

    private void parseHeader() {
        String[] lines = content.split("\n");
        int i = 0;
        if (lines.length > 0 && lines[i].startsWith("@E ")) i++;
        List<String> header = new ArrayList<>();
        while (i < lines.length && !lines[i].trim().isEmpty()) {
            if (lines[i].startsWith("@")) {
                header.add(lines[i].trim());
            }
            i++;
        }
        for (String line : header) {
            if (line.startsWith("@L ")) {
                String[] parts = line.substring(3).split("\\|");
                String name = parts[0].trim();
                List<String> values = new ArrayList<>();
                for (int j = 1; j < parts.length; j++) {
                    values.add(parts[j].trim());
                }
                Map<String, Object> props = new HashMap<>();
                props.put("L", values);
                attributes.put(name, props);
            } else {
                String prop = line.substring(1, 2);
                String name = line.substring(3).trim();
                attributes.computeIfAbsent(name, k -> new HashMap<>()).put(prop, true);
            }
        }
    }

    public void printProperties() {
        for (Map.Entry<String, Map<String, Object>> entry : attributes.entrySet()) {
            String name = entry.getKey();
            Map<String, Object> props = entry.getValue();
            List<String> propList = new ArrayList<>();
            for (String key : props.keySet()) {
                if ("L".equals(key)) {
                    propList.add(key + " (list)");
                } else {
                    propList.add(key);
                }
            }
            System.out.println("Attribute '" + name + "': " + String.join(", ", propList));
        }
    }

    public void write(String outputFilename) throws IOException {
        Files.write(Paths.get(outputFilename), content.getBytes("UTF-8"));
        System.out.println("Written to " + outputFilename);
    }

    // Example usage:
    // FSReader reader = new FSReader("example.fs");
    // reader.printProperties();
    // reader.write("output.fs");
}

To arrive at the solution, the class uses NIO Files to read the file as UTF-8 bytes, splits the content into lines, skips the optional @E line, collects and parses header lines into a map of attributes to properties (using HashMap for flexible storage, handling @L with a list), iterates to print, and writes the original content using Files.write.

  1. JavaScript class for .FS file handling (Node.js)
const fs = require('fs');

class FSReader {
  constructor(filename) {
    this.content = fs.readFileSync(filename, 'utf8');
    this.attributes = this._parseHeader();
  }

  _parseHeader() {
    const lines = this.content.split('\n');
    let i = 0;
    if (lines[i] && lines[i].startsWith('@E ')) i++;
    const header = [];
    while (i < lines.length && lines[i].trim()) {
      if (lines[i].startsWith('@')) header.push(lines[i].trim());
      i++;
    }
    const attributes = {};
    header.forEach(line => {
      if (line.startsWith('@L ')) {
        const parts = line.substring(3).split('|');
        const name = parts[0].trim();
        const values = parts.slice(1).map(p => p.trim());
        attributes[name] = { L: values };
      } else {
        const prop = line[1];
        const name = line.substring(3).trim();
        if (!attributes[name]) attributes[name] = {};
        attributes[name][prop] = true;
      }
    });
    return attributes;
  }

  printProperties() {
    Object.keys(this.attributes).forEach(name => {
      const props = Object.keys(this.attributes[name]);
      console.log(`Attribute '${name}': ${props.join(', ')}`);
    });
  }

  write(outputFilename) {
    fs.writeFileSync(outputFilename, this.content, 'utf8');
    console.log(`Written to ${outputFilename}`);
  }
}

// Example usage:
// const reader = new FSReader('example.fs');
// reader.printProperties();
// reader.write('output.fs');

To arrive at the solution, the class uses Node's fs.readFileSync for UTF-8 text reading, splits lines, skips @E, parses header into an object map (handling @L with array), logs properties via console.log, and writes with fs.writeFileSync.

  1. C class (struct-based) for .FS file handling
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char **names;
    char **props;
    int count;
} FSAttributes;

FSAttributes *parse_header(const char *content) {
    FSAttributes *attrs = malloc(sizeof(FSAttributes));
    attrs->names = NULL;
    attrs->props = NULL;
    attrs->count = 0;

    char *lines[1000]; // Assume max 1000 lines
    int line_count = 0;
    char *token = strtok((char *)content, "\n");
    while (token && line_count < 1000) {
        lines[line_count++] = token;
        token = strtok(NULL, "\n");
    }

    int i = 0;
    if (line_count > 0 && strncmp(lines[i], "@E ", 3) == 0) i++;

    // Simple parsing: collect @ lines
    char *header[100]; // Assume max 100 header lines
    int header_count = 0;
    while (i < line_count && strlen(lines[i]) > 0) {
        if (lines[i][0] == '@') {
            header[header_count++] = lines[i];
        }
        i++;
    }

    // Parse header to attributes (simplified: one prop per attr for brevity)
    attrs->count = header_count;
    attrs->names = malloc(sizeof(char *) * attrs->count);
    attrs->props = malloc(sizeof(char *) * attrs->count);
    for (int j = 0; j < header_count; j++) {
        char *line = header[j];
        if (strncmp(line, "@L ", 3) == 0) {
            // Simplified: treat as 'L'
            attrs->names[j] = strdup(line + 3); // Name only
            attrs->props[j] = strdup("L");
        } else {
            attrs->names[j] = strdup(line + 3);
            attrs->props[j] = malloc(2);
            snprintf(attrs->props[j], 2, "%c", line[1]);
        }
    }
    return attrs;
}

void print_properties(FSAttributes *attrs) {
    for (int j = 0; j < attrs->count; j++) {
        printf("Attribute '%s': %s\n", attrs->names[j], attrs->props[j]);
    }
}

void write_file(const char *content, const char *output_filename) {
    FILE *f = fopen(output_filename, "w");
    if (f) {
        fputs(content, f);
        fclose(f);
        printf("Written to %s\n", output_filename);
    }
}

void free_attrs(FSAttributes *attrs) {
    for (int j = 0; j < attrs->count; j++) {
        free(attrs->names[j]);
        free(attrs->props[j]);
    }
    free(attrs->names);
    free(attrs->props);
    free(attrs);
}

// Example usage:
// int main() {
//     FILE *f = fopen("example.fs", "r");
//     fseek(f, 0, SEEK_END);
//     long size = ftell(f);
//     fseek(f, 0, SEEK_SET);
//     char *content = malloc(size + 1);
//     fread(content, 1, size, f);
//     content[size] = '\0';
//     fclose(f);
//     FSAttributes *attrs = parse_header(content);
//     print_properties(attrs);
//     write_file(content, "output.fs");
//     free_attrs(attrs);
//     free(content);
//     return 0;
// }

To arrive at the solution, the struct stores names and props arrays (simplified to one prop per attr for C constraints; extend for multiples with lists). Use strtok for line splitting (in-place on content copy), skip @E, collect header, parse each line to extract prop and name (handle @L basically), print via printf, write with fopen/fputs, and free memory to avoid leaks. Assumes small files; for larger, use dynamic allocation.