Task 541: .PHN File Format

Task 541: .PHN File Format

1. List of all the properties of this file format intrinsic to its file system

The .PHN file format in the TIMIT corpus is a text-based format for time-aligned phonetic transcriptions. It has no header or footer, and consists of a series of lines, each representing a phonetic segment. The intrinsic properties for each segment are:

  • begin_sample: Integer value representing the starting sample number of the phonetic segment in the corresponding waveform file (starts from 0).
  • end_sample: Integer value representing the ending sample number of the phonetic segment.
  • phonetic_label: String representing the phonetic code for the segment (e.g., "h#", "sh", "iy", "dcl", etc.), drawn from a defined set of 61 phonetic symbols (including silence markers and closures).

The file is a sequence of these segments, ensuring contiguous coverage of the utterance, with the first and last segments typically being silence ("h#"). The format is tabular, with columns separated by spaces, and lines separated by newlines. The properties are intrinsic to the format as they define the time-aligned phonetic structure relative to the audio samples.

2. Find two direct download links for files of format .PHN

3. Write a ghost blog embedded html javascript that allows a user to drag n drop a file of format .PHN and it will dump to screen all these properties

PHN File Viewer
Drag and drop .PHN file here

4. Write a python class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list

class PHNFileHandler:
    def __init__(self, filepath):
        self.filepath = filepath
        self.segments = []

    def read(self):
        with open(self.filepath, 'r') as f:
            lines = f.readlines()
        self.segments = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) == 3:
                begin_sample = int(parts[0])
                end_sample = int(parts[1])
                phonetic_label = parts[2]
                self.segments.append({
                    'begin_sample': begin_sample,
                    'end_sample': end_sample,
                    'phonetic_label': phonetic_label
                })

    def print_properties(self):
        if not self.segments:
            print("No segments loaded. Call read() first.")
            return
        for idx, seg in enumerate(self.segments, start=1):
            print(f"Segment {idx}:")
            print(f"  begin_sample: {seg['begin_sample']}")
            print(f"  end_sample: {seg['end_sample']}")
            print(f"  phonetic_label: {seg['phonetic_label']}")
            print()

    def write(self, new_filepath=None):
        filepath = new_filepath or self.filepath
        with open(filepath, 'w') as f:
            for seg in self.segments:
                f.write(f"{seg['begin_sample']} {seg['end_sample']} {seg['phonetic_label']}\n")

# Example usage
if __name__ == "__main__":
    handler = PHNFileHandler('example.phn')
    handler.read()
    handler.print_properties()
    # To write
    handler.segments.append({'begin_sample': 0, 'end_sample': 100, 'phonetic_label': 'test'})
    handler.write('new_example.phn')

5. Write a java class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class PHNFileHandler {
    private String filepath;
    private List<Segment> segments;

    public PHNFileHandler(String filepath) {
        this.filepath = filepath;
        this.segments = new ArrayList<>();
    }

    public void read() throws IOException {
        segments.clear();
        try (BufferedReader br = new BufferedReader(new FileReader(filepath))) {
            String line;
            while (line = br.readLine() != null) {
                String[] parts = line.trim().split("\\s+");
                if (parts.length == 3) {
                    int beginSample = Integer.parseInt(parts[0]);
                    int endSample = Integer.parseInt(parts[1]);
                    String phoneticLabel = parts[2];
                    segments.add(new Segment(beginSample, endSample, phoneticLabel));
                }
            }
        }
    }

    public void printProperties() {
        if (segments.isEmpty()) {
            System.out.println("No segments loaded. Call read() first.");
            return;
        }
        for (int idx = 0; idx < segments.size(); idx++) {
            Segment seg = segments.get(idx);
            System.out.println("Segment " + (idx + 1) + ":");
            System.out.println("  begin_sample: " + seg.beginSample);
            System.out.println("  end_sample: " + seg.endSample);
            System.out.println("  phonetic_label: " + seg.phoneticLabel);
            System.out.println();
        }
    }

    public void write(String newFilepath) throws IOException {
        String path = (newFilepath == null) ? filepath : newFilepath;
        try (FileWriter fw = new FileWriter(path)) {
            for (Segment seg : segments) {
                fw.write(seg.beginSample + " " + seg.endSample + " " + seg.phoneticLabel + "\n");
            }
        }
    }

    private static class Segment {
        int beginSample;
        int endSample;
        String phoneticLabel;

        Segment(int beginSample, int endSample, String phoneticLabel) {
            this.beginSample = beginSample;
            this.endSample = endSample;
            this.phoneticLabel = phoneticLabel;
        }
    }

    // Example usage
    public static void main(String[] args) throws IOException {
        PHNFileHandler handler = new PHNFileHandler("example.phn");
        handler.read();
        handler.printProperties();
        // To write
        handler.segments.add(new Segment(0, 100, "test"));
        handler.write("new_example.phn");
    }
}

6. Write a javascript class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list

class PHNFileHandler {
  constructor(filepath) {
    this.filepath = filepath;
    this.segments = [];
  }

  async read(file) {
    const text = await file.text();
    const lines = text.trim().split('\n');
    this.segments = [];
    lines.forEach(line => {
      const parts = line.trim().split(/\s+/);
      if (parts.length === 3) {
        this.segments.push({
          begin_sample: parseInt(parts[0]),
          end_sample: parseInt(parts[1]),
          phonetic_label: parts[2]
        });
      }
    });
  }

  printProperties() {
    if (this.segments.length === 0) {
      console.log('No segments loaded. Call read() first.');
      return;
    }
    this.segments.forEach((seg, idx) => {
      console.log(`Segment ${idx + 1}:`);
      console.log(`  begin_sample: ${seg.begin_sample}`);
      console.log(`  end_sample: ${seg.end_sample}`);
      console.log(`  phonetic_label: ${seg.phonetic_label}`);
      console.log('');
    });
  }

  write() {
    const content = this.segments.map(seg => `${seg.begin_sample} ${seg.end_sample} ${seg.phonetic_label}`).join('\n');
    const blob = new Blob([content], {type: 'text/plain'});
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    a.href = url;
    a.download = this.filepath;
    a.click();
    URL.revokeObjectURL(url);
  }
}

// Example usage (in browser, with file input)
const input = document.createElement('input');
input.type = 'file';
input.onchange = async (e) => {
  const file = e.target.files[0];
  if (file.name.endsWith('.phn')) {
    const handler = new PHNFileHandler(file.name);
    await handler.read(file);
    handler.printProperties();
    // To write
    handler.segments.push({begin_sample: 0, end_sample: 100, phonetic_label: 'test'});
    handler.write();
  }
};
input.click();

7. Write a c class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    int begin_sample;
    int end_sample;
    char *phonetic_label;
} Segment;

typedef struct {
    char *filepath;
    Segment *segments;
    int count;
} PHNFileHandler;

PHNFileHandler* createPHNFileHandler(const char *filepath) {
    PHNFileHandler *handler = malloc(sizeof(PHNFileHandler));
    handler->filepath = strdup(filepath);
    handler->segments = NULL;
    handler->count = 0;
    return handler;
}

void read(PHNFileHandler *handler) {
    FILE *f = fopen(handler->filepath, "r");
    if (!f) {
        perror("Error opening file");
        return;
    }

    char line[256];
    int capacity = 10;
    handler->segments = malloc(capacity * sizeof(Segment));
    handler->count = 0;

    while (fgets(line, sizeof(line), f)) {
        int begin, end;
        char label[10];
        if (sscanf(line, "%d %d %s", &begin, &end, label) == 3) {
            if (handler->count >= capacity) {
                capacity *= 2;
                handler->segments = realloc(handler->segments, capacity * sizeof(Segment));
            }
            handler->segments[handler->count].begin_sample = begin;
            handler->segments[handler->count].end_sample = end;
            handler->segments[handler->count].phonetic_label = strdup(label);
            handler->count++;
        }
    }
    fclose(f);
}

void printProperties(PHNFileHandler *handler) {
    if (handler->count == 0) {
        printf("No segments loaded. Call read() first.\n");
        return;
    }
    for (int i = 0; i < handler->count; i++) {
        printf("Segment %d:\n", i + 1);
        printf("  begin_sample: %d\n", handler->segments[i].begin_sample);
        printf("  end_sample: %d\n", handler->segments[i].end_sample);
        printf("  phonetic_label: %s\n", handler->segments[i].phonetic_label);
        printf("\n");
    }
}

void write(PHNFileHandler *handler, const char *new_filepath) {
    const char *path = new_filepath ? new_filepath : handler->filepath;
    FILE *f = fopen(path, "w");
    if (!f) {
        perror("Error opening file for writing");
        return;
    }
    for (int i = 0; i < handler->count; i++) {
        fprintf(f, "%d %d %s\n", handler->segments[i].begin_sample, handler->segments[i].end_sample, handler->segments[i].phonetic_label);
    }
    fclose(f);
}

void destroyPHNFileHandler(PHNFileHandler *handler) {
    for (int i = 0; i < handler->count; i++) {
        free(handler->segments[i].phonetic_label);
    }
    free(handler->segments);
    free(handler->filepath);
    free(handler);
}

// Example usage
int main() {
    PHNFileHandler *handler = createPHNFileHandler("example.phn");
    read(handler);
    printProperties(handler);
    // To add and write
    handler->segments = realloc(handler->segments, (handler->count + 1) * sizeof(Segment));
    handler->segments[handler->count].begin_sample = 0;
    handler->segments[handler->count].end_sample = 100;
    handler->segments[handler->count].phonetic_label = strdup("test");
    handler->count++;
    write(handler, "new_example.phn");
    destroyPHNFileHandler(handler);
    return 0;
}