Task 541: .PHN File Format
Task 541: .PHN File Format
1. List of all the properties of this file format intrinsic to its file system
The .PHN file format in the TIMIT corpus is a text-based format for time-aligned phonetic transcriptions. It has no header or footer, and consists of a series of lines, each representing a phonetic segment. The intrinsic properties for each segment are:
- begin_sample: Integer value representing the starting sample number of the phonetic segment in the corresponding waveform file (starts from 0).
- end_sample: Integer value representing the ending sample number of the phonetic segment.
- phonetic_label: String representing the phonetic code for the segment (e.g., "h#", "sh", "iy", "dcl", etc.), drawn from a defined set of 61 phonetic symbols (including silence markers and closures).
The file is a sequence of these segments, ensuring contiguous coverage of the utterance, with the first and last segments typically being silence ("h#"). The format is tabular, with columns separated by spaces, and lines separated by newlines. The properties are intrinsic to the format as they define the time-aligned phonetic structure relative to the audio samples.
2. Find two direct download links for files of format .PHN
- https://www.kaggle.com/datasets/nltkdata/timitcorpus (Kaggle dataset containing TIMIT .PHN files; download the zip for access to multiple .PHN files)
- https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech (Kaggle dataset with the full TIMIT corpus including .PHN files; download the zip for access)
3. Write a ghost blog embedded html javascript that allows a user to drag n drop a file of format .PHN and it will dump to screen all these properties
4. Write a python class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list
class PHNFileHandler:
def __init__(self, filepath):
self.filepath = filepath
self.segments = []
def read(self):
with open(self.filepath, 'r') as f:
lines = f.readlines()
self.segments = []
for line in lines:
parts = line.strip().split()
if len(parts) == 3:
begin_sample = int(parts[0])
end_sample = int(parts[1])
phonetic_label = parts[2]
self.segments.append({
'begin_sample': begin_sample,
'end_sample': end_sample,
'phonetic_label': phonetic_label
})
def print_properties(self):
if not self.segments:
print("No segments loaded. Call read() first.")
return
for idx, seg in enumerate(self.segments, start=1):
print(f"Segment {idx}:")
print(f" begin_sample: {seg['begin_sample']}")
print(f" end_sample: {seg['end_sample']}")
print(f" phonetic_label: {seg['phonetic_label']}")
print()
def write(self, new_filepath=None):
filepath = new_filepath or self.filepath
with open(filepath, 'w') as f:
for seg in self.segments:
f.write(f"{seg['begin_sample']} {seg['end_sample']} {seg['phonetic_label']}\n")
# Example usage
if __name__ == "__main__":
handler = PHNFileHandler('example.phn')
handler.read()
handler.print_properties()
# To write
handler.segments.append({'begin_sample': 0, 'end_sample': 100, 'phonetic_label': 'test'})
handler.write('new_example.phn')
5. Write a java class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class PHNFileHandler {
private String filepath;
private List<Segment> segments;
public PHNFileHandler(String filepath) {
this.filepath = filepath;
this.segments = new ArrayList<>();
}
public void read() throws IOException {
segments.clear();
try (BufferedReader br = new BufferedReader(new FileReader(filepath))) {
String line;
while (line = br.readLine() != null) {
String[] parts = line.trim().split("\\s+");
if (parts.length == 3) {
int beginSample = Integer.parseInt(parts[0]);
int endSample = Integer.parseInt(parts[1]);
String phoneticLabel = parts[2];
segments.add(new Segment(beginSample, endSample, phoneticLabel));
}
}
}
}
public void printProperties() {
if (segments.isEmpty()) {
System.out.println("No segments loaded. Call read() first.");
return;
}
for (int idx = 0; idx < segments.size(); idx++) {
Segment seg = segments.get(idx);
System.out.println("Segment " + (idx + 1) + ":");
System.out.println(" begin_sample: " + seg.beginSample);
System.out.println(" end_sample: " + seg.endSample);
System.out.println(" phonetic_label: " + seg.phoneticLabel);
System.out.println();
}
}
public void write(String newFilepath) throws IOException {
String path = (newFilepath == null) ? filepath : newFilepath;
try (FileWriter fw = new FileWriter(path)) {
for (Segment seg : segments) {
fw.write(seg.beginSample + " " + seg.endSample + " " + seg.phoneticLabel + "\n");
}
}
}
private static class Segment {
int beginSample;
int endSample;
String phoneticLabel;
Segment(int beginSample, int endSample, String phoneticLabel) {
this.beginSample = beginSample;
this.endSample = endSample;
this.phoneticLabel = phoneticLabel;
}
}
// Example usage
public static void main(String[] args) throws IOException {
PHNFileHandler handler = new PHNFileHandler("example.phn");
handler.read();
handler.printProperties();
// To write
handler.segments.add(new Segment(0, 100, "test"));
handler.write("new_example.phn");
}
}
6. Write a javascript class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list
class PHNFileHandler {
constructor(filepath) {
this.filepath = filepath;
this.segments = [];
}
async read(file) {
const text = await file.text();
const lines = text.trim().split('\n');
this.segments = [];
lines.forEach(line => {
const parts = line.trim().split(/\s+/);
if (parts.length === 3) {
this.segments.push({
begin_sample: parseInt(parts[0]),
end_sample: parseInt(parts[1]),
phonetic_label: parts[2]
});
}
});
}
printProperties() {
if (this.segments.length === 0) {
console.log('No segments loaded. Call read() first.');
return;
}
this.segments.forEach((seg, idx) => {
console.log(`Segment ${idx + 1}:`);
console.log(` begin_sample: ${seg.begin_sample}`);
console.log(` end_sample: ${seg.end_sample}`);
console.log(` phonetic_label: ${seg.phonetic_label}`);
console.log('');
});
}
write() {
const content = this.segments.map(seg => `${seg.begin_sample} ${seg.end_sample} ${seg.phonetic_label}`).join('\n');
const blob = new Blob([content], {type: 'text/plain'});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = this.filepath;
a.click();
URL.revokeObjectURL(url);
}
}
// Example usage (in browser, with file input)
const input = document.createElement('input');
input.type = 'file';
input.onchange = async (e) => {
const file = e.target.files[0];
if (file.name.endsWith('.phn')) {
const handler = new PHNFileHandler(file.name);
await handler.read(file);
handler.printProperties();
// To write
handler.segments.push({begin_sample: 0, end_sample: 100, phonetic_label: 'test'});
handler.write();
}
};
input.click();
7. Write a c class that can open any file of format .PHN and decode read and write and print to console all the properties from the above list
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
int begin_sample;
int end_sample;
char *phonetic_label;
} Segment;
typedef struct {
char *filepath;
Segment *segments;
int count;
} PHNFileHandler;
PHNFileHandler* createPHNFileHandler(const char *filepath) {
PHNFileHandler *handler = malloc(sizeof(PHNFileHandler));
handler->filepath = strdup(filepath);
handler->segments = NULL;
handler->count = 0;
return handler;
}
void read(PHNFileHandler *handler) {
FILE *f = fopen(handler->filepath, "r");
if (!f) {
perror("Error opening file");
return;
}
char line[256];
int capacity = 10;
handler->segments = malloc(capacity * sizeof(Segment));
handler->count = 0;
while (fgets(line, sizeof(line), f)) {
int begin, end;
char label[10];
if (sscanf(line, "%d %d %s", &begin, &end, label) == 3) {
if (handler->count >= capacity) {
capacity *= 2;
handler->segments = realloc(handler->segments, capacity * sizeof(Segment));
}
handler->segments[handler->count].begin_sample = begin;
handler->segments[handler->count].end_sample = end;
handler->segments[handler->count].phonetic_label = strdup(label);
handler->count++;
}
}
fclose(f);
}
void printProperties(PHNFileHandler *handler) {
if (handler->count == 0) {
printf("No segments loaded. Call read() first.\n");
return;
}
for (int i = 0; i < handler->count; i++) {
printf("Segment %d:\n", i + 1);
printf(" begin_sample: %d\n", handler->segments[i].begin_sample);
printf(" end_sample: %d\n", handler->segments[i].end_sample);
printf(" phonetic_label: %s\n", handler->segments[i].phonetic_label);
printf("\n");
}
}
void write(PHNFileHandler *handler, const char *new_filepath) {
const char *path = new_filepath ? new_filepath : handler->filepath;
FILE *f = fopen(path, "w");
if (!f) {
perror("Error opening file for writing");
return;
}
for (int i = 0; i < handler->count; i++) {
fprintf(f, "%d %d %s\n", handler->segments[i].begin_sample, handler->segments[i].end_sample, handler->segments[i].phonetic_label);
}
fclose(f);
}
void destroyPHNFileHandler(PHNFileHandler *handler) {
for (int i = 0; i < handler->count; i++) {
free(handler->segments[i].phonetic_label);
}
free(handler->segments);
free(handler->filepath);
free(handler);
}
// Example usage
int main() {
PHNFileHandler *handler = createPHNFileHandler("example.phn");
read(handler);
printProperties(handler);
// To add and write
handler->segments = realloc(handler->segments, (handler->count + 1) * sizeof(Segment));
handler->segments[handler->count].begin_sample = 0;
handler->segments[handler->count].end_sample = 100;
handler->segments[handler->count].phonetic_label = strdup("test");
handler->count++;
write(handler, "new_example.phn");
destroyPHNFileHandler(handler);
return 0;
}