Task 684: .SRT File Format
Task 684: .SRT File Format
1. List of Properties of the .SRT File Format Intrinsic to Its File System
The .SRT (SubRip Subtitle) file format is a plain text-based format with no binary elements, magic numbers, or fixed headers/footers. It relies on text structure for parsing, and its "intrinsic properties" refer to the core structural components that define the format itself, as opposed to general file system metadata (e.g., size or creation date, which are not format-specific). Based on the format specifications, the key properties are:
- Plain text encoding: Typically UTF-8, but no strict standard; the file is human-readable ASCII-compatible text.
- Line endings: Usually CRLF (\r\n) or LF (\n), with no enforcement.
- No header or footer: The file starts directly with the first subtitle block and ends after the last.
- Subtitle blocks: The file is composed of repeating blocks, each separated by one or more blank lines.
- Subtitle number: An integer (starting from 1) on its own line, indicating the sequence.
- Timecode: A line in the format
HH:MM:SS,mmm --> HH:MM:SS,mmm, whereHHis hours (00-23),MMminutes (00-59),SSseconds (00-59), andmmmmilliseconds (000-999). - Subtitle text: One or more lines of text following the timecode, until a blank line.
- File extension: .srt (case-insensitive, but conventionally lowercase).
These properties make .SRT simple and platform-agnostic, with no dependencies on specific file system features beyond text handling.
2. Two Direct Download Links for .SRT Files
Here are two direct download links to sample .SRT files:
- https://hitokageproduction.com/files/subsSamples/srt.srt
- https://gist.githubusercontent.com/matibzurovski/d690d5c14acbaa399e7f0829f9d6888e/raw
These are plain text .SRT files containing example subtitles.
3. Ghost Blog Embedded HTML JavaScript for Drag-and-Drop .SRT File Dump
Below is a self-contained HTML snippet with embedded JavaScript that can be embedded into a Ghost blog post (or any HTML page). It creates a drag-and-drop area where users can drop a .SRT file. The script reads the file, parses it based on the format properties, and dumps all properties (e.g., subtitle numbers, timecodes, text, and overall file info) to the screen in a readable format.
4. Python Class for .SRT Handling
import re
import os
class SRTHandler:
def __init__(self):
self.properties = {
'encoding': 'utf-8',
'line_endings': '\n',
'has_header_footer': False,
'subtitles': [] # list of dicts: {'number': int, 'start': str, 'end': str, 'text': list[str]}
}
def read(self, filepath):
if not os.path.exists(filepath) or not filepath.lower().endswith('.srt'):
raise ValueError("Invalid .SRT file path.")
with open(filepath, 'r', encoding=self.properties['encoding']) as f:
content = f.read()
self.properties['line_endings'] = '\r\n' if '\r\n' in content else '\n'
lines = content.splitlines()
current_sub = None
state = 'number'
for line in lines:
line = line.strip()
if line == '':
if current_sub:
self.properties['subtitles'].append(current_sub)
current_sub = None
state = 'number'
continue
if state == 'number':
if re.match(r'^\d+$', line):
current_sub = {'number': int(line), 'start': '', 'end': '', 'text': []}
state = 'timecode'
elif state == 'timecode':
match = re.match(r'^(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})$', line)
if match:
current_sub['start'] = match.group(1)
current_sub['end'] = match.group(2)
state = 'text'
elif state == 'text':
current_sub['text'].append(line)
if current_sub:
self.properties['subtitles'].append(current_sub)
def print_properties(self):
print("SRT File Properties:")
print(f"Encoding: {self.properties['encoding']}")
print(f"Line Endings: {repr(self.properties['line_endings'])}")
print(f"Has Header/Footer: {self.properties['has_header_footer']}")
print(f"Number of Subtitles: {len(self.properties['subtitles'])}")
print("Subtitle Blocks:")
for sub in self.properties['subtitles']:
print(f"Number: {sub['number']}")
print(f"Start: {sub['start']}")
print(f"End: {sub['end']}")
print("Text:")
for text_line in sub['text']:
print(text_line)
print("---")
def write(self, filepath):
with open(filepath, 'w', encoding=self.properties['encoding']) as f:
for sub in self.properties['subtitles']:
f.write(f"{sub['number']}{self.properties['line_endings']}")
f.write(f"{sub['start']} --> {sub['end']}{self.properties['line_endings']}")
for text_line in sub['text']:
f.write(f"{text_line}{self.properties['line_endings']}")
f.write(self.properties['line_endings'])
# Example usage:
# handler = SRTHandler()
# handler.read('example.srt')
# handler.print_properties()
# handler.write('output.srt')
5. Java Class for .SRT Handling
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class SRTHandler {
private Map<String, Object> properties = new HashMap<>();
public SRTHandler() {
properties.put("encoding", "UTF-8");
properties.put("line_endings", "\n");
properties.put("has_header_footer", false);
properties.put("subtitles", new ArrayList<Map<String, Object>>());
}
public void read(String filepath) throws IOException {
if (!filepath.toLowerCase().endsWith(".srt")) {
throw new IllegalArgumentException("Invalid .SRT file path.");
}
StringBuilder content = new StringBuilder();
try (BufferedReader reader = new BufferedReader(new FileReader(filepath))) {
String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
}
}
String fullContent = content.toString();
properties.put("line_endings", fullContent.contains("\r\n") ? "\r\n" : "\n");
String[] lines = fullContent.split("\r?\n|\r");
Map<String, Object> currentSub = null;
String state = "number";
for (String line : lines) {
line = line.trim();
if (line.isEmpty()) {
if (currentSub != null) {
((List<Map<String, Object>>) properties.get("subtitles")).add(currentSub);
}
currentSub = null;
state = "number";
continue;
}
if (state.equals("number")) {
if (line.matches("\\d+")) {
currentSub = new HashMap<>();
currentSub.put("number", Integer.parseInt(line));
currentSub.put("text", new ArrayList<String>());
state = "timecode";
}
} else if (state.equals("timecode")) {
Matcher match = Pattern.compile("^(\\d{2}:\\d{2}:\\d{2},\\d{3}) --> (\\d{2}:\\d{2}:\\d{2},\\d{3})$").matcher(line);
if (match.matches()) {
currentSub.put("start", match.group(1));
currentSub.put("end", match.group(2));
state = "text";
}
} else if (state.equals("text")) {
((List<String>) currentSub.get("text")).add(line);
}
}
if (currentSub != null) {
((List<Map<String, Object>>) properties.get("subtitles")).add(currentSub);
}
}
public void printProperties() {
System.out.println("SRT File Properties:");
System.out.println("Encoding: " + properties.get("encoding"));
System.out.println("Line Endings: " + properties.get("line_endings").toString().replace("\n", "\\n").replace("\r", "\\r"));
System.out.println("Has Header/Footer: " + properties.get("has_header_footer"));
System.out.println("Number of Subtitles: " + ((List<?>) properties.get("subtitles")).size());
System.out.println("Subtitle Blocks:");
for (Map<String, Object> sub : (List<Map<String, Object>>) properties.get("subtitles")) {
System.out.println("Number: " + sub.get("number"));
System.out.println("Start: " + sub.get("start"));
System.out.println("End: " + sub.get("end"));
System.out.println("Text:");
for (String textLine : (List<String>) sub.get("text")) {
System.out.println(textLine);
}
System.out.println("---");
}
}
public void write(String filepath) throws IOException {
String lineEnd = (String) properties.get("line_endings");
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filepath))) {
for (Map<String, Object> sub : (List<Map<String, Object>>) properties.get("subtitles")) {
writer.write(sub.get("number") + lineEnd);
writer.write(sub.get("start") + " --> " + sub.get("end") + lineEnd);
for (String textLine : (List<String>) sub.get("text")) {
writer.write(textLine + lineEnd);
}
writer.write(lineEnd);
}
}
}
// Example usage:
// public static void main(String[] args) throws IOException {
// SRTHandler handler = new SRTHandler();
// handler.read("example.srt");
// handler.printProperties();
// handler.write("output.srt");
// }
}
6. JavaScript Class for .SRT Handling
class SRTHandler {
constructor() {
this.properties = {
encoding: 'UTF-8',
lineEndings: '\n',
hasHeaderFooter: false,
subtitles: [] // array of objects: {number: number, start: string, end: string, text: array<string>}
};
}
// Note: JS doesn't have native file I/O like Node.js; this assumes Node.js environment for fs.
// For browser, use FileReader as in part 3.
read(filepath) {
const fs = require('fs');
if (!filepath.toLowerCase().endsWith('.srt')) {
throw new Error('Invalid .SRT file path.');
}
const content = fs.readFileSync(filepath, this.properties.encoding);
this.properties.lineEndings = content.includes('\r\n') ? '\r\n' : '\n';
const lines = content.split(/\r?\n/);
let currentSub = null;
let state = 'number';
lines.forEach((line) => {
line = line.trim();
if (line === '') {
if (currentSub) this.properties.subtitles.push(currentSub);
currentSub = null;
state = 'number';
return;
}
if (state === 'number') {
if (/^\d+$/.test(line)) {
currentSub = { number: parseInt(line), start: '', end: '', text: [] };
state = 'timecode';
}
} else if (state === 'timecode') {
const match = line.match(/^(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})$/);
if (match) {
currentSub.start = match[1];
currentSub.end = match[2];
state = 'text';
}
} else if (state === 'text') {
currentSub.text.push(line);
}
});
if (currentSub) this.properties.subtitles.push(currentSub);
}
printProperties() {
console.log('SRT File Properties:');
console.log(`Encoding: ${this.properties.encoding}`);
console.log(`Line Endings: ${this.properties.lineEndings.replace(/\r/g, '\\r').replace(/\n/g, '\\n')}`);
console.log(`Has Header/Footer: ${this.properties.hasHeaderFooter}`);
console.log(`Number of Subtitles: ${this.properties.subtitles.length}`);
console.log('Subtitle Blocks:');
this.properties.subtitles.forEach((sub) => {
console.log(`Number: ${sub.number}`);
console.log(`Start: ${sub.start}`);
console.log(`End: ${sub.end}`);
console.log('Text:');
sub.text.forEach((textLine) => console.log(textLine));
console.log('---');
});
}
write(filepath) {
const fs = require('fs');
let output = '';
this.properties.subtitles.forEach((sub) => {
output += `${sub.number}${this.properties.lineEndings}`;
output += `${sub.start} --> ${sub.end}${this.properties.lineEndings}`;
sub.text.forEach((textLine) => {
output += `${textLine}${this.properties.lineEndings}`;
});
output += this.properties.lineEndings;
});
fs.writeFileSync(filepath, output, this.properties.encoding);
}
}
// Example usage (Node.js):
// const handler = new SRTHandler();
// handler.read('example.srt');
// handler.printProperties();
// handler.write('output.srt');
7. C++ Class for .SRT Handling
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <regex>
#include <map>
struct Subtitle {
int number;
std::string start;
std::string end;
std::vector<std::string> text;
};
class SRTHandler {
private:
std::map<std::string, std::string> properties;
std::vector<Subtitle> subtitles;
public:
SRTHandler() {
properties["encoding"] = "UTF-8";
properties["line_endings"] = "\n";
properties["has_header_footer"] = "false";
}
void read(const std::string& filepath) {
if (filepath.substr(filepath.find_last_of(".") + 1) != "srt") {
throw std::invalid_argument("Invalid .SRT file path.");
}
std::ifstream file(filepath);
if (!file.is_open()) {
throw std::runtime_error("Could not open file.");
}
std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
properties["line_endings"] = (content.find("\r\n") != std::string::npos) ? "\r\n" : "\n";
std::vector<std::string> lines;
std::string line;
std::istringstream iss(content);
while (std::getline(iss, line)) {
lines.push_back(line);
}
Subtitle currentSub;
bool inSub = false;
std::string state = "number";
for (auto& l : lines) {
l.erase(0, l.find_first_not_of(" \t"));
l.erase(l.find_last_not_of(" \t") + 1);
if (l.empty()) {
if (inSub) {
subtitles.push_back(currentSub);
inSub = false;
}
state = "number";
continue;
}
if (state == "number") {
if (std::regex_match(l, std::regex("^\\d+$"))) {
currentSub.number = std::stoi(l);
state = "timecode";
inSub = true;
}
} else if (state == "timecode") {
std::smatch match;
if (std::regex_match(l, match, std::regex("^(\\d{2}:\\d{2}:\\d{2},\\d{3}) --> (\\d{2}:\\d{2}:\\d{2},\\d{3})$"))) {
currentSub.start = match[1];
currentSub.end = match[2];
state = "text";
}
} else if (state == "text") {
currentSub.text.push_back(l);
}
}
if (inSub) {
subtitles.push_back(currentSub);
}
}
void printProperties() {
std::cout << "SRT File Properties:" << std::endl;
std::cout << "Encoding: " << properties["encoding"] << std::endl;
std::cout << "Line Endings: " << (properties["line_endings"] == "\r\n" ? "\\r\\n" : "\\n") << std::endl;
std::cout << "Has Header/Footer: " << properties["has_header_footer"] << std::endl;
std::cout << "Number of Subtitles: " << subtitles.size() << std::endl;
std::cout << "Subtitle Blocks:" << std::endl;
for (const auto& sub : subtitles) {
std::cout << "Number: " << sub.number << std::endl;
std::cout << "Start: " << sub.start << std::endl;
std::cout << "End: " << sub.end << std::endl;
std::cout << "Text:" << std::endl;
for (const auto& textLine : sub.text) {
std::cout << textLine << std::endl;
}
std::cout << "---" << std::endl;
}
}
void write(const std::string& filepath) {
std::ofstream file(filepath);
if (!file.is_open()) {
throw std::runtime_error("Could not open file for writing.");
}
std::string lineEnd = properties["line_endings"];
for (const auto& sub : subtitles) {
file << sub.number << lineEnd;
file << sub.start << " --> " << sub.end << lineEnd;
for (const auto& textLine : sub.text) {
file << textLine << lineEnd;
}
file << lineEnd;
}
file.close();
}
};
// Example usage:
// int main() {
// try {
// SRTHandler handler;
// handler.read("example.srt");
// handler.printProperties();
// handler.write("output.srt");
// } catch (const std::exception& e) {
// std::cerr << e.what() << std::endl;
// }
// return 0;
// }