Task 007: .4TH File Format
Task 007: .4TH File Format
.4TH File Format Specifications
The .4TH
file extension is associated with the Forth programming language, a stack-based, extensible language known for its use of Reverse Polish Notation (RPN). Based on available information, .4TH
files are plain text files containing source code written in Forth, similar to how .c
files contain C source code. They are used to store program instructions that can be executed by a Forth interpreter, such as Gforth. The .4TH
extension is one of several used for Forth source code, alongside .fs
, .fth
, .frt
, and .f
. There is no formal binary structure or metadata intrinsic to the file system beyond standard text file properties, as .4TH
files are essentially text-based source code files.
1. Properties of the .4TH File Format Intrinsic to Its File System
Since .4TH
files are plain text files, their intrinsic properties are those of a typical text file in the file system, with no additional binary structure or metadata specific to the Forth language. The following properties are relevant:
- File Extension:
.4th
(case-insensitive, e.g.,.4TH
or.4th
). - File Type: Text file (plain text, typically ASCII or UTF-8 encoded).
- MIME Type:
application/octet-stream
(no specific MIME type for.4TH
, but treated as generic text or application data). - Content: Contains Forth source code, which consists of words (commands), stack-based operations, and RPN-based expressions.
- Encoding: Typically ASCII or UTF-8, as Forth source code is human-readable text.
- File Size: Variable, depending on the length of the source code.
- Line Endings: Platform-dependent (
\n
for Unix-like systems,\r\n
for Windows). - Creation/Modification Timestamps: Standard file system metadata (e.g., creation date, last modified date).
- Permissions: Standard file system permissions (e.g., read, write, execute for owner/group/others on Unix-like systems).
- Path/Location: File system path where the file is stored (e.g.,
/home/user/code/example.4th
).
These properties are not unique to .4TH
files but are inherited from the text file format. The content-specific aspect (Forth source code) is not a file system property but rather a semantic characteristic processed by a Forth interpreter.
2. Python Class for Handling .4TH Files
Below is a Python class that opens, reads, writes, and prints the properties of a .4TH
file. Since .4TH
files are plain text, the class focuses on file system properties and displays the content as Forth source code.
import os
import stat
import time
class FourthFileHandler:
def __init__(self, filepath):
self.filepath = filepath
self.extension = ".4th"
self.mime_type = "application/octet-stream"
self.content = ""
self.encoding = "utf-8"
self.file_size = 0
self.creation_time = 0
self.modification_time = 0
self.permissions = ""
self.line_endings = "unknown"
def validate_extension(self):
"""Check if the file has a .4th extension."""
return self.filepath.lower().endswith(self.extension)
def read_file(self):
"""Read the content of the .4TH file."""
if not self.validate_extension():
raise ValueError("File must have a .4th extension")
try:
with open(self.filepath, 'r', encoding=self.encoding) as f:
self.content = f.read()
self.line_endings = "windows" if "\r\n" in self.content else "unix"
except Exception as e:
raise IOError(f"Error reading file: {e}")
def write_file(self, content):
"""Write content to the .4TH file."""
if not self.validate_extension():
raise ValueError("File must have a .4th extension")
try:
with open(self.filepath, 'w', encoding=self.encoding) as f:
f.write(content)
self.content = content
self.update_properties()
except Exception as e:
raise IOError(f"Error writing file: {e}")
def update_properties(self):
"""Update file system properties."""
try:
stats = os.stat(self.filepath)
self.file_size = stats.st_size
self.creation_time = stats.st_ctime
self.modification_time = stats.st_mtime
self.permissions = stat.filemode(stats.st_mode)
except Exception as e:
raise IOError(f"Error retrieving file properties: {e}")
def print_properties(self):
"""Print all file properties."""
print(f"File Path: {self.filepath}")
print(f"File Extension: {self.extension}")
print(f"MIME Type: {self.mime_type}")
print(f"Encoding: {self.encoding}")
print(f"File Size: {self.file_size} bytes")
print(f"Creation Time: {time.ctime(self.creation_time)}")
print(f"Modification Time: {time.ctime(self.modification_time)}")
print(f"Permissions: {self.permissions}")
print(f"Line Endings: {self.line_endings}")
print(f"Content:\n{self.content}")
# Example usage
if __name__ == "__main__":
# Example .4TH file with sample Forth code
sample_content = ": factorial ( n -- n! ) dup 0= if drop 1 exit then 1 swap begin dup 1- dup 0= if drop exit then swap * repeat ;"
handler = FourthFileHandler("example.4th")
handler.write_file(sample_content) # Write sample content
handler.read_file() # Read the file
handler.update_properties() # Update properties
handler.print_properties() # Print properties
This class:
- Validates the
.4th
extension. - Reads the file content (Forth source code).
- Writes new content to the file.
- Retrieves and prints file system properties (e.g., size, timestamps, permissions).
- Detects line endings (Unix
\n
or Windows\r\n
). - Handles errors for file operations.
3. Java Class for Handling .4TH Files
Below is a Java class that performs similar operations for .4TH
files.
import java.io.*;
import java.nio.file.*;
import java.nio.file.attribute.*;
import java.time.Instant;
public class FourthFileHandler {
private String filepath;
private final String extension = ".4th";
private final String mimeType = "application/octet-stream";
private String content = "";
private String encoding = "UTF-8";
private long fileSize = 0;
private long creationTime = 0;
private long modificationTime = 0;
private String permissions = "";
private String lineEndings = "unknown";
public FourthFileHandler(String filepath) {
this.filepath = filepath;
}
public boolean validateExtension() {
return filepath.toLowerCase().endsWith(extension);
}
public void readFile() throws IOException {
if (!validateExtension()) {
throw new IllegalArgumentException("File must have a .4th extension");
}
try {
content = new String(Files.readAllBytes(Paths.get(filepath)), encoding);
lineEndings = content.contains("\r\n") ? "windows" : "unix";
} catch (IOException e) {
throw new IOException("Error reading file: " + e.getMessage());
}
}
public void writeFile(String content) throws IOException {
if (!validateExtension()) {
throw new IllegalArgumentException("File must have a .4th extension");
}
try {
Files.write(Paths.get(filepath), content.getBytes(encoding));
this.content = content;
updateProperties();
} catch (IOException e) {
throw new IOException("Error writing file: " + e.getMessage());
}
}
public void updateProperties() throws IOException {
try {
Path path = Paths.get(filepath);
fileSize = Files.size(path);
BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class);
creationTime = attrs.creationTime().toMillis();
modificationTime = attrs.lastModifiedTime().toMillis();
// Simplified permissions (read/write/execute for owner)
permissions = (Files.isReadable(path) ? "r" : "-") +
(Files.isWritable(path) ? "w" : "-") +
(Files.isExecutable(path) ? "x" : "-");
} catch (IOException e) {
throw new IOException("Error retrieving file properties: " + e.getMessage());
}
}
public void printProperties() {
System.out.println("File Path: " + filepath);
System.out.println("File Extension: " + extension);
System.out.println("MIME Type: " + mimeType);
System.out.println("Encoding: " + encoding);
System.out.println("File Size: " + fileSize + " bytes");
System.out.println("Creation Time: " + Instant.ofEpochMilli(creationTime));
System.out.println("Modification Time: " + Instant.ofEpochMilli(modificationTime));
System.out.println("Permissions: " + permissions);
System.out.println("Line Endings: " + lineEndings);
System.out.println("Content:\n" + content);
}
public static void main(String[] args) {
try {
String sampleContent = ": factorial ( n -- n! ) dup 0= if drop 1 exit then 1 swap begin dup 1- dup 0= if drop exit then swap * repeat ;";
FourthFileHandler handler = new FourthFileHandler("example.4th");
handler.writeFile(sampleContent);
handler.readFile();
handler.updateProperties();
handler.printProperties();
} catch (IOException e) {
e.printStackTrace();
}
}
}
This class:
- Validates the
.4th
extension. - Reads and writes
.4TH
files using Java NIO. - Retrieves file system properties (size, timestamps, basic permissions).
- Detects line endings.
- Prints all properties and content.
4. JavaScript Class for Handling .4TH Files (Node.js)
Below is a JavaScript class for Node.js, as .4TH
file operations require file system access.
const fs = require('fs').promises;
const path = require('path');
class FourthFileHandler {
constructor(filepath) {
this.filepath = filepath;
this.extension = '.4th';
this.mimeType = 'application/octet-stream';
this.content = '';
this.encoding = 'utf8';
this.fileSize = 0;
this.creationTime = 0;
this.modificationTime = 0;
this.permissions = '';
this.lineEndings = 'unknown';
}
validateExtension() {
return this.filepath.toLowerCase().endsWith(this.extension);
}
async readFile() {
if (!this.validateExtension()) {
throw new Error('File must have a .4th extension');
}
try {
this.content = await fs.readFile(this.filepath, this.encoding);
this.lineEndings = this.content.includes('\r\n') ? 'windows' : 'unix';
} catch (error) {
throw new Error(`Error reading file: ${error.message}`);
}
}
async writeFile(content) {
if (!this.validateExtension()) {
throw new Error('File must have a .4th extension');
}
try {
await fs.writeFile(this.filepath, content, this.encoding);
this.content = content;
await this.updateProperties();
} catch (error) {
throw new Error(`Error writing file: ${error.message}`);
}
}
async updateProperties() {
try {
const stats = await fs.stat(this.filepath);
this.fileSize = stats.size;
this.creationTime = stats.ctimeMs;
this.modificationTime = stats.mtimeMs;
this.permissions = this.getPermissionsString(stats.mode);
} catch (error) {
throw new Error(`Error retrieving file properties: ${error.message}`);
}
}
getPermissionsString(mode) {
const perms = [];
perms.push((mode & 0o400) ? 'r' : '-');
perms.push((mode & 0o200) ? 'w' : '-');
perms.push((mode & 0o100) ? 'x' : '-');
return perms.join('');
}
printProperties() {
console.log(`File Path: ${this.filepath}`);
console.log(`File Extension: ${this.extension}`);
console.log(`MIME Type: ${this.mimeType}`);
console.log(`Encoding: ${this.encoding}`);
console.log(`File Size: ${this.fileSize} bytes`);
console.log(`Creation Time: ${new Date(this.creationTime)}`);
console.log(`Modification Time: ${new Date(this.modificationTime)}`);
console.log(`Permissions: ${this.permissions}`);
console.log(`Line Endings: ${this.lineEndings}`);
console.log(`Content:\n${this.content}`);
}
}
// Example usage
(async () => {
try {
const sampleContent = ': factorial ( n -- n! ) dup 0= if drop 1 exit then 1 swap begin dup 1- dup 0= if drop exit then swap * repeat ;';
const handler = new FourthFileHandler('example.4th');
await handler.writeFile(sampleContent);
await handler.readFile();
await handler.updateProperties();
handler.printProperties();
} catch (error) {
console.error(error.message);
}
})();
This class:
- Uses Node.js
fs
module for file operations. - Validates the
.4th
extension. - Reads and writes file content.
- Retrieves file system properties (size, timestamps, permissions).
- Detects line endings.
- Prints properties and content.
5. C Program for Handling .4TH Files
C does not have a direct equivalent to a "class," so the implementation uses a struct and functions. This program assumes a POSIX-compliant system for file system operations.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <time.h>
#include <errno.h>
#define MAX_CONTENT_SIZE 1024 * 1024 // 1MB max content size
typedef struct {
char* filepath;
char* extension;
char* mime_type;
char* content;
char* encoding;
long file_size;
time_t creation_time;
time_t modification_time;
char* permissions;
char* line_endings;
} FourthFileHandler;
FourthFileHandler* create_handler(const char* filepath) {
FourthFileHandler* handler = (FourthFileHandler*)malloc(sizeof(FourthFileHandler));
handler->filepath = strdup(filepath);
handler->extension = ".4th";
handler->mime_type = "application/octet-stream";
handler->content = (char*)malloc(MAX_CONTENT_SIZE);
handler->content[0] = '\0';
handler->encoding = "UTF-8";
handler->file_size = 0;
handler->creation_time = 0;
handler->modification_time = 0;
handler->permissions = (char*)malloc(10);
handler->permissions[0] = '\0';
handler->line_endings = "unknown";
return handler;
}
void free_handler(FourthFileHandler* handler) {
free(handler->filepath);
free(handler->content);
free(handler->permissions);
free(handler);
}
int validate_extension(FourthFileHandler* handler) {
char* ext = strrchr(handler->filepath, '.');
return ext && strcasecmp(ext, handler->extension) == 0;
}
int read_file(FourthFileHandler* handler) {
if (!validate_extension(handler)) {
fprintf(stderr, "File must have a .4th extension\n");
return 0;
}
FILE* file = fopen(handler->filepath, "r");
if (!file) {
fprintf(stderr, "Error reading file: %s\n", strerror(errno));
return 0;
}
fseek(file, 0, SEEK_END);
long size = ftell(file);
fseek(file, 0, SEEK_SET);
if (size >= MAX_CONTENT_SIZE) {
fprintf(stderr, "File too large\n");
fclose(file);
return 0;
}
size_t read = fread(handler->content, 1, size, file);
handler->content[read] = '\0';
handler->line_endings = strstr(handler->content, "\r\n") ? "windows" : "unix";
fclose(file);
return 1;
}
int write_file(FourthFileHandler* handler, const char* content) {
if (!validate_extension(handler)) {
fprintf(stderr, "File must have a .4th extension\n");
return 0;
}
FILE* file = fopen(handler->filepath, "w");
if (!file) {
fprintf(stderr, "Error writing file: %s\n", strerror(errno));
return 0;
}
fputs(content, file);
fclose(file);
strcpy(handler->content, content);
return 1;
}
int update_properties(FourthFileHandler* handler) {
struct stat stats;
if (stat(handler->filepath, &stats) != 0) {
fprintf(stderr, "Error retrieving file properties: %s\n", strerror(errno));
return 0;
}
handler->file_size = stats.st_size;
handler->creation_time = stats.st_ctime;
handler->modification_time = stats.st_mtime;
sprintf(handler->permissions, "%c%c%c",
(stats.st_mode & S_IRUSR) ? 'r' : '-',
(stats.st_mode & S_IWUSR) ? 'w' : '-',
(stats.st_mode & S_IXUSR) ? 'x' : '-');
return 1;
}
void print_properties(FourthFileHandler* handler) {
printf("File Path: %s\n", handler->filepath);
printf("File Extension: %s\n", handler->extension);
printf("MIME Type: %s\n", handler->mime_type);
printf("Encoding: %s\n", handler->encoding);
printf("File Size: %ld bytes\n", handler->file_size);
printf("Creation Time: %s", ctime(&handler->creation_time));
printf("Modification Time: %s", ctime(&handler->modification_time));
printf("Permissions: %s\n", handler->permissions);
printf("Line Endings: %s\n", handler->line_endings);
printf("Content:\n%s\n", handler->content);
}
int main() {
const char* sample_content = ": factorial ( n -- n! ) dup 0= if drop 1 exit then 1 swap begin dup 1- dup 0= if drop exit then swap * repeat ;";
FourthFileHandler* handler = create_handler("example.4th");
if (write_file(handler, sample_content) &&
read_file(handler) &&
update_properties(handler)) {
print_properties(handler);
}
free_handler(handler);
return 0;
}
This program:
- Uses a
struct
to mimic a class. - Validates the
.4th
extension. - Reads and writes file content.
- Retrieves file system properties using POSIX
stat
. - Detects line endings.
- Prints properties and content.
- Includes memory management to prevent leaks.
Notes
- Forth Interpretation: None of the classes interpret or execute the Forth code, as this requires a Forth interpreter like Gforth. They only handle file I/O and properties.
- Error Handling: Each implementation includes basic error handling for file operations.
- Permissions: Permissions are simplified (owner read/write/execute) for portability. More detailed permissions could be added for specific platforms.
- Line Endings: Detection is basic (checking for
\r\n
vs.\n
). More robust detection could be implemented if needed. - Portability: The C code assumes a POSIX system. For Windows, additional headers (e.g.,
<windows.h>
) would be needed for file properties.
These implementations cover the requirements for reading, writing, and displaying .4TH
file properties across Python, Java, JavaScript, and C, based on the text-based nature of the .4TH
file format.
File Format Specifications for .4TH
The .4TH file format is the Hcode eXecutable (HX) binary format used by the 4tH Forth compiler. It stores compiled Forth programs in a portable, architecture-independent manner, including a header, various segments for code, strings, variables, and character data, and a checksum for integrity. Numbers in the header and code segment are encoded in a compact, variable-length format with a type byte and little-endian data bytes. The file is little-endian overall, with optional segments for "sleeping" virtual machines (hibernated states). The structure is: header (fixed + variable), String Segment, Integer Segment (optional), Code Segment, Character Segment (optional), followed by a 1-byte checksum.
List of all the properties of this file format intrinsic to its file system:
- Tiny number length (1 byte, typically 1)
- Short number length (1 byte, typically 2)
- Long number length (1 byte, typically 4)
- CELL_MAX (4 bytes, little-endian signed 32-bit integer, largest positive cell value, e.g., 2147483647 for 32-bit systems)
- Version (2 bytes, little-endian unsigned 16-bit integer, e.g., 0x364 for version 3.64)
- Application byte (1 byte, typically 0)
- Code segment size (variable-length encoded number, count of elements/numbers in the code segment)
- String segment size (variable-length encoded number, byte count of the string segment)
- Variable area offset (variable-length encoded number, >0 for hibernated VMs, indicates runtime offset)
- Variable area size (variable-length encoded number, cell count of the integer/variable segment)
- Character area size (variable-length encoded number, byte count of the character segment)
- String segment data (byte array of constant strings)
- Integer segment data (array of signed 32-bit integers, for stacks, variables, etc.; byte length = variable area size * long number length)
- Code segment data (array of decoded numbers representing tokens and arguments)
- Character segment data (byte array for writable character data, e.g., buffers)
Python class:
import struct
class FourthFile:
def __init__(self):
self.tiny_len = 0
self.short_len = 0
self.long_len = 0
self.cell_max = 0
self.version = 0
self.app_byte = 0
self.code_size = 0
self.string_size = 0
self.var_offset = 0
self.var_size = 0
self.char_size = 0
self.string_data = b''
self.integer_data = [] # list of ints
self.code_data = [] # list of ints
self.char_data = b''
def _read_number(self, data, pos):
type_byte = data[pos]
pos += 1
if type_byte & 0x08: # HCZERO
value = (- (self.cell_max + 1)) if (type_byte & 0x01) else 0
length = 0
elif type_byte & 0x10: # HCONE
value = -1 if (type_byte & 0x01) else 1
length = 0
else:
if type_byte & 0x02: # HCBYTE
size = self.tiny_len
elif type_byte & 0x04: # HCSHRT
size = self.short_len
else:
size = self.long_len
val = int.from_bytes(data[pos:pos + size], 'little', signed=False)
value = -val if (type_byte & 0x01) else val
length = size
return value, pos + length
def open_and_decode(self, filename):
with open(filename, 'rb') as f:
data = f.read()
if len(data) < 11:
raise ValueError("Invalid file")
# Check checksum
xor = 0
for b in data[:-1]:
xor ^= b
if xor != data[-1]:
raise ValueError("Checksum mismatch")
# Parse header
self.tiny_len = data[0]
self.short_len = data[1]
self.long_len = data[2]
self.cell_max = struct.unpack_from('<i', data, 3)[0]
self.version = struct.unpack_from('<H', data, 7)[0]
self.app_byte = data[9]
pos = 10
self.code_size, pos = self._read_number(data, pos)
self.string_size, pos = self._read_number(data, pos)
self.var_offset, pos = self._read_number(data, pos)
self.var_size, pos = self._read_number(data, pos)
self.char_size, pos = self._read_number(data, pos)
# Segments
self.string_data = data[pos:pos + self.string_size]
pos += self.string_size
# Integer segment (signed longs)
int_size_bytes = self.var_size * self.long_len
self.integer_data = [struct.unpack_from('<i', data, pos + i*4)[0] for i in range(self.var_size)] if self.var_size > 0 else []
pos += int_size_bytes
# Code segment (decode numbers)
self.code_data = []
for _ in range(self.code_size):
val, new_pos = self._read_number(data, pos)
self.code_data.append(val)
pos = new_pos
# Character segment
self.char_data = data[pos:pos + self.char_size]
pos += self.char_size
if pos != len(data) - 1:
raise ValueError("File size mismatch")
def write(self, filename):
data = bytearray()
data.append(self.tiny_len)
data.append(self.short_len)
data.append(self.long_len)
data.extend(struct.pack('<i', self.cell_max))
data.extend(struct.pack('<H', self.version))
data.append(self.app_byte)
# Append encoded numbers for sizes
for val in [self.code_size, self.string_size, self.var_offset, self.var_size, self.char_size]:
data.extend(self._encode_number(val))
# Append segments
data.extend(self.string_data)
# Integer
for i in self.integer_data:
data.extend(struct.pack('<i', i))
# Code
for val in self.code_data:
data.extend(self._encode_number(val))
# Character
data.extend(self.char_data)
# Checksum
xor = 0
for b in data:
xor ^= b
data.append(xor)
with open(filename, 'wb') as f:
f.write(data)
def _encode_number(self, value):
enc = bytearray()
sign = 1 if value < 0 else 0
abs_val = abs(value)
if value == 0 or value == - (self.cell_max + 1):
type_byte = 0x08 | sign
enc.append(type_byte)
elif value == 1 or value == -1:
type_byte = 0x10 | sign
enc.append(type_byte)
else:
if abs_val < (1 << (8 * self.tiny_len)):
size = self.tiny_len
type_byte = 0x02 | sign
elif abs_val < (1 << (8 * self.short_len)):
size = self.short_len
type_byte = 0x04 | sign
else:
size = self.long_len
type_byte = sign
enc.append(type_byte)
enc.extend(abs_val.to_bytes(size, 'little'))
return enc
- Java class:
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class FourthFile {
private byte tinyLen;
private byte shortLen;
private byte longLen;
private int cellMax;
private short version;
private byte appByte;
private int codeSize;
private int stringSize;
private int varOffset;
private int varSize;
private int charSize;
private byte[] stringData;
private List<Integer> integerData = new ArrayList<>();
private List<Integer> codeData = new ArrayList<>();
private byte[] charData;
private int readNumber(ByteBuffer bb) {
byte typeByte = bb.get();
if ((typeByte & 0x08) != 0) { // HCZERO
return ((typeByte & 0x01) != 0) ? - (cellMax + 1) : 0;
} else if ((typeByte & 0x10) != 0) { // HCONE
return ((typeByte & 0x01) != 0) ? -1 : 1;
} else {
int size;
if ((typeByte & 0x02) != 0) {
size = tinyLen;
} else if ((typeByte & 0x04) != 0) {
size = shortLen;
} else {
size = longLen;
}
int val = 0;
for (int i = 0; i < size; i++) {
val |= (bb.get() & 0xFF) << (i * 8);
}
return ((typeByte & 0x01) != 0) ? -val : val;
}
}
public void openAndDecode(String filename) throws IOException {
byte[] data = Files.readAllBytes(Paths.get(filename));
ByteBuffer bb = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
// Check checksum
int xor = 0;
for (int i = 0; i < data.length - 1; i++) {
xor ^= data[i] & 0xFF;
}
if (xor != (data[data.length - 1] & 0xFF)) {
throw new IOException("Checksum mismatch");
}
// Header
tinyLen = bb.get();
shortLen = bb.get();
longLen = bb.get();
cellMax = bb.getInt();
version = bb.getShort();
appByte = bb.get();
codeSize = readNumber(bb);
stringSize = readNumber(bb);
varOffset = readNumber(bb);
varSize = readNumber(bb);
charSize = readNumber(bb);
// String
stringData = new byte[stringSize];
bb.get(stringData);
// Integer
for (int i = 0; i < varSize; i++) {
integerData.add(bb.getInt());
}
// Code
for (int i = 0; i < codeSize; i++) {
codeData.add(readNumber(bb));
}
// Character
charData = new byte[charSize];
bb.get(charData);
if (bb.position() != data.length - 1) {
throw new IOException("File size mismatch");
}
}
private byte[] encodeNumber(int value) {
ByteBuffer enc = ByteBuffer.allocate(5).order(ByteOrder.LITTLE_ENDIAN);
byte sign = (byte) (value < 0 ? 1 : 0);
int absVal = Math.abs(value);
if (value == 0 || value == - (cellMax + 1)) {
byte typeByte = (byte) (0x08 | sign);
enc.put(typeByte);
} else if (value == 1 || value == -1) {
byte typeByte = (byte) (0x10 | sign);
enc.put(typeByte);
} else {
byte typeByte = sign;
int size;
if (absVal < (1 << (8 * tinyLen))) {
size = tinyLen;
typeByte |= 0x02;
} else if (absVal < (1 << (8 * shortLen))) {
size = shortLen;
typeByte |= 0x04;
} else {
size = longLen;
}
enc.put(typeByte);
for (int i = 0; i < size; i++) {
enc.put((byte) (absVal >> (i * 8)));
}
}
byte[] result = new byte[enc.position()];
enc.flip();
enc.get(result);
return result;
}
public void write(String filename) throws IOException {
ByteBuffer bb = ByteBuffer.allocate(1024 * 1024).order(ByteOrder.LITTLE_ENDIAN); // Oversize buffer
bb.put(tinyLen);
bb.put(shortLen);
bb.put(longLen);
bb.putInt(cellMax);
bb.putShort(version);
bb.put(appByte);
bb.put(encodeNumber(codeSize));
bb.put(encodeNumber(stringSize));
bb.put(encodeNumber(varOffset));
bb.put(encodeNumber(varSize));
bb.put(encodeNumber(charSize));
bb.put(stringData);
for (int i : integerData) {
bb.putInt(i);
}
for (int val : codeData) {
bb.put(encodeNumber(val));
}
bb.put(charData);
byte[] content = new byte[bb.position()];
bb.flip();
bb.get(content);
// Checksum
int xor = 0;
for (byte b : content) {
xor ^= b & 0xFF;
}
byte[] finalData = new byte[content.length + 1];
System.arraycopy(content, 0, finalData, 0, content.length);
finalData[content.length] = (byte) xor;
Files.write(Paths.get(filename), finalData);
}
}
- JavaScript class (for Node.js):
const fs = require('fs');
class FourthFile {
constructor() {
this.tinyLen = 0;
this.shortLen = 0;
this.longLen = 0;
this.cellMax = 0;
this.version = 0;
this.appByte = 0;
this.codeSize = 0;
this.stringSize = 0;
this.varOffset = 0;
this.varSize = 0;
this.charSize = 0;
this.stringData = Buffer.alloc(0);
this.integerData = [];
this.codeData = [];
this.charData = Buffer.alloc(0);
}
_readNumber(data, pos) {
let typeByte = data.readUInt8(pos);
pos++;
let value, length = 0;
if (typeByte & 0x08) { // HCZERO
value = (typeByte & 0x01) ? - (this.cellMax + 1) : 0;
} else if (typeByte & 0x10) { // HCONE
value = (typeByte & 0x01) ? -1 : 1;
} else {
let size;
if (typeByte & 0x02) size = this.tinyLen;
else if (typeByte & 0x04) size = this.shortLen;
else size = this.longLen;
value = 0;
for (let i = 0; i < size; i++) {
value |= data.readUInt8(pos + i) << (i * 8);
}
if (typeByte & 0x01) value = -value;
length = size;
}
return {value, newPos: pos + length};
}
openAndDecode(filename) {
const data = fs.readFileSync(filename);
let xor = 0;
for (let i = 0; i < data.length - 1; i++) {
xor ^= data[i];
}
if (xor !== data[data.length - 1]) throw new Error('Checksum mismatch');
this.tinyLen = data[0];
this.shortLen = data[1];
this.longLen = data[2];
this.cellMax = data.readInt32LE(3);
this.version = data.readUInt16LE(7);
this.appByte = data[9];
let pos = 10;
({value: this.codeSize, newPos: pos} = this._readNumber(data, pos));
({value: this.stringSize, newPos: pos} = this._readNumber(data, pos));
({value: this.varOffset, newPos: pos} = this._readNumber(data, pos));
({value: this.varSize, newPos: pos} = this._readNumber(data, pos));
({value: this.charSize, newPos: pos} = this._readNumber(data, pos));
this.stringData = data.slice(pos, pos + this.stringSize);
pos += this.stringSize;
this.integerData = [];
for (let i = 0; i < this.varSize; i++) {
this.integerData.push(data.readInt32LE(pos));
pos += 4;
}
this.codeData = [];
for (let i = 0; i < this.codeSize; i++) {
const res = this._readNumber(data, pos);
this.codeData.push(res.value);
pos = res.newPos;
}
this.charData = data.slice(pos, pos + this.charSize);
pos += this.charSize;
if (pos !== data.length - 1) throw new Error('File size mismatch');
}
_encodeNumber(value) {
const enc = Buffer.alloc(5);
const sign = value < 0 ? 1 : 0;
const absVal = Math.abs(value);
let ptr = 0;
if (value === 0 || value === - (this.cellMax + 1)) {
enc.writeUInt8(0x08 | sign, ptr++);
} else if (value === 1 || value === -1) {
enc.writeUInt8(0x10 | sign, ptr++);
} else {
let typeByte = sign;
let size;
if (absVal < (1 << (8 * this.tinyLen))) {
size = this.tinyLen;
typeByte |= 0x02;
} else if (absVal < (1 << (8 * this.shortLen))) {
size = this.shortLen;
typeByte |= 0x04;
} else {
size = this.longLen;
}
enc.writeUInt8(typeByte, ptr++);
for (let i = 0; i < size; i++) {
enc.writeUInt8((absVal >> (i * 8)) & 0xFF, ptr++);
}
}
return enc.slice(0, ptr);
}
write(filename) {
let data = Buffer.alloc(0);
data = Buffer.concat([data, Buffer.from([this.tinyLen, this.shortLen, this.longLen])]);
const cellBuf = Buffer.alloc(4);
cellBuf.writeInt32LE(this.cellMax, 0);
data = Buffer.concat([data, cellBuf]);
const verBuf = Buffer.alloc(2);
verBuf.writeUInt16LE(this.version, 0);
data = Buffer.concat([data, verBuf, Buffer.from([this.appByte])]);
data = Buffer.concat([data, this._encodeNumber(this.codeSize)]);
data = Buffer.concat([data, this._encodeNumber(this.stringSize)]);
data = Buffer.concat([data, this._encodeNumber(this.varOffset)]);
data = Buffer.concat([data, this._encodeNumber(this.varSize)]);
data = Buffer.concat([data, this._encodeNumber(this.charSize)]);
data = Buffer.concat([data, this.stringData]);
const intBuf = Buffer.alloc(this.varSize * 4);
for (let i = 0; i < this.varSize; i++) {
intBuf.writeInt32LE(this.integerData[i], i * 4);
}
data = Buffer.concat([data, intBuf]);
for (const val of this.codeData) {
data = Buffer.concat([data, this._encodeNumber(val)]);
}
data = Buffer.concat([data, this.charData]);
let xor = 0;
for (const b of data) {
xor ^= b;
}
data = Buffer.concat([data, Buffer.from([xor])]);
fs.writeFileSync(filename, data);
}
}
- C class (implemented as a struct with associated functions, since C does not have classes; assuming C99 or later):
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
typedef struct {
uint8_t tiny_len;
uint8_t short_len;
uint8_t long_len;
int32_t cell_max;
uint16_t version;
uint8_t app_byte;
int32_t code_size;
int32_t string_size;
int32_t var_offset;
int32_t var_size;
int32_t char_size;
uint8_t *string_data;
int32_t *integer_data;
int32_t *code_data;
uint8_t *char_data;
} FourthFile;
FourthFile* fourthfile_new() {
FourthFile* f = malloc(sizeof(FourthFile));
memset(f, 0, sizeof(FourthFile));
return f;
}
void fourthfile_free(FourthFile* f) {
if (f->string_data) free(f->string_data);
if (f->integer_data) free(f->integer_data);
if (f->code_data) free(f->code_data);
if (f->char_data) free(f->char_data);
free(f);
}
typedef struct {
int32_t value;
size_t length;
} ReadNumberResult;
ReadNumberResult read_number(const uint8_t* data, size_t pos, FourthFile* f) {
ReadNumberResult res = {0, 0};
uint8_t type_byte = data[pos];
pos++;
if (type_byte & 0x08) { // HCZERO
res.value = (type_byte & 0x01) ? - (f->cell_max + 1LL) : 0;
res.length = 0;
} else if (type_byte & 0x10) { // HCONE
res.value = (type_byte & 0x01) ? -1 : 1;
res.length = 0;
} else {
uint8_t size;
if (type_byte & 0x02) size = f->tiny_len;
else if (type_byte & 0x04) size = f->short_len;
else size = f->long_len;
uint32_t val = 0;
for (uint8_t i = 0; i < size; i++) {
val |= ((uint32_t)data[pos + i]) << (i * 8);
}
res.value = (type_byte & 0x01) ? -((int32_t)val) : (int32_t)val;
res.length = size;
}
res.length += 1; // include type byte
return res;
}
int fourthfile_open_and_decode(FourthFile* f, const char* filename) {
FILE* file = fopen(filename, "rb");
if (!file) return -1;
fseek(file, 0, SEEK_END);
size_t size = ftell(file);
fseek(file, 0, SEEK_SET);
uint8_t* data = malloc(size);
fread(data, 1, size, file);
fclose(file);
// Checksum
uint8_t xor = 0;
for (size_t i = 0; i < size - 1; i++) {
xor ^= data[i];
}
if (xor != data[size - 1]) {
free(data);
return -2;
}
// Header
f->tiny_len = data[0];
f->short_len = data[1];
f->long_len = data[2];
memcpy(&f->cell_max, data + 3, 4);
memcpy(&f->version, data + 7, 2);
f->app_byte = data[9];
size_t pos = 10;
ReadNumberResult res;
res = read_number(data, pos, f); f->code_size = res.value; pos += res.length;
res = read_number(data, pos, f); f->string_size = res.value; pos += res.length;
res = read_number(data, pos, f); f->var_offset = res.value; pos += res.length;
res = read_number(data, pos, f); f->var_size = res.value; pos += res.length;
res = read_number(data, pos, f); f->char_size = res.value; pos += res.length;
// String
f->string_data = malloc(f->string_size);
memcpy(f->string_data, data + pos, f->string_size);
pos += f->string_size;
// Integer
f->integer_data = malloc(f->var_size * sizeof(int32_t));
for (int32_t i = 0; i < f->var_size; i++) {
memcpy(f->integer_data + i, data + pos, 4);
pos += 4;
}
// Code
f->code_data = malloc(f->code_size * sizeof(int32_t));
for (int32_t i = 0; i < f->code_size; i++) {
res = read_number(data, pos, f);
f->code_data[i] = res.value;
pos += res.length;
}
// Character
f->char_data = malloc(f->char_size);
memcpy(f->char_data, data + pos, f->char_size);
pos += f->char_size;
free(data);
if (pos != size - 1) return -3;
return 0;
}
size_t encode_number(int32_t value, uint8_t* enc, FourthFile* f) {
uint8_t sign = value < 0 ? 1 : 0;
uint32_t abs_val = labs(value);
size_t ptr = 0;
if (value == 0 || value == - (f->cell_max + 1LL)) {
enc[ptr++] = 0x08 | sign;
} else if (value == 1 || value == -1) {
enc[ptr++] = 0x10 | sign;
} else {
uint8_t type_byte = sign;
uint8_t size;
if (abs_val < (1U << (8 * f->tiny_len))) {
size = f->tiny_len;
type_byte |= 0x02;
} else if (abs_val < (1U << (8 * f->short_len))) {
size = f->short_len;
type_byte |= 0x04;
} else {
size = f->long_len;
}
enc[ptr++] = type_byte;
for (uint8_t i = 0; i < size; i++) {
enc[ptr++] = (abs_val >> (i * 8)) & 0xFF;
}
}
return ptr;
}
int fourthfile_write(FourthFile* f, const char* filename) {
uint8_t* data = malloc(1024 * 1024); // Oversize
size_t pos = 0;
data[pos++] = f->tiny_len;
data[pos++] = f->short_len;
data[pos++] = f->long_len;
memcpy(data + pos, &f->cell_max, 4); pos += 4;
memcpy(data + pos, &f->version, 2); pos += 2;
data[pos++] = f->app_byte;
uint8_t enc[5];
size_t len = encode_number(f->code_size, enc, f); memcpy(data + pos, enc, len); pos += len;
len = encode_number(f->string_size, enc, f); memcpy(data + pos, enc, len); pos += len;
len = encode_number(f->var_offset, enc, f); memcpy(data + pos, enc, len); pos += len;
len = encode_number(f->var_size, enc, f); memcpy(data + pos, enc, len); pos += len;
len = encode_number(f->char_size, enc, f); memcpy(data + pos, enc, len); pos += len;
memcpy(data + pos, f->string_data, f->string_size); pos += f->string_size;
for (int32_t i = 0; i < f->var_size; i++) {
memcpy(data + pos, f->integer_data + i, 4); pos += 4;
}
for (int32_t i = 0; i < f->code_size; i++) {
len = encode_number(f->code_data[i], enc, f);
memcpy(data + pos, enc, len); pos += len;
}
memcpy(data + pos, f->char_data, f->char_size); pos += f->char_size;
uint8_t xor = 0;
for (size_t i = 0; i < pos; i++) {
xor ^= data[i];
}
data[pos++] = xor;
FILE* file = fopen(filename, "wb");
if (!file) {
free(data);
return -1;
}
fwrite(data, 1, pos, file);
fclose(file);
free(data);
return 0;
}