Task 609: .RDS File Format
Task 609: .RDS File Format
.RDS File Format Specifications
The .RDS file format is a binary (or optionally ASCII) serialization format used by the R programming language to store a single R object (e.g., data frames, vectors, lists) along with its metadata, such as classes, attributes, and types. It is created via functions like saveRDS() and read via readRDS(). The format is based on R's internal serialization mechanism, which is recursive and handles complex object structures. Files may be compressed (default: gzip) or uncompressed. The core structure includes a header followed by the serialized object data. The serialization supports three modes: ASCII (human-readable), XDR binary (big-endian), and native binary (little-endian, rarely used). Versions include 2 (pre-R 3.5.0) and 3 (R 3.5.0+), with version 3 adding native encoding information for better character handling.
1. List of All Properties Intrinsic to This File Format
These properties are derived from the file's header and are always present (with native encoding only in version 3+). They describe the serialization metadata without including the object data itself:
- Format: The serialization encoding scheme ('A' for ASCII, 'X' for XDR/big-endian binary, 'B' for native/little-endian binary).
- Version: The RDS serialization format version (integer, typically 2 or 3).
- Writer R Version: The version of R that created the file, encoded as a packed integer and decodable to major.minor.patch (e.g., 4.1.2).
- Minimum Reader R Version: The minimum R version required to read the file, encoded similarly to the writer version.
- Native Encoding: The character encoding used for strings in the file (e.g., "UTF-8" or "latin1"), present only if version >= 3.
(Note: Compression is not a header property but can be detected from the file's magic bytes, e.g., 0x1F 0x8B for gzip.)
2. Two Direct Download Links for .RDS Files
- https://zenodo.org/records/10959197/files/iris.rds?download=1 (Sample dataset: iris.rds)
- https://zenodo.org/records/10959197/files/mtcars.rds?download=1 (Sample dataset: mtcars.rds)
3. Ghost Blog Embedded HTML/JavaScript for Drag-and-Drop .RDS Property Dump
This is a self-contained HTML snippet with embedded JavaScript that can be embedded in a Ghost blog post. It allows users to drag and drop an .RDS file (compressed or uncompressed). It uses the pako library (loaded from CDN) for decompression if needed, parses the header, and displays the properties on screen. It handles binary (XDR/native) and ASCII formats.
4. Python Class for .RDS Handling
This Python class can open an .RDS file (compressed or uncompressed), decode the header, print the properties to console, and write a simple .RDS file (e.g., serializing a minimal object like an integer vector, mimicking R's format for demonstration).
import gzip
import struct
import os
class RDSHandler:
def __init__(self, filepath):
self.filepath = filepath
self.format = None
self.version = None
self.writer_version = None
self.min_reader_version = None
self.native_encoding = None
def _decode_version(self, packed):
major = packed >> 16
minor = (packed >> 8) & 0xFF
patch = packed & 0xFF
return f"{major}.{minor}.{patch}"
def read_and_decode(self):
with open(self.filepath, 'rb') as f:
data = f.read()
# Check for gzip
if data[:2] == b'\x1F\x8B':
data = gzip.decompress(data)
offset = 0
self.format = chr(data[offset])
offset += 1
if data[offset] != 0x0A: # newline
raise ValueError("Invalid RDS format")
offset += 1
is_ascii = self.format == 'A'
is_big_endian = self.format == 'X'
endian = '>' if is_big_endian else '<'
if is_ascii:
text = data[offset:].decode('utf-8')
lines = [l.strip() for l in text.split('\n') if l.strip()]
self.version = int(lines[0])
writer_packed = int(lines[1])
min_packed = int(lines[2])
if self.version >= 3:
enc_len = int(lines[3])
self.native_encoding = lines[4][:enc_len]
else:
self.version, = struct.unpack(f'{endian}i', data[offset:offset+4])
offset += 4
writer_packed, = struct.unpack(f'{endian}i', data[offset:offset+4])
offset += 4
min_packed, = struct.unpack(f'{endian}i', data[offset:offset+4])
offset += 4
if self.version >= 3:
enc_len, = struct.unpack(f'{endian}i', data[offset:offset+4])
offset += 4
self.native_encoding = data[offset:offset+enc_len].decode('utf-8')
self.writer_version = self._decode_version(writer_packed)
self.min_reader_version = self._decode_version(min_packed)
if not is_ascii and self.native_encoding is None:
self.native_encoding = 'N/A'
def print_properties(self):
print(f"Format: {self.format}")
print(f"Version: {self.version}")
print(f"Writer R Version: {self.writer_version}")
print(f"Minimum Reader R Version: {self.min_reader_version}")
print(f"Native Encoding: {self.native_encoding or 'N/A'}")
def write_simple_rds(self, output_path, compress=True):
# Write a minimal RDS: version 3, XDR, simple integer vector [1,2]
header = b'X\n' # XDR binary
header += struct.pack('>i', 3) # version
header += struct.pack('>i', 262402) # R 4.1.2 packed
header += struct.pack('>i', 197888) # Min R 3.5.0 packed
enc = 'UTF-8'
header += struct.pack('>i', len(enc))
header += enc.encode('utf-8')
# Simple object: INTSXP (13), no flags (13), length 2, values 1 and 2
object_data = struct.pack('>i', 13) # packed SEXPTYPE
object_data += struct.pack('>i', 2) # length
object_data += struct.pack('>ii', 1, 2) # values
data = header + object_data
if compress:
data = gzip.compress(data)
with open(output_path, 'wb') as f:
f.write(data)
# Example usage:
# handler = RDSHandler('path/to/file.rds')
# handler.read_and_decode()
# handler.print_properties()
# handler.write_simple_rds('output.rds')
5. Java Class for .RDS Handling
This Java class can open an .RDS file, decode the header, print properties to console, and write a simple .RDS file (minimal integer vector).
import java.io.*;
import java.nio.*;
import java.util.zip.GZIPInputStream;
public class RDSHandler {
private String filepath;
private char format;
private int version;
private String writerVersion;
private String minReaderVersion;
private String nativeEncoding;
public RDSHandler(String filepath) {
this.filepath = filepath;
}
private String decodeVersion(int packed) {
int major = packed >> 16;
int minor = (packed >> 8) & 0xFF;
int patch = packed & 0xFF;
return major + "." + minor + "." + patch;
}
public void readAndDecode() throws IOException {
byte[] data;
try (FileInputStream fis = new FileInputStream(filepath);
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
byte[] buffer = new byte[1024];
int len;
while ((len = fis.read(buffer)) > -1) {
baos.write(buffer, 0, len);
}
data = baos.toByteArray();
}
// Check gzip
if (data[0] == 0x1F && data[1] == (byte) 0x8B) {
try (ByteArrayInputStream bais = new ByteArrayInputStream(data);
GZIPInputStream gis = new GZIPInputStream(bais);
ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
byte[] buffer = new byte[1024];
int len;
while ((len = gis.read(buffer)) > -1) {
baos.write(buffer, 0, len);
}
data = baos.toByteArray();
}
}
int offset = 0;
format = (char) data[offset++];
if (data[offset++] != 0x0A) {
throw new IOException("Invalid RDS format");
}
boolean isAscii = format == 'A';
boolean isBigEndian = format == 'X';
ByteOrder order = isBigEndian ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
ByteBuffer bb = ByteBuffer.wrap(data, offset, data.length - offset).order(order);
if (isAscii) {
// ASCII handling: convert remaining to string and parse lines
String text = new String(data, offset, data.length - offset, "UTF-8");
String[] lines = text.split("\n");
version = Integer.parseInt(lines[0].trim());
int writerPacked = Integer.parseInt(lines[1].trim());
int minPacked = Integer.parseInt(lines[2].trim());
writerVersion = decodeVersion(writerPacked);
minReaderVersion = decodeVersion(minPacked);
if (version >= 3) {
int encLen = Integer.parseInt(lines[3].trim());
nativeEncoding = lines[4].substring(0, encLen);
} else {
nativeEncoding = "N/A";
}
} else {
// Binary
version = bb.getInt();
int writerPacked = bb.getInt();
int minPacked = bb.getInt();
writerVersion = decodeVersion(writerPacked);
minReaderVersion = decodeVersion(minPacked);
if (version >= 3) {
int encLen = bb.getInt();
byte[] encBytes = new byte[encLen];
bb.get(encBytes);
nativeEncoding = new String(encBytes, "UTF-8");
} else {
nativeEncoding = "N/A";
}
}
}
public void printProperties() {
System.out.println("Format: " + format);
System.out.println("Version: " + version);
System.out.println("Writer R Version: " + writerVersion);
System.out.println("Minimum Reader R Version: " + minReaderVersion);
System.out.println("Native Encoding: " + nativeEncoding);
}
public void writeSimpleRds(String outputPath, boolean compress) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write('X');
baos.write('\n');
ByteBuffer bb = ByteBuffer.allocate(1024).order(ByteOrder.BIG_ENDIAN);
bb.putInt(3); // version
bb.putInt(262402); // R 4.1.2
bb.putInt(197888); // Min R 3.5.0
String enc = "UTF-8";
bb.putInt(enc.length());
bb.put(enc.getBytes("UTF-8"));
// Simple INTSXP: packed 13, length 2, 1, 2
bb.putInt(13);
bb.putInt(2);
bb.putInt(1);
bb.putInt(2);
baos.write(bb.array(), 0, bb.position());
byte[] data = baos.toByteArray();
if (compress) {
try (ByteArrayOutputStream compressedBaos = new ByteArrayOutputStream();
java.util.zip.GZIPOutputStream gzos = new java.util.zip.GZIPOutputStream(compressedBaos)) {
gzos.write(data);
gzos.finish();
data = compressedBaos.toByteArray();
}
}
try (FileOutputStream fos = new FileOutputStream(outputPath)) {
fos.write(data);
}
}
// Example usage:
// public static void main(String[] args) throws IOException {
// RDSHandler handler = new RDSHandler("path/to/file.rds");
// handler.readAndDecode();
// handler.printProperties();
// handler.writeSimpleRds("output.rds", true);
// }
}
6. JavaScript Class for .RDS Handling
This JavaScript class (for Node.js) can open an .RDS file, decode the header, print properties to console, and write a simple .RDS file. Requires fs and zlib.
const fs = require('fs');
const zlib = require('zlib');
class RDSHandler {
constructor(filepath) {
this.filepath = filepath;
this.format = null;
this.version = null;
this.writerVersion = null;
this.minReaderVersion = null;
this.nativeEncoding = null;
}
_decodeVersion(packed) {
const major = packed >> 16;
const minor = (packed >> 8) & 0xFF;
const patch = packed & 0xFF;
return `${major}.${minor}.${patch}`;
}
readAndDecode() {
let data = fs.readFileSync(this.filepath);
// Check gzip
if (data[0] === 0x1F && data[1] === 0x8B) {
data = zlib.gunzipSync(data);
}
let offset = 0;
this.format = String.fromCharCode(data[offset++]);
if (data[offset++] !== 0x0A) {
throw new Error('Invalid RDS format');
}
const isAscii = this.format === 'A';
const isBigEndian = this.format === 'X';
const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
if (isAscii) {
const text = new TextDecoder().decode(data.subarray(offset));
const lines = text.split('\n').filter(l => l.trim());
this.version = parseInt(lines[0], 10);
const writerPacked = parseInt(lines[1], 10);
const minPacked = parseInt(lines[2], 10);
this.writerVersion = this._decodeVersion(writerPacked);
this.minReaderVersion = this._decodeVersion(minPacked);
if (this.version >= 3) {
const encLen = parseInt(lines[3], 10);
this.nativeEncoding = lines[4].substring(0, encLen);
} else {
this.nativeEncoding = 'N/A';
}
} else {
this.version = view.getInt32(offset, isBigEndian);
offset += 4;
const writerPacked = view.getInt32(offset, isBigEndian);
offset += 4;
const minPacked = view.getInt32(offset, isBigEndian);
offset += 4;
this.writerVersion = this._decodeVersion(writerPacked);
this.minReaderVersion = this._decodeVersion(minPacked);
if (this.version >= 3) {
const encLen = view.getInt32(offset, isBigEndian);
offset += 4;
this.nativeEncoding = new TextDecoder().decode(data.subarray(offset, offset + encLen));
} else {
this.nativeEncoding = 'N/A';
}
}
}
printProperties() {
console.log(`Format: ${this.format}`);
console.log(`Version: ${this.version}`);
console.log(`Writer R Version: ${this.writerVersion}`);
console.log(`Minimum Reader R Version: ${this.minReaderVersion}`);
console.log(`Native Encoding: ${this.nativeEncoding}`);
}
writeSimpleRds(outputPath, compress = true) {
const header = new Uint8Array(1024);
let offset = 0;
header[offset++] = 'X'.charCodeAt(0);
header[offset++] = 0x0A;
const view = new DataView(header.buffer);
view.setInt32(offset, 3, false); // big-endian
offset += 4;
view.setInt32(offset, 262402, false); // R 4.1.2
offset += 4;
view.setInt32(offset, 197888, false); // Min 3.5.0
offset += 4;
const enc = 'UTF-8';
view.setInt32(offset, enc.length, false);
offset += 4;
for (let i = 0; i < enc.length; i++) {
header[offset++] = enc.charCodeAt(i);
}
// Simple INTSXP
view.setInt32(offset, 13, false);
offset += 4;
view.setInt32(offset, 2, false); // length
offset += 4;
view.setInt32(offset, 1, false);
offset += 4;
view.setInt32(offset, 2, false);
offset += 4;
let data = header.subarray(0, offset);
if (compress) {
data = zlib.gzipSync(data);
}
fs.writeFileSync(outputPath, data);
}
}
// Example usage:
// const handler = new RDSHandler('path/to/file.rds');
// handler.readAndDecode();
// handler.printProperties();
// handler.writeSimpleRds('output.rds');
7. C Class (Using C++) for .RDS Handling
This C++ class can open an .RDS file, decode the header, print properties to console, and write a simple .RDS file. Uses <zlib.h> for compression (link with -lz).
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <zlib.h>
#include <cstring>
#include <cstdint>
#include <endian.h> // For byte swap if needed
class RDSHandler {
private:
std::string filepath;
char format;
int32_t version;
std::string writerVersion;
std::string minReaderVersion;
std::string nativeEncoding;
std::string decodeVersion(int32_t packed) {
int major = packed >> 16;
int minor = (packed >> 8) & 0xFF;
int patch = packed & 0xFF;
return std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(patch);
}
public:
RDSHandler(const std::string& fp) : filepath(fp), format(0), version(0) {}
void readAndDecode() {
std::ifstream file(filepath, std::ios::binary);
if (!file) {
throw std::runtime_error("Cannot open file");
}
file.seekg(0, std::ios::end);
size_t size = file.tellg();
file.seekg(0);
std::vector<uint8_t> data(size);
file.read(reinterpret_cast<char*>(data.data()), size);
// Check gzip
bool isGzip = (data[0] == 0x1F && data[1] == 0x8B);
if (isGzip) {
z_stream stream;
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
stream.opaque = Z_NULL;
stream.avail_in = size;
stream.next_in = data.data();
if (inflateInit2(&stream, 16 + MAX_WBITS) != Z_OK) { // gzip mode
throw std::runtime_error("inflateInit failed");
}
std::vector<uint8_t> decompressed(1024 * 1024); // Assume max size
stream.avail_out = decompressed.size();
stream.next_out = decompressed.data();
int ret = inflate(&stream, Z_NO_FLUSH);
if (ret != Z_STREAM_END) {
inflateEnd(&stream);
throw std::runtime_error("Decompression failed");
}
size = decompressed.size() - stream.avail_out;
data.assign(decompressed.begin(), decompressed.begin() + size);
inflateEnd(&stream);
}
size_t offset = 0;
format = static_cast<char>(data[offset++]);
if (data[offset++] != 0x0A) {
throw std::runtime_error("Invalid RDS format");
}
bool isAscii = format == 'A';
bool isBigEndian = format == 'X';
if (isAscii) {
std::string text(reinterpret_cast<char*>(data.data() + offset), size - offset);
std::vector<std::string> lines;
size_t pos = 0;
while ((pos = text.find('\n')) != std::string::npos) {
std::string line = text.substr(0, pos);
if (!line.empty()) lines.push_back(line);
text.erase(0, pos + 1);
}
if (!text.empty()) lines.push_back(text);
version = std::stoi(lines[0]);
int32_t writerPacked = std::stoi(lines[1]);
int32_t minPacked = std::stoi(lines[2]);
writerVersion = decodeVersion(writerPacked);
minReaderVersion = decodeVersion(minPacked);
if (version >= 3) {
int encLen = std::stoi(lines[3]);
nativeEncoding = lines[4].substr(0, encLen);
} else {
nativeEncoding = "N/A";
}
} else {
auto readInt32 = [&](size_t& off) -> int32_t {
int32_t val;
std::memcpy(&val, data.data() + off, 4);
off += 4;
if (!isBigEndian) {
val = le32toh(val); // Assume host is little, convert if needed
} else {
val = be32toh(val);
}
return val;
};
version = readInt32(offset);
int32_t writerPacked = readInt32(offset);
int32_t minPacked = readInt32(offset);
writerVersion = decodeVersion(writerPacked);
minReaderVersion = decodeVersion(minPacked);
if (version >= 3) {
int32_t encLen = readInt32(offset);
nativeEncoding.assign(reinterpret_cast<char*>(data.data() + offset), encLen);
offset += encLen;
} else {
nativeEncoding = "N/A";
}
}
}
void printProperties() {
std::cout << "Format: " << format << std::endl;
std::cout << "Version: " << version << std::endl;
std::cout << "Writer R Version: " << writerVersion << std::endl;
std::cout << "Minimum Reader R Version: " << minReaderVersion << std::endl;
std::cout << "Native Encoding: " << nativeEncoding << std::endl;
}
void writeSimpleRds(const std::string& outputPath, bool compress = true) {
std::vector<uint8_t> header;
header.push_back('X');
header.push_back(0x0A);
auto addInt32 = [&](int32_t val) {
uint32_t beVal = htobe32(val); // Big-endian
header.insert(header.end(), reinterpret_cast<uint8_t*>(&beVal), reinterpret_cast<uint8_t*>(&beVal) + 4);
};
addInt32(3); // version
addInt32(262402); // R 4.1.2
addInt32(197888); // Min 3.5.0
std::string enc = "UTF-8";
addInt32(static_cast<int32_t>(enc.length()));
header.insert(header.end(), enc.begin(), enc.end());
// Simple INTSXP
addInt32(13);
addInt32(2); // length
addInt32(1);
addInt32(2);
std::vector<uint8_t> data = header;
if (compress) {
uLongf destLen = compressBound(data.size());
std::vector<uint8_t> compressed(destLen);
if (compress2(compressed.data(), &destLen, data.data(), data.size(), Z_DEFAULT_COMPRESSION) != Z_OK) {
throw std::runtime_error("Compression failed");
}
data.assign(compressed.begin(), compressed.begin() + destLen);
}
std::ofstream out(outputPath, std::ios::binary);
out.write(reinterpret_cast<char*>(data.data()), data.size());
}
};
// Example usage:
// int main() {
// try {
// RDSHandler handler("path/to/file.rds");
// handler.readAndDecode();
// handler.printProperties();
// handler.writeSimpleRds("output.rds");
// } catch (const std::exception& e) {
// std::cerr << e.what() << std::endl;
// }
// return 0;
// }