Task 217: .FB2 File Format
Task 217: .FB2 File Format
1. List of All Properties Intrinsic to the .FB2 File Format
The .FB2 (FictionBook 2.0) format is an open, XML-based e-book standard primarily for fictional literature, originating in Russia. It emphasizes logical document structure over presentation, using well-formed XML that conforms to a specific schema. Files are plain text XML (often zipped for distribution but intrinsically uncompressed XML), with a .fb2
extension. The format supports Unicode, footnotes, images (via XLink), tables, and semantic elements like poems or epigraphs, but no DRM. Key intrinsic properties include:
Base Structure and Encoding:
- XML 1.0 compliant (W3C standard).
- UTF-8 encoding (recommended/required for consistency).
- No compression intrinsic to the format (though often zipped externally).
- Namespaces mandatory: Primary
xmlns="http://www.gribuser.ru/xml/fictionbook/2.0"
; XLinkxmlns:xlink="http://www.w3.org/1999/xlink"
for hyperlinks/images.
Root Element (<FictionBook>
):
- Single root element.
- Attributes: Namespaces (as above); no other intrinsic attributes.
Child Elements of Root:
<stylesheet>
: Optional; contains CSS or other styles. Attributes:type
(MIME type, e.g.,text/css
).<description>
: Required; metadata container. No attributes.<title-info>
: Book metadata. No attributes.<genre>
: One or more; string content. Attributes:match
(percentage match, e.g., "80").<author>
: One or more; person data. No attributes.<first-name>
,<middle-name>
,<last-name>
: Text content.<home-page>
,<email>
: Optional text attributes.<book-title>
: Text content.<annotation>
: Optional; rich text (HTML-like subsets:<p>
,<strong>
, etc.).<keywords>
: Optional comma-separated text.<date>
: Optional text. Attributes:value
(ISO 8601 date).<src-lang>
,<lang>
: Optional language codes (ISO 639-1).<translator>
: Optional; mirrors<author>
structure.<document-info>
: Creation/editing metadata. No attributes.<author>
,<program-used>
,<date>
,<src-url>
,<src-ocr>
: Text content (mirrors above where applicable).<version>
: Text (e.g., semantic version).<history>
: Optional; change log as<p>
elements.<publish-info>
: Publishing data. No attributes.<book-name>
,<publisher>
,<city>
,<year>
,<isbn>
: Text content.<custom-info>
: Optional extensions. Attributes:info-type
(string).<body>
: Required (at least one); content container. Attributes:id
(optional unique ID).<title>
,<epigraph>
: Optional; contain<p>
or<empty-line>
.<section>
: One or more; hierarchical. Attributes:id
(optional).- Sub-elements:
<title>
,<epigraph>
,<image>
,<annotation>
(optional). - Either subsections (
<section>
) or text blocks (no mixing in one section). - Text blocks:
<p>
,<v>
(verse),<subtitle>
(text);<empty-line>
(spacing). - Semantic containers:
<poem>
(with<stanza>
,<v>
),<cite>
,<epigraph>
. - Hyperlinks:
<a>
within text. Attributes:xlink:type="simple"
,xlink:href
(URI/XPointer, e.g.,#footnote-id
),type
(e.g., "note"). - Images:
<image>
within sections. Attributes:xlink:href
(e.g.,#binary-id
),xlink:type="simple"
,type="image"
. <binary>
: Optional; embedded resources (e.g., images). Attributes:id
(unique),content-type
(MIME, e.g.,image/jpeg
); content: Base64-encoded data.
These properties define the format's logical hierarchy, ensuring portability across readers. The schema is defined in XSD files (e.g., FictionBook2.xsd), enforcing validation.
2. Two Direct Download Links for .FB2 Files
- https://filesamples.com/samples/ebook/fb2/Around the World in 28 Languages.fb2
- https://filesamples.com/samples/ebook/fb2/famouspaintings.fb2
3. Ghost Blog Embedded HTML JavaScript
This is a self-contained HTML snippet with embedded JavaScript for drag-and-drop .FB2 file handling. It can be pasted into a Ghost blog post (use the HTML card). It parses the XML using DOMParser
, extracts key properties, and dumps them to a <pre>
element on the page.
Drag and drop a .FB2 file here to view its properties.
4. Python Class
This class uses xml.etree.ElementTree
to parse .FB2 files. It reads the file, extracts and prints properties to console, and supports writing (serializes back to original format).
import xml.etree.ElementTree as ET
class FB2Parser:
def __init__(self, file_path):
self.tree = ET.parse(file_path)
self.root = self.tree.getroot()
self.ns = {'fb2': 'http://www.gribuser.ru/xml/fictionbook/2.0', 'xlink': 'http://www.w3.org/1999/xlink'}
def print_properties(self):
print("FB2 Properties:")
print(f"Namespace: {self.root.get('xmlns')}")
# Description
desc = self.root.find('description', self.ns)
if desc is not None:
title_info = desc.find('title-info', self.ns)
if title_info is not None:
genres = [g.text for g in title_info.findall('genre', self.ns)]
print(f"Genres: {', '.join(genres)}")
authors = []
for author in title_info.findall('author', self.ns):
first = author.find('first-name', self.ns)
last = author.find('last-name', self.ns)
authors.append(f"{first.text if first is not None else ''} {last.text if last is not None else ''}".strip())
print(f"Authors: {', '.join(authors)}")
print(f"Title: {title_info.find('book-title', self.ns).text if title_info.find('book-title', self.ns) is not None else 'N/A'}")
print(f"Language: {title_info.find('lang', self.ns).text if title_info.find('lang', self.ns) is not None else 'N/A'}")
date = title_info.find('date', self.ns)
print(f"Date: {date.text if date is not None else 'N/A'} (value: {date.get('value') if date is not None else 'N/A'})")
doc_info = desc.find('document-info', self.ns)
if doc_info is not None:
print(f"Version: {doc_info.find('version', self.ns).text if doc_info.find('version', self.ns) is not None else 'N/A'}")
pub_info = desc.find('publish-info', self.ns)
if pub_info is not None:
print(f"Publisher: {pub_info.find('publisher', self.ns).text if pub_info.find('publisher', self.ns) is not None else 'N/A'}")
print(f"ISBN: {pub_info.find('isbn', self.ns).text if pub_info.find('isbn', self.ns) is not None else 'N/A'}")
# Body
body = self.root.find('body', self.ns)
if body is not None:
print(f"Body ID: {body.get('id') or 'N/A'}")
sections = len(body.findall('.//section', self.ns))
print(f"Number of Sections: {sections}")
first_title = body.find('.//section/title/p', self.ns)
print(f"First Section Title: {first_title.text if first_title is not None else 'N/A'}")
# Binaries
binaries = self.root.findall('binary', self.ns)
print(f"Number of Binaries: {len(binaries)}")
if binaries:
first = binaries[0]
print(f"First Binary ID: {first.get('id')}, Type: {first.get('content-type')}")
def write(self, output_path):
self.tree.write(output_path, encoding='utf-8', xml_declaration=True)
# Usage
# parser = FB2Parser('example.fb2')
# parser.print_properties()
# parser.write('output.fb2')
5. Java Class
This class uses javax.xml.parsers.DocumentBuilder
for parsing. It reads the file, prints properties to console, and supports writing via Transformer
.
import org.w3c.dom.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.File;
public class FB2Parser {
private Document doc;
private String ns = "http://www.gribuser.ru/xml/fictionbook/2.0";
public FB2Parser(String filePath) throws Exception {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
this.doc = builder.parse(new File(filePath));
}
public void printProperties() {
System.out.println("FB2 Properties:");
Element root = doc.getDocumentElement();
System.out.println("Namespace: " + root.getAttribute("xmlns"));
// Description
NodeList descNodes = doc.getElementsByTagNameNS(ns, "description");
if (descNodes.getLength() > 0) {
Element desc = (Element) descNodes.item(0);
NodeList titleInfoNodes = desc.getElementsByTagNameNS(ns, "title-info");
if (titleInfoNodes.getLength() > 0) {
Element titleInfo = (Element) titleInfoNodes.item(0);
String genres = "";
NodeList genreNodes = titleInfo.getElementsByTagNameNS(ns, "genre");
for (int i = 0; i < genreNodes.getLength(); i++) {
genres += ((Element) genreNodes.item(i)).getTextContent() + ", ";
}
System.out.println("Genres: " + genres.trim());
String authors = "";
NodeList authorNodes = titleInfo.getElementsByTagNameNS(ns, "author");
for (int i = 0; i < authorNodes.getLength(); i++) {
Element author = (Element) authorNodes.item(i);
String first = getText(author, "first-name");
String last = getText(author, "last-name");
authors += (first + " " + last).trim() + ", ";
}
System.out.println("Authors: " + authors.trim());
System.out.println("Title: " + getText(titleInfo, "book-title"));
System.out.println("Language: " + getText(titleInfo, "lang"));
Element date = getElement(titleInfo, "date");
System.out.println("Date: " + (date != null ? date.getTextContent() : "N/A") +
" (value: " + (date != null ? date.getAttribute("value") : "N/A") + ")");
NodeList docInfoNodes = desc.getElementsByTagNameNS(ns, "document-info");
if (docInfoNodes.getLength() > 0) {
System.out.println("Version: " + getText((Element) docInfoNodes.item(0), "version"));
}
NodeList pubInfoNodes = desc.getElementsByTagNameNS(ns, "publish-info");
if (pubInfoNodes.getLength() > 0) {
Element pubInfo = (Element) pubInfoNodes.item(0);
System.out.println("Publisher: " + getText(pubInfo, "publisher"));
System.out.println("ISBN: " + getText(pubInfo, "isbn"));
}
}
}
// Body
NodeList bodyNodes = doc.getElementsByTagNameNS(ns, "body");
if (bodyNodes.getLength() > 0) {
Element body = (Element) bodyNodes.item(0);
System.out.println("Body ID: " + (body.getAttribute("id").isEmpty() ? "N/A" : body.getAttribute("id")));
NodeList sections = doc.getElementsByTagNameNS(ns, "section");
System.out.println("Number of Sections: " + sections.getLength());
NodeList firstTitle = ((Element) sections.item(0)).getElementsByTagNameNS(ns, "title").item(0).getChildNodes();
String firstTitleText = firstTitle.getLength() > 0 ? firstTitle.item(0).getTextContent() : "N/A";
System.out.println("First Section Title: " + firstTitleText);
}
// Binaries
NodeList binaries = doc.getElementsByTagNameNS(ns, "binary");
System.out.println("Number of Binaries: " + binaries.getLength());
if (binaries.getLength() > 0) {
Element first = (Element) binaries.item(0);
System.out.println("First Binary ID: " + first.getAttribute("id") + ", Type: " + first.getAttribute("content-type"));
}
}
private String getText(Element parent, String tag) {
Element el = getElement(parent, tag);
return el != null ? el.getTextContent() : "N/A";
}
private Element getElement(Element parent, String tag) {
NodeList nodes = parent.getElementsByTagNameNS(ns, tag);
return nodes.getLength() > 0 ? (Element) nodes.item(0) : null;
}
public void write(String outputPath) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputPath));
transformer.transform(source, result);
}
// Usage
// FB2Parser parser = new FB2Parser("example.fb2");
// parser.printProperties();
// parser.write("output.fb2");
}
6. JavaScript Class
This Node.js-compatible class uses xml2js
(assume installed via npm; for browser, adapt with DOMParser). It reads from file (fs), parses, prints to console, and writes back.
const fs = require('fs');
const xml2js = require('xml2js');
class FB2Parser {
constructor(filePath) {
this.xml = fs.readFileSync(filePath, 'utf8');
this.parser = new xml2js.Parser({ explicitArray: false });
}
async printProperties() {
const result = await this.parser.parseStringPromise(this.xml);
const root = result.FictionBook;
console.log('FB2 Properties:');
console.log(`Namespace: ${root.$['xmlns'] || 'Not found'}`);
// Description
if (root.description) {
const titleInfo = root.description['title-info'];
if (titleInfo) {
const genres = titleInfo.genre ? (Array.isArray(titleInfo.genre) ? titleInfo.genre.join(', ') : titleInfo.genre) : '';
console.log(`Genres: ${genres}`);
const authors = [];
if (titleInfo.author) {
const auths = Array.isArray(titleInfo.author) ? titleInfo.author : [titleInfo.author];
auths.forEach(author => {
const first = author['first-name'] || '';
const last = author['last-name'] || '';
authors.push(`${first} ${last}`.trim());
});
}
console.log(`Authors: ${authors.join(', ')}`);
console.log(`Title: ${titleInfo['book-title'] || 'N/A'}`);
console.log(`Language: ${titleInfo.lang || 'N/A'}`);
const date = titleInfo.date;
console.log(`Date: ${date || 'N/A'} (value: ${date ? date.$['value'] || 'N/A' : 'N/A'})`);
}
const docInfo = root.description['document-info'];
if (docInfo) {
console.log(`Version: ${docInfo.version || 'N/A'}`);
}
const pubInfo = root.description['publish-info'];
if (pubInfo) {
console.log(`Publisher: ${pubInfo.publisher || 'N/A'}`);
console.log(`ISBN: ${pubInfo.isbn || 'N/A'}`);
}
}
// Body
if (root.body) {
console.log(`Body ID: ${root.body.$.id || 'N/A'}`);
const sections = root.body.section ? (Array.isArray(root.body.section) ? root.body.section.length : 1) : 0;
console.log(`Number of Sections: ${sections}`);
if (root.body.section && root.body.section.title && root.body.section.title.p) {
console.log(`First Section Title: ${root.body.section.title.p || 'N/A'}`);
}
}
// Binaries
if (root.binary) {
const binaries = Array.isArray(root.binary) ? root.binary : [root.binary];
console.log(`Number of Binaries: ${binaries.length}`);
if (binaries.length > 0) {
const first = binaries[0];
console.log(`First Binary ID: ${first.$.id}, Type: ${first.$['content-type']}`);
}
}
}
async write(outputPath) {
const builder = new xml2js.Builder();
const xml = builder.buildObject(result); // Note: 'result' from parse
fs.writeFileSync(outputPath, xml);
}
}
// Usage (async)
// const parser = new FB2Parser('example.fb2');
// await parser.printProperties();
// await parser.write('output.fb2');
7. C Class
This uses libxml2 (compile with gcc -o fb2 fb2.c -lxml2
). It parses XML, prints properties to stdout, and supports writing via xmlSaveFile
.
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
typedef struct {
xmlDocPtr doc;
xmlNsPtr ns;
} FB2Parser;
FB2Parser* fb2_parser_new(const char* file_path) {
FB2Parser* parser = malloc(sizeof(FB2Parser));
parser->doc = xmlReadFile(file_path, NULL, 0);
if (parser->doc == NULL) {
fprintf(stderr, "Error parsing file\n");
free(parser);
return NULL;
}
parser->ns = xmlSearchNs(parser->doc, xmlDocGetRootElement(parser->doc), (xmlChar*)"http://www.gribuser.ru/xml/fictionbook/2.0");
return parser;
}
void fb2_print_properties(FB2Parser* parser) {
xmlNodePtr root = xmlDocGetRootElement(parser->doc);
printf("FB2 Properties:\n");
xmlChar* xmlns = xmlGetNsProp(root, (xmlChar*)"xmlns", NULL);
printf("Namespace: %s\n", xmlns ? (char*)xmlns : "Not found");
xmlFree(xmlns);
// Description
xmlNodePtr desc = xmlNsLookupNode(root->children, parser->ns, "description");
if (desc) {
xmlNodePtr title_info = xmlNsLookupNode(desc->children, parser->ns, "title-info");
if (title_info) {
// Genres
xmlChar* genres = NULL;
xmlNodePtr genre = xmlNsLookupNode(title_info->children, parser->ns, "genre");
while (genre) {
if (genres) genres = xmlStrcat(genres, (xmlChar*)", ");
genres = xmlStrcat(genres, genre->children ? genre->children->content : (xmlChar*)"");
genre = genre->next;
if (genre) genre = xmlNsLookupNode(genre, parser->ns, "genre");
}
printf("Genres: %s\n", genres ? (char*)genres : "");
xmlFree(genres);
// Authors
xmlChar* authors = NULL;
xmlNodePtr author = xmlNsLookupNode(title_info->children, parser->ns, "author");
while (author) {
xmlNodePtr first = xmlNsLookupNode(author->children, parser->ns, "first-name");
xmlNodePtr last = xmlNsLookupNode(author->children, parser->ns, "last-name");
xmlChar* auth_str = xmlStrdup((xmlChar*)"");
if (first && first->children) auth_str = xmlStrcat(auth_str, first->children->content);
auth_str = xmlStrcat(auth_str, (xmlChar*)" ");
if (last && last->children) auth_str = xmlStrcat(auth_str, last->children->content);
if (authors) authors = xmlStrcat(authors, (xmlChar*)", ");
authors = xmlStrcat(authors, auth_str);
xmlFree(auth_str);
author = author->next;
if (author) author = xmlNsLookupNode(author, parser->ns, "author");
}
printf("Authors: %s\n", authors ? (char*)authors : "");
xmlFree(authors);
// Title, Lang, Date
xmlNodePtr title = xmlNsLookupNode(title_info->children, parser->ns, "book-title");
printf("Title: %s\n", title && title->children ? (char*)title->children->content : "N/A");
xmlNodePtr lang = xmlNsLookupNode(title_info->children, parser->ns, "lang");
printf("Language: %s\n", lang && lang->children ? (char*)lang->children->content : "N/A");
xmlNodePtr date = xmlNsLookupNode(title_info->children, parser->ns, "date");
xmlChar* date_val = xmlGetProp(date, (xmlChar*)"value");
printf("Date: %s (value: %s)\n", date && date->children ? (char*)date->children->content : "N/A",
date_val ? (char*)date_val : "N/A");
if (date_val) xmlFree(date_val);
}
// Document-info version
xmlNodePtr doc_info = xmlNsLookupNode(desc->children, parser->ns, "document-info");
if (doc_info) {
xmlNodePtr version = xmlNsLookupNode(doc_info->children, parser->ns, "version");
printf("Version: %s\n", version && version->children ? (char*)version->children->content : "N/A");
}
// Publish-info
xmlNodePtr pub_info = xmlNsLookupNode(desc->children, parser->ns, "publish-info");
if (pub_info) {
xmlNodePtr publisher = xmlNsLookupNode(pub_info->children, parser->ns, "publisher");
printf("Publisher: %s\n", publisher && publisher->children ? (char*)publisher->children->content : "N/A");
xmlNodePtr isbn = xmlNsLookupNode(pub_info->children, parser->ns, "isbn");
printf("ISBN: %s\n", isbn && isbn->children ? (char*)isbn->children->content : "N/A");
}
}
// Body
xmlNodePtr body = xmlNsLookupNode(root->children, parser->ns, "body");
if (body) {
xmlChar* body_id = xmlGetProp(body, (xmlChar*)"id");
printf("Body ID: %s\n", body_id ? (char*)body_id : "N/A");
if (body_id) xmlFree(body_id);
int sections = 0;
xmlNodePtr section = xmlNsLookupNode(body->children, parser->ns, "section");
while (section) {
sections++;
section = section->next;
if (section) section = xmlNsLookupNode(section, parser->ns, "section");
}
printf("Number of Sections: %d\n", sections);
// First section title (simplified)
xmlNodePtr first_section = xmlNsLookupNode(body->children, parser->ns, "section");
if (first_section) {
xmlNodePtr title = xmlNsLookupNode(first_section->children, parser->ns, "title");
if (title) {
xmlNodePtr p = xmlNsLookupNode(title->children, parser->ns, "p");
printf("First Section Title: %s\n", p && p->children ? (char*)p->children->content : "N/A");
}
}
}
// Binaries
int binaries = 0;
xmlNodePtr binary = xmlNsLookupNode(root->children, parser->ns, "binary");
while (binary) {
binaries++;
binary = binary->next;
if (binary) binary = xmlNsLookupNode(binary, parser->ns, "binary");
}
printf("Number of Binaries: %d\n", binaries);
if (binaries > 0) {
binary = xmlNsLookupNode(root->children, parser->ns, "binary"); // First
xmlChar* id = xmlGetProp(binary, (xmlChar*)"id");
xmlChar* content_type = xmlGetProp(binary, (xmlChar*)"content-type");
printf("First Binary ID: %s, Type: %s\n", id ? (char*)id : "N/A", content_type ? (char*)content_type : "N/A");
if (id) xmlFree(id);
if (content_type) xmlFree(content_type);
}
}
void fb2_write(FB2Parser* parser, const char* output_path) {
xmlSaveFormatFileEnc(output_path, parser->doc, "UTF-8", 1);
}
void fb2_parser_free(FB2Parser* parser) {
xmlFreeDoc(parser->doc);
free(parser);
}
int main(int argc, char** argv) {
if (argc < 2) return 1;
FB2Parser* parser = fb2_parser_new(argv[1]);
if (parser) {
fb2_print_properties(parser);
// fb2_write(parser, "output.fb2");
fb2_parser_free(parser);
}
return 0;
}