Task 818: .WOS File Format
Task 818: .WOS File Format
1. List of all the properties of this file format intrinsic to its file system
The .WOS file format refers to the Web of Science (WoS) bibliographic record format, which is a structured text format used for exporting citation and publication data. It is line-oriented, with two-letter uppercase tags followed by their values, and each record ends with "ER". The file starts with "FN ISI Export Format" and "VR 1.0", uses UTF-8 encoding with BOM, and ends with "EF". The properties are the standardized tags that represent metadata fields. Here is the complete list of properties (tags) and their descriptions:
- PT: Publication Type (e.g., J for journal, C for conference).
- AU: Authors (short/abbreviated form).
- AF: Authors (full name form).
- TI: Title of the publication.
- SO: Journal/Series title (uppercase normalized).
- LA: Language.
- DT: Document Type.
- DE: Author Keywords.
- ID: Keywords Plus (derived by Clarivate from cited titles).
- AB: Abstract (multi-line text preserved).
- C1: Author Affiliations.
- RP: Reprint Address.
- EM: Email Addresses.
- RI: Researcher IDs.
- OI: ORCID Identifiers.
- FU: Funding Agencies.
- FX: Funding Text.
- CR: Cited References (parsed into citation objects).
- NR: Cited References Count (length of CR).
- TC: Times Cited in Web of Science.
- Z9: Total Times Cited (including non-WoS sources).
- PU: Publisher.
- PI: Publisher City.
- PA: Publisher Address.
- SN: ISSN/ISBN.
- BN: Book ISBN.
- J9: Journal Abbreviation (29-character).
- JI: Journal ISO Abbreviation.
- PD: Publication Date (month/day).
- PY: Publication Year (integer).
- VL: Volume.
- IS: Issue.
- SI: Special Issue.
- PN: Part Number.
- SU: Supplement.
- MA: Meeting Abstract.
- BP: Beginning Page.
- EP: Ending Page.
- AR: Article Number.
- DI: DOI.
- D2: Book DOI.
- PG: Page Count.
- WC: Web of Science Categories.
- SC: Research Areas.
- GA: Document Delivery Number.
- PM: PubMed ID.
- UT: Unique WoS Accession Number (prefixed with WOS:).
- OA: Open Access Designation.
- HP: Highly Cited Paper Status.
- HC: Hot Paper Status.
- DA: Date Generated.
- ER: End of Record (marks the end of each entry).
- EF: End of File (marks the end of the entire file).
Additional conference-related properties:
- CT: Conference Title.
- CY: Conference Date.
- CL: Conference Location.
- HO: Conference Host.
- SP: Conference Sponsors.
These properties are intrinsic to the format's structure, as they define the tagged fields stored in the file. Multi-line values are supported for fields like AB and CR, and lists are used for repeatable fields like AU or CR.
2. Two direct download links for files of format .WOS
Note: WoS files are typically exported as .txt but follow the .WOS format structure. Here are two direct download links for sample files in this format:
- https://docs.google.com/uc?id=0B0HnDMi5NBF8UnF5a2ZReVBJNW8&export=download (Single record sample WoS file)
- https://data.mendeley.com/public-files/datasets/9rw3vkcfy4/files/fd6b04f8-27fb-4c6e-a6a9-017d4f3f5fc3/file_downloaded (Zip archive containing multiple WoS format .txt files from a dataset of 46,985 documents)
3. Ghost blog embedded html javascript that allows a user to drag n drop a file of format .WOS and it will dump to screen all these properties
4. Python class that can open any file of format .WOS and decode read and write and print to console all the properties from the above list
import codecs
class WOSFile:
def __init__(self, filepath=None):
self.records = []
if filepath:
self.read(filepath)
def read(self, filepath):
with codecs.open(filepath, 'r', encoding='utf-8-sig') as f:
content = f.read()
lines = content.split('\n')
current_record = {}
current_tag = None
for line in lines:
line = line.rstrip()
if line == 'ER':
if current_record:
self.records.append(current_record)
current_record = {}
current_tag = None
elif len(line) >= 3 and line[2] == ' ' and line[:2].isupper():
current_tag = line[:2]
value = line[3:].strip()
if current_tag not in current_record:
current_record[current_tag] = []
if value:
current_record[current_tag].append(value)
elif current_tag and line:
current_record[current_tag].append(line.strip())
if current_record:
self.records.append(current_record)
def write(self, filepath):
with codecs.open(filepath, 'w', encoding='utf-8-sig') as f:
f.write('FN ISI Export Format\nVR 1.0\n')
for record in self.records:
for tag, values in record.items():
f.write(f'{tag} {values[0]}\n')
for value in values[1:]:
f.write(f' {value}\n')
f.write('ER\n')
f.write('EF\n')
def print_properties(self):
for idx, record in enumerate(self.records, 1):
print(f'Record {idx}:')
for tag, values in record.items():
print(f'{tag}: {" ".join(values)}')
print()
# Example usage:
# wos = WOSFile('sample.wos')
# wos.print_properties()
# wos.write('output.wos')
5. Java class that can open any file of format .WOS and decode read and write and print to console all the properties from the above list
import java.io.*;
import java.util.*;
public class WOSFile {
private List<Map<String, List<String>>> records = new ArrayList<>();
public WOSFile(String filepath) throws IOException {
read(filepath);
}
public WOSFile() {}
public void read(String filepath) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filepath), "UTF-8"))) {
Map<String, List<String>> currentRecord = new LinkedHashMap<>();
String currentTag = null;
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.equals("ER")) {
if (!currentRecord.isEmpty()) {
records.add(currentRecord);
}
currentRecord = new LinkedHashMap<>();
currentTag = null;
} else if (line.length() >= 3 && line.charAt(2) == ' ' && Character.isUpperCase(line.charAt(0)) && Character.isUpperCase(line.charAt(1))) {
currentTag = line.substring(0, 2);
String value = line.substring(3).trim();
currentRecord.computeIfAbsent(currentTag, k -> new ArrayList<>());
if (!value.isEmpty()) {
currentRecord.get(currentTag).add(value);
}
} else if (currentTag != null && !line.isEmpty()) {
currentRecord.get(currentTag).add(line);
}
}
if (!currentRecord.isEmpty()) {
records.add(currentRecord);
}
}
}
public void write(String filepath) throws IOException {
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filepath), "UTF-8"))) {
writer.write("FN ISI Export Format\nVR 1.0\n");
for (Map<String, List<String>> record : records) {
for (Map.Entry<String, List<String>> entry : record.entrySet()) {
String tag = entry.getKey();
List<String> values = entry.getValue();
writer.write(tag + " " + values.get(0) + "\n");
for (int i = 1; i < values.size(); i++) {
writer.write(" " + values.get(i) + "\n");
}
}
writer.write("ER\n");
}
writer.write("EF\n");
}
}
public void printProperties() {
for (int i = 0; i < records.size(); i++) {
System.out.println("Record " + (i + 1) + ":");
Map<String, List<String>> record = records.get(i);
for (Map.Entry<String, List<String>> entry : record.entrySet()) {
System.out.println(entry.getKey() + ": " + String.join(" ", entry.getValue()));
}
System.out.println();
}
}
// Example usage:
// public static void main(String[] args) throws IOException {
// WOSFile wos = new WOSFile("sample.wos");
// wos.printProperties();
// wos.write("output.wos");
// }
}
6. Javascript class that can open any file of format .WOS and decode read and write and print to console all the properties from the above list
const fs = require('fs'); // For Node.js environment
class WOSFile {
constructor(filepath = null) {
this.records = [];
if (filepath) {
this.read(filepath);
}
}
read(filepath) {
const content = fs.readFileSync(filepath, 'utf8').trim();
const lines = content.split('\n');
let currentRecord = {};
let currentTag = null;
for (let line of lines) {
line = line.trim();
if (line === 'ER') {
if (Object.keys(currentRecord).length > 0) {
this.records.push(currentRecord);
}
currentRecord = {};
currentTag = null;
} else if (line.length >= 3 && line[2] === ' ' && /^[A-Z0-9]{2}$/.test(line.substring(0, 2))) {
currentTag = line.substring(0, 2);
const value = line.substring(3).trim();
if (!currentRecord[currentTag]) currentRecord[currentTag] = [];
if (value) currentRecord[currentTag].push(value);
} else if (currentTag && line) {
currentRecord[currentTag].push(line);
}
}
if (Object.keys(currentRecord).length > 0) {
this.records.push(currentRecord);
}
}
write(filepath) {
let output = 'FN ISI Export Format\nVR 1.0\n';
for (let record of this.records) {
for (let tag in record) {
output += `${tag} ${record[tag][0]}\n`;
for (let i = 1; i < record[tag].length; i++) {
output += ` ${record[tag][i]}\n`;
}
}
output += 'ER\n';
}
output += 'EF\n';
fs.writeFileSync(filepath, '\uFEFF' + output, 'utf8');
}
printProperties() {
this.records.forEach((record, index) => {
console.log(`Record ${index + 1}:`);
for (let tag in record) {
console.log(`${tag}: ${record[tag].join(' ')}`);
}
console.log('');
});
}
}
// Example usage:
// const wos = new WOSFile('sample.wos');
// wos.printProperties();
// wos.write('output.wos');
7. C class that can open any file of format .WOS and decode read and write and print to console all the properties from the above list
Note: C does not have built-in classes like object-oriented languages, so this is implemented as a struct with functions (C++ style for "class" as per the query).
#include <iostream>
#include <fstream>
#include <vector>
#include <map>
#include <string>
#include <codecvt>
#include <locale>
class WOSFile {
private:
std::vector<std::map<std::string, std::vector<std::string>>> records;
public:
WOSFile(const std::string& filepath = "") {
if (!filepath.empty()) {
read(filepath);
}
}
void read(const std::string& filepath) {
std::wifstream file(filepath, std::ios::binary);
file.imbue(std::locale(file.getloc(), new std::codecvt_utf8<wchar_t>));
if (!file) {
std::cerr << "Error opening file" << std::endl;
return;
}
std::map<std::string, std::vector<std::string>> currentRecord;
std::string currentTag;
std::wstring lineW;
while (std::getline(file, lineW)) {
std::string line(lineW.begin(), lineW.end());
line.erase(line.find_last_not_of(" \r\n") + 1);
if (line == "ER") {
if (!currentRecord.empty()) {
records.push_back(currentRecord);
}
currentRecord.clear();
currentTag.clear();
} else if (line.length() >= 3 && line[2] == ' ' && isupper(line[0]) && isupper(line[1])) {
currentTag = line.substr(0, 2);
std::string value = line.substr(3);
value.erase(0, value.find_first_not_of(' '));
value.erase(value.find_last_not_of(' ') + 1);
if (currentRecord.find(currentTag) == currentRecord.end()) {
currentRecord[currentTag] = std::vector<std::string>();
}
if (!value.empty()) {
currentRecord[currentTag].push_back(value);
}
} else if (!currentTag.empty() && !line.empty()) {
currentRecord[currentTag].push_back(line);
}
}
if (!currentRecord.empty()) {
records.push_back(currentRecord);
}
}
void write(const std::string& filepath) {
std::wofstream file(filepath, std::ios::binary);
file.imbue(std::locale(file.getloc(), new std::codecvt_utf8<wchar_t>));
file << L"\uFEFF"; // UTF-8 BOM
file << L"FN ISI Export Format\nVR 1.0\n";
for (const auto& record : records) {
for (const auto& entry : record) {
const std::string& tag = entry.first;
const auto& values = entry.second;
file << std::wstring(tag.begin(), tag.end()) << L" " << std::wstring(values[0].begin(), values[0].end()) << L"\n";
for (size_t i = 1; i < values.size(); ++i) {
file << L" " << std::wstring(values[i].begin(), values[i].end()) << L"\n";
}
}
file << L"ER\n";
}
file << L"EF\n";
}
void printProperties() {
for (size_t i = 0; i < records.size(); ++i) {
std::cout << "Record " << (i + 1) << ":" << std::endl;
for (const auto& entry : records[i]) {
std::cout << entry.first << ": ";
for (const auto& val : entry.second) {
std::cout << val << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
}
}
};
// Example usage:
// int main() {
// WOSFile wos("sample.wos");
// wos.printProperties();
// wos.write("output.wos");
// return 0;
// }