Task 661: .SHAR File Format

Task 661: .SHAR File Format

1. Properties of the .SHAR File Format Intrinsic to Its Structure

The .SHAR (shell archive) format is a text-based, self-extracting archive consisting of a Bourne shell script that recreates archived files upon execution. It does not constitute a traditional file system but embeds file system-like metadata and contents within shell commands. Based on the GNU sharutils specification (the de facto standard implementation), the intrinsic properties are as follows:

  • Self-extracting executable: The archive is a valid shell script beginning with #!/bin/sh, executable via /bin/sh to extract files without additional tools.
  • Text-based encoding: All contents are ASCII text, with binary files uuencoded (or compressed then uuencoded) to ensure safe transmission.
  • File names and paths: Full or basename-only paths for archived files and directories, preserved via echo commands and directory creation (mkdir).
  • File permissions (modes): Octal modes (e.g., 644, 755) restored via chmod commands for each file.
  • Modification timestamps: File timestamps restored via touch -am -t commands, preserving access and modification times (directories excluded).
  • File contents: Direct inclusion for text files; uuencoding for binaries; optional gzip or compress followed by uuencoding; lines may be prefixed with 'X' for error protection.
  • Section delimiters: Files delimited by unique strings (default SHAR_EOF for content, SHAR_MD5_EOF for MD5) in heredoc (<<'DELIM') constructs.
  • Integrity checks (character count): Post-extraction length verification via wc -c commands.
  • Integrity checks (MD5 checksum): Optional but default MD5 digests (per RFC 1321) for content validation via md5sum.
  • Directory structure: Recursive recreation of input directories via scanning and mkdir -p commands.
  • Overwrite protection: Default checks for existing files before extraction, with options for forcing or querying.
  • Archive metadata: Optional headers including archive name, submitter email, and contents list.
  • Split support: Archives may be segmented into numbered parts (e.g., archive.01) with size limits, requiring sequential extraction.
  • Exit handling: Ends with exit 0 for concatenation of multiple archives.

These properties ensure portability across Unix-like systems but vary slightly by implementation (e.g., GNU vs. POSIX shar).

3. Ghost Blog Embedded HTML JavaScript for Drag-and-Drop .SHAR Property Dump

The following is a self-contained HTML snippet embeddable in a Ghost blog post (e.g., via the HTML card). It creates a drag-and-drop zone that reads a dropped .SHAR file as text, parses its structure (assuming GNU shar variant with SHAR_EOF delimiter and 'X' prefixes), extracts properties, and displays them in a formatted output area.

Drag and drop a .SHAR file here to analyze its properties.

4. Python Class for .SHAR Handling

The following Python class opens a .SHAR file, parses its properties (using a line-based scanner for GNU shar structure), prints them to the console, and includes a basic write method to create a simple .SHAR from a list of text files (limited to text mode without encoding for conciseness).

import re
import os
import time

class SharArchive:
    def __init__(self, filename):
        self.filename = filename
        with open(filename, 'r') as f:
            self.lines = f.readlines()

    def parse(self):
        properties = {'files': []}
        i = 0
        while i < len(self.lines):
            line = self.lines[i].strip()
            if line.startswith('echo x - '):
                parts = line.split(' ', 3)
                filename = parts[3].strip(" '\"") if len(parts) > 3 else 'unknown'
                file_type = 'text' if 'text' in line else 'binary'

                # Find chmod
                perm = '644'
                j = i + 1
                while j < len(self.lines) and 'sed' not in self.lines[j]:
                    match = re.search(r'chmod\s+(\d+)', self.lines[j])
                    if match:
                        perm = match.group(1)
                    j += 1

                # Find touch
                timestamp = None
                while j < len(self.lines) and 'sed' not in self.lines[j]:
                    match = re.search(r'touch -am -t (\S+)', self.lines[j])
                    if match:
                        timestamp = match.group(1)
                    j += 1

                # Find delimiter and content
                delim = 'SHAR_EOF'
                content = []
                k = j + 1
                while k < len(self.lines):
                    if self.lines[k].strip() == delim:
                        break
                    content.append(self.lines[k].replace('X', '', 1))
                    k += 1
                length = len(''.join(content))

                # Find MD5
                md5 = None
                m = k + 1
                while m < len(self.lines) and 'md5sum' not in self.lines[m]:
                    m += 1
                if m + 1 < len(self.lines):
                    md5_match = re.search(r'([a-f0-9]{32})', self.lines[m + 1])
                    if md5_match:
                        md5 = md5_match.group(1)

                properties['files'].append({
                    'name': filename,
                    'type': file_type,
                    'permissions': perm,
                    'timestamp': timestamp,
                    'length': length,
                    'md5': md5
                })
                i = k + 1
            else:
                i += 1
        return properties

    def print_properties(self):
        props = self.parse()
        print("Archive Properties:")
        print(f"Number of files: {len(props['files'])}")
        for f in props['files']:
            print(f"File: {f['name']}")
            print(f"  Type: {f['type']}")
            print(f"  Permissions: {f['permissions']}")
            print(f"  Timestamp: {f['timestamp'] or 'N/A'}")
            print(f"  Length: {f['length']}")
            print(f"  MD5: {f['md5'] or 'N/A'}")
            print()

    @staticmethod
    def write(files_list, output_filename):
        """Simple writer for text files only (no binary/encoding support)."""
        with open(output_filename, 'w') as out:
            out.write("#!/bin/sh\n")
            out.write("# Simple SHAR archive.\n\n")
            for file_path in files_list:
                if os.path.isfile(file_path):
                    filename = os.path.basename(file_path)
                    perm = oct(os.stat(file_path).st_mode)[-3:]
                    mtime = time.strftime('%Y%m%d%H%M.%S', time.localtime(os.path.getmtime(file_path)))
                    content = open(file_path, 'r').read().splitlines()
                    out.write(f"echo 'x - {filename} (text)'\n")
                    out.write(f"chmod {perm} {filename}\n")
                    out.write(f"touch -am -t {mtime} {filename}\n")
                    out.write(f"sed 's/^X//' <<'SHAR_EOF' >{filename}\n")
                    for line in content:
                        out.write(f"X{line}\n")
                    out.write("SHAR_EOF\n")
                    out.write(f"test -r {filename} || echo 'restore of {filename} failed'\n")
                    out.write(f"wc -c <'{filename}'\n")
                    out.write("echo 'done'\n\n")
            out.write("exit 0\n")

# Example usage:
# archive = SharArchive('example.shar')
# archive.print_properties()
# SharArchive.write(['file1.txt', 'file2.txt'], 'new.shar')

5. Java Class for .SHAR Handling

The following Java class uses BufferedReader to open and parse a .SHAR file, printing properties to the console. The write method creates a basic .SHAR from text files (text mode only). Compile with javac SharArchive.java and run with java SharArchive <filename>.

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SharArchive {
    private List<String> lines;
    private List<FileProperties> files = new ArrayList<>();

    public SharArchive(String filename) throws IOException {
        lines = new ArrayList<>();
        try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
            String line;
            while ((line = br.readLine()) != null) {
                lines.add(line);
            }
        }
    }

    public void parse() {
        int i = 0;
        while (i < lines.size()) {
            String line = lines.get(i).trim();
            if (line.startsWith("echo x - ")) {
                String[] parts = line.split(" ", 4);
                String filename = (parts.length > 3) ? parts[3].replaceAll("[\"']", "") : "unknown";
                String type = line.contains("text") ? "text" : "binary";

                // Find chmod
                String perm = "644";
                int j = i + 1;
                while (j < lines.size() && !lines.get(j).contains("sed")) {
                    Pattern p = Pattern.compile("chmod\\s+(\\d+)");
                    Matcher m = p.matcher(lines.get(j));
                    if (m.find()) perm = m.group(1);
                    j++;
                }

                // Find touch
                String timestamp = null;
                while (j < lines.size() && !lines.get(j).contains("sed")) {
                    Pattern p = Pattern.compile("touch -am -t (\\S+)");
                    Matcher m = p.matcher(lines.get(j));
                    if (m.find()) timestamp = m.group(1);
                    j++;
                }

                // Find content length (approximate)
                String delim = "SHAR_EOF";
                List<String> content = new ArrayList<>();
                int k = j + 1;
                while (k < lines.size()) {
                    if (lines.get(k).trim().equals(delim)) break;
                    content.add(lines.get(k).replaceFirst("^X", ""));
                    k++;
                }
                int length = content.stream().mapToInt(String::length).sum() + content.size() - 1; // Approx with newlines

                // Find MD5
                String md5 = null;
                int m = k + 1;
                while (m < lines.size() && !lines.get(m).contains("md5sum")) {
                    m++;
                }
                if (m + 1 < lines.size()) {
                    Pattern p = Pattern.compile("([a-f0-9]{32})");
                    Matcher matcher = p.matcher(lines.get(m + 1));
                    if (matcher.find()) md5 = matcher.group(1);
                }

                files.add(new FileProperties(filename, type, perm, timestamp, length, md5));
                i = k + 1;
            } else {
                i++;
            }
        }
    }

    public void printProperties() {
        parse();
        System.out.println("Archive Properties:");
        System.out.println("Number of files: " + files.size());
        for (FileProperties f : files) {
            System.out.println("File: " + f.name);
            System.out.println("  Type: " + f.type);
            System.out.println("  Permissions: " + f.permissions);
            System.out.println("  Timestamp: " + (f.timestamp != null ? f.timestamp : "N/A"));
            System.out.println("  Length: " + f.length);
            System.out.println("  MD5: " + (f.md5 != null ? f.md5 : "N/A"));
            System.out.println();
        }
    }

    public static void write(List<String> filePaths, String outputFilename) throws IOException {
        try (PrintWriter out = new PrintWriter(new FileWriter(outputFilename))) {
            out.println("#!/bin/sh");
            out.println("# Simple SHAR archive.");
            out.println();
            for (String filePath : filePaths) {
                Path path = Path.of(filePath);
                if (Files.exists(path)) {
                    String filename = path.getFileName().toString();
                    String perm = String.format("%03o", Files.getAttribute(path, "posix:permissions").hashCode() % 1000); // Approx
                    BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class);
                    FileTime mtime = attrs.lastModifiedTime();
                    String timestamp = mtime.toString().replaceAll("[-:T.]", "").substring(0, 12); // YYYYMMDDhhmm
                    List<String> content = Files.readAllLines(path);
                    out.println("echo 'x - " + filename + " (text)'");
                    out.println("chmod " + perm + " " + filename);
                    out.println("touch -am -t " + timestamp + " " + filename);
                    out.println("sed 's/^X//' <<'SHAR_EOF' >" + filename);
                    for (String ln : content) {
                        out.println("X" + ln);
                    }
                    out.println("SHAR_EOF");
                    out.println("test -r " + filename + " || echo 'restore of " + filename + " failed'");
                    out.println("wc -c <'" + filename + "'");
                    out.println("echo 'done'");
                    out.println();
                }
            }
            out.println("exit 0");
        }
    }

    private static class FileProperties {
        String name, type, permissions, timestamp, md5;
        int length;

        FileProperties(String name, String type, String permissions, String timestamp, int length, String md5) {
            this.name = name;
            this.type = type;
            this.permissions = permissions;
            this.timestamp = timestamp;
            this.length = length;
            this.md5 = md5;
        }
    }

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            System.out.println("Usage: java SharArchive <sharfile>");
            return;
        }
        SharArchive archive = new SharArchive(args[0]);
        archive.printProperties();
        // Example write: write(List.of("file1.txt"), "new.shar");
    }
}

6. JavaScript Class for .SHAR Handling (Node.js)

The following Node.js class uses fs to read a .SHAR file, parse properties, and print to console. The write method creates a basic .SHAR from text files. Run with node shar.js <filename> (save as shar.js).

const fs = require('fs');
const path = require('path');

class SharArchive {
  constructor(filename) {
    this.filename = filename;
    this.lines = fs.readFileSync(filename, 'utf8').split('\n');
  }

  parse() {
    const properties = { files: [] };
    let i = 0;
    while (i < this.lines.length) {
      let line = this.lines[i].trim();
      if (line.startsWith('echo x - ')) {
        let parts = line.split(' ', 4);
        let filename = parts[3] ? parts[3].replace(/["']/g, '') : 'unknown';
        let type_ = line.includes('text') ? 'text' : 'binary';

        // Find chmod
        let perm = '644';
        let j = i + 1;
        while (j < this.lines.length && !this.lines[j].includes('sed')) {
          let match = this.lines[j].match(/chmod\s+(\d+)/);
          if (match) perm = match[1];
          j++;
        }

        // Find touch
        let timestamp = null;
        while (j < this.lines.length && !this.lines[j].includes('sed')) {
          let match = this.lines[j].match(/touch -am -t (\S+)/);
          if (match) timestamp = match[1];
          j++;
        }

        // Find content length
        let delim = 'SHAR_EOF';
        let content = [];
        let k = j + 1;
        while (k < this.lines.length) {
          if (this.lines[k].trim() === delim) break;
          content.push(this.lines[k].replace(/^X/, ''));
          k++;
        }
        let length = content.join('\n').length;

        // Find MD5
        let md5 = null;
        let m = k + 1;
        while (m < this.lines.length && !this.lines[m].includes('md5sum')) {
          m++;
        }
        if (this.lines[m + 1]) {
          let md5Match = this.lines[m + 1].match(/([a-f0-9]{32})/);
          if (md5Match) md5 = md5Match[1];
        }

        properties.files.push({
          name: filename,
          type: type_,
          permissions: perm,
          timestamp,
          length,
          md5
        });
        i = k + 1;
      } else {
        i++;
      }
    }
    return properties;
  }

  printProperties() {
    let props = this.parse();
    console.log('Archive Properties:');
    console.log(`Number of files: ${props.files.length}`);
    props.files.forEach(f => {
      console.log(`File: ${f.name}`);
      console.log(`  Type: ${f.type}`);
      console.log(`  Permissions: ${f.permissions}`);
      console.log(`  Timestamp: ${f.timestamp || 'N/A'}`);
      console.log(`  Length: ${f.length}`);
      console.log(`  MD5: ${f.md5 || 'N/A'}`);
      console.log('');
    });
  }

  static write(filesList, outputFilename) {
    let output = '#!/bin/sh\n';
    output += '# Simple SHAR archive.\n\n';
    filesList.forEach(filePath => {
      if (fs.existsSync(filePath)) {
        let filename = path.basename(filePath);
        let stat = fs.statSync(filePath);
        let perm = (stat.mode & 0o777).toString(8).padStart(3, '0');
        let mtime = stat.mtime.toISOString().replace(/[-:T.]/g, '').slice(0, 12);
        let content = fs.readFileSync(filePath, 'utf8').split('\n');
        output += `echo 'x - ${filename} (text)'\n`;
        output += `chmod ${perm} ${filename}\n`;
        output += `touch -am -t ${mtime} ${filename}\n`;
        output += `sed 's/^X//' <<'SHAR_EOF' >${filename}\n`;
        content.forEach(ln => output += `X${ln}\n`);
        output += 'SHAR_EOF\n';
        output += `test -r ${filename} || echo 'restore of ${filename} failed'\n`;
        output += `wc -c <'${filename}'\n`;
        output += "echo 'done'\n\n";
      }
    });
    output += 'exit 0\n';
    fs.writeFileSync(outputFilename, output);
  }
}

// Example usage:
// const archive = new SharArchive('example.shar');
// archive.printProperties();
// SharArchive.write(['file1.txt'], 'new.shar');

if (require.main === module && process.argv.length > 2) {
  const archive = new SharArchive(process.argv[2]);
  archive.printProperties();
}

7. C Implementation for .SHAR Handling

C lacks classes, so the following provides a struct-based API with functions to open, parse, read properties (print to stdout), and write a basic .SHAR. Compile with gcc -o shar shar.c and run ./shar <filename>. Limited to POSIX systems for file stats.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <time.h>
#include <regex.h>
#include <dirent.h> // Not used, but for completeness

#define MAX_LINES 100000
#define MAX_LINE_LEN 1024
#define MAX_FILES 1000

typedef struct {
    char name[256];
    char type[16];
    char permissions[4];
    char timestamp[16];
    int length;
    char md5[33];
} FileProperties;

typedef struct {
    char **lines;
    int line_count;
    FileProperties files[MAX_FILES];
    int file_count;
} SharArchive;

SharArchive *shar_open(const char *filename) {
    SharArchive *arch = malloc(sizeof(SharArchive));
    if (!arch) return NULL;
    arch->line_count = 0;
    arch->lines = malloc(MAX_LINES * sizeof(char *));
    if (!arch->lines) {
        free(arch);
        return NULL;
    }
    FILE *f = fopen(filename, "r");
    if (!f) {
        free(arch->lines);
        free(arch);
        return NULL;
    }
    char buf[MAX_LINE_LEN];
    while (fgets(buf, sizeof(buf), f) && arch->line_count < MAX_LINES) {
        arch->lines[arch->line_count] = strdup(buf);
        arch->line_count++;
    }
    fclose(f);
    arch->file_count = 0;
    return arch;
}

void shar_parse(SharArchive *arch) {
    int i = 0;
    regex_t regex_chmod, regex_touch, regex_md5;
    regcomp(&regex_chmod, "chmod\\s+(\\d+)", REG_EXTENDED);
    regcomp(&regex_touch, "touch -am -t (\\S+)", REG_EXTENDED);
    regcomp(&regex_md5, "([a-f0-9]{32})", REG_EXTENDED);
    while (i < arch->line_count) {
        char *line = arch->lines[i];
        if (strncmp(line, "echo x - ", 9) == 0) {
            char *filename_start = strstr(line, " - ") + 3;
            char filename[256] = "unknown";
            sscanf(filename_start, "%255s", filename);
            char *quote = strchr(filename, '\'');
            if (quote) *quote = '\0';
            strcpy(arch->files[arch->file_count].name, filename);
            strcpy(arch->files[arch->file_count].type, strstr(line, "text") ? "text" : "binary");

            // Find chmod
            char perm[4] = "644";
            int j = i + 1;
            while (j < arch->line_count && strstr(arch->lines[j], "sed") == NULL) {
                regmatch_t matches[2];
                if (regexec(&regex_chmod, arch->lines[j], 2, matches, 0) == 0) {
                    strncpy(perm, arch->lines[j] + matches[1].rm_so, 3);
                    perm[3] = '\0';
                }
                j++;
            }
            strcpy(arch->files[arch->file_count].permissions, perm);

            // Find touch
            char ts[16] = {0};
            while (j < arch->line_count && strstr(arch->lines[j], "sed") == NULL) {
                regmatch_t matches[2];
                if (regexec(&regex_touch, arch->lines[j], 2, matches, 0) == 0) {
                    strncpy(ts, arch->lines[j] + matches[1].rm_so, 14);
                    ts[15] = '\0';
                }
                j++;
            }
            strcpy(arch->files[arch->file_count].timestamp, ts);

            // Content length (approx)
            const char *delim = "SHAR_EOF";
            int content_len = 0;
            int k = j + 1;
            while (k < arch->line_count) {
                if (strstr(arch->lines[k], delim) != NULL) break;
                if (arch->lines[k][0] == 'X') content_len += strlen(arch->lines[k]) - 1;
                else content_len += strlen(arch->lines[k]);
                k++;
            }
            arch->files[arch->file_count].length = content_len;

            // Find MD5
            char md5[33] = {0};
            int m = k + 1;
            while (m < arch->line_count && strstr(arch->lines[m], "md5sum") == NULL) m++;
            if (m + 1 < arch->line_count) {
                regmatch_t matches[2];
                if (regexec(&regex_md5, arch->lines[m + 1], 2, matches, 0) == 0) {
                    strncpy(md5, arch->lines[m + 1] + matches[1].rm_so, 32);
                }
            }
            strcpy(arch->files[arch->file_count].md5, md5);

            arch->file_count++;
            i = k + 1;
        } else {
            i++;
        }
    }
    regfree(&regex_chmod);
    regfree(&regex_touch);
    regfree(&regex_md5);
}

void shar_print_properties(SharArchive *arch) {
    shar_parse(arch);
    printf("Archive Properties:\n");
    printf("Number of files: %d\n", arch->file_count);
    for (int f = 0; f < arch->file_count; f++) {
        printf("File: %s\n", arch->files[f].name);
        printf("  Type: %s\n", arch->files[f].type);
        printf("  Permissions: %s\n", arch->files[f].permissions);
        printf("  Timestamp: %s\n", strlen(arch->files[f].timestamp) ? arch->files[f].timestamp : "N/A");
        printf("  Length: %d\n", arch->files[f].length);
        printf("  MD5: %s\n", strlen(arch->files[f].md5) ? arch->files[f].md5 : "N/A");
        printf("\n");
    }
}

void shar_write(const char **filepaths, int num_files, const char *output_filename) {
    FILE *out = fopen(output_filename, "w");
    if (!out) return;
    fprintf(out, "#!/bin/sh\n");
    fprintf(out, "# Simple SHAR archive.\n\n");
    for (int idx = 0; idx < num_files; idx++) {
        const char *file_path = filepaths[idx];
        struct stat st;
        if (stat(file_path, &st) == 0) {
            char filename[256];
            strncpy(filename, strrchr(file_path, '/') + 1, 255);
            char perm[4];
            sprintf(perm, "%03o", st.st_mode & 0777);
            struct tm *tm_info = localtime(&st.st_mtime);
            char timestamp[16];
            strftime(timestamp, sizeof(timestamp), "%Y%m%d%H%M.%S", tm_info);
            FILE *inf = fopen(file_path, "r");
            char buf[1024];
            fprintf(out, "echo 'x - %s (text)'\n", filename);
            fprintf(out, "chmod %s %s\n", perm, filename);
            fprintf(out, "touch -am -t %s %s\n", timestamp, filename);
            fprintf(out, "sed 's/^X//' <<'SHAR_EOF' >%s\n", filename);
            while (fgets(buf, sizeof(buf), inf)) {
                fprintf(out, "X%s", buf);
            }
            fclose(inf);
            fprintf(out, "SHAR_EOF\n");
            fprintf(out, "test -r %s || echo 'restore of %s failed'\n", filename, filename);
            fprintf(out, "wc -c <'%s'\n", filename);
            fprintf(out, "echo 'done'\n\n");
        }
    }
    fprintf(out, "exit 0\n");
    fclose(out);
}

void shar_close(SharArchive *arch) {
    for (int l = 0; l < arch->line_count; l++) {
        free(arch->lines[l]);
    }
    free(arch->lines);
    free(arch);
}

int main(int argc, char **argv) {
    if (argc < 2) {
        printf("Usage: %s <sharfile>\n", argv[0]);
        return 1;
    }
    SharArchive *arch = shar_open(argv[1]);
    if (arch) {
        shar_print_properties(arch);
        shar_close(arch);
    }
    // Example write: const char *files[] = {"file1.txt"}; shar_write(files, 1, "new.shar");
    return 0;
}