mirror of
https://github.com/8051Enthusiast/regex2fat
synced 2025-01-30 17:24:45 +00:00
Initial commit
This commit is contained in:
commit
99fd700ba6
21
.gitignore
vendored
Normal file
21
.gitignore
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
/target
|
||||
|
||||
# Swap
|
||||
[._]*.s[a-v][a-z]
|
||||
!*.svg # comment out if you don't need vector files
|
||||
[._]*.sw[a-p]
|
||||
[._]s[a-rt-v][a-z]
|
||||
[._]ss[a-gi-z]
|
||||
[._]sw[a-p]
|
||||
|
||||
# Session
|
||||
Session.vim
|
||||
Sessionx.vim
|
||||
|
||||
# Temporary
|
||||
.netrwhist
|
||||
*~
|
||||
# Auto-generated tag files
|
||||
tags
|
||||
# Persistent undo
|
||||
[._]*.un~
|
136
Cargo.lock
generated
Normal file
136
Cargo.lock
generated
Normal file
@ -0,0 +1,136 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"strsim",
|
||||
"textwrap",
|
||||
"unicode-width",
|
||||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "725cf19794cf90aa94e65050cb4191ff5d8fa87a498383774c47b332e3af952e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
|
||||
|
||||
[[package]]
|
||||
name = "regex2fat"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"regex-automata",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
15
Cargo.toml
Normal file
15
Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "regex2fat"
|
||||
description = "Turn your favourite regex into FAT32"
|
||||
version = "0.1.0"
|
||||
authors = ["8051Enthusiast <8051Enthusiast@protonmail.com>"]
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/8051Enthusiast/regex2fat"
|
||||
keywords = ["regex", "fat"]
|
||||
edition = "2018"
|
||||
license = "Unlicense"
|
||||
|
||||
|
||||
[dependencies]
|
||||
regex-automata = "0.1"
|
||||
clap = "2.33"
|
40
README.md
Normal file
40
README.md
Normal file
@ -0,0 +1,40 @@
|
||||
regex2fat
|
||||
=========
|
||||
|
||||
Did you ever want to match a regex, but all you had was a fat32 driver?
|
||||
Ever wanted to serialize your regex DFAs into one of the most widely supported formats used by over 3 billion devices?
|
||||
[Are directory loops your thing?](https://xkcd.com/981/)
|
||||
|
||||
Worry no more, with `regex2fat`, this has become easier than ever before!
|
||||
With just a little `regex2fat '[YOUR] F{4}VOUR{1,7}E (R[^E]G)*EX HERE.' /dev/whatever`, you will have a fat32 regex DFA of your favourite regex.
|
||||
For example, to see whether the string 'Y FFFFVOURRE EX HEREM' would match, just mount it and check if '/Y/SPACE/F/F/F/F/V/O/U/R/R/E/SPACE/E/X/SPACE/H/E/R/E/M/MATCH' exists.
|
||||
|
||||
To run it, you can [install cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) and then run `cargo install regex2fat` (or compile it directly from this repo).
|
||||
If you have the cargo bin directory in your path, you should be able to invoke it like described above.
|
||||
The file created will be a fat32 image, which can probably be mounted or put on a drive in some way, but most likely shouldn't.
|
||||
|
||||
## FAQ
|
||||
### Q: How does this work?
|
||||
A: Regular regexes (i.e. no backreferences and similar advanced features) can be turned into a so called DFA (deterministic state automaton).
|
||||
This is basically a bunch of arrows going between states, where an arrow is labeled with a letter so that a letter in a state causes the current state to go along the arrow to another state, with a subset of states being accepting.
|
||||
Yes, I'm bad at explaining, you're better off reading [the wikipedia article on DFAs](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) if you don't know what it is.
|
||||
|
||||
Because I'm lazy, I used [BurntSushi/regex-automata](https://github.com/BurntSushi/regex-automata) to get an DFA from a regex.
|
||||
|
||||
While Fat32 normally has a tree-like structure, each directory just references blocks anywhere on the file system, so the same block can be referenced from multiple directories.
|
||||
The directories also have no explicit field for parent directories, so one can leave `..` out.
|
||||
This allows for graph structures inside a file system, which a DFA basically is.
|
||||
|
||||
### Q: Should I use this <del>in production</del> anywhere?
|
||||
A: No, but I can't stop you.
|
||||
|
||||
### Q: Does this actually work?
|
||||
A: I've tried it on Windows 10 and Linux so far.
|
||||
It seems to work flawlessly on Windows as far as I've tested.
|
||||
|
||||
On Linux, the fat32 code claims an directory is invalid if there are two dentries with the same directory name and the same parent in a loop (or something like that), so some paths are forbidden.
|
||||
|
||||
Might be fun to try on some embedded devices.
|
||||
|
||||
### Q: NOOOOOOOOOOO!!! YOU CAN'T TURN A DFA INTO A FAT32 FILE SYSTEM!!!! YOU CAN'T JUST HAVE A DIRECTORY WITH MULTIPLE PARENTS!!! YOU ARE BREAKING THE ASSUMPTION OF LACK OF LOOPERINOS NOOOOOOOOO
|
||||
A: Haha OS-driven regex engine go brrrrr
|
185
src/fat32.rs
Normal file
185
src/fat32.rs
Normal file
@ -0,0 +1,185 @@
|
||||
use regex_automata::DFA;
|
||||
use std::collections::HashMap;
|
||||
pub type UFat = u32;
|
||||
pub const BLOCK_SIZE: usize = 512;
|
||||
const BOOT_SECTOR: [u8; 90] = [
|
||||
/* 0 */ 0xeb, 0xfe, 0x90, // jump to self (placeholder)
|
||||
/* 3 */ 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x20, 0x20, // "regex " as vendor name
|
||||
/* 11 */ 0x00, 0x02, // bytes per sector (512)
|
||||
/* 13 */ 0x01, // one sector per cluster, why not
|
||||
/* 14 */ 0x08, 0x00, // 8 reserved sectors
|
||||
/* 16 */ 0x01, // one fat sector (don't really need two)
|
||||
/* 17 */ 0x00, 0x00, // zero for fat32
|
||||
/* 19 */ 0x00, 0x00, // zero for fat32
|
||||
/* 21 */ 0xF8, // pretend to be a non-removable device
|
||||
/* 22 */ 0x00, 0x00, // zero for fat32
|
||||
/* 24 */ 0x01, 0x00, // it is the year 2020, no one uses CHS
|
||||
/* 26 */ 0x01, 0x00, // but the values are 1 to prevent divide by zero...
|
||||
/* 28 */ 0x00, 0x00, 0x00, 0x00, // I don't ever want to boot from this
|
||||
/* 32 */ 0, 0, 0, 0, // total number of sectors, gets calculated later
|
||||
/* 36 */ 0, 0, 0, 0, // number of sectors for FAT, gets calculated later
|
||||
/* 40 */ 0x00, 0x00, // fat mirroring enabled
|
||||
/* 42 */ 0x00, 0x00, // version 0
|
||||
/* 44 */ 0x02, 0x00, 0x00, 0x00, // first cluster of root directory is 2
|
||||
/* 48 */ 0x01, 0x00, // FSINFO location
|
||||
/* 50 */ 0x06, 0x00, // backup in sector 6
|
||||
/* 52 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 12 zeros reserved
|
||||
/* 64 */ 0x80, // sure hope no one ever uses this on a floppy
|
||||
/* 65 */ 0x00, // reserved
|
||||
/* 66 */ 0x00, // no volume label/serial
|
||||
/* 67 */ 0x00, 0x00, 0x00, 0x00, // no serial
|
||||
/* 71 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no volume label
|
||||
/* 82 */ 0x66, 0x61, 0x74, 0x33, 0x32, 0x20, 0x20, 0x20 // "FAT32 "
|
||||
];
|
||||
const BOOT_SECTOR_TOTAL_SEC_32: usize = 32;
|
||||
const BOOT_SECTOR_FAT_SZ_32: usize = 36;
|
||||
|
||||
const FSINFO_HEAD: [u8; 4] = [0x52, 0x52, 0x61, 0x41];
|
||||
|
||||
const FSINFO_TAIL: [u8; 28] = [
|
||||
0x72, 0x72, 0x41, 0x61, // required signature
|
||||
0x00, 0x00, 0x00, 0x00, // ideally, we used all sectors (else we would just make the image smaller)
|
||||
0xff, 0xff, 0xff, 0xff, // don't know where the first free sector is, if there is none
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a bunch of zeros
|
||||
0x00, 0x00, 0x55, 0xaa // classic IBM
|
||||
];
|
||||
|
||||
const FAT32_EOF: [u8; 4] = [0xff, 0xff, 0xff, 0x0f];
|
||||
|
||||
pub struct StatePosInfo {
|
||||
pub block: UFat, // position (in blocks) of state dir
|
||||
pub byte_sized: usize, // size (in bytes) of state dir
|
||||
}
|
||||
pub struct StateFatMap<D: DFA> {
|
||||
pub blocks: UFat,
|
||||
pub order_list: Vec<D::ID>,
|
||||
pub pos_hash: HashMap<D::ID, StatePosInfo>,
|
||||
}
|
||||
|
||||
|
||||
fn write_u32_into(into: &mut [u8], pos: usize, val: u32) {
|
||||
// why use indexing when iterators do the job in triple the space
|
||||
// (what I would love is assigning to slices)
|
||||
for (x, &y) in into.iter_mut().skip(pos).take(4).zip(val.to_le_bytes().iter()) {
|
||||
*x = y;
|
||||
}
|
||||
}
|
||||
|
||||
fn write_u16_into(into: &mut [u8], pos: usize, val: u16) {
|
||||
for (x, &y) in into.iter_mut().skip(pos).take(2).zip(val.to_le_bytes().iter()) {
|
||||
*x = y;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len_to_block(size: usize) -> UFat {
|
||||
(size/BLOCK_SIZE + if size % BLOCK_SIZE != 0 {1} else {0}) as UFat
|
||||
}
|
||||
|
||||
pub fn generate_header(n_state_sector: UFat) -> Vec<u8> {
|
||||
|
||||
// boot block
|
||||
let mut boot_and_fsinfo = BOOT_SECTOR.to_vec();
|
||||
let fatsize: u32 = len_to_block((2+n_state_sector as usize)*(std::mem::size_of::<UFat>()));
|
||||
write_u32_into(&mut boot_and_fsinfo, BOOT_SECTOR_FAT_SZ_32, fatsize);
|
||||
write_u32_into(&mut boot_and_fsinfo, BOOT_SECTOR_TOTAL_SEC_32, n_state_sector + 8 + fatsize);
|
||||
boot_and_fsinfo.extend_from_slice(&[0u8; BLOCK_SIZE - 2 - BOOT_SECTOR.len()]);
|
||||
boot_and_fsinfo.push(0x55);
|
||||
boot_and_fsinfo.push(0xaa);
|
||||
|
||||
// fsinfo
|
||||
boot_and_fsinfo.extend_from_slice(&FSINFO_HEAD);
|
||||
boot_and_fsinfo.extend_from_slice(&[0u8; BLOCK_SIZE - FSINFO_HEAD.len() - FSINFO_TAIL.len()]);
|
||||
boot_and_fsinfo.extend_from_slice(&FSINFO_TAIL);
|
||||
let mut volume = boot_and_fsinfo.clone();
|
||||
|
||||
volume.extend_from_slice(&[0u8; 4*BLOCK_SIZE]);
|
||||
|
||||
// backup copy in block 6 and 7
|
||||
volume.append(&mut boot_and_fsinfo);
|
||||
volume
|
||||
}
|
||||
|
||||
pub fn generate_fat<D: DFA>(state_blocks: &StateFatMap<D>, pad: UFat) -> Result<Vec<u8>, &'static str> {
|
||||
let mut fat = Vec::new();
|
||||
fat.extend_from_slice(&FAT32_EOF);
|
||||
fat.extend_from_slice(&FAT32_EOF);
|
||||
let mut current_cluster: UFat = 2;
|
||||
for state in &state_blocks.order_list {
|
||||
let pl = match state_blocks.pos_hash.get(&state) {
|
||||
Some(x) => x,
|
||||
None => return Err("Refernce to invalid state")
|
||||
};
|
||||
let size = len_to_block(pl.byte_sized);
|
||||
if size == 0 {
|
||||
return Err("Zero size state");
|
||||
}
|
||||
for i in 0..size {
|
||||
current_cluster += 1;
|
||||
if i == size - 1 {
|
||||
fat.extend_from_slice(&FAT32_EOF);
|
||||
}
|
||||
else {
|
||||
fat.extend_from_slice(¤t_cluster.to_le_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
for _ in 0..pad {
|
||||
fat.extend_from_slice(&[0xffu8, 0xff, 0xff, 0x0f]);
|
||||
}
|
||||
if fat.len() % BLOCK_SIZE != 0 {
|
||||
fat.extend(
|
||||
std::iter::repeat(0u8)
|
||||
.take(BLOCK_SIZE - fat.len() % BLOCK_SIZE)
|
||||
);
|
||||
}
|
||||
Ok(fat)
|
||||
}
|
||||
|
||||
const ENTRY_TEMPLATE: [u8; 32] = [
|
||||
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // to be filled in (8.3 name)
|
||||
/* 11 */ 0, // attributes (to be filled in)
|
||||
/* 12 */ 0x00, // reserved
|
||||
/* 13 */ 0x00, // creation time deciseconds (0)
|
||||
/* 14 */ 0x00, 0x00, // creation time
|
||||
/* 16 */ 0x00, 0x00, // creation date
|
||||
/* 18 */ 0x00, 0x00, // access date
|
||||
/* 20 */ 0, 0, // to be filled in (cluster high word)
|
||||
/* 22 */ 0x00, 0x00, // write time
|
||||
/* 24 */ 0x21, 0x00, // write date (1980-01-01)
|
||||
/* 26 */ 0, 0, // to be filled in (cluster low word)
|
||||
/* 28 */ 0, 0, 0, 0, // size for directory is zero
|
||||
];
|
||||
|
||||
pub fn generate_dir_short(letter: u8, target: UFat) -> Vec<u8> {
|
||||
let name_8_3: [u8; 11] = if letter == b' ' {
|
||||
*b"SPACE "
|
||||
}
|
||||
else {
|
||||
// fat32 entries are padded with space
|
||||
[letter, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]
|
||||
};
|
||||
let mut dir_entry = ENTRY_TEMPLATE.to_vec();
|
||||
for (x, &y) in dir_entry.iter_mut().take(11).zip(name_8_3.iter()) {
|
||||
*x = y;
|
||||
}
|
||||
dir_entry[11] = 0x11; // read-only (defunct but I'll use it anyway), directory
|
||||
write_u16_into(&mut dir_entry, 20, (target >> 16) as u16);
|
||||
write_u16_into(&mut dir_entry, 26, (target & 0xffff) as u16);
|
||||
// directories have size of zero
|
||||
write_u32_into(&mut dir_entry, 28, 0);
|
||||
dir_entry
|
||||
}
|
||||
|
||||
pub fn generate_match(target: UFat) -> Vec<u8> {
|
||||
let name_8_3 = *b"MATCH ";
|
||||
let mut dir_entry = ENTRY_TEMPLATE.to_vec();
|
||||
for (x, &y) in dir_entry.iter_mut().take(11).zip(name_8_3.iter()) {
|
||||
*x = y;
|
||||
}
|
||||
dir_entry[11] = 0;
|
||||
write_u16_into(&mut dir_entry, 20, (target >> 16) as u16);
|
||||
write_u16_into(&mut dir_entry, 26, (target & 0xffff) as u16);
|
||||
// just make it a 0-length file, idc
|
||||
write_u32_into(&mut dir_entry, 28, 0);
|
||||
dir_entry
|
||||
}
|
163
src/main.rs
Normal file
163
src/main.rs
Normal file
@ -0,0 +1,163 @@
|
||||
mod fat32;
|
||||
use clap::{App, Arg};
|
||||
use fat32::{StateFatMap, StatePosInfo, UFat};
|
||||
use regex_automata::{dense, DFA};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::process::exit;
|
||||
|
||||
const FORBIDDEN_PRINT_ASCII: [u8; 17] = [
|
||||
0x22, 0x2a, 0x2b, 0x2c, 0x2e, 0x2f, 0x3a, 0x3b, 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x5b, 0x5c, 0x5d,
|
||||
0x7c,
|
||||
];
|
||||
|
||||
// precalculate the position of every dfa state inside the fat table so we can
|
||||
// later replace the referenced state numbers by the fat entry when writing
|
||||
// directories
|
||||
fn determine_state_positions<D: DFA>(
|
||||
dfa: &D,
|
||||
validlist: &[u8],
|
||||
) -> Result<StateFatMap<D>, &'static str> {
|
||||
let nomatch_len = validlist.len() * 32;
|
||||
let match_len = (validlist.len() + 1) * 32;
|
||||
// root directory starts at 2
|
||||
let mut current_block: UFat = 2;
|
||||
let mut current_index: usize = 0;
|
||||
// vector of visited states in order of visit
|
||||
let mut state_vec = Vec::new();
|
||||
// map state numbers to StatePosInfos
|
||||
let mut state_pos_hash = HashMap::new();
|
||||
// keep track of visited states
|
||||
let mut state_set = HashSet::new();
|
||||
state_vec.push(dfa.start_state());
|
||||
while let Some(¤t_state) = state_vec.get(current_index) {
|
||||
// queue all unvisited states from current state
|
||||
for &next_byte in validlist {
|
||||
let next_state = dfa.next_state(current_state, next_byte);
|
||||
if state_set.insert(next_state) {
|
||||
state_vec.push(next_state);
|
||||
}
|
||||
}
|
||||
// relevant for size of directory (but mostly not because it's constant
|
||||
// and they're both the same)
|
||||
let size = if dfa.is_match_state(current_state) {
|
||||
match_len
|
||||
} else {
|
||||
nomatch_len
|
||||
};
|
||||
state_pos_hash.insert(
|
||||
current_state,
|
||||
StatePosInfo {
|
||||
block: current_block,
|
||||
byte_sized: size,
|
||||
},
|
||||
);
|
||||
match current_block.checked_add(fat32::len_to_block(size)) {
|
||||
Some(val) => {
|
||||
current_block = val;
|
||||
}
|
||||
None => return Err("State machine exceeds Fate32 capacity!"),
|
||||
}
|
||||
current_index += 1;
|
||||
}
|
||||
Ok(StateFatMap {
|
||||
blocks: current_block - 2,
|
||||
order_list: state_vec,
|
||||
pos_hash: state_pos_hash,
|
||||
})
|
||||
}
|
||||
|
||||
fn regex_to_fat32<D: DFA, W: Write>(
|
||||
dfa: &D,
|
||||
validlist: &[u8],
|
||||
mut vol: W,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
let state_blocks = determine_state_positions(&dfa, &validlist)?;
|
||||
// pad until at least 65536 blocks, since otherwise ideologically
|
||||
// I would have to implement fat12/fat16
|
||||
// also keep at least one free block for match file (which is 0 bytes,
|
||||
// but I'm not sure if it needs to reference a valid block)
|
||||
let pad = 1isize.max(65536 - state_blocks.blocks as isize) as UFat;
|
||||
vol.write_all(&fat32::generate_header(state_blocks.blocks + pad))?;
|
||||
vol.write_all(&fat32::generate_fat(&state_blocks, pad)?)?;
|
||||
for &state in &state_blocks.order_list {
|
||||
let mut current_dir = Vec::<u8>::new();
|
||||
// generate directories for each possible character
|
||||
for &c in validlist {
|
||||
let next_state = dfa.next_state(state, c);
|
||||
// maps the state to the block where the state directory is
|
||||
let &state_block = &state_blocks.pos_hash[&next_state].block;
|
||||
current_dir.append(&mut fat32::generate_dir_short(c, state_block));
|
||||
}
|
||||
// if accepting state, put match file into dir
|
||||
if dfa.is_match_state(state) {
|
||||
current_dir.append(&mut fat32::generate_match(state_blocks.blocks + 2))
|
||||
}
|
||||
if current_dir.len() % fat32::BLOCK_SIZE == 0 {
|
||||
vol.write_all(¤t_dir)?;
|
||||
continue;
|
||||
}
|
||||
// fill up current block to multiple of BLOCK_SIZE
|
||||
current_dir.extend(
|
||||
std::iter::repeat(0u8).take(fat32::BLOCK_SIZE - current_dir.len() % fat32::BLOCK_SIZE),
|
||||
);
|
||||
vol.write_all(¤t_dir)?;
|
||||
}
|
||||
let emptyblock = &[0u8; fat32::BLOCK_SIZE];
|
||||
// make space for one more (match file)
|
||||
for _ in 0..pad {
|
||||
vol.write_all(emptyblock)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let matches = App::new("regex2fat")
|
||||
.version("0.1.0")
|
||||
.author("8051Enthusiast")
|
||||
.about("Convert regex DFAs to FAT32 file systems")
|
||||
.arg(
|
||||
Arg::with_name("anchor")
|
||||
.short("a")
|
||||
.long("anchor")
|
||||
.help("Anchor regex at beginning (off by default)"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("pattern")
|
||||
.required(true)
|
||||
.index(1)
|
||||
.help("The regex pattern to match"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("outfile")
|
||||
.required(true)
|
||||
.index(2)
|
||||
.help("The file to write the fat fs to"),
|
||||
)
|
||||
.get_matches();
|
||||
let pattern = matches.value_of("pattern").unwrap();
|
||||
let dfa = dense::Builder::new()
|
||||
// fat32 is case insensitive
|
||||
.case_insensitive(true)
|
||||
.anchored(matches.is_present("anchor"))
|
||||
.build(pattern)
|
||||
.unwrap_or_else(|err| {
|
||||
eprintln!("Could not compile regex '{}': {}", pattern, err);
|
||||
exit(1);
|
||||
});
|
||||
let validlist: Vec<u8> = (0x20..0x61)
|
||||
.chain(0x7b..0x7e)
|
||||
.filter(|c| !FORBIDDEN_PRINT_ASCII.contains(c))
|
||||
.collect();
|
||||
let outfile = matches.value_of("outfile").unwrap();
|
||||
let file = File::create(outfile).unwrap_or_else(|err| {
|
||||
eprintln!("Could not open file '{}': {}", outfile, err);
|
||||
exit(1);
|
||||
});
|
||||
regex_to_fat32(&dfa, &validlist, file).unwrap_or_else(|err| {
|
||||
eprintln!("Could not write DFA to '{}': {}", outfile, err);
|
||||
exit(1);
|
||||
});
|
||||
}
|
Loading…
Reference in New Issue
Block a user