Initial commit

This commit is contained in:
8051Enthusiast 2020-04-09 11:18:47 +02:00
commit 99fd700ba6
6 changed files with 560 additions and 0 deletions

21
.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
/target
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
*~
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~

136
Cargo.lock generated Normal file
View File

@ -0,0 +1,136 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
[[package]]
name = "clap"
version = "2.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "hermit-abi"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "725cf19794cf90aa94e65050cb4191ff5d8fa87a498383774c47b332e3af952e"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0"
[[package]]
name = "regex-automata"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
dependencies = [
"byteorder",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
[[package]]
name = "regex2fat"
version = "0.1.0"
dependencies = [
"clap",
"regex-automata",
]
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "unicode-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
[[package]]
name = "vec_map"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
[[package]]
name = "winapi"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

15
Cargo.toml Normal file
View File

@ -0,0 +1,15 @@
[package]
name = "regex2fat"
description = "Turn your favourite regex into FAT32"
version = "0.1.0"
authors = ["8051Enthusiast <8051Enthusiast@protonmail.com>"]
readme = "README.md"
repository = "https://github.com/8051Enthusiast/regex2fat"
keywords = ["regex", "fat"]
edition = "2018"
license = "Unlicense"
[dependencies]
regex-automata = "0.1"
clap = "2.33"

40
README.md Normal file
View File

@ -0,0 +1,40 @@
regex2fat
=========
Did you ever want to match a regex, but all you had was a fat32 driver?
Ever wanted to serialize your regex DFAs into one of the most widely supported formats used by over 3 billion devices?
[Are directory loops your thing?](https://xkcd.com/981/)
Worry no more, with `regex2fat`, this has become easier than ever before!
With just a little `regex2fat '[YOUR] F{4}VOUR{1,7}E (R[^E]G)*EX HERE.' /dev/whatever`, you will have a fat32 regex DFA of your favourite regex.
For example, to see whether the string 'Y FFFFVOURRE EX HEREM' would match, just mount it and check if '/Y/SPACE/F/F/F/F/V/O/U/R/R/E/SPACE/E/X/SPACE/H/E/R/E/M/MATCH' exists.
To run it, you can [install cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html) and then run `cargo install regex2fat` (or compile it directly from this repo).
If you have the cargo bin directory in your path, you should be able to invoke it like described above.
The file created will be a fat32 image, which can probably be mounted or put on a drive in some way, but most likely shouldn't.
## FAQ
### Q: How does this work?
A: Regular regexes (i.e. no backreferences and similar advanced features) can be turned into a so called DFA (deterministic state automaton).
This is basically a bunch of arrows going between states, where an arrow is labeled with a letter so that a letter in a state causes the current state to go along the arrow to another state, with a subset of states being accepting.
Yes, I'm bad at explaining, you're better off reading [the wikipedia article on DFAs](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) if you don't know what it is.
Because I'm lazy, I used [BurntSushi/regex-automata](https://github.com/BurntSushi/regex-automata) to get an DFA from a regex.
While Fat32 normally has a tree-like structure, each directory just references blocks anywhere on the file system, so the same block can be referenced from multiple directories.
The directories also have no explicit field for parent directories, so one can leave `..` out.
This allows for graph structures inside a file system, which a DFA basically is.
### Q: Should I use this <del>in production</del> anywhere?
A: No, but I can't stop you.
### Q: Does this actually work?
A: I've tried it on Windows 10 and Linux so far.
It seems to work flawlessly on Windows as far as I've tested.
On Linux, the fat32 code claims an directory is invalid if there are two dentries with the same directory name and the same parent in a loop (or something like that), so some paths are forbidden.
Might be fun to try on some embedded devices.
### Q: NOOOOOOOOOOO!!! YOU CAN'T TURN A DFA INTO A FAT32 FILE SYSTEM!!!! YOU CAN'T JUST HAVE A DIRECTORY WITH MULTIPLE PARENTS!!! YOU ARE BREAKING THE ASSUMPTION OF LACK OF LOOPERINOS NOOOOOOOOO
A: Haha OS-driven regex engine go brrrrr

185
src/fat32.rs Normal file
View File

@ -0,0 +1,185 @@
use regex_automata::DFA;
use std::collections::HashMap;
pub type UFat = u32;
pub const BLOCK_SIZE: usize = 512;
const BOOT_SECTOR: [u8; 90] = [
/* 0 */ 0xeb, 0xfe, 0x90, // jump to self (placeholder)
/* 3 */ 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x20, 0x20, // "regex " as vendor name
/* 11 */ 0x00, 0x02, // bytes per sector (512)
/* 13 */ 0x01, // one sector per cluster, why not
/* 14 */ 0x08, 0x00, // 8 reserved sectors
/* 16 */ 0x01, // one fat sector (don't really need two)
/* 17 */ 0x00, 0x00, // zero for fat32
/* 19 */ 0x00, 0x00, // zero for fat32
/* 21 */ 0xF8, // pretend to be a non-removable device
/* 22 */ 0x00, 0x00, // zero for fat32
/* 24 */ 0x01, 0x00, // it is the year 2020, no one uses CHS
/* 26 */ 0x01, 0x00, // but the values are 1 to prevent divide by zero...
/* 28 */ 0x00, 0x00, 0x00, 0x00, // I don't ever want to boot from this
/* 32 */ 0, 0, 0, 0, // total number of sectors, gets calculated later
/* 36 */ 0, 0, 0, 0, // number of sectors for FAT, gets calculated later
/* 40 */ 0x00, 0x00, // fat mirroring enabled
/* 42 */ 0x00, 0x00, // version 0
/* 44 */ 0x02, 0x00, 0x00, 0x00, // first cluster of root directory is 2
/* 48 */ 0x01, 0x00, // FSINFO location
/* 50 */ 0x06, 0x00, // backup in sector 6
/* 52 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 12 zeros reserved
/* 64 */ 0x80, // sure hope no one ever uses this on a floppy
/* 65 */ 0x00, // reserved
/* 66 */ 0x00, // no volume label/serial
/* 67 */ 0x00, 0x00, 0x00, 0x00, // no serial
/* 71 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // no volume label
/* 82 */ 0x66, 0x61, 0x74, 0x33, 0x32, 0x20, 0x20, 0x20 // "FAT32 "
];
const BOOT_SECTOR_TOTAL_SEC_32: usize = 32;
const BOOT_SECTOR_FAT_SZ_32: usize = 36;
const FSINFO_HEAD: [u8; 4] = [0x52, 0x52, 0x61, 0x41];
const FSINFO_TAIL: [u8; 28] = [
0x72, 0x72, 0x41, 0x61, // required signature
0x00, 0x00, 0x00, 0x00, // ideally, we used all sectors (else we would just make the image smaller)
0xff, 0xff, 0xff, 0xff, // don't know where the first free sector is, if there is none
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // a bunch of zeros
0x00, 0x00, 0x55, 0xaa // classic IBM
];
const FAT32_EOF: [u8; 4] = [0xff, 0xff, 0xff, 0x0f];
pub struct StatePosInfo {
pub block: UFat, // position (in blocks) of state dir
pub byte_sized: usize, // size (in bytes) of state dir
}
pub struct StateFatMap<D: DFA> {
pub blocks: UFat,
pub order_list: Vec<D::ID>,
pub pos_hash: HashMap<D::ID, StatePosInfo>,
}
fn write_u32_into(into: &mut [u8], pos: usize, val: u32) {
// why use indexing when iterators do the job in triple the space
// (what I would love is assigning to slices)
for (x, &y) in into.iter_mut().skip(pos).take(4).zip(val.to_le_bytes().iter()) {
*x = y;
}
}
fn write_u16_into(into: &mut [u8], pos: usize, val: u16) {
for (x, &y) in into.iter_mut().skip(pos).take(2).zip(val.to_le_bytes().iter()) {
*x = y;
}
}
pub fn len_to_block(size: usize) -> UFat {
(size/BLOCK_SIZE + if size % BLOCK_SIZE != 0 {1} else {0}) as UFat
}
pub fn generate_header(n_state_sector: UFat) -> Vec<u8> {
// boot block
let mut boot_and_fsinfo = BOOT_SECTOR.to_vec();
let fatsize: u32 = len_to_block((2+n_state_sector as usize)*(std::mem::size_of::<UFat>()));
write_u32_into(&mut boot_and_fsinfo, BOOT_SECTOR_FAT_SZ_32, fatsize);
write_u32_into(&mut boot_and_fsinfo, BOOT_SECTOR_TOTAL_SEC_32, n_state_sector + 8 + fatsize);
boot_and_fsinfo.extend_from_slice(&[0u8; BLOCK_SIZE - 2 - BOOT_SECTOR.len()]);
boot_and_fsinfo.push(0x55);
boot_and_fsinfo.push(0xaa);
// fsinfo
boot_and_fsinfo.extend_from_slice(&FSINFO_HEAD);
boot_and_fsinfo.extend_from_slice(&[0u8; BLOCK_SIZE - FSINFO_HEAD.len() - FSINFO_TAIL.len()]);
boot_and_fsinfo.extend_from_slice(&FSINFO_TAIL);
let mut volume = boot_and_fsinfo.clone();
volume.extend_from_slice(&[0u8; 4*BLOCK_SIZE]);
// backup copy in block 6 and 7
volume.append(&mut boot_and_fsinfo);
volume
}
pub fn generate_fat<D: DFA>(state_blocks: &StateFatMap<D>, pad: UFat) -> Result<Vec<u8>, &'static str> {
let mut fat = Vec::new();
fat.extend_from_slice(&FAT32_EOF);
fat.extend_from_slice(&FAT32_EOF);
let mut current_cluster: UFat = 2;
for state in &state_blocks.order_list {
let pl = match state_blocks.pos_hash.get(&state) {
Some(x) => x,
None => return Err("Refernce to invalid state")
};
let size = len_to_block(pl.byte_sized);
if size == 0 {
return Err("Zero size state");
}
for i in 0..size {
current_cluster += 1;
if i == size - 1 {
fat.extend_from_slice(&FAT32_EOF);
}
else {
fat.extend_from_slice(&current_cluster.to_le_bytes());
}
}
}
for _ in 0..pad {
fat.extend_from_slice(&[0xffu8, 0xff, 0xff, 0x0f]);
}
if fat.len() % BLOCK_SIZE != 0 {
fat.extend(
std::iter::repeat(0u8)
.take(BLOCK_SIZE - fat.len() % BLOCK_SIZE)
);
}
Ok(fat)
}
const ENTRY_TEMPLATE: [u8; 32] = [
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // to be filled in (8.3 name)
/* 11 */ 0, // attributes (to be filled in)
/* 12 */ 0x00, // reserved
/* 13 */ 0x00, // creation time deciseconds (0)
/* 14 */ 0x00, 0x00, // creation time
/* 16 */ 0x00, 0x00, // creation date
/* 18 */ 0x00, 0x00, // access date
/* 20 */ 0, 0, // to be filled in (cluster high word)
/* 22 */ 0x00, 0x00, // write time
/* 24 */ 0x21, 0x00, // write date (1980-01-01)
/* 26 */ 0, 0, // to be filled in (cluster low word)
/* 28 */ 0, 0, 0, 0, // size for directory is zero
];
pub fn generate_dir_short(letter: u8, target: UFat) -> Vec<u8> {
let name_8_3: [u8; 11] = if letter == b' ' {
*b"SPACE "
}
else {
// fat32 entries are padded with space
[letter, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]
};
let mut dir_entry = ENTRY_TEMPLATE.to_vec();
for (x, &y) in dir_entry.iter_mut().take(11).zip(name_8_3.iter()) {
*x = y;
}
dir_entry[11] = 0x11; // read-only (defunct but I'll use it anyway), directory
write_u16_into(&mut dir_entry, 20, (target >> 16) as u16);
write_u16_into(&mut dir_entry, 26, (target & 0xffff) as u16);
// directories have size of zero
write_u32_into(&mut dir_entry, 28, 0);
dir_entry
}
pub fn generate_match(target: UFat) -> Vec<u8> {
let name_8_3 = *b"MATCH ";
let mut dir_entry = ENTRY_TEMPLATE.to_vec();
for (x, &y) in dir_entry.iter_mut().take(11).zip(name_8_3.iter()) {
*x = y;
}
dir_entry[11] = 0;
write_u16_into(&mut dir_entry, 20, (target >> 16) as u16);
write_u16_into(&mut dir_entry, 26, (target & 0xffff) as u16);
// just make it a 0-length file, idc
write_u32_into(&mut dir_entry, 28, 0);
dir_entry
}

163
src/main.rs Normal file
View File

@ -0,0 +1,163 @@
mod fat32;
use clap::{App, Arg};
use fat32::{StateFatMap, StatePosInfo, UFat};
use regex_automata::{dense, DFA};
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs::File;
use std::io::Write;
use std::process::exit;
const FORBIDDEN_PRINT_ASCII: [u8; 17] = [
0x22, 0x2a, 0x2b, 0x2c, 0x2e, 0x2f, 0x3a, 0x3b, 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x5b, 0x5c, 0x5d,
0x7c,
];
// precalculate the position of every dfa state inside the fat table so we can
// later replace the referenced state numbers by the fat entry when writing
// directories
fn determine_state_positions<D: DFA>(
dfa: &D,
validlist: &[u8],
) -> Result<StateFatMap<D>, &'static str> {
let nomatch_len = validlist.len() * 32;
let match_len = (validlist.len() + 1) * 32;
// root directory starts at 2
let mut current_block: UFat = 2;
let mut current_index: usize = 0;
// vector of visited states in order of visit
let mut state_vec = Vec::new();
// map state numbers to StatePosInfos
let mut state_pos_hash = HashMap::new();
// keep track of visited states
let mut state_set = HashSet::new();
state_vec.push(dfa.start_state());
while let Some(&current_state) = state_vec.get(current_index) {
// queue all unvisited states from current state
for &next_byte in validlist {
let next_state = dfa.next_state(current_state, next_byte);
if state_set.insert(next_state) {
state_vec.push(next_state);
}
}
// relevant for size of directory (but mostly not because it's constant
// and they're both the same)
let size = if dfa.is_match_state(current_state) {
match_len
} else {
nomatch_len
};
state_pos_hash.insert(
current_state,
StatePosInfo {
block: current_block,
byte_sized: size,
},
);
match current_block.checked_add(fat32::len_to_block(size)) {
Some(val) => {
current_block = val;
}
None => return Err("State machine exceeds Fate32 capacity!"),
}
current_index += 1;
}
Ok(StateFatMap {
blocks: current_block - 2,
order_list: state_vec,
pos_hash: state_pos_hash,
})
}
fn regex_to_fat32<D: DFA, W: Write>(
dfa: &D,
validlist: &[u8],
mut vol: W,
) -> Result<(), Box<dyn Error>> {
let state_blocks = determine_state_positions(&dfa, &validlist)?;
// pad until at least 65536 blocks, since otherwise ideologically
// I would have to implement fat12/fat16
// also keep at least one free block for match file (which is 0 bytes,
// but I'm not sure if it needs to reference a valid block)
let pad = 1isize.max(65536 - state_blocks.blocks as isize) as UFat;
vol.write_all(&fat32::generate_header(state_blocks.blocks + pad))?;
vol.write_all(&fat32::generate_fat(&state_blocks, pad)?)?;
for &state in &state_blocks.order_list {
let mut current_dir = Vec::<u8>::new();
// generate directories for each possible character
for &c in validlist {
let next_state = dfa.next_state(state, c);
// maps the state to the block where the state directory is
let &state_block = &state_blocks.pos_hash[&next_state].block;
current_dir.append(&mut fat32::generate_dir_short(c, state_block));
}
// if accepting state, put match file into dir
if dfa.is_match_state(state) {
current_dir.append(&mut fat32::generate_match(state_blocks.blocks + 2))
}
if current_dir.len() % fat32::BLOCK_SIZE == 0 {
vol.write_all(&current_dir)?;
continue;
}
// fill up current block to multiple of BLOCK_SIZE
current_dir.extend(
std::iter::repeat(0u8).take(fat32::BLOCK_SIZE - current_dir.len() % fat32::BLOCK_SIZE),
);
vol.write_all(&current_dir)?;
}
let emptyblock = &[0u8; fat32::BLOCK_SIZE];
// make space for one more (match file)
for _ in 0..pad {
vol.write_all(emptyblock)?;
}
Ok(())
}
fn main() {
let matches = App::new("regex2fat")
.version("0.1.0")
.author("8051Enthusiast")
.about("Convert regex DFAs to FAT32 file systems")
.arg(
Arg::with_name("anchor")
.short("a")
.long("anchor")
.help("Anchor regex at beginning (off by default)"),
)
.arg(
Arg::with_name("pattern")
.required(true)
.index(1)
.help("The regex pattern to match"),
)
.arg(
Arg::with_name("outfile")
.required(true)
.index(2)
.help("The file to write the fat fs to"),
)
.get_matches();
let pattern = matches.value_of("pattern").unwrap();
let dfa = dense::Builder::new()
// fat32 is case insensitive
.case_insensitive(true)
.anchored(matches.is_present("anchor"))
.build(pattern)
.unwrap_or_else(|err| {
eprintln!("Could not compile regex '{}': {}", pattern, err);
exit(1);
});
let validlist: Vec<u8> = (0x20..0x61)
.chain(0x7b..0x7e)
.filter(|c| !FORBIDDEN_PRINT_ASCII.contains(c))
.collect();
let outfile = matches.value_of("outfile").unwrap();
let file = File::create(outfile).unwrap_or_else(|err| {
eprintln!("Could not open file '{}': {}", outfile, err);
exit(1);
});
regex_to_fat32(&dfa, &validlist, file).unwrap_or_else(|err| {
eprintln!("Could not write DFA to '{}': {}", outfile, err);
exit(1);
});
}