use std::collections::{HashSet, VecDeque};
use std::io::Read as _;
use std::ops::Range;
use std::path::Path;

pub mod data_structures;
pub mod decode;
pub mod encode;
pub mod tree;

#[cfg(test)]
pub(crate) mod tests;

use crate::internals::data_structures::TinySet;
use decode::read_base64_from_bytes;
use encode::{write_48bit_to_bytes_be, write_base64_to_bytes, write_hex_to_string};
use tree::SearchTree;

/// When dumping to the filesystem, data partitions are size-limited to 256KiB.
///
/// There is no single, optimal size for these partitions, because it depends on
/// configuration values that we can't predict or control, such as the version
/// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
/// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
/// the search query is going to produce a large number of results or a small
/// number, the bandwidth delay product of the network...
///
/// 256KiB is chosen after benching the wordnet data set with the lev+trans
/// automata. Using smaller partitions results in too many roundtrips.
///
/// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
/// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
/// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
const FILE_SIZE_LIMIT: usize = 256 * 1024;

const CHILD_IS_LARGE_LIMIT: usize = 64;

/// Generates the search tree in a JS-wrapped disk format.
///
/// # Parameters
///
/// - `tree`: This is the output of [`crate::internals::tree::encode_search_tree_ukkonen`].
/// - `child_path`: The name of the directory (which will be created) to drop child nodes.
///
/// # Format
///
/// The root of the tree looks like this:
///
/// ```js
/// rr_('{"<COL>"{<COLDATA>,"I":"<NODEDATA>"},...}')
/// ```
///
/// This function returns the NODEDATA, but not the wrapper function or the COLDATA part.
/// The COLDATA is returned by [`write_data_to_disk`].
/// The function wrapper needs to be generated by the caller.
///
/// Child nodes in the search tree have `rn_("<NODEDATA>")`, and go in `index_path/HASH.js`.
///
/// See `HACKING.md` for a description of the format in more detail.
pub fn write_tree_to_disk(
    tree: &SearchTree,
    child_path: impl AsRef<Path>,
    root_write: &mut Vec<u8>,
) -> Result<(), std::io::Error> {
    let child_path = child_path.as_ref();
    std::fs::create_dir_all(child_path)?;
    let root = write_tree(
        tree,
        &mut |filename, data| {
            let mut full_path = child_path.to_path_buf();
            full_path.push(format!("{filename}.js"));
            let mut output = Vec::with_capacity((data.len() * 8 / 6) + 7);
            output.extend_from_slice(br##"rn_(""##);
            crate::internals::encode::write_base64_to_bytes(data, &mut output);
            output.extend_from_slice(br##"")"##);
            std::fs::write(full_path, output)
        },
        &mut |filename| {
            let mut full_path = child_path.to_path_buf();
            full_path.push(format!("{filename}.js"));
            let raw_input = match std::fs::read(full_path) {
                Ok(raw_input) => raw_input,
                Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
                Err(e) => return Err(e),
            };
            if let Some(raw_input) = raw_input
                .strip_prefix(br##"rn_(""##)
                .and_then(|input| input.strip_suffix(br##"")"##))
            {
                let mut input = Vec::with_capacity(raw_input.len() * 3 / 4);
                crate::internals::decode::read_base64_from_bytes(raw_input, &mut input)
                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
                Ok(Some(input))
            } else {
                Err(std::io::Error::new(std::io::ErrorKind::InvalidData, ""))
            }
        },
    )?;
    crate::internals::encode::write_base64_to_bytes(&root, root_write);
    Ok(())
}

pub fn write_tree(
    tree: &SearchTree,
    file_writer: &mut dyn FnMut(&str, &[u8]) -> Result<(), std::io::Error>,
    file_reader: &mut dyn FnMut(&str) -> Result<Option<Vec<u8>>, std::io::Error>,
) -> Result<Vec<u8>, std::io::Error> {
    write_tree_with_custom_hasher(tree, file_writer, file_reader, &mut |data| {
        siphash_of_bytes(data, 0, 0)
    })
}

const SHORT_ALPHABITMAP_CHARS: &[u8; 24] = b"abcdefghijklmnoprstuwxyz";
const LONG_ALPHABITMAP_CHARS: &[u8; 32] = b"123456abcdefghijklmnopqrstuvwxyz";

enum AlphaBitmap {
    Short,
    Long,
}

pub fn write_tree_with_custom_hasher(
    tree: &SearchTree,
    file_writer: &mut dyn FnMut(&str, &[u8]) -> Result<(), std::io::Error>,
    file_reader: &mut dyn FnMut(&str) -> Result<Option<Vec<u8>>, std::io::Error>,
    hasher: &mut dyn FnMut(&[u8]) -> u64,
) -> Result<Vec<u8>, std::io::Error> {
    #[derive(Clone, Copy, Debug, Eq, PartialEq)]
    struct NodeHeader {
        is_pure_suffixes_only_node: bool,
        is_stack_compressed: bool,
        is_long_compressed: bool,
        is_all_compressed: bool,
        is_dictionary_compressed: bool,
        is_compressed: bool,
        is_inline_neighbors: bool,
        is_no_leaves: bool,
        compression_bitmap: u32,
        data_start: usize,
        data_len: u8,
        mhp_len: usize,
        so_len: usize,
        alphabitmap: u8,
    }
    impl NodeHeader {
        fn parse(outbuf: &[u8]) -> (NodeHeader, usize) {
            let mut i = 0;
            let mut compression_tag = outbuf[i] as u32;
            let is_pure_suffixes_only_node = (compression_tag & 0x01) != 0;
            let is_stack_compressed = (compression_tag & 0x02) != 0;
            let is_long_compressed = (compression_tag & 0x04) != 0;
            let is_dictionary_compressed = (compression_tag & 0x08) != 0 && !is_pure_suffixes_only_node;
            let is_all_compressed = (compression_tag & 0xF0) == 0xF0 && !is_long_compressed;
            let is_compressed = (compression_tag & 0xF0) != 0x00 || is_long_compressed;
            i += 1;
            if is_long_compressed {
                compression_tag |= u32::from(outbuf[i]) << 8;
                i += 1;
            }
            let (data_start, data_len, inline_neighbors_flag, is_no_leaves) = if is_pure_suffixes_only_node && (compression_tag & 0x08) != 0 {
                (i, 0, 0, true)
            } else {
                let data_len = outbuf[i] & 0x3F;
                let inline_neighbors_flag = outbuf[i] & 0x40;
                let is_no_leaves = outbuf[i] & 0x80 != 0;
                i += 1;
                let data_start = i;
                if !is_pure_suffixes_only_node && !is_dictionary_compressed {
                    i += data_len as usize;
                }
                (data_start, data_len, inline_neighbors_flag, is_no_leaves)
            };
            if inline_neighbors_flag != 0 {
                let branches_len = if is_long_compressed {
                    compression_tag >> 8
                } else {
                    0
                };
                let (mhp_len, so_len) = if is_pure_suffixes_only_node {
                    (0, branches_len as usize)
                } else {
                    (branches_len as usize, 0)
                };
                return (NodeHeader {
                    is_pure_suffixes_only_node,
                    is_stack_compressed: false,
                    is_long_compressed: false,
                    is_dictionary_compressed,
                    is_all_compressed: false,
                    is_compressed: false,
                    is_inline_neighbors: true,
                    is_no_leaves,
                    compression_bitmap: 0,
                    data_start,
                    data_len,
                    mhp_len,
                    so_len,
                    alphabitmap: 0,
                }, i);
            }
            let (mhp_len, so_len, alphabitmap) = if is_pure_suffixes_only_node {
                if outbuf[i] >= 0xc0 {
                    (
                        0,
                        0x3f & outbuf[i] as usize,
                        0xc0,
                    )
                } else if outbuf[i] >= 0x80 {
                    (
                        0,
                        0x7f & outbuf[i] as usize,
                        0x80,
                    )
                } else {
                    (
                        0,
                        outbuf[i] as usize,
                        0x00,
                    )
                }
            } else if outbuf[i] == 0xff && outbuf[i + 1] == 0xff {
                (256usize, 0usize, 0x00)
            } else if outbuf[i] >= 0xc0 && outbuf[i + 1] >= 0xc0 {
                (
                    0x3f & outbuf[i] as usize,
                    0x3f & outbuf[i + 1] as usize,
                    0xc0,
                )
            } else if outbuf[i] >= 0x80 && outbuf[i + 1] >= 0x80 {
                (
                    0x7f & outbuf[i] as usize,
                    0x7f & outbuf[i + 1] as usize,
                    0x80,
                )
            } else {
                (
                    outbuf[i] as usize,
                    outbuf[i + 1] as usize,
                    0x00,
                )
            };
            i += if is_pure_suffixes_only_node { 1 } else { 2 };
            (NodeHeader {
                is_pure_suffixes_only_node,
                is_stack_compressed,
                is_long_compressed,
                is_dictionary_compressed,
                is_all_compressed,
                is_compressed,
                is_inline_neighbors: false,
                is_no_leaves,
                compression_bitmap: compression_tag >> 4,
                data_start,
                data_len,
                mhp_len,
                so_len,
                alphabitmap,
            }, i)
        }
        fn count_compressed_children(self) -> usize {
            if !self.is_compressed {
                0
            } else if self.is_all_compressed {
                self.mhp_len + self.so_len
            } else {
                self.compression_bitmap.count_ones() as usize
            }
        }
    }
    /// Convert a subtree chunk to a file.
    /// This function also does dictionary compression.
    fn subtree_to_bytes(
        outbuf: &mut Vec<u8>,
        subtree: impl Iterator<Item = (u64, (Range<usize>, Range<usize>))>,
    ) -> &[u8] {
        let mut dict = VecDeque::new();
        let start = outbuf.len();
        for (_, (_, compressed)) in subtree {
            let node_start = compressed.start;
            let is_pure_suffix_tree = outbuf[node_start] & 0x01 != 0;
            if !is_pure_suffix_tree &&
                let is_long_compressed = outbuf[node_start] & 0x04 != 0 &&
                let doff = if is_long_compressed { 2 } else { 1 } &&
                let dlen = usize::from(outbuf[node_start + doff]) & 0x3F &&
                dlen != 0
            {
                let no_leaves_flag = outbuf[node_start + doff] & 0x80;
                let inline_neightbors_flag = outbuf[node_start + doff] & 0x40;
                let data = node_start + doff + 1..node_start + dlen + doff + 1;
                if let Some(backref) = dict.iter().cloned().position(|dentry| outbuf[dentry] == outbuf[data.clone()]) {
                    outbuf.push(outbuf[node_start] | 0x08); // flag node as dictionary compressed
                    if is_long_compressed {
                        outbuf.push(outbuf[node_start + 1]);
                    }
                    outbuf.push(u8::try_from(backref).unwrap() | no_leaves_flag | inline_neightbors_flag);
                    outbuf.extend_from_within(compressed.start + dlen + doff + 1..compressed.end);
                } else {
                    outbuf.extend_from_within(compressed);
                }
                dict.push_front(data);
                if dict.len() > 0x3F {
                    dict.pop_back();
                }
            } else {
                outbuf.extend_from_within(compressed);
            }
        }
        &outbuf[start..]
    }
    /// Returns a node's file, and writes any child nodes that couldn't be bundled through the `file_writer`.
    ///
    /// This function recursively builds all the children. If all the children have already been built, use `generate_node`.
    fn generate_node_with_bundled_children(
        tree: &SearchTree,
        node: usize,
        disambig_level: usize,
        parent_is_suffixes_only: bool,
        file_reader: &mut dyn FnMut(&str) -> Result<Option<Vec<u8>>, std::io::Error>,
        file_writer: &mut dyn FnMut(&str, &[u8]) -> Result<(), std::io::Error>,
        file_existence_cache: &mut HashSet<u64>,
        bundled_subtree_hashes: &mut Vec<u64>,
        bundled_subtree_offsets: &mut Vec<(Range<usize>, Range<usize>)>,
        outbuf: &mut Vec<u8>,
        hasher: &mut dyn FnMut(&[u8]) -> u64,
    ) -> Result<(), std::io::Error> {
        let mhp_len = tree.nodes[node].branch_values.iter().copied().filter(|child_node| !tree.nodes[*child_node].suffixes_only).count();
        let so_len = tree.nodes[node].branch_values.iter().copied().filter(|child_node| tree.nodes[*child_node].suffixes_only).count();
        let local_subtree_start = bundled_subtree_hashes.len();
        assert_eq!(local_subtree_start, bundled_subtree_offsets.len());
        if // special representation for nodes that...
            // haven't hit a hash collision (because this format has no redundant space for it)
            disambig_level == 0 &&
            // ...have at most 16 leaves (that's all that fits in the field, given the extra byte for no-leaves that lets us ignore zero)
            tree.nodes[node].leaves.len() <= 0x10 &&
            // ...do not require data to be chopped up (the format could support it, but the code doesn't)
            tree.nodes[node].data.len() <= 0x3F &&
            // ...have only one kind of direct leaf
            (tree.nodes[node].suffixes_only || (
                tree.nodes[node].leaves.leaves_suffix.is_empty() &&
                tree.nodes[node].branch_values.iter().copied().all(|child_node| !tree.nodes[child_node].suffixes_only)
            )) &&
            // ...has only direct branches, no sub-branches, and all fulfill the leaf repr reqs
            tree.nodes[node].branch_values.iter().copied().all(|child_node| {
                let node = &tree.nodes[child_node];
                node.branch_keys.is_empty() &&
                    node.leaves.len() <= 0x10 &&
                    node.data.len() <= 0xF &&
                    (node.suffixes_only || node.leaves.leaves_suffix.is_empty())
            }) &&
            // ...and, most importantly, all transitives leaves have the same 16bit prefix
            let Some(leaf_value_upper) = tree.nodes[node].leaves.iter().next()
                .or_else(|| tree.nodes[node].branch_values.iter().copied().find_map(|child_node| tree.nodes[child_node].leaves.iter().next()))
                .map(|upper| upper >> 16) &&
            tree.nodes[node].leaves.iter().all(|leaf| (leaf >> 16) == leaf_value_upper) &&
            tree.nodes[node].branch_values.iter().copied().all(|child_node| tree.nodes[child_node].leaves.iter().all(|leaf| (leaf >> 16) == leaf_value_upper))
        {
            let node = &tree.nodes[node];
            let data = &tree.data[node.data.clone()];
            let new_data_start = outbuf.len();
            let no_leaves_flag = if node.leaves.is_empty() { 0x80 } else { 0x00 };
            let inline_neighbors_flag = 0x40;
            outbuf.push(
                if node.suffixes_only { 0x01 } else { 0 } |
                if node.branch_values.is_empty() { 0 } else { 0x04 } |
                if node.leaves.is_empty() { 0 } else { u8::try_from(node.leaves.len() - 1).unwrap() << 4 }
            );
            if !node.branch_values.is_empty() {
                outbuf.push(u8::try_from(node.branch_values.len() - 1).unwrap());
            }
            outbuf.push(u8::try_from(data.len()).expect("data len must be less than 64") | no_leaves_flag | inline_neighbors_flag);
            if !node.suffixes_only {
                outbuf.extend(data);
            }
            outbuf.extend_from_slice(&[
                u8::try_from(leaf_value_upper & 0xFF).unwrap(),
                u8::try_from((leaf_value_upper >> 8) & 0xFF).unwrap(),
            ][..]);
            for child_node in node.branch_values.iter().copied() {
                outbuf.push(u8::try_from(
                    tree.nodes[child_node].data.len() |
                    ((tree.nodes[child_node].leaves.len() - 1) << 4)
                ).unwrap());
                if !node.suffixes_only {
                    outbuf.extend_from_slice(&tree.data[tree.nodes[child_node].data.clone()]);
                }
                for leaf in tree.nodes[child_node].leaves.iter() {
                    outbuf.extend_from_slice(&[
                        u8::try_from(leaf & 0xFF).unwrap(),
                        u8::try_from((leaf >> 8) & 0xFF).unwrap(),
                    ][..]);
                }
            }
            outbuf.extend_from_slice(&node.branch_keys[..]);
            for leaf in node.leaves.iter() {
                outbuf.extend_from_slice(&[
                    u8::try_from(leaf & 0xFF).unwrap(),
                    u8::try_from((leaf >> 8) & 0xFF).unwrap(),
                ][..]);
            }
            let nodeid = hasher(&outbuf[new_data_start..outbuf.len()]) & 0x0000_7FFF_FFFF_FFFF;
            bundled_subtree_hashes.push(nodeid);
            bundled_subtree_offsets.push((
                new_data_start..outbuf.len(),
                new_data_start..outbuf.len(),
            ));
            return Ok(());
        }
        // The set of characters, one for each child, that might follow this node
        let mut might_have_prefix_child_keys: Vec<u8> = Vec::with_capacity(mhp_len);
        let mut suffix_only_child_keys: Vec<u8> = Vec::with_capacity(so_len);
        // The hash or inlined data of each child node
        let mut might_have_prefix_child_nodeids: Vec<u64> = Vec::with_capacity(mhp_len);
        let mut suffix_only_child_nodeids: Vec<u64> = Vec::with_capacity(so_len);
        // The filename to write a child to (this buffer is reused for efficiency).
        let mut filename = String::new();
        for (i, child_node) in
            tree.nodes[node].branch_values.iter().copied().enumerate().filter(|(_, child_node)| tree.nodes[*child_node].suffixes_only).rev()
            .chain(
                tree.nodes[node].branch_values.iter().copied().enumerate().filter(|(_, child_node)| !tree.nodes[*child_node].suffixes_only).rev()
            )
        {
            let child_node_suffixes_only = tree.nodes[child_node].suffixes_only;
            let current_node_suffixes_only = tree.nodes[node].suffixes_only;
            let (child_keys, child_nodeids) = if !child_node_suffixes_only || mhp_len >= 256 || so_len >= 256 {
                // if there are 256 child nodes, we overflow the one-byte length marker
                // to work around this, both bytes are set to 0xff, and we can't distinguish suffix-only
                // data from prefix-containing data
                (
                    &mut might_have_prefix_child_keys,
                    &mut might_have_prefix_child_nodeids,
                )
            } else {
                (&mut suffix_only_child_keys, &mut suffix_only_child_nodeids)
            };
            child_keys.push(tree.nodes[node].branch_keys[i]);
            let mut disambig_level_child = 0;
            let child_nodeid = 'nodeid: loop {
                let child_data = &tree.data[tree.nodes[child_node].data.clone()][..];
                let child_leaves = &tree.nodes[child_node].leaves;
                if tree.nodes[child_node].branch_keys.is_empty() {
                    // Inlining:
                    // The first bit of a 48 bit node ID is a tag. If this tag is zero, then
                    // the node ID is a hash. If it's a 1, then its leaf ID is packed into the
                    // lower 32 bits, and the remaining data contains a byte (if any) and a tag
                    // marking it as a whole or a suffix node.
                    let (tag, id) = match (
                        &child_leaves.leaves_whole[..],
                        &child_leaves.leaves_suffix[..],
                        child_data,
                    ) {
                        // prefix nodes need to store the actual data
                        // ==========================================
                        // one ID, zero byte
                        (&[id], &[], &[]) => (0x0000_c000_0000_0000u64, u64::from(id)), // 1100
                        // one ID, one byte
                        (&[id], &[], &[c]) => {
                            (0x0000_e000_0000_0000u64 | ((c as u64) << 36), u64::from(id)) // 1110
                        }
                        // two ID, zero byte (two ID encoding stores up to 28 bits of id1,
                        // and id2 is a 16 bit offset)
                        (&[id1, id2], &[], &[]) if id1 <= 0xfffffff && id2 - id1 <= 0xffff => (
                            0x0000_d000_0000_0000u64, // 1101
                            (u64::from(id1) << 16) | u64::from(id2 - id1),
                        ),
                        // two ID, one byte (two ID encoding stores up to 20 bits of id1,
                        // and id2 is a 16 bit offset)
                        (&[id1, id2], &[], &[c]) if id1 <= 0xfffff && id2 - id1 <= 0xffff => (
                            0x0000_f000_0000_0000u64 | ((c as u64) << 36), // 1111
                            (u64::from(id1) << 16) | u64::from(id2 - id1),
                        ),
                        // suffix nodes need to store only a count
                        // =======================================
                        (&[], &[id], data) if data.len() <= 0xff => {
                            (0x0000_a000_0000_0000u64 | ((data.len() as u64) << 36), u64::from(id)) // 1010
                        }
                        (&[], &[id1, id2], data) if data.len() <= 0xff && id1 <= 0xfffff && id2 - id1 <= 0xffff => (
                            0x0000_b000_0000_0000u64 | ((data.len() as u64) << 36), // 1011
                            (u64::from(id1) << 16) | u64::from(id2 - id1),
                        ),
                        (&[], &[id1, id2], &[]) if id1 <= 0xfffffff && id2 - id1 <= 0xffff => (
                            0x0000_9000_0000_0000u64, // 1001
                            (u64::from(id1) << 16) | u64::from(id2 - id1),
                        ),
                        (&[], &[id1, id2, id3], &[]) if id1 <= 0xfffff && id2 - id1 <= 0xfff && id3 - id2 <= 0xfff => (
                            0x0000_8000_0000_0000u64, // 1000
                            (u64::from(id1) << 24) | (u64::from(id2 - id1) << 12) | u64::from(id3 - id2),
                        ),
                        // not inlineable
                        _ => (0, 0),
                    };
                    if tag != 0 {
                        let inlined_data = tag | id;
                        break 'nodeid inlined_data;
                    }
                }
                let new_subtree_start = bundled_subtree_hashes.len();
                let new_outbuf_start = outbuf.len();
                stacker::maybe_grow(32 * 1024, 1024 * 1024, || {
                    generate_node_with_bundled_children(
                        tree,
                        child_node,
                        disambig_level_child,
                        current_node_suffixes_only,
                        file_reader,
                        file_writer,
                        file_existence_cache,
                        bundled_subtree_hashes,
                        bundled_subtree_offsets,
                        outbuf,
                        hasher,
                    )
                })?;
                let root_child_nodeid = *bundled_subtree_hashes.last().unwrap();
                let child_is_large = bundled_subtree_offsets[new_subtree_start..]
                    .iter()
                    .map(|(_, compressed)| compressed.len())
                    .sum::<usize>()
                    > CHILD_IS_LARGE_LIMIT;
                if
                // The unbundling heuristics (these are the cases where a node is never bundled):
                (
                        // 1. I don't bundle large nodes that has seven one in the MSB of the hash.
                        //    The first bit is always masked zero, as a tag, so the
                        //    remaining zeroes work out to a 1/2**15 pseudo-random chance of unbundling,
                        //    in a context where every node will always make the same choice,
                        //    which improves reuse over doing it based on a size cutoff alone.
                        (root_child_nodeid >= 0x0000_7FFF_0000_0000 && child_is_large)
                        // 2. Suffix-only subtrees are always unbundled from prefix-bearing nodes.
                        //    This reduces the amount of data that levenshtein matching, which ignores suffix-only subtrees,
                        //    winds up pulling down.
                        || (child_node_suffixes_only != current_node_suffixes_only && child_is_large)
                        // 3. I don't bundle data when there's a hash collision (implementation complexity).
                        || disambig_level_child > 0
                        // 4. I limit bundling to 256KiB, as mentioned above.
                        || bundled_subtree_offsets[local_subtree_start..]
                            .iter()
                            .map(|(_, compressed)| compressed.len())
                            .sum::<usize>()
                            > FILE_SIZE_LIMIT
                        // 5. I don't bundle if a file with the same hash already exists.
                        || file_existence_cache.contains(&root_child_nodeid)
                    )
                    // 6. I always bundle a node that has already *been* bundled.
                    // The unbundling heuristics:
                    && !bundled_subtree_hashes[local_subtree_start..new_subtree_start]
                        .iter()
                        .any(|h| {
                            *h == root_child_nodeid
                        })
                {
                    filename.clear();
                    filename.reserve(12);
                    write_hex_to_string((root_child_nodeid >> 40) as u8, &mut filename);
                    write_hex_to_string((root_child_nodeid >> 32) as u8, &mut filename);
                    write_hex_to_string((root_child_nodeid >> 24) as u8, &mut filename);
                    write_hex_to_string((root_child_nodeid >> 16) as u8, &mut filename);
                    write_hex_to_string((root_child_nodeid >> 8) as u8, &mut filename);
                    write_hex_to_string(root_child_nodeid as u8, &mut filename);
                    let flattened_child_data = subtree_to_bytes(
                        outbuf,
                        bundled_subtree_hashes.drain(new_subtree_start..).zip(bundled_subtree_offsets.drain(new_subtree_start..)),
                    );
                    match file_existence_cache
                        .contains(&root_child_nodeid)
                        .then(|| file_reader(&filename))
                        .unwrap_or(Ok(None))?
                    {
                        Some(other_content) => {
                            if other_content[..] == flattened_child_data[..] {
                                outbuf.truncate(new_outbuf_start);
                                break 'nodeid root_child_nodeid;
                            }
                            disambig_level_child += 1;
                            outbuf.truncate(new_outbuf_start);
                            continue 'nodeid;
                        }
                        None => {
                            file_writer(&filename, &flattened_child_data)?;
                            file_existence_cache.insert(root_child_nodeid);
                            outbuf.truncate(new_outbuf_start);
                            break 'nodeid root_child_nodeid;
                        }
                    }
                } else {
                    let mut needs_deleted = TinySet::<usize>::default();
                    let children_hashes = &bundled_subtree_hashes[new_subtree_start..];
                    let children_offsets = &bundled_subtree_offsets[new_subtree_start..];
                    // Check if any other nodes inside this file have the same ID.
                    // We don't need to check for conflicts between files (because the loader doesn't use a big global cache) and we already checked for conflicts
                    // between a file and an internal node.
                    {
                        let mut children_hashes_iter = children_hashes.iter().cloned().enumerate().rev();
                        while let Some((mut child_node_idx, mut child_nodeid)) = children_hashes_iter.next() {
                            // Yes, I've tried a hashtable. It's slower.
                            // It's probably the per-file size limit preventing this set from ever being truly big.
                            match bundled_subtree_hashes[local_subtree_start..new_subtree_start]
                                .iter()
                                .position(|h| *h == child_nodeid)
                            {
                                Some(mut i) => {
                                    i += local_subtree_start;
                                    // if a duplicate node is found, and this node has stack compression,
                                    // then the structure of every node above it must also be identical,
                                    // so we can just scan backwards instead of having to do the full check
                                    let mut stack_depth = 1;
                                    while stack_depth > 0 && child_nodeid == bundled_subtree_hashes[i] {
                                        let (child_canonical, child_compressed) = children_offsets[child_node_idx].clone();
                                        let canonical = &bundled_subtree_offsets[i].0;
                                        let compressed = &bundled_subtree_offsets[i].1;
                                        if outbuf[canonical.clone()] != outbuf[child_canonical.clone()] {
                                            disambig_level_child += 1;
                                            bundled_subtree_hashes.truncate(new_subtree_start);
                                            bundled_subtree_offsets.truncate(new_subtree_start);
                                            outbuf.truncate(new_outbuf_start);
                                            continue 'nodeid;
                                        }
                                        needs_deleted.push(child_node_idx);
                                        stack_depth -= 1;
                                        let (child_compressed_node, _) = NodeHeader::parse(&outbuf[child_compressed.start..]);
                                        let (compressed_node, _) = NodeHeader::parse(&outbuf[compressed.start..]);
                                        if (child_compressed_node.is_compressed && !child_compressed_node.is_stack_compressed)
                                            || (compressed_node.is_compressed && !compressed_node.is_stack_compressed)
                                            || (child_compressed_node.compression_bitmap != compressed_node.compression_bitmap)
                                        {
                                            // if these nodes have different structure,
                                            // or if one of them uses backref compression,
                                            // then we need to handle potentially different orderings
                                            break;
                                        }
                                        stack_depth += child_compressed_node.count_compressed_children();
                                        if stack_depth != 0 {
                                            (child_node_idx, child_nodeid) = children_hashes_iter.next().unwrap();
                                            i -= 1;
                                        }
                                    }
                                }
                                None => {}
                            }
                        }
                        for &idx in needs_deleted[..].iter().rev() {
                            bundled_subtree_hashes.remove(new_subtree_start + idx);
                            bundled_subtree_offsets.remove(new_subtree_start + idx);
                        }
                        if let Some(idx) = needs_deleted.first().copied() {
                            let len = bundled_subtree_hashes.len();
                            for child_node_idx in new_subtree_start + idx..len {
                                compress_node_branches(
                                    &mut bundled_subtree_hashes[local_subtree_start..child_node_idx + 1],
                                    &mut bundled_subtree_offsets[local_subtree_start..child_node_idx + 1],
                                    outbuf,
                                );
                            }
                        }
                    }
                    break 'nodeid root_child_nodeid;
                }
            };
            child_nodeids.push(child_nodeid);
        }
        // To support stack compression, we want the branch list to be in reverse
        // order of the serialized nodes.
        might_have_prefix_child_keys.reverse();
        suffix_only_child_keys.reverse();
        might_have_prefix_child_nodeids.reverse();
        suffix_only_child_nodeids.reverse();
        // If there was a hash conflict, add dummy children to break it up
        if disambig_level != 0 {
            let dummy_node = generate_node(
                std::iter::empty(),
                std::iter::empty(),
                std::iter::empty(),
                std::iter::empty(),
                std::iter::empty(),
                &[],
                &[],
                outbuf,
                hasher,
            );
            let dummy_node_id = dummy_node.0;
            bundled_subtree_hashes.push(dummy_node_id);
            bundled_subtree_offsets.push((dummy_node.1, dummy_node.2));
            let mut c = 0;
            for _ in 0..disambig_level {
                while might_have_prefix_child_keys.contains(&c)
                    || suffix_only_child_keys.contains(&c)
                {
                    c = c.checked_add(1).unwrap();
                }
                let mut ci = suffix_only_child_keys.len();
                suffix_only_child_keys.push(c);
                suffix_only_child_nodeids.push(dummy_node_id);
                while ci > 0 && suffix_only_child_keys[ci - 1] > c {
                    suffix_only_child_keys.swap(ci, ci - 1);
                    suffix_only_child_nodeids.swap(ci, ci - 1);
                    ci -= 1;
                }
            }
        }
        // Now that every child has been computed, do this node
        let mut data = tree.nodes[node].data.clone();
        let mut leaves_whole = &tree.nodes[node].leaves.leaves_whole;
        let mut leaves_suffix = &tree.nodes[node].leaves.leaves_suffix;
        let empty = TinySet::default();
        let is_suffix_only_node = might_have_prefix_child_nodeids.is_empty() && leaves_whole.is_empty();
        while
            // three situations where we cut a note in its data field:
            // 1. the amount of data in the node exceeds 127, which is the limit
            data.len() > 0x3F ||
            // 2. the node's children are larger than the data limit, and it's a
            // suffix-only node, which means cutting off a suffix would likely
            // allow combining with another node
            // 3. a 1/128 that suffix nodes that are children of suffix nodes are also
            // going to be split (based on a hash, so that the same subtree gets split).
            (data.len() >= 1 && is_suffix_only_node &&
                bundled_subtree_offsets[local_subtree_start..]
                .iter()
                .map(|(_, compressed)| compressed.len())
                .sum::<usize>()
                >= if parent_is_suffixes_only && bundled_subtree_hashes.iter().fold(0, |a, b| a ^ *b) < 0x0000_7F00_0000_0000 { FILE_SIZE_LIMIT } else { CHILD_IS_LARGE_LIMIT }
            )
        {
            let parent_data = data.start..if data.len() > 0x00_3F { data.end - 0x00_3F } else { data.end - 1 };
            let c = tree.data[parent_data.end];
            let child_data = parent_data.end + 1..data.end;
            data = parent_data;
            let child_node = stacker::maybe_grow(32 * 1024, 1024 * 1024, || {
                generate_node(
                    might_have_prefix_child_nodeids.iter().copied(),
                    might_have_prefix_child_keys.iter().copied(),
                    suffix_only_child_nodeids.iter().copied(),
                    suffix_only_child_keys.iter().copied(),
                    tree.data[child_data].iter().copied(),
                    &leaves_whole,
                    &leaves_suffix,
                    outbuf,
                    hasher,
                )
            });
            let child_node_id = child_node.0;
            might_have_prefix_child_nodeids.clear();
            might_have_prefix_child_keys.clear();
            suffix_only_child_nodeids.clear();
            suffix_only_child_keys.clear();
            if is_suffix_only_node {
                suffix_only_child_nodeids.push(child_node_id);
                suffix_only_child_keys.push(c);
            } else {
                might_have_prefix_child_nodeids.push(child_node_id);
                might_have_prefix_child_keys.push(c);
            }
            leaves_whole = &empty;
            leaves_suffix = &empty;
            bundled_subtree_hashes.push(child_node.0);
            bundled_subtree_offsets.push((child_node.1, child_node.2));
            compress_node_branches(
                &mut bundled_subtree_hashes[local_subtree_start..],
                &mut bundled_subtree_offsets[local_subtree_start..],
                outbuf,
            );
            if is_suffix_only_node {
                filename.clear();
                filename.reserve(12);
                write_hex_to_string((child_node_id >> 40) as u8, &mut filename);
                write_hex_to_string((child_node_id >> 32) as u8, &mut filename);
                write_hex_to_string((child_node_id >> 24) as u8, &mut filename);
                write_hex_to_string((child_node_id >> 16) as u8, &mut filename);
                write_hex_to_string((child_node_id >> 8) as u8, &mut filename);
                write_hex_to_string(child_node_id as u8, &mut filename);
                let outbuf_mark = outbuf.len();
                let flattened_child_data = subtree_to_bytes(
                    outbuf,
                    bundled_subtree_hashes[local_subtree_start..].iter().copied().zip(bundled_subtree_offsets[local_subtree_start..].iter().cloned()),
                );
                match file_existence_cache
                    .contains(&child_node_id)
                    .then(|| file_reader(&filename))
                    .unwrap_or(Ok(None))?
                {
                    Some(other_content) => {
                        if other_content[..] == flattened_child_data[..] {
                            bundled_subtree_hashes.truncate(local_subtree_start);
                            bundled_subtree_offsets.truncate(local_subtree_start);
                        }
                        outbuf.truncate(outbuf_mark);
                    }
                    None => {
                        file_writer(&filename, &flattened_child_data)?;
                        file_existence_cache.insert(child_node_id);
                        bundled_subtree_hashes.truncate(local_subtree_start);
                        bundled_subtree_offsets.truncate(local_subtree_start);
                        outbuf.truncate(outbuf_mark);
                    }
                }
            }
        }
        let node = generate_node(
            might_have_prefix_child_nodeids.iter().copied(),
            might_have_prefix_child_keys.iter().copied(),
            suffix_only_child_nodeids.iter().copied(),
            suffix_only_child_keys.iter().copied(),
            tree.data[data].iter().copied(),
            &leaves_whole,
            &leaves_suffix,
            outbuf,
            hasher,
        );
        let node_id = node.0;
        bundled_subtree_hashes.push(node_id);
        bundled_subtree_offsets.push((node.1, node.2));
        compress_node_branches(
            &mut bundled_subtree_hashes[local_subtree_start..],
            &mut bundled_subtree_offsets[local_subtree_start..],
            outbuf,
        );
        Ok(())
    }

    // Convert a canonical node's branches to its compressed form.
    //
    // There are two (and a half) compressed node types:
    // - stack compressed, where the pointers are implicitly set to the most
    //   recently decoded, unused nodes in the same file
    // - backref compressed, where the pointers are one byte backreferences
    //
    // The header (which is always 0/1 on a canonical node) represents that flag
    // in the second MSB. The first significant bit is for dictionary-compressed
    // data, which is done later.
    //
    // The remaining six bits are used to identify which branches are compressed,
    // because we allow mixing and matching. If all those bits are one, that's a
    // special case for compressing everything, even if there are more than six
    // branches.
    fn compress_node_branches(
        bundled_subtree_hashes: &mut [u64],
        bundled_subtree_offsets: &mut [(Range<usize>, Range<usize>)],
        outbuf: &mut Vec<u8>,
    ) {
        // First, find backrefs for each child.
        let (canonical, existing_compressed) = bundled_subtree_offsets.last().unwrap().clone();
        let (NodeHeader {
            is_pure_suffixes_only_node,
            is_inline_neighbors,
            is_no_leaves,
            mhp_len,
            so_len,
            data_start,
            data_len,
            alphabitmap,
            ..
        }, mut canonical_i) = NodeHeader::parse(&outbuf[canonical.clone()]);
        canonical_i += canonical.start;
        if is_inline_neighbors {
            // all branches are inline, nothing to compress
            return;
        }
        if mhp_len + so_len == 0 {
            return;
        }
        let mhp_backrefs = (0..mhp_len)
            .map(|_mhp_i| {
                let nodeid =
                    crate::internals::decode::read_48bit_from_bytes_be(&outbuf[canonical_i..]);
                canonical_i += 6;
                if nodeid & 0x0000_8000_0000_0000u64 != 0 {
                    // inline nodes never appear in the tree
                    return Err(nodeid);
                }
                for (offset, other_nodeid) in
                    bundled_subtree_hashes.iter().rev().skip(1).take(256).enumerate()
                {
                    if *other_nodeid == nodeid {
                        return Ok(offset);
                    }
                }
                Err(nodeid)
            })
            .collect::<Vec<Result<usize, u64>>>();
        let so_backrefs = (0..so_len)
            .map(|_so_i| {
                let nodeid =
                    crate::internals::decode::read_48bit_from_bytes_be(&outbuf[canonical_i..]);
                canonical_i += 6;
                if nodeid & 0x0000_8000_0000_0000u64 != 0 {
                    // inline nodes never appear in the tree
                    return Err(nodeid);
                }
                for (offset, other_nodeid) in
                    bundled_subtree_hashes.iter().rev().skip(1).take(256).enumerate()
                {
                    if *other_nodeid == nodeid {
                        return Ok(offset);
                    }
                }
                Err(nodeid)
            })
            .collect::<Vec<Result<usize, u64>>>();
        if mhp_backrefs.iter().all(Result::is_err) && so_backrefs.iter().all(Result::is_err) {
            // Didn't find any of them. Cannot compress node.
            bundled_subtree_offsets.last_mut().unwrap().1 = canonical;
            return;
        }
        // If we found them, then generate the compressed form.
        let children_len = mhp_len + so_len;
        let compressed_children_len = mhp_backrefs.iter().filter(|b| b.is_ok()).count()
            + so_backrefs.iter().filter(|b| b.is_ok()).count();
        let all_children_are_compressed = children_len == compressed_children_len;
        let is_long_compressed = if !all_children_are_compressed && children_len > 4 {
            if children_len > 12 {
                // Limited number of bits in the tag.
                bundled_subtree_offsets.last_mut().unwrap().1 = canonical;
                return;
            } else {
                true
            }
        } else {
            false
        };
        // If this node is compressible at all, then figure out if it's stack compressible,
        // specifically.
        let is_stack_compressible = mhp_backrefs
            .iter()
            .chain(so_backrefs.iter())
            .filter_map(|backref| backref.ok())
            .is_sorted() &&
        {
            // calculate the state of the stack at this point
            let start_at = bundled_subtree_hashes.len() - 2 - mhp_backrefs
                .iter()
                .chain(so_backrefs.iter())
                .filter_map(|backref| backref.ok())
                .rev()
                .next()
                .unwrap();
            let mut stack: Vec<(usize, bool)> = Vec::new();
            assert_eq!(bundled_subtree_hashes.len(), bundled_subtree_offsets.len());
            for (other_node_backref, (_other_canonical, other_compressed)) in
                bundled_subtree_offsets[start_at..bundled_subtree_hashes.len() - 1].iter().enumerate()
            {
                let other_node_backref = (bundled_subtree_hashes.len() - 2) - (other_node_backref + start_at);
                let mut other_node_i = other_compressed.start;
                let (other_node_header, other_node_i_consumed) = NodeHeader::parse(&outbuf[other_node_i..]);
                other_node_i += other_node_i_consumed;
                assert!(!other_node_header.is_dictionary_compressed, "dictionary compression should not happen yet");
                if other_node_header.is_stack_compressed {
                    let mut stack_slot = stack.len();
                    'hit_end: for branchnum in 0..other_node_header.mhp_len + other_node_header.so_len {
                        if stack_slot == 0 {
                            // we don't care about anything beyond the visible sliding window
                            // at the decode end; the only reason we even execute stack ops here
                            // is to set the "used" flag
                            break 'hit_end;
                        }
                        if other_node_header.is_all_compressed || (other_node_header.compression_bitmap & (1 << branchnum)) != 0 {
                            stack_slot -= 1;
                            // skip consumed stack slots
                            while stack[stack_slot].1 {
                                if stack_slot == 0 {
                                    break 'hit_end;
                                }
                                stack_slot -= 1;
                            }
                            stack[stack_slot].1 = true;
                        } else {
                            other_node_i += 6;
                        }
                    }
                } else if other_node_header.is_compressed {
                    for branchnum in 0..other_node_header.mhp_len + other_node_header.so_len {
                        if other_node_header.is_all_compressed || (other_node_header.compression_bitmap & (1 << branchnum)) != 0 {
                            let backref = outbuf[other_node_i] as usize;
                            if stack.len() != 0 && (stack.len() - 1) >= backref {
                                let backref_slot = stack.len() - 1 - backref;
                                stack[backref_slot].1 = true;
                            }
                            other_node_i += 1;
                        } else {
                            other_node_i += 6;
                        }
                    }
                }
                // to manage the stack, we need to execute the same operations that the decoder will
                // by keeping track of whether a particular node is consumed or not
                stack.push((other_node_backref, false));
            }
            // check the offsets again
            let mut is_stack_compressible = true;
            for backref in mhp_backrefs.iter().chain(so_backrefs.iter()) {
                if let Ok(backref) = backref {
                    if is_stack_compressible {
                        while let Some(stack_slot) = stack.last() {
                            if !stack_slot.1 {
                                break;
                            }
                            stack.pop();
                        }
                        is_stack_compressible = stack.last() == Some(&(*backref, false));
                        stack.pop();
                    }
                }
            }
            is_stack_compressible
        };
        let new_data_start = outbuf.len();
        outbuf.push(
            if all_children_are_compressed { 0xF0 } else { 0x00 } |
            if is_long_compressed { 0x04 } else { 0x00 } |
            if is_stack_compressible { 0x02 } else { 0x00 } |
            if is_pure_suffixes_only_node { 0x01 } else { 0x00 } |
            if is_pure_suffixes_only_node && data_len == 0 && is_no_leaves { 0x08 } else { 0x00 }
        );
        if is_long_compressed {
            outbuf.push(0);
        }
        if !is_pure_suffixes_only_node || data_len != 0 || !is_no_leaves {
            let no_leaves_flag = if is_no_leaves { 0x80 } else { 0 };
            outbuf.push(u8::try_from(data_len).expect("compressed node can only have data < 256") | no_leaves_flag);
            if !is_pure_suffixes_only_node {
                outbuf.extend_from_within(canonical.start + data_start..canonical.start + data_start + usize::from(data_len));
            }
        }
        assert!(children_len <= 256);
        if is_pure_suffixes_only_node {
            outbuf.push(alphabitmap | u8::try_from(so_len).unwrap());
        } else if mhp_len == 256 || so_len == 256 {
            // special path for the unlikely case that a node has every child possible
            // if this happens every time, it means we can't compress
            //
            // since suffix_only keys and might_have_prefix keys are disjoint,
            // we know that it's impossible for both of them to have 255 entries
            outbuf.push(0xff);
            outbuf.push(0xff);
        } else {
            outbuf.push(alphabitmap | u8::try_from(mhp_len).unwrap());
            outbuf.push(alphabitmap | u8::try_from(so_len).unwrap());
        }
        let mut tag_idx = 0;
        let mut tag_flag: i32 = 0;
        for backref in mhp_backrefs {
            match backref {
                Ok(backref) => {
                    if !is_stack_compressible {
                        let backref = u8::try_from(backref).expect("search loop limited to 256 steps");
                        outbuf.push(backref);
                    }
                    if !all_children_are_compressed {
                        tag_flag |= 1 << tag_idx;
                    }
                }
                Err(nodeid) => {
                    write_48bit_to_bytes_be(nodeid, outbuf);
                }
            }
            tag_idx += 1;
        }
        for backref in so_backrefs {
            match backref {
                Ok(backref) => {
                    if !is_stack_compressible {
                        let backref = u8::try_from(backref).expect("search loop limited to 256 steps");
                        outbuf.push(backref);
                    }
                    if !all_children_are_compressed {
                        tag_flag |= 1 << tag_idx;
                    }
                }
                Err(nodeid) => {
                    write_48bit_to_bytes_be(nodeid, outbuf);
                }
            }
            tag_idx += 1;
        }
        outbuf[new_data_start] |= u8::try_from(tag_flag & 0x0F).unwrap() << 4;
        if is_long_compressed {
            outbuf[new_data_start + 1] |= u8::try_from((tag_flag >> 4) & 0xFF).unwrap();
        }
        // copy child keys and leaves
        outbuf.extend_from_within(canonical_i..canonical.end);
        if outbuf[new_data_start..] == outbuf[existing_compressed.clone()] {
            outbuf.truncate(new_data_start);
        } else {
            bundled_subtree_offsets.last_mut().unwrap().1 = new_data_start..outbuf.len();
        }
    }
    /// Generate the code for a single node. The child IDs must be computed in order to call this file.
    fn generate_node(
        might_have_prefix_child_nodeids: impl Iterator<Item = u64> + ExactSizeIterator,
        might_have_prefix_child_keys: impl Iterator<Item = u8> + ExactSizeIterator + Clone,
        suffix_only_child_nodeids: impl Iterator<Item = u64> + ExactSizeIterator,
        suffix_only_child_keys: impl Iterator<Item = u8> + ExactSizeIterator + Clone,
        data: impl Iterator<Item = u8> + ExactSizeIterator,
        whole: &[u32],
        suffix: &[u32],
        outbuf: &mut Vec<u8>,
        hasher: &mut dyn FnMut(&[u8]) -> u64,
    ) -> (u64, Range<usize>, Range<usize>) {
        let mhp_len = might_have_prefix_child_keys.len();
        let so_len = suffix_only_child_keys.len();
        let pure_suffixes_only_node = so_len < 0x80 && mhp_len == 0 && whole.is_empty();
        let new_data_start = outbuf.len();
        if whole.is_empty() && suffix.is_empty() && pure_suffixes_only_node && data.len() == 0 {
            outbuf.push(0x09); // empty data = 0x08 | pure_suffixes_only_node = 0x01
        } else {
            let no_leaves_flag = if whole.is_empty() && suffix.is_empty() { 0x80 } else { 0x00 };
            outbuf.push(if pure_suffixes_only_node { 1 } else { 0 });
            assert!(data.len() <= 0x3F, "data too long");
            outbuf.push(u8::try_from(data.len()).expect("data len must be less than 64") | no_leaves_flag);
            if !pure_suffixes_only_node {
                outbuf.extend(data);
            }
        }
        assert!(mhp_len + so_len <= 256);
        let alphabitmap = if mhp_len == 256 || so_len == 256 {
            // special path for the unlikely case that a node has every child possible
            // if this happens every time, it means we can't compress
            //
            // since suffix_only keys and might_have_prefix keys are disjoint,
            // we know that it's impossible for both of them to have 255 entries
            assert!(!pure_suffixes_only_node);
            outbuf.push(0xff);
            outbuf.push(0xff);
            None
        } else {
            let len = might_have_prefix_child_keys.len() + suffix_only_child_keys.len();
            let short_alphabitmap = 
                // truth table for lengths
                // mhp | so | result
                // ----|----|-------
                //   1 |  0 | false
                //   3 |  0 | false
                //   4 |  0 | true (requires only 3 bytes to encode as bitmap, but 4 to encode as list)
                //   4 |  1 | false (requires 6 bytes to encode as bitmap, but 5 to encode as list)
                //   4 |  2 | false (requires 6 bytes to encode as bitmap or list)
                //   4 |  3 | true (requires 6 bytes to encode as bitmap, but 7 to encode as list)
                len <= 24 && len > 3 &&
                (might_have_prefix_child_keys.len() >= 3 || might_have_prefix_child_keys.len() == 0 || len > 6) &&
                (suffix_only_child_keys.len() >= 3 || suffix_only_child_keys.len() == 0 || len > 6) &&
                might_have_prefix_child_keys
                    .clone().chain(suffix_only_child_keys.clone())
                    .all(|c| SHORT_ALPHABITMAP_CHARS.binary_search(&c).is_ok());
            let long_alphabitmap = 
                len <= 32 && len > 4 &&
                (might_have_prefix_child_keys.len() >= 4 || might_have_prefix_child_keys.len() == 0 || len > 8) &&
                (suffix_only_child_keys.len() >= 4 || suffix_only_child_keys.len() == 0 || len > 8) &&
                might_have_prefix_child_keys
                    .clone().chain(suffix_only_child_keys.clone())
                    .all(|c| LONG_ALPHABITMAP_CHARS.binary_search(&c).is_ok());
            let bmflag = if short_alphabitmap { 0x80 } else if long_alphabitmap { 0xc0 } else { 0x00 };
            if !pure_suffixes_only_node {
                outbuf.push(bmflag | u8::try_from(might_have_prefix_child_keys.len()).unwrap());
            }
            outbuf.push(bmflag | u8::try_from(suffix_only_child_keys.len()).unwrap());
            if short_alphabitmap { Some(AlphaBitmap::Short) } else if long_alphabitmap { Some(AlphaBitmap::Long) } else { None }
        };
        for nodeid in might_have_prefix_child_nodeids {
            crate::internals::encode::write_48bit_to_bytes_be(nodeid, outbuf);
        }
        for nodeid in suffix_only_child_nodeids {
            crate::internals::encode::write_48bit_to_bytes_be(nodeid, outbuf);
        }
        match alphabitmap {
            Some(AlphaBitmap::Short) => {
                if might_have_prefix_child_keys.len() > 0 {
                    let mut bitmap = [0u8, 0, 0];
                    for c in might_have_prefix_child_keys {
                        let i = SHORT_ALPHABITMAP_CHARS.binary_search(&c).unwrap();
                        bitmap[i >> 3] |= 1 << u8::try_from(i & 0x07).unwrap();
                    }
                    outbuf.extend_from_slice(&bitmap);
                }
                if suffix_only_child_keys.len() > 0 {
                    let mut bitmap = [0u8, 0, 0];
                    for c in suffix_only_child_keys {
                        let i = SHORT_ALPHABITMAP_CHARS.binary_search(&c).unwrap();
                        bitmap[i >> 3] |= 1 << u8::try_from(i & 0x07).unwrap();
                    }
                    outbuf.extend_from_slice(&bitmap);
                }
            }
            Some(AlphaBitmap::Long) => {
                if might_have_prefix_child_keys.len() > 0 {
                    let mut bitmap = [0u8, 0, 0, 0];
                    for c in might_have_prefix_child_keys {
                        let i = LONG_ALPHABITMAP_CHARS.binary_search(&c).unwrap();
                        bitmap[i >> 3] |= 1 << u8::try_from(i & 0x07).unwrap();
                    }
                    outbuf.extend_from_slice(&bitmap);
                }
                if suffix_only_child_keys.len() > 0 {
                    let mut bitmap = [0u8, 0, 0, 0];
                    for c in suffix_only_child_keys {
                        let i = LONG_ALPHABITMAP_CHARS.binary_search(&c).unwrap();
                        bitmap[i >> 3] |= 1 << u8::try_from(i & 0x07).unwrap();
                    }
                    outbuf.extend_from_slice(&bitmap);
                }
            }
            None => {
                outbuf.extend(might_have_prefix_child_keys);
                outbuf.extend(suffix_only_child_keys);
            }
        }
        if whole.is_empty() && suffix.is_empty() {
            // bitflag written above
        } else {
            // three possible special-case renderings
            // - 0xff = both whole and suffix are empty
            // - 0x3a / 0x3b ... = magic number for roaring bitmap
            // - 0xnn where nn < 3a = tiny set stored inline
            // - 0xfn ... = tiny set with 16 bit offsets instead of 32 bit numbers
            fn serialize_special(domain: &[u32], outbuf: &mut Vec<u8>) -> bool {
                let stlen = outbuf.len();
                crate::internals::encode::write_bitmap_to_bytes(domain, &mut *outbuf).unwrap();
                let use_special =
                    outbuf.len() - stlen > domain.len() * 4 + 1 && domain.len() < 0x3a;
                if use_special {
                    outbuf.truncate(stlen);
                    if domain.len() <= 0x0f &&
                        domain.len() > 1 &&
                        domain[1..].iter().enumerate().all(|(i, &n)| n - domain[i] <= 0xffff)
                    {
                        outbuf.push(0xf0 | (domain.len() as u8));
                        outbuf.extend_from_slice(&u32::to_le_bytes(domain[0]));
                        for (i, &n) in domain[1..].iter().enumerate() {
                            let item = u16::try_from(n - domain[i]).unwrap();
                            outbuf.extend_from_slice(&u16::to_le_bytes(item));
                        }
                    } else {
                        outbuf.push(domain.len() as u8);
                        for item in domain {
                            outbuf.extend_from_slice(&u32::to_le_bytes(*item));
                        }
                    }
                }
                use_special
            }
            if !pure_suffixes_only_node {
                serialize_special(whole, outbuf);
            }
            serialize_special(suffix, outbuf);
        }
        let nodeid = hasher(&outbuf[new_data_start..outbuf.len()]) & 0x0000_7FFF_FFFF_FFFF;
        (
            nodeid,
            new_data_start..outbuf.len(),
            new_data_start..outbuf.len(),
        )
    }
    let mut bundled_subtree_hashes: Vec<_> = Vec::with_capacity(tree.nodes.len() * 2 / 3);
    let mut bundled_subtree_offsets: Vec<(_, _)> = Vec::with_capacity(tree.nodes.len() * 2 / 3);
    let mut outbuf = Vec::new();
    let mut file_existence_cache = HashSet::new();
    generate_node_with_bundled_children(
        tree,
        0,
        0,
        false,
        file_reader,
        file_writer,
        &mut file_existence_cache,
        &mut bundled_subtree_hashes,
        &mut bundled_subtree_offsets,
        &mut outbuf,
        hasher,
    )?;
    Ok(subtree_to_bytes(
        &mut outbuf,
        bundled_subtree_hashes.into_iter().zip(bundled_subtree_offsets.into_iter()),
    ).to_vec())
}

/// Write a data column to disk.
///
/// A column is keyed by a contiguous ID number, and has a set of bytes as its value.
/// Empties are filtered out, and you can check if something's empty by
pub fn write_data_to_disk(
    data: &mut dyn Iterator<Item = impl AsRef<[u8]> + Clone>,
    dir_path: impl AsRef<Path>,
) -> Result<Vec<u8>, std::io::Error> {
    let dir_path = dir_path.as_ref();
    std::fs::create_dir_all(dir_path)?;
    write_data(data, &mut |filename: &str, contents: &[u8]| {
        let mut full_path = dir_path.to_path_buf();
        full_path.push(format!("{filename}.js"));
        std::fs::write(full_path, contents)
    })
}

pub fn write_data<D: AsRef<[u8]> + Clone>(
    data: &mut dyn Iterator<Item = D>,
    file_writer: &mut dyn FnMut(&str, &[u8]) -> Result<(), std::io::Error>,
) -> Result<Vec<u8>, std::io::Error> {
    let mut empties: Vec<u32> = Vec::new();
    let mut partitions: Vec<(u64, u32)> = Vec::new();
    let mut current_partition: Vec<u8> = Vec::new();
    let mut current_partition_count = 0;
    let mut previous_data: VecDeque<D> = VecDeque::new();
    let mut filename = String::new();
    fn flush<D: AsRef<[u8]>>(
        current_partition: &mut Vec<u8>,
        current_partition_count: &mut u32,
        previous_data: &mut VecDeque<D>,
        partitions: &mut Vec<(u64, u32)>,
        filename: &mut String,
        file_writer: &mut dyn FnMut(&str, &[u8]) -> Result<(), std::io::Error>,
    ) -> Result<(), std::io::Error> {
        let hash = siphash_of_bytes(&current_partition, 0, 0) & 0x00_00_FF_FF_FF_FF_FF_FF;
        filename.clear();
        filename.reserve(15);
        write_hex_to_string((hash >> 40) as u8, filename);
        write_hex_to_string((hash >> 32) as u8, filename);
        write_hex_to_string((hash >> 24) as u8, filename);
        write_hex_to_string((hash >> 16) as u8, filename);
        write_hex_to_string((hash >> 8) as u8, filename);
        write_hex_to_string(hash as u8, filename);
        //use std::fmt::Write;
        //write!(filename, ".{}", partitions.len()).unwrap();
        let mut output = Vec::with_capacity(current_partition.len() + 6);
        output.extend_from_slice(br#"rd_(""#);
        output.extend(current_partition.escape_ascii());
        output.extend_from_slice(br#"")"#);
        if (output.len() - 7) > (current_partition.len() * 3) / 2 {
            output.clear();
            output.extend_from_slice(br#"rb_(""#);
            write_base64_to_bytes(current_partition, &mut output);
            output.extend_from_slice(br#"")"#);
        }
        file_writer(&filename, &output)?;
        partitions.push((hash, *current_partition_count));
        *current_partition_count = 0;
        current_partition.clear();
        previous_data.clear();
        Ok(())
    }
    for (id, data_) in data.enumerate() {
        let data = data_.as_ref();
        if data.is_empty() {
            empties.push(id.try_into().unwrap());
        } else {
            if current_partition.len() + data.len() > FILE_SIZE_LIMIT {
                flush(
                    &mut current_partition,
                    &mut current_partition_count,
                    &mut previous_data,
                    &mut partitions,
                    &mut filename,
                    file_writer,
                )?;
            }
            if let Some(i) = previous_data
                .iter()
                .cloned()
                .position(|prev| prev.as_ref() == data)
            {
                current_partition.push(b'0' + (i as u8));
            } else {
                crate::internals::encode::write_vlqhex_to_bytes(
                    u32::try_from(data.len()).unwrap(),
                    &mut current_partition,
                );
                current_partition.extend_from_slice(data);
                previous_data.push_front(data_);
                if previous_data.len() > 16 {
                    previous_data.pop_back();
                }
            }
            current_partition_count += 1;
        }
    }
    flush(
        &mut current_partition,
        &mut current_partition_count,
        &mut previous_data,
        &mut partitions,
        &mut filename,
        file_writer,
    )?;
    let mut result = Vec::new();
    result.extend_from_slice(br#""N":""#);
    for &(_, n) in &partitions {
        crate::internals::encode::write_vlqhex_to_bytes(n, &mut result);
    }
    result.extend_from_slice(br#"","E":""#);
    let mut empties_bytes = Vec::new();
    crate::internals::encode::write_bitmap_to_bytes(&empties, &mut empties_bytes).unwrap();
    crate::internals::encode::write_base64_to_bytes(&empties_bytes, &mut result);
    result.extend_from_slice(br#"","H":""#);
    let mut hash_buf = Vec::new();
    for &(h, _) in &partitions {
        hash_buf.clear();
        crate::internals::encode::write_48bit_to_bytes_be(h, &mut hash_buf);
        crate::internals::encode::write_base64_to_bytes(&hash_buf, &mut result);
    }
    result.push(b'"');
    Ok(result)
}

#[derive(Debug)]
pub enum ReadDataError<E> {
    Io(std::io::Error),
    InvalidRoot,
    InvalidPartition { hash: u64, pos: usize },
    ConsumerError(E),
}

impl<E> std::fmt::Display for ReadDataError<E>
where
    E: std::fmt::Display,
{
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ReadDataError::Io(io) => <std::io::Error as std::fmt::Display>::fmt(io, f),
            ReadDataError::InvalidRoot => write!(f, "invalid root file"),
            ReadDataError::InvalidPartition { hash, pos } => {
                let mut filename = String::new();
                write_hex_to_string((*hash >> 40) as u8, &mut filename);
                write_hex_to_string((*hash >> 32) as u8, &mut filename);
                write_hex_to_string((*hash >> 24) as u8, &mut filename);
                write_hex_to_string((*hash >> 16) as u8, &mut filename);
                write_hex_to_string((*hash >> 8) as u8, &mut filename);
                write_hex_to_string(*hash as u8, &mut filename);
                write!(f, "invalid partition file: {filename} : {pos}")
            }
            ReadDataError::ConsumerError(e) => {
                write!(f, "data consumer error: {e}")
            }
        }
    }
}

pub fn scan_column_in_root<'root>(
    root: &'root [u8],
    column_name: &'root [u8],
) -> Option<&'root [u8]> {
    if column_name.is_empty()
        || column_name
            .iter()
            .any(|&c| !matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_'))
    {
        return None;
    }
    let mut root = root
        .strip_prefix(br#"rr_('{"#)
        .and_then(|root| root.strip_suffix(br#"}')"#))?;
    loop {
        root = root.strip_prefix(br#"""#)?;
        if let Some(potentially_needle) = root.strip_prefix(column_name) {
            if let Some(needle) = potentially_needle.strip_prefix(br#"":{"#) {
                return Some(&needle[..needle.iter().position(|&c| c == b'}')?]);
            }
        }
        root = &root[root.iter().position(|&c| c == b'"')? + 1..];
        root = root.strip_prefix(br#":{"#)?;
        root = &root[root.iter().position(|&c| c == b'}')? + 1..];
        root = root.strip_prefix(br#","#)?;
    }
}

pub fn scan_column_parts<'root>(
    mut column_root: &'root [u8],
) -> Option<(Vec<u32>, crate::internals::decode::RoaringBitmap, Vec<u64>)> {
    let mut n = None;
    let mut e = None;
    let mut h = None;
    loop {
        let target = match column_root {
            [b'"', b'N', b'"', b':', remaining @ ..] => {
                column_root = &remaining;
                &mut n
            }
            [b'"', b'E', b'"', b':', remaining @ ..] => {
                column_root = &remaining;
                &mut e
            }
            [b'"', b'H', b'"', b':', remaining @ ..] => {
                column_root = &remaining;
                &mut h
            }
            _ => {
                column_root = column_root.strip_prefix(br#"""#)?;
                column_root = &column_root[column_root.iter().position(|&c| c == b'"')? + 1..];
                column_root = column_root.strip_prefix(br#":""#)?;
                column_root = &column_root[column_root.iter().position(|&c| c == b'"')? + 1..];
                column_root = column_root.strip_prefix(br#","#)?;
                continue;
            }
        };
        column_root = column_root.strip_prefix(br#"""#)?;
        let end = column_root.iter().position(|&c| c == b'"')?;
        let (data, remaining) = column_root.split_at(end);
        *target = Some(data);
        if let (Some(mut n), Some(e), Some(h)) = (n, e, h) {
            let mut numbers = Vec::new();
            while !n.is_empty() {
                let (num, i) = crate::internals::decode::read_vlqhex_from_bytes(n)?;
                n = &n[i..];
                numbers.push(num);
            }
            let mut hash_bucket = Vec::new();
            read_base64_from_bytes(h, &mut hash_bucket).ok()?;
            let mut hashes: Vec<u64> = Vec::with_capacity(hash_bucket.len() / 6);
            for hash in hash_bucket.chunks_exact(6) {
                let hash = (u64::from(hash[0]) << 40)
                    | (u64::from(hash[1]) << 32)
                    | (u64::from(hash[2]) << 24)
                    | (u64::from(hash[3]) << 16)
                    | (u64::from(hash[4]) << 8)
                    | u64::from(hash[5]);
                hashes.push(hash);
            }
            let mut empties = Vec::new();
            read_base64_from_bytes(e, &mut empties).ok()?;
            let (empties, _) = crate::internals::decode::RoaringBitmap::from_bytes(&empties)?;
            return Some((numbers, empties, hashes));
        }
        column_root = remaining.strip_prefix(br#"","#)?;
    }
}

pub fn read_data_from_disk_column<'consumer, E>(
    root_path: impl AsRef<Path>,
    column_name: &[u8],
    column_path: impl AsRef<Path>,
    consumer: &'consumer mut dyn for<'tmp> FnMut(u32, &'tmp [u8]) -> Result<(), E>,
) -> Result<(), ReadDataError<E>> {
    let root = std::fs::read(root_path).map_err(ReadDataError::Io)?;
    let column_path = column_path.as_ref();
    read_data_from_column(
        &root,
        column_name,
        &mut |filename, out| {
            let path = column_path.join(format!("{filename}.js"));
            std::fs::File::open(path)?.read_to_end(out)?;
            Ok(())
        },
        consumer,
    )
}

pub fn read_data_from_column<'root, 'loader, 'consumer, E>(
    root: &'root [u8],
    column_name: &'root [u8],
    loader: &'loader mut dyn for<'tmp> FnMut(
        &'tmp str,
        &'tmp mut Vec<u8>,
    ) -> Result<(), std::io::Error>,
    consumer: &'consumer mut dyn for<'tmp> FnMut(u32, &'tmp [u8]) -> Result<(), E>,
) -> Result<(), ReadDataError<E>> {
    let column_root = scan_column_in_root(root, column_name).ok_or(ReadDataError::InvalidRoot)?;
    let (counts, empties, hashes) =
        scan_column_parts(column_root).ok_or(ReadDataError::InvalidRoot)?;
    let mut entry = 0;
    let mut bucket_raw = Vec::new();
    let mut bucket = Vec::new();
    let mut filename = String::new();
    for (&count, &hash) in counts.iter().zip(hashes.iter()) {
        filename.clear();
        filename.reserve(15);
        write_hex_to_string((hash >> 40) as u8, &mut filename);
        write_hex_to_string((hash >> 32) as u8, &mut filename);
        write_hex_to_string((hash >> 24) as u8, &mut filename);
        write_hex_to_string((hash >> 16) as u8, &mut filename);
        write_hex_to_string((hash >> 8) as u8, &mut filename);
        write_hex_to_string(hash as u8, &mut filename);
        bucket_raw.clear();
        bucket.clear();
        loader(&filename, &mut bucket_raw).map_err(ReadDataError::Io)?;
        let mut pos = 0;
        if let Some(inner) = bucket_raw
            .strip_prefix(br#"rd_(""#)
            .and_then(|bucket_raw| bucket_raw.strip_suffix(br#"")"#))
        {
            crate::internals::decode::unescape_ascii(inner, &mut bucket)
                .ok_or(ReadDataError::InvalidPartition { hash, pos })?;
        } else if let Some(inner) = bucket_raw
            .strip_prefix(br#"rb_(""#)
            .and_then(|bucket_raw| bucket_raw.strip_suffix(br#"")"#))
        {
            crate::internals::decode::read_base64_from_bytes(inner, &mut bucket)
                .map_err(|_| ReadDataError::InvalidPartition { hash, pos })?;
        } else {
            return Err(ReadDataError::InvalidPartition { hash, pos: !0 });
        }
        let mut backrefs = VecDeque::new();
        for _ in 0..count {
            while empties.contains(entry) {
                consumer(entry, b"").map_err(ReadDataError::ConsumerError)?;
                entry += 1;
            }
            let c = bucket[pos];
            if c >= 48 && c <= 63 {
                // 48 = "0", 63 = "?"
                consumer(entry, backrefs[usize::from(c - 48)])
                    .map_err(ReadDataError::ConsumerError)?;
                pos += 1;
            } else {
                let (len, consumed) =
                    crate::internals::decode::read_vlqhex_from_bytes(&bucket[pos..])
                        .ok_or(ReadDataError::InvalidPartition { hash, pos })?;
                let len = usize::try_from(len).unwrap();
                pos += consumed;
                consumer(entry, &bucket[pos..pos + len]).map_err(ReadDataError::ConsumerError)?;
                backrefs.push_front(&bucket[pos..pos + len]);
                if backrefs.len() > 16 {
                    backrefs.pop_back();
                }
                pos += len;
            }
            entry += 1;
        }
    }
    while empties.contains(entry) {
        consumer(entry, b"").map_err(ReadDataError::ConsumerError)?;
        entry += 1;
    }
    Ok(())
}

fn siphash_of_bytes(input: &[u8], k0: u64, k1: u64) -> u64 {
    // hash state
    let mut v: [u64; 4] = [0; 4];
    v[0] = k0 ^ 0x736f6d6570736575;
    v[1] = k1 ^ 0x646f72616e646f6d;
    v[2] = k0 ^ 0x6c7967656e657261;
    v[3] = k1 ^ 0x7465646279746573;
    let input_length = input.len();
    let mut input_i = 0;
    // main hash loop
    let left = input_length & 0x7;
    while input_i < input_length - left {
        let mi = u8_to_u64le(input, input_i, input_i + 8);
        v[3] ^= mi;
        siphash_compress(&mut v);
        v[0] ^= mi;
        input_i += 8;
    }
    let tail = u8_to_u64le(input, input_i, input_i + left);
    let b = (u64::try_from(input_length & 0xff).unwrap() << 56) | tail;
    v[3] ^= b;
    siphash_compress(&mut v);
    v[0] ^= b;
    v[2] ^= 0xff;
    siphash_compress(&mut v);
    siphash_compress(&mut v);
    siphash_compress(&mut v);
    v[0] ^ v[1] ^ v[2] ^ v[3]
}

fn siphash_compress(v: &mut [u64; 4]) {
    v[0] = v[0].wrapping_add(v[1]);
    v[1] = (v[1] << 13) | (v[1] >> 51);
    v[1] ^= v[0];
    v[0] = (v[0] << 32) | (v[0] >> 32);
    v[2] = v[2].wrapping_add(v[3]);
    v[3] = (v[3] << 16) | (v[3] >> 48);
    v[3] ^= v[2];
    v[0] = v[0].wrapping_add(v[3]);
    v[3] = (v[3] << 21) | (v[3] >> 43);
    v[3] ^= v[0];
    v[2] = v[2].wrapping_add(v[1]);
    v[1] = (v[1] << 17) | (v[1] >> 47);
    v[1] ^= v[2];
    v[2] = (v[2] << 32) | (v[2] >> 32);
}

fn u8_to_u64le(array: &[u8], offset: usize, length: usize) -> u64 {
    let n0 = if offset < length {
        array[offset] & 0xff
    } else {
        0
    };
    let n1 = if offset + 1 < length {
        array[offset + 1] & 0xff
    } else {
        0
    };
    let n2 = if offset + 2 < length {
        array[offset + 2] & 0xff
    } else {
        0
    };
    let n3 = if offset + 3 < length {
        array[offset + 3] & 0xff
    } else {
        0
    };
    let n4 = if offset + 4 < length {
        array[offset + 4] & 0xff
    } else {
        0
    };
    let n5 = if offset + 5 < length {
        array[offset + 5] & 0xff
    } else {
        0
    };
    let n6 = if offset + 6 < length {
        array[offset + 6] & 0xff
    } else {
        0
    };
    let n7 = if offset + 7 < length {
        array[offset + 7] & 0xff
    } else {
        0
    };
    u64::from(n0)
        | (u64::from(n1) << 8)
        | (u64::from(n2) << 16)
        | (u64::from(n3) << 24)
        | (u64::from(n4) << 32)
        | (u64::from(n5) << 40)
        | (u64::from(n6) << 48)
        | (u64::from(n7) << 56)
}
