mirror of
https://github.com/SinTan1729/unscrambler-rust.git
synced 2025-04-19 09:20:02 -05:00
Add compression to dictionary data
This commit is contained in:
parent
ade3107107
commit
cf9e1f7775
8 changed files with 60 additions and 5 deletions
41
Cargo.lock
generated
41
Cargo.lock
generated
|
@ -2,6 +2,47 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.0.73"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.126"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lzma-sys"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e06754c4acf47d49c727d5665ca9fb828851cda315ed3bd51edd148ef78a8772"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unscrambler"
|
name = "unscrambler"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"xz2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xz2"
|
||||||
|
version = "0.1.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
|
||||||
|
dependencies = [
|
||||||
|
"lzma-sys",
|
||||||
|
]
|
||||||
|
|
|
@ -6,3 +6,4 @@ edition = "2021"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
xz2 = "0.1.7"
|
|
@ -6,4 +6,4 @@ I'm learning Rust, so this is just a rewrite of an simple old project in Rust.
|
||||||
|
|
||||||
### Note
|
### Note
|
||||||
|
|
||||||
The main wordlist was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here.
|
The main `src/wordlist` was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here. The processing included pre-sorting the each line in `src/wordlist` to create `src/wordlist_sorted` and then compressing both using `xz`.
|
BIN
src/dict/wordlist.txt.xz
Normal file
BIN
src/dict/wordlist.txt.xz
Normal file
Binary file not shown.
BIN
src/dict/wordlist_sorted.txt.xz
Normal file
BIN
src/dict/wordlist_sorted.txt.xz
Normal file
Binary file not shown.
21
src/main.rs
21
src/main.rs
|
@ -1,9 +1,22 @@
|
||||||
use std::io::{self, Write};
|
use std::io::{self, prelude::*, Write};
|
||||||
|
use xz2::read::XzDecoder;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
// read the dictionary files
|
// load the compressed dictionary files (embedded in compile-time)
|
||||||
let wordlist = include_str!("data/wordlist.txt");
|
let wordlist_cmp: &[u8] = include_bytes!("dict/wordlist.txt.xz");
|
||||||
let wordlist_sorted = include_str!("data/wordlist_sorted.txt");
|
let wordlist_sorted_cmp: &[u8] = include_bytes!("dict/wordlist_sorted.txt.xz");
|
||||||
|
|
||||||
|
// decompress the dictionary files
|
||||||
|
let mut decompressor = XzDecoder::new(wordlist_cmp);
|
||||||
|
let mut decompressor_sorted = XzDecoder::new(wordlist_sorted_cmp);
|
||||||
|
let mut wordlist = String::new();
|
||||||
|
let mut wordlist_sorted = String::new();
|
||||||
|
decompressor.read_to_string(&mut wordlist).unwrap();
|
||||||
|
decompressor_sorted
|
||||||
|
.read_to_string(&mut wordlist_sorted)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// some formatting of the dictionary data
|
||||||
let wordlist = &[" ", &wordlist.replace("\n", " ")[..]].join("")[..];
|
let wordlist = &[" ", &wordlist.replace("\n", " ")[..]].join("")[..];
|
||||||
let wordlist_sorted = &[" ", &wordlist_sorted.replace("\n", " ")[..]].join("")[..];
|
let wordlist_sorted = &[" ", &wordlist_sorted.replace("\n", " ")[..]].join("")[..];
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue