mirror of
https://github.com/SinTan1729/unscrambler-rust.git
synced 2025-04-11 05:46:05 -05:00
Add compression to dictionary data
This commit is contained in:
parent
ade3107107
commit
cf9e1f7775
8 changed files with 60 additions and 5 deletions
41
Cargo.lock
generated
41
Cargo.lock
generated
|
@ -2,6 +2,47 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.126"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
|
||||
|
||||
[[package]]
|
||||
name = "lzma-sys"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e06754c4acf47d49c727d5665ca9fb828851cda315ed3bd51edd148ef78a8772"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
|
||||
[[package]]
|
||||
name = "unscrambler"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"xz2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xz2"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
|
||||
dependencies = [
|
||||
"lzma-sys",
|
||||
]
|
||||
|
|
|
@ -6,3 +6,4 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
xz2 = "0.1.7"
|
|
@ -6,4 +6,4 @@ I'm learning Rust, so this is just a rewrite of an simple old project in Rust.
|
|||
|
||||
### Note
|
||||
|
||||
The main wordlist was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here.
|
||||
The main `src/wordlist` was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here. The processing included pre-sorting the each line in `src/wordlist` to create `src/wordlist_sorted` and then compressing both using `xz`.
|
BIN
src/dict/wordlist.txt.xz
Normal file
BIN
src/dict/wordlist.txt.xz
Normal file
Binary file not shown.
BIN
src/dict/wordlist_sorted.txt.xz
Normal file
BIN
src/dict/wordlist_sorted.txt.xz
Normal file
Binary file not shown.
21
src/main.rs
21
src/main.rs
|
@ -1,9 +1,22 @@
|
|||
use std::io::{self, Write};
|
||||
use std::io::{self, prelude::*, Write};
|
||||
use xz2::read::XzDecoder;
|
||||
|
||||
fn main() {
|
||||
// read the dictionary files
|
||||
let wordlist = include_str!("data/wordlist.txt");
|
||||
let wordlist_sorted = include_str!("data/wordlist_sorted.txt");
|
||||
// load the compressed dictionary files (embedded in compile-time)
|
||||
let wordlist_cmp: &[u8] = include_bytes!("dict/wordlist.txt.xz");
|
||||
let wordlist_sorted_cmp: &[u8] = include_bytes!("dict/wordlist_sorted.txt.xz");
|
||||
|
||||
// decompress the dictionary files
|
||||
let mut decompressor = XzDecoder::new(wordlist_cmp);
|
||||
let mut decompressor_sorted = XzDecoder::new(wordlist_sorted_cmp);
|
||||
let mut wordlist = String::new();
|
||||
let mut wordlist_sorted = String::new();
|
||||
decompressor.read_to_string(&mut wordlist).unwrap();
|
||||
decompressor_sorted
|
||||
.read_to_string(&mut wordlist_sorted)
|
||||
.unwrap();
|
||||
|
||||
// some formatting of the dictionary data
|
||||
let wordlist = &[" ", &wordlist.replace("\n", " ")[..]].join("")[..];
|
||||
let wordlist_sorted = &[" ", &wordlist_sorted.replace("\n", " ")[..]].join("")[..];
|
||||
|
||||
|
|
Loading…
Reference in a new issue