Skip to content

Commit 41960de

Browse files
authored
Merge pull request #3 from dapper91/dev
- documentation fixed. - examples added.
2 parents 8324121 + c5243d4 commit 41960de

File tree

10 files changed

+392
-67
lines changed

10 files changed

+392
-67
lines changed

Cargo.toml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ext-sort"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
edition = "2021"
55
license = "Unlicense"
66
description = "rust external sort algorithm implementation"
@@ -17,7 +17,7 @@ keywords = ["algorithms", "sort", "sorting", "external-sort", "external"]
1717
bytesize = { version = "^1.1", optional = true }
1818
deepsize = { version = "^0.2", optional = true }
1919
env_logger = { version = "^0.9", optional = true}
20-
log = "0.4"
20+
log = "^0.4"
2121
rayon = "^1.5"
2222
rmp-serde = "^0.15"
2323
serde = { version = "^1.0", features = ["derive"] }
@@ -33,3 +33,11 @@ memory-limit = ["deepsize"]
3333
[[example]]
3434
name = "quickstart"
3535
required-features = ["bytesize", "env_logger"]
36+
37+
[[example]]
38+
name = "custom_serializer"
39+
required-features = ["env_logger"]
40+
41+
[[example]]
42+
name = "custom_type"
43+
required-features = ["env_logger"]

README.md

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,82 @@
1+
[![Crates.io][crates-badge]][crates-url]
2+
[![License][licence-badge]][licence-url]
3+
[![Test Status][test-badge]][test-url]
4+
[![Documentation][doc-badge]][doc-url]
5+
6+
[crates-badge]: https://img.shields.io/crates/v/ext-sort.svg
7+
[crates-url]: https://crates.io/crates/ext-sort
8+
[licence-badge]: https://img.shields.io/badge/license-Unlicense-blue.svg
9+
[licence-url]: https://github.com/dapper91/ext-sort-rs/blob/master/LICENSE
10+
[test-badge]: https://github.com/dapper91/ext-sort-rs/actions/workflows/test.yml/badge.svg?branch=master
11+
[test-url]: https://github.com/dapper91/ext-sort-rs/actions/workflows/test.yml
12+
[doc-badge]: https://docs.rs/ext-sort/badge.svg
13+
[doc-url]: https://docs.rs/ext-sort
14+
15+
116
# Rust external sort
217

318
`ext-sort` is a rust external sort algorithm implementation.
419

5-
External sort algorithm implementation. External sorting is a class of sorting algorithms
6-
that can handle massive amounts of data. External sorting is required when the data being
7-
sorted do not fit into the main memory (RAM) of a computer and instead must be resided in
8-
slower external memory, usually a hard disk drive. Sorting is achieved in two passes.
9-
During the first pass it sorts chunks of data that each fit in RAM, during the second pass
10-
it merges the sorted chunks together.
11-
For more information see https://en.wikipedia.org/wiki/External_sorting.
20+
External sorting is a class of sorting algorithms that can handle massive amounts of data. External sorting
21+
is required when the data being sorted do not fit into the main memory (RAM) of a computer and instead must be
22+
resided in slower external memory, usually a hard disk drive. Sorting is achieved in two passes. During the
23+
first pass it sorts chunks of data that each fit in RAM, during the second pass it merges the sorted chunks together.
24+
For more information see [External Sorting](https://en.wikipedia.org/wiki/External_sorting).
25+
26+
## Overview
1227

13-
## Features
28+
`ext-sort` supports the following features:
1429

1530
* **Data agnostic:**
16-
`ext-sort` support all data types that that implement `serde` serialization/deserialization.
31+
it supports all data types that implement `serde` serialization/deserialization by default,
32+
otherwise you can implement your own serialization/deserialization mechanism.
1733
* **Serialization format agnostic:**
18-
`ext-sort` use `MessagePack` serialization format by default, but it can be easily substituted by your custom one
19-
if `MessagePack` serialization/deserialization performance is not sufficient for your task.
34+
the library uses `MessagePack` serialization format by default, but it can be easily substituted by your custom one
35+
if `MessagePack` serialization/deserialization performance is not sufficient for your task.
2036
* **Multithreading support:**
21-
`ext-sort` support multithreading, which means data is sorted in multiple threads utilizing maximum CPU resources
37+
multi-threaded sorting is supported, which means data is sorted in multiple threads utilizing maximum CPU resources
2238
and reducing sorting time.
39+
* **Memory limit support:**
40+
memory limited sorting is supported. It allows you to limit sorting memory consumption
41+
(`memory-limit` feature required).
2342

2443
# Basic example
2544

45+
Activate `memory-limit` feature of the ext-sort crate on Cargo.toml:
46+
47+
```toml
48+
[dependencies]
49+
ext-sort = { version = "^0.1.1", features = ["memory-limit"] }
50+
```
51+
2652
``` rust
27-
use std::fs;
28-
use std::io::{self, prelude::*};
29-
use std::path;
30-
31-
use bytesize::MB;
32-
use env_logger;
33-
use log;
34-
35-
use ext_sort::buffer::mem::MemoryLimitedBufferBuilder;
36-
use ext_sort::{ExternalSorter, ExternalSorterBuilder};
37-
38-
fn main() {
39-
env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init();
40-
41-
let input_reader = io::BufReader::new(fs::File::open("input.txt").unwrap());
42-
let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap());
43-
44-
let sorter: ExternalSorter<String, io::Error, MemoryLimitedBufferBuilder> = ExternalSorterBuilder::new()
45-
.with_tmp_dir(path::Path::new("tmp"))
46-
.with_buffer(MemoryLimitedBufferBuilder::new(50 * MB))
47-
.build()
48-
.unwrap();
49-
50-
let sorted = sorter.sort(input_reader.lines()).unwrap();
51-
52-
for item in sorted.map(Result::unwrap) {
53-
output_writer.write_all(format!("{}\n", item).as_bytes()).unwrap();
54-
}
55-
output_writer.flush().unwrap();
53+
use std::fs;
54+
use std::io::{self, prelude::*};
55+
use std::path;
56+
57+
use bytesize::MB;
58+
use env_logger;
59+
use log;
60+
61+
use ext_sort::{buffer::mem::MemoryLimitedBufferBuilder, ExternalSorter, ExternalSorterBuilder};
62+
63+
fn main() {
64+
env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init();
65+
66+
let input_reader = io::BufReader::new(fs::File::open("input.txt").unwrap());
67+
let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap());
68+
69+
let sorter: ExternalSorter<String, io::Error, MemoryLimitedBufferBuilder> = ExternalSorterBuilder::new()
70+
.with_tmp_dir(path::Path::new("./"))
71+
.with_buffer(MemoryLimitedBufferBuilder::new(50 * MB))
72+
.build()
73+
.unwrap();
74+
75+
let sorted = sorter.sort(input_reader.lines()).unwrap();
76+
77+
for item in sorted.map(Result::unwrap) {
78+
output_writer.write_all(format!("{}\n", item).as_bytes()).unwrap();
5679
}
80+
output_writer.flush().unwrap();
81+
}
5782
```

examples/custom_serializer.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
use std::fs;
2+
use std::fs::File;
3+
use std::io::{self, prelude::*, BufReader, BufWriter, Take};
4+
use std::path;
5+
6+
use env_logger;
7+
use log;
8+
9+
use ext_sort::{ExternalChunk, ExternalSorter, ExternalSorterBuilder, LimitedBufferBuilder};
10+
11+
struct CustomExternalChunk {
12+
reader: io::Take<io::BufReader<fs::File>>,
13+
}
14+
15+
impl ExternalChunk<u32> for CustomExternalChunk {
16+
type SerializationError = io::Error;
17+
type DeserializationError = io::Error;
18+
19+
fn new(reader: Take<BufReader<File>>) -> Self {
20+
CustomExternalChunk { reader }
21+
}
22+
23+
fn dump(
24+
chunk_writer: &mut BufWriter<File>,
25+
items: impl IntoIterator<Item = u32>,
26+
) -> Result<(), Self::SerializationError> {
27+
for item in items {
28+
chunk_writer.write_all(&item.to_le_bytes())?;
29+
}
30+
31+
return Ok(());
32+
}
33+
}
34+
35+
impl Iterator for CustomExternalChunk {
36+
type Item = Result<u32, io::Error>;
37+
38+
fn next(&mut self) -> Option<Self::Item> {
39+
if self.reader.limit() == 0 {
40+
None
41+
} else {
42+
let mut buf: [u8; 4] = [0; 4];
43+
match self.reader.read_exact(&mut buf.as_mut_slice()) {
44+
Ok(_) => Some(Ok(u32::from_le_bytes(buf))),
45+
Err(err) => Some(Err(err)),
46+
}
47+
}
48+
}
49+
}
50+
51+
fn main() {
52+
env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init();
53+
54+
let input_reader = io::BufReader::new(fs::File::open("input.txt").unwrap());
55+
let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap());
56+
57+
let sorter: ExternalSorter<u32, io::Error, LimitedBufferBuilder, CustomExternalChunk> =
58+
ExternalSorterBuilder::new()
59+
.with_tmp_dir(path::Path::new("./"))
60+
.with_buffer(LimitedBufferBuilder::new(1_000_000, true))
61+
.build()
62+
.unwrap();
63+
64+
let sorted = sorter
65+
.sort(input_reader.lines().map(|line| {
66+
let line = line.unwrap();
67+
let number = line.parse().unwrap();
68+
69+
return Ok(number);
70+
}))
71+
.unwrap();
72+
73+
for item in sorted.map(Result::unwrap) {
74+
output_writer.write_all(format!("{}\n", item).as_bytes()).unwrap();
75+
}
76+
output_writer.flush().unwrap();
77+
}

examples/custom_type.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
use std::cmp::Ordering;
2+
use std::error::Error;
3+
use std::fmt::{Display, Formatter};
4+
use std::fs;
5+
use std::io::{self, prelude::*};
6+
use std::path;
7+
8+
use env_logger;
9+
use log;
10+
use serde;
11+
12+
use ext_sort::{ExternalSorter, ExternalSorterBuilder, LimitedBufferBuilder};
13+
14+
#[derive(Debug)]
15+
enum CsvParseError {
16+
RowError(String),
17+
ColumnError(String),
18+
}
19+
20+
impl Display for CsvParseError {
21+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
22+
match self {
23+
CsvParseError::ColumnError(err) => write!(f, "column format error: {}", err),
24+
CsvParseError::RowError(err) => write!(f, "row format error: {}", err),
25+
}
26+
}
27+
}
28+
29+
impl Error for CsvParseError {}
30+
31+
#[derive(PartialEq, Eq, serde::Serialize, serde::Deserialize)]
32+
struct Person {
33+
name: String,
34+
surname: String,
35+
age: u8,
36+
}
37+
38+
impl Person {
39+
fn as_csv(&self) -> String {
40+
format!("{},{},{}", self.name, self.surname, self.age)
41+
}
42+
43+
fn from_str(s: &str) -> Result<Self, CsvParseError> {
44+
let parts: Vec<&str> = s.split(',').collect();
45+
if parts.len() != 3 {
46+
Err(CsvParseError::RowError("wrong columns number".to_string()))
47+
} else {
48+
Ok(Person {
49+
name: parts[0].to_string(),
50+
surname: parts[1].to_string(),
51+
age: parts[2]
52+
.parse()
53+
.map_err(|err| CsvParseError::ColumnError(format!("age field format error: {}", err)))?,
54+
})
55+
}
56+
}
57+
}
58+
59+
impl PartialOrd for Person {
60+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
61+
Some(self.cmp(&other))
62+
}
63+
}
64+
65+
impl Ord for Person {
66+
fn cmp(&self, other: &Self) -> Ordering {
67+
self.surname
68+
.cmp(&other.surname)
69+
.then(self.name.cmp(&other.name))
70+
.then(self.age.cmp(&other.age))
71+
}
72+
}
73+
74+
fn main() {
75+
env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init();
76+
77+
let input_reader = io::BufReader::new(fs::File::open("input.csv").unwrap());
78+
let mut output_writer = io::BufWriter::new(fs::File::create("output.csv").unwrap());
79+
80+
let sorter: ExternalSorter<Person, io::Error, LimitedBufferBuilder> = ExternalSorterBuilder::new()
81+
.with_tmp_dir(path::Path::new("./"))
82+
.with_buffer(LimitedBufferBuilder::new(1_000_000, true))
83+
.build()
84+
.unwrap();
85+
86+
let sorted = sorter
87+
.sort(
88+
input_reader
89+
.lines()
90+
.map(|line| line.map(|line| Person::from_str(&line).unwrap())),
91+
)
92+
.unwrap();
93+
94+
for item in sorted.map(Result::unwrap) {
95+
output_writer
96+
.write_all(format!("{}\n", item.as_csv()).as_bytes())
97+
.unwrap();
98+
}
99+
output_writer.flush().unwrap();
100+
}

examples/quickstart.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ use bytesize::MB;
66
use env_logger;
77
use log;
88

9-
use ext_sort::buffer::mem::MemoryLimitedBufferBuilder;
10-
use ext_sort::{ExternalSorter, ExternalSorterBuilder};
9+
use ext_sort::{buffer::mem::MemoryLimitedBufferBuilder, ExternalSorter, ExternalSorterBuilder};
1110

1211
fn main() {
1312
env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init();
@@ -16,7 +15,7 @@ fn main() {
1615
let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap());
1716

1817
let sorter: ExternalSorter<String, io::Error, MemoryLimitedBufferBuilder> = ExternalSorterBuilder::new()
19-
.with_tmp_dir(path::Path::new("tmp"))
18+
.with_tmp_dir(path::Path::new("./"))
2019
.with_buffer(MemoryLimitedBufferBuilder::new(50 * MB))
2120
.build()
2221
.unwrap();

0 commit comments

Comments
 (0)