|
| 1 | +[![Crates.io][crates-badge]][crates-url] |
| 2 | +[![License][licence-badge]][licence-url] |
| 3 | +[![Test Status][test-badge]][test-url] |
| 4 | +[![Documentation][doc-badge]][doc-url] |
| 5 | + |
| 6 | +[crates-badge]: https://img.shields.io/crates/v/ext-sort.svg |
| 7 | +[crates-url]: https://crates.io/crates/ext-sort |
| 8 | +[licence-badge]: https://img.shields.io/badge/license-Unlicense-blue.svg |
| 9 | +[licence-url]: https://github.com/dapper91/ext-sort-rs/blob/master/LICENSE |
| 10 | +[test-badge]: https://github.com/dapper91/ext-sort-rs/actions/workflows/test.yml/badge.svg?branch=master |
| 11 | +[test-url]: https://github.com/dapper91/ext-sort-rs/actions/workflows/test.yml |
| 12 | +[doc-badge]: https://docs.rs/ext-sort/badge.svg |
| 13 | +[doc-url]: https://docs.rs/ext-sort |
| 14 | + |
| 15 | + |
1 | 16 | # Rust external sort
|
2 | 17 |
|
3 | 18 | `ext-sort` is a rust external sort algorithm implementation.
|
4 | 19 |
|
5 |
| -External sort algorithm implementation. External sorting is a class of sorting algorithms |
6 |
| -that can handle massive amounts of data. External sorting is required when the data being |
7 |
| -sorted do not fit into the main memory (RAM) of a computer and instead must be resided in |
8 |
| -slower external memory, usually a hard disk drive. Sorting is achieved in two passes. |
9 |
| -During the first pass it sorts chunks of data that each fit in RAM, during the second pass |
10 |
| -it merges the sorted chunks together. |
11 |
| -For more information see https://en.wikipedia.org/wiki/External_sorting. |
| 20 | +External sorting is a class of sorting algorithms that can handle massive amounts of data. External sorting |
| 21 | +is required when the data being sorted do not fit into the main memory (RAM) of a computer and instead must be |
| 22 | +resided in slower external memory, usually a hard disk drive. Sorting is achieved in two passes. During the |
| 23 | +first pass it sorts chunks of data that each fit in RAM, during the second pass it merges the sorted chunks together. |
| 24 | +For more information see [External Sorting](https://en.wikipedia.org/wiki/External_sorting). |
| 25 | + |
| 26 | +## Overview |
12 | 27 |
|
13 |
| -## Features |
| 28 | +`ext-sort` supports the following features: |
14 | 29 |
|
15 | 30 | * **Data agnostic:**
|
16 |
| - `ext-sort` support all data types that that implement `serde` serialization/deserialization. |
| 31 | + it supports all data types that implement `serde` serialization/deserialization by default, |
| 32 | + otherwise you can implement your own serialization/deserialization mechanism. |
17 | 33 | * **Serialization format agnostic:**
|
18 |
| - `ext-sort` use `MessagePack` serialization format by default, but it can be easily substituted by your custom one |
19 |
| - if `MessagePack` serialization/deserialization performance is not sufficient for your task. |
| 34 | + the library uses `MessagePack` serialization format by default, but it can be easily substituted by your custom one |
| 35 | + if `MessagePack` serialization/deserialization performance is not sufficient for your task. |
20 | 36 | * **Multithreading support:**
|
21 |
| - `ext-sort` support multithreading, which means data is sorted in multiple threads utilizing maximum CPU resources |
| 37 | + multi-threaded sorting is supported, which means data is sorted in multiple threads utilizing maximum CPU resources |
22 | 38 | and reducing sorting time.
|
| 39 | +* **Memory limit support:** |
| 40 | + memory limited sorting is supported. It allows you to limit sorting memory consumption |
| 41 | + (`memory-limit` feature required). |
23 | 42 |
|
24 | 43 | # Basic example
|
25 | 44 |
|
| 45 | +Activate `memory-limit` feature of the ext-sort crate on Cargo.toml: |
| 46 | + |
| 47 | +```toml |
| 48 | +[dependencies] |
| 49 | +ext-sort = { version = "^0.1.1", features = ["memory-limit"] } |
| 50 | +``` |
| 51 | + |
26 | 52 | ``` rust
|
27 |
| - use std::fs; |
28 |
| - use std::io::{self, prelude::*}; |
29 |
| - use std::path; |
30 |
| - |
31 |
| - use bytesize::MB; |
32 |
| - use env_logger; |
33 |
| - use log; |
34 |
| - |
35 |
| - use ext_sort::buffer::mem::MemoryLimitedBufferBuilder; |
36 |
| - use ext_sort::{ExternalSorter, ExternalSorterBuilder}; |
37 |
| - |
38 |
| - fn main() { |
39 |
| - env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init(); |
40 |
| - |
41 |
| - let input_reader = io::BufReader::new(fs::File::open("input.txt").unwrap()); |
42 |
| - let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap()); |
43 |
| - |
44 |
| - let sorter: ExternalSorter<String, io::Error, MemoryLimitedBufferBuilder> = ExternalSorterBuilder::new() |
45 |
| - .with_tmp_dir(path::Path::new("tmp")) |
46 |
| - .with_buffer(MemoryLimitedBufferBuilder::new(50 * MB)) |
47 |
| - .build() |
48 |
| - .unwrap(); |
49 |
| - |
50 |
| - let sorted = sorter.sort(input_reader.lines()).unwrap(); |
51 |
| - |
52 |
| - for item in sorted.map(Result::unwrap) { |
53 |
| - output_writer.write_all(format!("{}\n", item).as_bytes()).unwrap(); |
54 |
| - } |
55 |
| - output_writer.flush().unwrap(); |
| 53 | +use std::fs; |
| 54 | +use std::io::{self, prelude::*}; |
| 55 | +use std::path; |
| 56 | + |
| 57 | +use bytesize::MB; |
| 58 | +use env_logger; |
| 59 | +use log; |
| 60 | + |
| 61 | +use ext_sort::{buffer::mem::MemoryLimitedBufferBuilder, ExternalSorter, ExternalSorterBuilder}; |
| 62 | + |
| 63 | +fn main() { |
| 64 | + env_logger::Builder::new().filter_level(log::LevelFilter::Debug).init(); |
| 65 | + |
| 66 | + let input_reader = io::BufReader::new(fs::File::open("input.txt").unwrap()); |
| 67 | + let mut output_writer = io::BufWriter::new(fs::File::create("output.txt").unwrap()); |
| 68 | + |
| 69 | + let sorter: ExternalSorter<String, io::Error, MemoryLimitedBufferBuilder> = ExternalSorterBuilder::new() |
| 70 | + .with_tmp_dir(path::Path::new("./")) |
| 71 | + .with_buffer(MemoryLimitedBufferBuilder::new(50 * MB)) |
| 72 | + .build() |
| 73 | + .unwrap(); |
| 74 | + |
| 75 | + let sorted = sorter.sort(input_reader.lines()).unwrap(); |
| 76 | + |
| 77 | + for item in sorted.map(Result::unwrap) { |
| 78 | + output_writer.write_all(format!("{}\n", item).as_bytes()).unwrap(); |
56 | 79 | }
|
| 80 | + output_writer.flush().unwrap(); |
| 81 | +} |
57 | 82 | ```
|
0 commit comments