Build status for all platforms: Commercial support:
This directory contains the JavaCPP Presets module for:
- SentencePiece 0.1.99 https://github.com/google/sentencepiece
Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
Java API documentation is available here:
Here is a simple example of SentencePiece ported to Java from this C++ example:
We can use Maven 3 to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the pom.xml
and SentencePieceExample.java
source files below, simply execute on the command line:
$ wget https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs10000.model
$ mvn compile exec:java exec.args="en.wiki.bpe.vs10000.model"
<project>
<modelVersion>4.0.0</modelVersion>
<groupId>org.bytedeco.sentencepiece</groupId>
<artifactId>sentencepiece-example</artifactId>
<version>1.5.10-SNAPSHOT</version>
<properties>
<exec.mainClass>SentencePieceExample</exec.mainClass>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>sentencepiece-platform</artifactId>
<version>0.1.99-1.5.10-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>.</sourceDirectory>
</build>
</project>
import org.bytedeco.javacpp.*;
import org.bytedeco.sentencepiece.*;
/**
* To try encoding you can download an existing model, i.e.
* wget https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs10000.model
* mvn compile exec:java exec.args="en.wiki.bpe.vs10000.model"
*/
public final class SentencePieceExample {
public static void main(String[] args) {
SentencePieceProcessor processor = new SentencePieceProcessor();
Status status = processor.Load(args[0]);
if (!status.ok()) {
throw new RuntimeException(status.ToString());
}
IntVector ids = new IntVector();
processor.Encode("hello world!", ids);
for (int id : ids.get()) {
System.out.print(id + " ");
}
System.out.println();
BytePointer text = new BytePointer("");
processor.Decode(ids, text);
System.out.println(text.getString());
}
}