From badd195b90ac61823d464f1fe5a032c7d84e8c4d Mon Sep 17 00:00:00 2001 From: RobinLiew Date: Sun, 3 Dec 2017 15:34:56 +0800 Subject: [PATCH 1/8] robinliew first commit for RS erasure correction --- .../IRSErasureCorrection.java | 33 ++++ .../RSErasureCorrectionImpl.java | 155 ++++++++++++++++++ .../robinliew/dealbytesinterface/test.java | 43 +++++ 3 files changed, 231 insertions(+) create mode 100644 src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/IRSErasureCorrection.java create mode 100644 src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/RSErasureCorrectionImpl.java create mode 100644 src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/test.java diff --git a/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/IRSErasureCorrection.java b/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/IRSErasureCorrection.java new file mode 100644 index 0000000..123ab5e --- /dev/null +++ b/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/IRSErasureCorrection.java @@ -0,0 +1,33 @@ +package com.backblaze.erasure.robinliew.dealbytesinterface; +/** + * Codec interface of RS erasure checking algorithm(RS纠删校验算法编解码器接口) + * @author RobinLiew RobinLiew 2017.9.21 + * + */ +public interface IRSErasureCorrection { + /** + * 编码 + * @param srcBuffer Original data that needs to be erasure(需要进行纠删编码的原始数据) + * @param sliceLength The length of the file in the file block (the length of the file is consistent)(文件块中文件片长度(文件片的长度保持一致)) + * @param sliceCount The number of files in a file block(文件块中文件片的数量) + * @param fecSliceCount The number of pieces of erasure check in a file block(文件块内纠删校验的片的数量) + * @return The return value is the check data(返回值是校验数据) + */ + public byte[] rs_Encoder(byte[] srcBuffer,int sliceLength,int sliceCount,int fecSliceCount); + /** + * 解码 + * @param srcEraseBuff Received file blocks (including raw data and erasure check data)(接收到的文件块(包括原始数据和纠删校验数据)) + * @param sliceLen The length of the file in a file block(文件块中文件片的长度), + * @param sliceCount The number of files in a file block(文件块中文件片的数量) + * @param rsSliceCount The number of RS erasure check pieces in a file block(文件块内rs纠删校验片的数量) + * @param eraserFlag Erase the image, the array length is sliceCount+rsSliceCount, the true element represents the file pieces without being erased, + * false indicates that the file was wipe out(擦除样图,数组长度为sliceCount+rsSliceCount,其中元素true表示文件片未被擦除,false表示文件片被擦除) + * @return If the return value is 0 on behalf of success, + * that piece of data by right or wipe out the number of pieces in the allowable range of file transmission in the process, + * at the same time to write the original data deleted after srcEraseBuff correction; + * if non zero represents no success, which shows that the number of wipe out more than RS erasure ability + * (返回值如果是0代表成功,说明在传输过程中文件块数据正确或经擦出的片数在容许范围内,同时把纠删后的原始数据写入srcEraseBuff; + * 如果非零代表不成功,说明经擦出的片数超过了RS的纠删能力) + */ + public int rs_Decoder(byte[] srcEraseBuff,int sliceLen,int sliceCount,int rsSliceCount,boolean[] eraserFlag); +} diff --git a/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/RSErasureCorrectionImpl.java b/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/RSErasureCorrectionImpl.java new file mode 100644 index 0000000..ba53ba7 --- /dev/null +++ b/src/main/java/com/backblaze/erasure/robinliew/dealbytesinterface/RSErasureCorrectionImpl.java @@ -0,0 +1,155 @@ +package com.backblaze.erasure.robinliew.dealbytesinterface; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import org.junit.Test; + +import com.backblaze.erasure.ReedSolomon; + +/** + * Implementation of RS algorithm codec interface + * Encoding: incoming byte[] data data containing N slice data, encoding the array of N+M slice data after encoding, and M as the number of erasure check pieces + * Decode: the data byte[] rs_data after the afferent code, and the information of the data sheet, the erasure check, the recorded lost data sheet + * RS算法编解码接口实现 + * 编码:传入包含N片数据的byte[] data数据,编码后生成N+M片数据数组,M为纠删校验片的数量 + * 解码:传入编码后的数据byte[] rs_data,以及数据片、纠删校验片、记录的丢失数据片的信息 + * @author RobinLiew 2017.9.21 + * + */ +public class RSErasureCorrectionImpl implements IRSErasureCorrection{ + + public int DATA_SHARDS = 4;//Default number of data slices(默认的数据片数量) + public int PARITY_SHARDS = 2;//Default number of checkout data(默认的校验片数据数量) + public int TOTAL_SHARDS = 6;//The total number of the default slices(默认的切片的总数量) + + public int BYTES_IN_INT = 4; + + @Override + public byte[] rs_Encoder(byte[] srcBuffer, int sliceLength, int sliceCount, + int fecSliceCount) { + + byte[] rsData=null; + + try{ + //The length of the data of the payload (equivalent to the length of the file)净荷的数据长度(相当于文件的长度) + final int dataSize = (int) srcBuffer.length; + DATA_SHARDS=sliceCount; + PARITY_SHARDS=fecSliceCount; + TOTAL_SHARDS=DATA_SHARDS+PARITY_SHARDS; + + + // Figure out how big each shard will be. The total size stored + final int storedSize = dataSize; //The total size of the incoming data(传入数据的总大小) + final int shardSize = (storedSize) / DATA_SHARDS;//The size of each piece of data(每片数据的大小) + + // Create a buffer holding the srcBuffer size, followed by + final int bufferSize = shardSize * DATA_SHARDS; + final byte [] allBytes = new byte[bufferSize]; + ByteBuffer.wrap(allBytes).putInt(dataSize); + InputStream in = new ByteArrayInputStream(srcBuffer); + int bytesRead = in.read(allBytes, 0, dataSize); + if (bytesRead != dataSize) { + throw new IOException("not enough bytes read"); + } + in.close(); + + // Make the buffers to hold the shards. + byte [] [] shards = new byte [TOTAL_SHARDS] [shardSize]; + + // Fill in the data shards + for (int i = 0; i < DATA_SHARDS; i++) { + System.arraycopy(allBytes, i * shardSize, shards[i], 0, shardSize); + } + + // Use Reed-Solomon to calculate the parity. + ReedSolomon reedSolomon = ReedSolomon.create(DATA_SHARDS, PARITY_SHARDS); + reedSolomon.encodeParity(shards, 0, shardSize); + + List list=new ArrayList<>(); + + rsData=new byte[TOTAL_SHARDS*shardSize]; + int index=0; + for(int i = 0; i < TOTAL_SHARDS; i++){ + for(int j=0;j Date: Sun, 3 Dec 2017 15:45:59 +0800 Subject: [PATCH 2/8] Update README.md --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 358959f..2729fac 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,8 @@ +## My adding interface and implemention +First, I read and learn the code of this excellent project. Then, on the basis of this great work, I added the interface and implementation of byte array data, so that we can implement the byte array erasure algorithm. You can apply it to the erasure processing of network data transmission, the efficiency and ability of the algorithm is great. + +首先,我阅读并学习这个优秀项目的代码,然后,我在这个很棒的工作的基础上添加了处理字节数组数据的接口和实现,以便我们可以实现字节数组纠删算法的处理。你可以将其应用到网络数据传输的纠删处理上,算法的效率和能力很棒。 + # JavaReedSolomon This is a simple and efficient Reed-Solomon implementation in Java, @@ -28,6 +33,7 @@ you may be interested in using the Intel SIMD instructions to speed up the Galois field multiplication. You can read more about that in the paper on [Screaming Fast Galois Field Arithmetic](http://www.kaymgee.com/Kevin_Greenan/Publications_files/plank-fast2013.pdf). + ## Performance Notes The performance of the inner loop depends on the specific processor From cb8e88f7f0864f8086a875d733a94241a022f432 Mon Sep 17 00:00:00 2001 From: RobinLiew <1498975990@qq.com> Date: Sat, 30 Dec 2017 17:42:04 +0800 Subject: [PATCH 3/8] Update README.md --- README.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/README.md b/README.md index 2729fac..e853c27 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,51 @@ First, I read and learn the code of this excellent project. Then, on the basis o 首先,我阅读并学习这个优秀项目的代码,然后,我在这个很棒的工作的基础上添加了处理字节数组数据的接口和实现,以便我们可以实现字节数组纠删算法的处理。你可以将其应用到网络数据传输的纠删处理上,算法的效率和能力很棒。 +## example +package com.backblaze.erasure.robinliew.dealbytesinterface; + +/** + * + * @author RobinLiew 2017.9.21 + * + */ +public class test { + public static void main(String[] args) { + + IRSErasureCorrection rsProcessor=new RSErasureCorrectionImpl(); + + byte[] data=new byte[1000]; + for(int i=0; i Date: Sat, 30 Dec 2017 17:43:08 +0800 Subject: [PATCH 4/8] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e853c27..358de7d 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,12 @@ public class test { byte[] en_data; en_data=rsProcessor.rs_Encoder(data, sliceLength, sliceCount, fecSliceCount); -//==================Test use: second pieces of data are lost, and the decoding code has the corresponding test code(测试使用:让第二片数据丢失,解码代码中也有对应的测试代码)=================== + //==================Test use: second pieces of data are lost, and the decoding code has the corresponding test code(测试使用:让第二片数据丢失,解码代码中也有对应的测试代码)=================== byte[] temp = new byte[250]; System.arraycopy(temp, 0, en_data, 250, 250); -//============================================================================================================================================================================ + //======================================================================================================================================= - boolean[] eraserFlag=new boolean[sliceCount+fecSliceCount]; + boolean[] eraserFlag=new boolean[sliceCount+fecSliceCount]; for(int i=0;i Date: Sat, 30 Dec 2017 17:44:55 +0800 Subject: [PATCH 5/8] Update README.md --- README.md | 70 +++++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 358de7d..4f1a2de 100644 --- a/README.md +++ b/README.md @@ -11,42 +11,42 @@ package com.backblaze.erasure.robinliew.dealbytesinterface; * @author RobinLiew 2017.9.21 * */ -public class test { - public static void main(String[] args) { - - IRSErasureCorrection rsProcessor=new RSErasureCorrectionImpl(); - - byte[] data=new byte[1000]; - for(int i=0; i Date: Sat, 30 Dec 2017 17:45:28 +0800 Subject: [PATCH 6/8] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4f1a2de..7acb233 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ First, I read and learn the code of this excellent project. Then, on the basis o 首先,我阅读并学习这个优秀项目的代码,然后,我在这个很棒的工作的基础上添加了处理字节数组数据的接口和实现,以便我们可以实现字节数组纠删算法的处理。你可以将其应用到网络数据传输的纠删处理上,算法的效率和能力很棒。 ## example -package com.backblaze.erasure.robinliew.dealbytesinterface; + package com.backblaze.erasure.robinliew.dealbytesinterface; -/** - * - * @author RobinLiew 2017.9.21 - * - */ + /** + * + * @author RobinLiew 2017.9.21 + * + */ public class test { public static void main(String[] args) { From 0b31058edff778c3021d004c3e9247dcc3526806 Mon Sep 17 00:00:00 2001 From: RobinLiew <1498975990@qq.com> Date: Sat, 30 Dec 2017 17:46:31 +0800 Subject: [PATCH 7/8] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7acb233..cac1841 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,9 @@ First, I read and learn the code of this excellent project. Then, on the basis o public class test { public static void main(String[] args) { - IRSErasureCorrection rsProcessor=new RSErasureCorrectionImpl(); + IRSErasureCorrection rsProcessor=new RSErasureCorrectionImpl(); - byte[] data=new byte[1000]; + byte[] data=new byte[1000]; for(int i=0; i Date: Fri, 23 Mar 2018 09:48:02 +0800 Subject: [PATCH 8/8] Update README.md --- README.md | 61 ------------------------------------------------------- 1 file changed, 61 deletions(-) diff --git a/README.md b/README.md index cac1841..2825d16 100644 --- a/README.md +++ b/README.md @@ -48,64 +48,3 @@ First, I read and learn the code of this excellent project. Then, on the basis o } -# JavaReedSolomon - -This is a simple and efficient Reed-Solomon implementation in Java, -which was originally built at [Backblaze](https://www.backblaze.com). -There is an overview of how the algorithm works in my [blog -post](https://www.backblaze.com/blog/reed-solomon/). - -The ReedSolomon class does the encoding and decoding, and is supported -by Matrix, which does matrix arithmetic, and Galois, which is a finite -field over 8-bit values. - -For examples of how to use ReedSolomon, take a look at SampleEncoder -and SampleDecoder. They show, in a very simple way, how to break a -file into shards and encode parity, and then how to take a subset of -the shards and reconstruct the original file. - -There is a Gradle build file to make a jar and run the tests. Running -it is simple. Just type: `gradle build` - -We would like to send out a special thanks to James Plank at the -University of Tennessee at Knoxville for his useful papers on erasure -coding. If you'd like an intro into how it all works, take a look at -[this introductory paper](http://web.eecs.utk.edu/~plank/plank/papers/SPE-9-97.html). - -This project is limited to a pure Java implementation. If you need -more speed, and can handle some assembly-language programming, -you may be interested in using the Intel SIMD instructions to speed -up the Galois field multiplication. You can read more about that -in the paper on [Screaming Fast Galois Field Arithmetic](http://www.kaymgee.com/Kevin_Greenan/Publications_files/plank-fast2013.pdf). - - -## Performance Notes - -The performance of the inner loop depends on the specific processor -you're running on. There are twelve different permutations of the -loop in this library, and the ReedSolomonBenchmark class will tell -you which one is faster for your particular application. The number -of parity and data shards in the benchmark, as well as the buffer -sizes, match the usage at Backblaze. You can set the parameters of -the benchmark to match your specific use before choosing a loop -implementation. - -These are the speeds I got running the benchmark on a Backblaze -storage pod: - -``` - ByteInputOutputExpCodingLoop 95.2 MB/s - ByteInputOutputTableCodingLoop 107.0 MB/s - ByteOutputInputExpCodingLoop 130.3 MB/s - ByteOutputInputTableCodingLoop 181.4 MB/s - InputByteOutputExpCodingLoop 94.4 MB/s - InputByteOutputTableCodingLoop 138.3 MB/s - InputOutputByteExpCodingLoop 200.4 MB/s - InputOutputByteTableCodingLoop 525.7 MB/s - OutputByteInputExpCodingLoop 143.7 MB/s - OutputByteInputTableCodingLoop 209.5 MB/s - OutputInputByteExpCodingLoop 217.6 MB/s - OutputInputByteTableCodingLoop 515.7 MB/s -``` - -![Bar Chart of Benchmark Results](notes/benchmark_on_storage_pod.png)