lzo: update from N770FXXU8HVK5

This commit is contained in:
xxmustafacooTR 2022-12-25 20:20:21 +03:00
parent a1bab45510
commit a0affe59bb
No known key found for this signature in database
GPG key ID: 520B6FE385CBF5C9
10 changed files with 403 additions and 55 deletions

View file

@ -73,15 +73,33 @@ Description
They just have to "refill" this credit if they consume extra bytes. This is
an implementation design choice independent on the algorithm or encoding.
Versions
0: Original version
1: LZO-RLE
Version 1 of LZO implements an extension to encode runs of zeros using run
length encoding. This improves speed for data with many zeros, which is a
common case for zram. This modifies the bitstream in a backwards compatible way
(v1 can correctly decompress v0 compressed data, but v0 cannot read v1 data).
For maximum compatibility, both versions are available under different names
(lzo and lzo-rle). Differences in the encoding are noted in this document with
e.g.: version 1 only.
Byte sequences
First byte encoding :
0..17 : follow regular instruction encoding, see below. It is worth
noting that codes 16 and 17 will represent a block copy from
the dictionary which is empty, and that they will always be
0..16 : follow regular instruction encoding, see below. It is worth
noting that code 16 will represent a block copy from the
dictionary which is empty, and that it will always be
invalid at this place.
17 : bitstream version. If the first byte is 17, the next byte
gives the bitstream version (version 1 only). If the first byte
is not 17, the bitstream version is 0.
18..21 : copy 0..3 literals
state = (byte - 17) = 0..3 [ copy <state> literals ]
skip byte
@ -134,6 +152,11 @@ Byte sequences
state = S (copy S literals after this block)
End of stream is reached if distance == 16384
In version 1 only, this instruction is also used to encode a run of
zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
In this case, it is followed by a fourth byte, X.
run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
0 0 1 L L L L L (32..63)
Copy of small block within 16kB distance (preferably less than 34B)
length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
@ -158,7 +181,9 @@ Byte sequences
Authors
This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
analysis of the decompression code available in Linux 3.16-rc5. The code is
tricky, it is possible that this document contains mistakes or that a few
corner cases were overlooked. In any case, please report any doubt, fix, or
proposed updates to the author(s) so that the document can be updated.
analysis of the decompression code available in Linux 3.16-rc5, and updated
by Dave Rodgman <dave.rodgman@arm.com> on 2018/10/30 to introduce run-length
encoding. The code is tricky, it is possible that this document contains
mistakes or that a few corner cases were overlooked. In any case, please
report any doubt, fix, or proposed updates to the author(s) so that the
document can be updated.

View file

@ -116,7 +116,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
obj-$(CONFIG_CRYPTO_842) += 842.o

175
crypto/lzo-rle.c Normal file
View file

@ -0,0 +1,175 @@
/*
* Cryptographic API.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/lzo.h>
#include <crypto/internal/scompress.h>
struct lzorle_ctx {
void *lzorle_comp_mem;
};
static void *lzorle_alloc_ctx(struct crypto_scomp *tfm)
{
void *ctx;
ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
return ctx;
}
static int lzorle_init(struct crypto_tfm *tfm)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL);
if (IS_ERR(ctx->lzorle_comp_mem))
return -ENOMEM;
return 0;
}
static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx)
{
kvfree(ctx);
}
static void lzorle_exit(struct crypto_tfm *tfm)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
lzorle_free_ctx(NULL, ctx->lzorle_comp_mem);
}
static int __lzorle_compress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
int err;
err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx);
if (err != LZO_E_OK)
return -EINVAL;
*dlen = tmp_len;
return 0;
}
static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem);
}
static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __lzorle_compress(src, slen, dst, dlen, ctx);
}
static int __lzorle_decompress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen)
{
int err;
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
err = lzo1x_decompress_safe(src, slen, dst, &tmp_len);
if (err != LZO_E_OK)
return -EINVAL;
*dlen = tmp_len;
return 0;
}
static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
return __lzorle_decompress(src, slen, dst, dlen);
}
static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __lzorle_decompress(src, slen, dst, dlen);
}
static struct crypto_alg alg = {
.cra_name = "lzo-rle",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lzorle_ctx),
.cra_module = THIS_MODULE,
.cra_init = lzorle_init,
.cra_exit = lzorle_exit,
.cra_u = { .compress = {
.coa_compress = lzorle_compress,
.coa_decompress = lzorle_decompress } }
};
static struct scomp_alg scomp = {
.alloc_ctx = lzorle_alloc_ctx,
.free_ctx = lzorle_free_ctx,
.compress = lzorle_scompress,
.decompress = lzorle_sdecompress,
.base = {
.cra_name = "lzo-rle",
.cra_driver_name = "lzo-rle-scomp",
.cra_module = THIS_MODULE,
}
};
static int __init lzorle_mod_init(void)
{
int ret;
ret = crypto_register_alg(&alg);
if (ret)
return ret;
ret = crypto_register_scomp(&scomp);
if (ret) {
crypto_unregister_alg(&alg);
return ret;
}
return ret;
}
static void __exit lzorle_mod_fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
}
module_init(lzorle_mod_init);
module_exit(lzorle_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO-RLE Compression Algorithm");
MODULE_ALIAS_CRYPTO("lzo-rle");

View file

@ -73,7 +73,8 @@ static char *check[] = {
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
"lzo", "lzo-rle", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384",
"sha3-512",
NULL
};

View file

@ -41,7 +41,7 @@ config ZRAM_MEMORY_TRACKING
config ZRAM_DEFAULT_COMP_ALGORITHM
string "Default ZRAM algorithm"
default "lz4"
default "lzo-rle"
config ZRAM_LRU_WRITEBACK
bool

View file

@ -20,6 +20,7 @@
static const char * const backends[] = {
"lzo",
"lzo-rle",
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
"lz4",
#endif

View file

@ -17,12 +17,16 @@
#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short))
#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2)
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
int lzo1x_1_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
int lzorle1x_1_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
/* safe decompression with overrun testing */
int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len);

View file

@ -20,7 +20,8 @@
static noinline size_t
lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
size_t ti, void *wrkmem)
size_t ti, void *wrkmem, signed char *state_offset,
const unsigned char bitstream_version)
{
const unsigned char *ip;
unsigned char *op;
@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
ip += ti < 4 ? 4 - ti : 0;
for (;;) {
const unsigned char *m_pos;
const unsigned char *m_pos = NULL;
size_t t, m_len, m_off;
u32 dv;
u32 run_length = 0;
literal:
ip += 1 + ((ip - ii) >> 5);
next:
if (unlikely(ip >= ip_end))
break;
dv = get_unaligned_le32(ip);
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
m_pos = in + dict[t];
dict[t] = (lzo_dict_t) (ip - in);
if (unlikely(dv != get_unaligned_le32(m_pos)))
goto literal;
if (dv == 0 && bitstream_version) {
const unsigned char *ir = ip + 4;
const unsigned char *limit = ip_end
< (ip + MAX_ZERO_RUN_LENGTH + 1)
? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
defined(LZO_FAST_64BIT_MEMORY_ACCESS)
u64 dv64;
for (; (ir + 32) <= limit; ir += 32) {
dv64 = get_unaligned((u64 *)ir);
dv64 |= get_unaligned((u64 *)ir + 1);
dv64 |= get_unaligned((u64 *)ir + 2);
dv64 |= get_unaligned((u64 *)ir + 3);
if (dv64)
break;
}
for (; (ir + 8) <= limit; ir += 8) {
dv64 = get_unaligned((u64 *)ir);
if (dv64) {
# if defined(__LITTLE_ENDIAN)
ir += __builtin_ctzll(dv64) >> 3;
# elif defined(__BIG_ENDIAN)
ir += __builtin_clzll(dv64) >> 3;
# else
# error "missing endian definition"
# endif
break;
}
}
#else
while ((ir < (const unsigned char *)
ALIGN((uintptr_t)ir, 4)) &&
(ir < limit) && (*ir == 0))
ir++;
for (; (ir + 4) <= limit; ir += 4) {
dv = *((u32 *)ir);
if (dv) {
# if defined(__LITTLE_ENDIAN)
ir += __builtin_ctz(dv) >> 3;
# elif defined(__BIG_ENDIAN)
ir += __builtin_clz(dv) >> 3;
# else
# error "missing endian definition"
# endif
break;
}
}
#endif
while (likely(ir < limit) && unlikely(*ir == 0))
ir++;
run_length = ir - ip;
if (run_length > MAX_ZERO_RUN_LENGTH)
run_length = MAX_ZERO_RUN_LENGTH;
} else {
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
m_pos = in + dict[t];
dict[t] = (lzo_dict_t) (ip - in);
if (unlikely(dv != get_unaligned_le32(m_pos)))
goto literal;
}
ii -= ti;
ti = 0;
t = ip - ii;
if (t != 0) {
if (t <= 3) {
op[-2] |= t;
op[*state_offset] |= t;
COPY4(op, ii);
op += t;
} else if (t <= 16) {
@ -88,6 +147,17 @@ next:
}
}
if (unlikely(run_length)) {
ip += run_length;
run_length -= MIN_ZERO_RUN_LENGTH;
put_unaligned_le32((run_length << 21) | 0xfffc18
| (run_length & 0x7), op);
op += 4;
run_length = 0;
*state_offset = -3;
goto finished_writing_instruction;
}
m_len = 4;
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
@ -170,7 +240,6 @@ m_len_done:
m_off = ip - m_pos;
ip += m_len;
ii = ip;
if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
m_off -= 1;
*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
@ -207,29 +276,45 @@ m_len_done:
*op++ = (m_off << 2);
*op++ = (m_off >> 6);
}
*state_offset = -2;
finished_writing_instruction:
ii = ip;
goto next;
}
*out_len = op - out;
return in_end - (ii - ti);
}
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
void *wrkmem, const unsigned char bitstream_version)
{
const unsigned char *ip = in;
unsigned char *op = out;
size_t l = in_len;
size_t t = 0;
signed char state_offset = -2;
unsigned int m4_max_offset;
// LZO v0 will never write 17 as first byte,
// so this is used to version the bitstream
if (bitstream_version > 0) {
*op++ = 17;
*op++ = bitstream_version;
m4_max_offset = M4_MAX_OFFSET_V1;
} else {
m4_max_offset = M4_MAX_OFFSET_V0;
}
while (l > 20) {
size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
uintptr_t ll_end = (uintptr_t) ip + ll;
if ((ll_end + ((t + ll) >> 5)) <= ll_end)
break;
BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
&state_offset, bitstream_version);
ip += ll;
op += *out_len;
l -= ll;
@ -242,7 +327,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
if (op == out && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[-2] |= t;
op[state_offset] |= t;
} else if (t <= 18) {
*op++ = (t - 3);
} else {
@ -273,7 +358,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
*out_len = op - out;
return LZO_E_OK;
}
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
{
return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
}
int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
{
return lzogeneric1x_1_compress(in, in_len, out, out_len,
wrkmem, LZO_VERSION);
}
EXPORT_SYMBOL_GPL(lzo1x_1_compress);
EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X-1 Compressor");

View file

@ -46,11 +46,23 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
const unsigned char * const ip_end = in + in_len;
unsigned char * const op_end = out + *out_len;
unsigned char bitstream_version;
op = out;
ip = in;
if (unlikely(in_len < 3))
goto input_overrun;
if (likely(*ip == 17)) {
bitstream_version = ip[1];
ip += 2;
if (unlikely(in_len < 5))
goto input_overrun;
} else {
bitstream_version = 0;
}
if (*ip > 17) {
t = *ip++ - 17;
if (t < 4) {
@ -154,32 +166,49 @@ copy_literal_run:
m_pos -= next >> 2;
next &= 3;
} else {
m_pos = op;
m_pos -= (t & 8) << 11;
t = (t & 7) + (3 - 1);
if (unlikely(t == 2)) {
size_t offset;
const unsigned char *ip_last = ip;
while (unlikely(*ip == 0)) {
ip++;
NEED_IP(1);
}
offset = ip - ip_last;
if (unlikely(offset > MAX_255_COUNT))
return LZO_E_ERROR;
offset = (offset << 8) - offset;
t += offset + 7 + *ip++;
NEED_IP(2);
}
NEED_IP(2);
next = get_unaligned_le16(ip);
ip += 2;
m_pos -= next >> 2;
next &= 3;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
if (((next & 0xfffc) == 0xfffc) &&
((t & 0xf8) == 0x18) &&
likely(bitstream_version)) {
NEED_IP(3);
t &= 7;
t |= ip[2] << 3;
t += MIN_ZERO_RUN_LENGTH;
NEED_OP(t);
memset(op, 0, t);
op += t;
next &= 3;
ip += 3;
goto match_next;
} else {
m_pos = op;
m_pos -= (t & 8) << 11;
t = (t & 7) + (3 - 1);
if (unlikely(t == 2)) {
size_t offset;
const unsigned char *ip_last = ip;
while (unlikely(*ip == 0)) {
ip++;
NEED_IP(1);
}
offset = ip - ip_last;
if (unlikely(offset > MAX_255_COUNT))
return LZO_E_ERROR;
offset = (offset << 8) - offset;
t += offset + 7 + *ip++;
NEED_IP(2);
next = get_unaligned_le16(ip);
}
ip += 2;
m_pos -= next >> 2;
next &= 3;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
}
}
TEST_LB(m_pos);
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)

View file

@ -12,9 +12,15 @@
*/
/* Version
* 0: original lzo version
* 1: lzo with support for RLE
*/
#define LZO_VERSION 1
#define COPY4(dst, src) \
put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
#if defined(__x86_64__)
#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
#define COPY8(dst, src) \
put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
#else
@ -24,19 +30,21 @@
#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
#error "conflicting endian definitions"
#elif defined(__x86_64__)
#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
#define LZO_USE_CTZ64 1
#define LZO_USE_CTZ32 1
#elif defined(__i386__) || defined(__powerpc__)
#define LZO_FAST_64BIT_MEMORY_ACCESS
#elif defined(CONFIG_X86) || defined(CONFIG_PPC)
#define LZO_USE_CTZ32 1
#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5)
#define LZO_USE_CTZ32 1
#endif
#define M1_MAX_OFFSET 0x0400
#define M2_MAX_OFFSET 0x0800
#define M3_MAX_OFFSET 0x4000
#define M4_MAX_OFFSET 0xbfff
#define M4_MAX_OFFSET_V0 0xbfff
#define M4_MAX_OFFSET_V1 0xbffe
#define M1_MIN_LEN 2
#define M1_MAX_LEN 2
@ -52,6 +60,9 @@
#define M3_MARKER 32
#define M4_MARKER 16
#define MIN_ZERO_RUN_LENGTH 4
#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH)
#define lzo_dict_t unsigned short
#define D_BITS 13
#define D_SIZE (1u << D_BITS)