684b7a999f
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
137 lines
4.3 KiB
Go
137 lines
4.3 KiB
Go
// Copyright 2015, Joe Tsai. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE.md file.
|
|
|
|
package prefix
|
|
|
|
import (
|
|
"sort"
|
|
|
|
"github.com/dsnet/compress/internal"
|
|
)
|
|
|
|
// The algorithm used to decode variable length codes is based on the lookup
|
|
// method in zlib. If the code is less-than-or-equal to maxChunkBits,
|
|
// then the symbol can be decoded using a single lookup into the chunks table.
|
|
// Otherwise, the links table will be used for a second level lookup.
|
|
//
|
|
// The chunks slice is keyed by the contents of the bit buffer ANDed with
|
|
// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
|
|
// that is decoded as follow:
|
|
//
|
|
// var length = chunks[bitBuffer&chunkMask] & countMask
|
|
// var symbol = chunks[bitBuffer&chunkMask] >> countBits
|
|
//
|
|
// If the decoded length is larger than chunkBits, then an overflow link table
|
|
// must be used for further decoding. In this case, the symbol is actually the
|
|
// index into the links tables. The second-level links table returned is
|
|
// processed in the same way as the chunks table.
|
|
//
|
|
// if length > chunkBits {
|
|
// var index = symbol // Previous symbol is index into links tables
|
|
// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
|
|
// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
|
|
// }
|
|
//
|
|
// See the following:
|
|
// http://www.gzip.org/algorithm.txt
|
|
|
|
type Decoder struct {
|
|
chunks []uint32 // First-level lookup map
|
|
links [][]uint32 // Second-level lookup map
|
|
chunkMask uint32 // Mask the length of the chunks table
|
|
linkMask uint32 // Mask the length of the link table
|
|
chunkBits uint32 // Bit-length of the chunks table
|
|
|
|
MinBits uint32 // The minimum number of bits to safely make progress
|
|
NumSyms uint32 // Number of symbols
|
|
}
|
|
|
|
// Init initializes Decoder according to the codes provided.
|
|
func (pd *Decoder) Init(codes PrefixCodes) {
|
|
// Handle special case trees.
|
|
if len(codes) <= 1 {
|
|
switch {
|
|
case len(codes) == 0: // Empty tree (should error if used later)
|
|
*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
|
|
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
|
|
pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
|
|
*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
|
|
default:
|
|
panic("invalid codes")
|
|
}
|
|
return
|
|
}
|
|
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
|
|
panic("input codes is not sorted")
|
|
}
|
|
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
|
|
panic("detected incomplete or overlapping codes")
|
|
}
|
|
|
|
var minBits, maxBits uint32 = valueBits, 0
|
|
for _, c := range codes {
|
|
if minBits > c.Len {
|
|
minBits = c.Len
|
|
}
|
|
if maxBits < c.Len {
|
|
maxBits = c.Len
|
|
}
|
|
}
|
|
|
|
// Allocate chunks table as needed.
|
|
const maxChunkBits = 9 // This can be tuned for better performance
|
|
pd.NumSyms = uint32(len(codes))
|
|
pd.MinBits = minBits
|
|
pd.chunkBits = maxBits
|
|
if pd.chunkBits > maxChunkBits {
|
|
pd.chunkBits = maxChunkBits
|
|
}
|
|
numChunks := 1 << pd.chunkBits
|
|
pd.chunks = allocUint32s(pd.chunks, numChunks)
|
|
pd.chunkMask = uint32(numChunks - 1)
|
|
|
|
// Allocate links tables as needed.
|
|
pd.links = pd.links[:0]
|
|
pd.linkMask = 0
|
|
if pd.chunkBits < maxBits {
|
|
numLinks := 1 << (maxBits - pd.chunkBits)
|
|
pd.linkMask = uint32(numLinks - 1)
|
|
|
|
var linkIdx uint32
|
|
for i := range pd.chunks {
|
|
pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
|
|
}
|
|
for _, c := range codes {
|
|
if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
|
|
pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
|
|
linkIdx++
|
|
}
|
|
}
|
|
|
|
pd.links = extendSliceUint32s(pd.links, int(linkIdx))
|
|
linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
|
|
for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
|
|
pd.links[i] = linksFlat[j : j+numLinks]
|
|
}
|
|
}
|
|
|
|
// Fill out chunks and links tables with values.
|
|
for _, c := range codes {
|
|
chunk := c.Sym<<countBits | c.Len
|
|
if c.Len <= pd.chunkBits {
|
|
skip := 1 << uint(c.Len)
|
|
for j := int(c.Val); j < len(pd.chunks); j += skip {
|
|
pd.chunks[j] = chunk
|
|
}
|
|
} else {
|
|
linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
|
|
links := pd.links[linkIdx]
|
|
skip := 1 << uint(c.Len-pd.chunkBits)
|
|
for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
|
|
links[j] = chunk
|
|
}
|
|
}
|
|
}
|
|
}
|