Commit d558a595 authored by Zahoor Mohamed's avatar Zahoor Mohamed Committed by Felix Lange

swarm/storage: pyramid chunker re-write (#14382)

parent 3c865634
...@@ -30,3 +30,6 @@ build/_vendor/pkg ...@@ -30,3 +30,6 @@ build/_vendor/pkg
# travis # travis
profile.tmp profile.tmp
profile.cov profile.cov
# IdeaIDE
.idea
...@@ -29,12 +29,12 @@ import ( ...@@ -29,12 +29,12 @@ import (
// Handler for storage/retrieval related protocol requests // Handler for storage/retrieval related protocol requests
// implements the StorageHandler interface used by the bzz protocol // implements the StorageHandler interface used by the bzz protocol
type Depo struct { type Depo struct {
hashfunc storage.Hasher hashfunc storage.SwarmHasher
localStore storage.ChunkStore localStore storage.ChunkStore
netStore storage.ChunkStore netStore storage.ChunkStore
} }
func NewDepo(hash storage.Hasher, localStore, remoteStore storage.ChunkStore) *Depo { func NewDepo(hash storage.SwarmHasher, localStore, remoteStore storage.ChunkStore) *Depo {
return &Depo{ return &Depo{
hashfunc: hash, hashfunc: hash,
localStore: localStore, localStore: localStore,
......
...@@ -20,9 +20,9 @@ import ( ...@@ -20,9 +20,9 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"hash"
"io" "io"
"sync" "sync"
"time"
) )
/* /*
...@@ -50,14 +50,6 @@ data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1} ...@@ -50,14 +50,6 @@ data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
The underlying hash function is configurable The underlying hash function is configurable
*/ */
const (
defaultHash = "SHA3"
// defaultHash = "BMTSHA3" // http://golang.org/pkg/hash/#Hash
// defaultHash = "SHA256" // http://golang.org/pkg/hash/#Hash
defaultBranches int64 = 128
// hashSize int64 = hasherfunc.New().Size() // hasher knows about its own length in bytes
// chunksize int64 = branches * hashSize // chunk is defined as this
)
/* /*
Tree chunker is a concrete implementation of data chunking. Tree chunker is a concrete implementation of data chunking.
...@@ -67,25 +59,19 @@ If all is well it is possible to implement this by simply composing readers so t ...@@ -67,25 +59,19 @@ If all is well it is possible to implement this by simply composing readers so t
The hashing itself does use extra copies and allocation though, since it does need it. The hashing itself does use extra copies and allocation though, since it does need it.
*/ */
type ChunkerParams struct { var (
Branches int64 errAppendOppNotSuported = errors.New("Append operation not supported")
Hash string errOperationTimedOut = errors.New("operation timed out")
} )
func NewChunkerParams() *ChunkerParams {
return &ChunkerParams{
Branches: defaultBranches,
Hash: defaultHash,
}
}
type TreeChunker struct { type TreeChunker struct {
branches int64 branches int64
hashFunc Hasher hashFunc SwarmHasher
// calculated // calculated
hashSize int64 // self.hashFunc.New().Size() hashSize int64 // self.hashFunc.New().Size()
chunkSize int64 // hashSize* branches chunkSize int64 // hashSize* branches
workerCount int workerCount int64 // the number of worker routines used
workerLock sync.RWMutex // lock for the worker count
} }
func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) { func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
...@@ -94,7 +80,8 @@ func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) { ...@@ -94,7 +80,8 @@ func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
self.branches = params.Branches self.branches = params.Branches
self.hashSize = int64(self.hashFunc().Size()) self.hashSize = int64(self.hashFunc().Size())
self.chunkSize = self.hashSize * self.branches self.chunkSize = self.hashSize * self.branches
self.workerCount = 1 self.workerCount = 0
return return
} }
...@@ -114,13 +101,31 @@ type hashJob struct { ...@@ -114,13 +101,31 @@ type hashJob struct {
parentWg *sync.WaitGroup parentWg *sync.WaitGroup
} }
func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) { func (self *TreeChunker) incrementWorkerCount() {
self.workerLock.Lock()
defer self.workerLock.Unlock()
self.workerCount += 1
}
func (self *TreeChunker) getWorkerCount() int64 {
self.workerLock.RLock()
defer self.workerLock.RUnlock()
return self.workerCount
}
func (self *TreeChunker) decrementWorkerCount() {
self.workerLock.Lock()
defer self.workerLock.Unlock()
self.workerCount -= 1
}
func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
if self.chunkSize <= 0 { if self.chunkSize <= 0 {
panic("chunker must be initialised") panic("chunker must be initialised")
} }
jobC := make(chan *hashJob, 2*processors)
jobC := make(chan *hashJob, 2*ChunkProcessors)
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
errC := make(chan error) errC := make(chan error)
quitC := make(chan bool) quitC := make(chan bool)
...@@ -129,6 +134,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s ...@@ -129,6 +134,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
if wwg != nil { if wwg != nil {
wwg.Add(1) wwg.Add(1)
} }
self.incrementWorkerCount()
go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
depth := 0 depth := 0
...@@ -157,10 +164,15 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s ...@@ -157,10 +164,15 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
close(errC) close(errC)
}() }()
//TODO: add a timeout
if err := <-errC; err != nil { defer close(quitC)
close(quitC) select {
return nil, err case err := <-errC:
if err != nil {
return nil, err
}
case <-time.NewTimer(splitTimeout).C:
return nil,errOperationTimedOut
} }
return key, nil return key, nil
...@@ -168,6 +180,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s ...@@ -168,6 +180,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) { func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) {
//
for depth > 0 && size < treeSize { for depth > 0 && size < treeSize {
treeSize /= self.branches treeSize /= self.branches
depth-- depth--
...@@ -223,12 +237,15 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade ...@@ -223,12 +237,15 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
// parentWg.Add(1) // parentWg.Add(1)
// go func() { // go func() {
childrenWg.Wait() childrenWg.Wait()
if len(jobC) > self.workerCount && self.workerCount < processors {
worker := self.getWorkerCount()
if int64(len(jobC)) > worker && worker < ChunkProcessors {
if wwg != nil { if wwg != nil {
wwg.Add(1) wwg.Add(1)
} }
self.workerCount++ self.incrementWorkerCount()
go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg) go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
} }
select { select {
case jobC <- &hashJob{key, chunk, size, parentWg}: case jobC <- &hashJob{key, chunk, size, parentWg}:
...@@ -237,6 +254,8 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade ...@@ -237,6 +254,8 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
} }
func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) { func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
defer self.decrementWorkerCount()
hasher := self.hashFunc() hasher := self.hashFunc()
if wwg != nil { if wwg != nil {
defer wwg.Done() defer wwg.Done()
...@@ -249,7 +268,6 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC ...@@ -249,7 +268,6 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
return return
} }
// now we got the hashes in the chunk, then hash the chunks // now we got the hashes in the chunk, then hash the chunks
hasher.Reset()
self.hashChunk(hasher, job, chunkC, swg) self.hashChunk(hasher, job, chunkC, swg)
case <-quitC: case <-quitC:
return return
...@@ -260,9 +278,11 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC ...@@ -260,9 +278,11 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
// The treeChunkers own Hash hashes together // The treeChunkers own Hash hashes together
// - the size (of the subtree encoded in the Chunk) // - the size (of the subtree encoded in the Chunk)
// - the Chunk, ie. the contents read from the input reader // - the Chunk, ie. the contents read from the input reader
func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) { func (self *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
hasher.Write(job.chunk) hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
hasher.Write(job.chunk[8:]) // minus 8 []byte length
h := hasher.Sum(nil) h := hasher.Sum(nil)
newChunk := &Chunk{ newChunk := &Chunk{
Key: h, Key: h,
SData: job.chunk, SData: job.chunk,
...@@ -285,6 +305,10 @@ func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan * ...@@ -285,6 +305,10 @@ func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *
} }
} }
func (self *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
return nil, errAppendOppNotSuported
}
// LazyChunkReader implements LazySectionReader // LazyChunkReader implements LazySectionReader
type LazyChunkReader struct { type LazyChunkReader struct {
key Key // root key key Key // root key
...@@ -298,7 +322,6 @@ type LazyChunkReader struct { ...@@ -298,7 +322,6 @@ type LazyChunkReader struct {
// implements the Joiner interface // implements the Joiner interface
func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader { func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
return &LazyChunkReader{ return &LazyChunkReader{
key: key, key: key,
chunkC: chunkC, chunkC: chunkC,
......
This diff is collapsed.
...@@ -76,7 +76,7 @@ func testStore(m ChunkStore, l int64, branches int64, t *testing.T) { ...@@ -76,7 +76,7 @@ func testStore(m ChunkStore, l int64, branches int64, t *testing.T) {
}() }()
chunker := NewTreeChunker(&ChunkerParams{ chunker := NewTreeChunker(&ChunkerParams{
Branches: branches, Branches: branches,
Hash: defaultHash, Hash: SHA3Hash,
}) })
swg := &sync.WaitGroup{} swg := &sync.WaitGroup{}
key, _ := chunker.Split(rand.Reader, l, chunkC, swg, nil) key, _ := chunker.Split(rand.Reader, l, chunkC, swg, nil)
......
...@@ -72,12 +72,12 @@ type DbStore struct { ...@@ -72,12 +72,12 @@ type DbStore struct {
gcPos, gcStartPos []byte gcPos, gcStartPos []byte
gcArray []*gcItem gcArray []*gcItem
hashfunc Hasher hashfunc SwarmHasher
lock sync.Mutex lock sync.Mutex
} }
func NewDbStore(path string, hash Hasher, capacity uint64, radius int) (s *DbStore, err error) { func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) {
s = new(DbStore) s = new(DbStore)
s.hashfunc = hash s.hashfunc = hash
......
...@@ -29,7 +29,7 @@ func initDbStore(t *testing.T) *DbStore { ...@@ -29,7 +29,7 @@ func initDbStore(t *testing.T) *DbStore {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
m, err := NewDbStore(dir, MakeHashFunc(defaultHash), defaultDbCapacity, defaultRadius) m, err := NewDbStore(dir, MakeHashFunc(SHA3Hash), defaultDbCapacity, defaultRadius)
if err != nil { if err != nil {
t.Fatal("can't create store:", err) t.Fatal("can't create store:", err)
} }
......
...@@ -28,7 +28,7 @@ type LocalStore struct { ...@@ -28,7 +28,7 @@ type LocalStore struct {
} }
// This constructor uses MemStore and DbStore as components // This constructor uses MemStore and DbStore as components
func NewLocalStore(hash Hasher, params *StoreParams) (*LocalStore, error) { func NewLocalStore(hash SwarmHasher, params *StoreParams) (*LocalStore, error) {
dbStore, err := NewDbStore(params.ChunkDbPath, hash, params.DbCapacity, params.Radius) dbStore, err := NewDbStore(params.ChunkDbPath, hash, params.DbCapacity, params.Radius)
if err != nil { if err != nil {
return nil, err return nil, err
......
...@@ -36,7 +36,7 @@ NetStore falls back to a backend (CloudStorage interface) ...@@ -36,7 +36,7 @@ NetStore falls back to a backend (CloudStorage interface)
implemented by bzz/network/forwarder. forwarder or IPFS or IPΞS implemented by bzz/network/forwarder. forwarder or IPFS or IPΞS
*/ */
type NetStore struct { type NetStore struct {
hashfunc Hasher hashfunc SwarmHasher
localStore *LocalStore localStore *LocalStore
cloud CloudStore cloud CloudStore
} }
...@@ -69,7 +69,7 @@ func NewStoreParams(path string) (self *StoreParams) { ...@@ -69,7 +69,7 @@ func NewStoreParams(path string) (self *StoreParams) {
// netstore contructor, takes path argument that is used to initialise dbStore, // netstore contructor, takes path argument that is used to initialise dbStore,
// the persistent (disk) storage component of LocalStore // the persistent (disk) storage component of LocalStore
// the second argument is the hive, the connection/logistics manager for the node // the second argument is the hive, the connection/logistics manager for the node
func NewNetStore(hash Hasher, lstore *LocalStore, cloud CloudStore, params *StoreParams) *NetStore { func NewNetStore(hash SwarmHasher, lstore *LocalStore, cloud CloudStore, params *StoreParams) *NetStore {
return &NetStore{ return &NetStore{
hashfunc: hash, hashfunc: hash,
localStore: lstore, localStore: lstore,
......
This diff is collapsed.
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"hash"
)
const (
BMTHash = "BMT"
SHA3Hash = "SHA3" // http://golang.org/pkg/hash/#Hash
)
type SwarmHash interface {
hash.Hash
ResetWithLength([]byte)
}
type HashWithLength struct {
hash.Hash
}
func (self *HashWithLength) ResetWithLength(length []byte) {
self.Reset()
self.Write(length)
}
...@@ -24,12 +24,13 @@ import ( ...@@ -24,12 +24,13 @@ import (
"io" "io"
"sync" "sync"
// "github.com/ethereum/go-ethereum/bmt" "github.com/ethereum/go-ethereum/bmt"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto/sha3" "github.com/ethereum/go-ethereum/crypto/sha3"
) )
type Hasher func() hash.Hash type Hasher func() hash.Hash
type SwarmHasher func() SwarmHash
// Peer is the recorded as Source on the chunk // Peer is the recorded as Source on the chunk
// should probably not be here? but network should wrap chunk object // should probably not be here? but network should wrap chunk object
...@@ -78,12 +79,18 @@ func IsZeroKey(key Key) bool { ...@@ -78,12 +79,18 @@ func IsZeroKey(key Key) bool {
var ZeroKey = Key(common.Hash{}.Bytes()) var ZeroKey = Key(common.Hash{}.Bytes())
func MakeHashFunc(hash string) Hasher { func MakeHashFunc(hash string) SwarmHasher {
switch hash { switch hash {
case "SHA256": case "SHA256":
return crypto.SHA256.New return func() SwarmHash { return &HashWithLength{crypto.SHA256.New()} }
case "SHA3": case "SHA3":
return sha3.NewKeccak256 return func() SwarmHash { return &HashWithLength{sha3.NewKeccak256()} }
case "BMT":
return func() SwarmHash {
hasher := sha3.NewKeccak256
pool := bmt.NewTreePool(hasher, bmt.DefaultSegmentCount, bmt.DefaultPoolSize)
return bmt.New(pool)
}
} }
return nil return nil
} }
...@@ -192,6 +199,13 @@ type Splitter interface { ...@@ -192,6 +199,13 @@ type Splitter interface {
A closed error signals process completion at which point the key can be considered final if there were no errors. A closed error signals process completion at which point the key can be considered final if there were no errors.
*/ */
Split(io.Reader, int64, chan *Chunk, *sync.WaitGroup, *sync.WaitGroup) (Key, error) Split(io.Reader, int64, chan *Chunk, *sync.WaitGroup, *sync.WaitGroup) (Key, error)
/* This is the first step in making files mutable (not chunks)..
Append allows adding more data chunks to the end of the already existsing file.
The key for the root chunk is supplied to load the respective tree.
Rest of the parameters behave like Split.
*/
Append(Key, io.Reader, chan *Chunk, *sync.WaitGroup, *sync.WaitGroup) (Key, error)
} }
type Joiner interface { type Joiner interface {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment