Commit d558a595 authored by Zahoor Mohamed's avatar Zahoor Mohamed Committed by Felix Lange

swarm/storage: pyramid chunker re-write (#14382)

parent 3c865634
......@@ -30,3 +30,6 @@ build/_vendor/pkg
# travis
profile.tmp
profile.cov
# IdeaIDE
.idea
......@@ -29,12 +29,12 @@ import (
// Handler for storage/retrieval related protocol requests
// implements the StorageHandler interface used by the bzz protocol
type Depo struct {
hashfunc storage.Hasher
hashfunc storage.SwarmHasher
localStore storage.ChunkStore
netStore storage.ChunkStore
}
func NewDepo(hash storage.Hasher, localStore, remoteStore storage.ChunkStore) *Depo {
func NewDepo(hash storage.SwarmHasher, localStore, remoteStore storage.ChunkStore) *Depo {
return &Depo{
hashfunc: hash,
localStore: localStore,
......
......@@ -20,9 +20,9 @@ import (
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"sync"
"time"
)
/*
......@@ -50,14 +50,6 @@ data_{i} := size(subtree_{i}) || key_{j} || key_{j+1} .... || key_{j+n-1}
The underlying hash function is configurable
*/
const (
defaultHash = "SHA3"
// defaultHash = "BMTSHA3" // http://golang.org/pkg/hash/#Hash
// defaultHash = "SHA256" // http://golang.org/pkg/hash/#Hash
defaultBranches int64 = 128
// hashSize int64 = hasherfunc.New().Size() // hasher knows about its own length in bytes
// chunksize int64 = branches * hashSize // chunk is defined as this
)
/*
Tree chunker is a concrete implementation of data chunking.
......@@ -67,25 +59,19 @@ If all is well it is possible to implement this by simply composing readers so t
The hashing itself does use extra copies and allocation though, since it does need it.
*/
type ChunkerParams struct {
Branches int64
Hash string
}
func NewChunkerParams() *ChunkerParams {
return &ChunkerParams{
Branches: defaultBranches,
Hash: defaultHash,
}
}
var (
errAppendOppNotSuported = errors.New("Append operation not supported")
errOperationTimedOut = errors.New("operation timed out")
)
type TreeChunker struct {
branches int64
hashFunc Hasher
hashFunc SwarmHasher
// calculated
hashSize int64 // self.hashFunc.New().Size()
chunkSize int64 // hashSize* branches
workerCount int
workerCount int64 // the number of worker routines used
workerLock sync.RWMutex // lock for the worker count
}
func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
......@@ -94,7 +80,8 @@ func NewTreeChunker(params *ChunkerParams) (self *TreeChunker) {
self.branches = params.Branches
self.hashSize = int64(self.hashFunc().Size())
self.chunkSize = self.hashSize * self.branches
self.workerCount = 1
self.workerCount = 0
return
}
......@@ -114,13 +101,31 @@ type hashJob struct {
parentWg *sync.WaitGroup
}
func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
func (self *TreeChunker) incrementWorkerCount() {
self.workerLock.Lock()
defer self.workerLock.Unlock()
self.workerCount += 1
}
func (self *TreeChunker) getWorkerCount() int64 {
self.workerLock.RLock()
defer self.workerLock.RUnlock()
return self.workerCount
}
func (self *TreeChunker) decrementWorkerCount() {
self.workerLock.Lock()
defer self.workerLock.Unlock()
self.workerCount -= 1
}
func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
if self.chunkSize <= 0 {
panic("chunker must be initialised")
}
jobC := make(chan *hashJob, 2*processors)
jobC := make(chan *hashJob, 2*ChunkProcessors)
wg := &sync.WaitGroup{}
errC := make(chan error)
quitC := make(chan bool)
......@@ -129,6 +134,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
if wwg != nil {
wwg.Add(1)
}
self.incrementWorkerCount()
go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
depth := 0
......@@ -157,10 +164,15 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
close(errC)
}()
//TODO: add a timeout
if err := <-errC; err != nil {
close(quitC)
return nil, err
defer close(quitC)
select {
case err := <-errC:
if err != nil {
return nil, err
}
case <-time.NewTimer(splitTimeout).C:
return nil,errOperationTimedOut
}
return key, nil
......@@ -168,6 +180,8 @@ func (self *TreeChunker) Split(data io.Reader, size int64, chunkC chan *Chunk, s
func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reader, size int64, jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, parentWg, swg, wwg *sync.WaitGroup) {
//
for depth > 0 && size < treeSize {
treeSize /= self.branches
depth--
......@@ -223,12 +237,15 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
// parentWg.Add(1)
// go func() {
childrenWg.Wait()
if len(jobC) > self.workerCount && self.workerCount < processors {
worker := self.getWorkerCount()
if int64(len(jobC)) > worker && worker < ChunkProcessors {
if wwg != nil {
wwg.Add(1)
}
self.workerCount++
self.incrementWorkerCount()
go self.hashWorker(jobC, chunkC, errC, quitC, swg, wwg)
}
select {
case jobC <- &hashJob{key, chunk, size, parentWg}:
......@@ -237,6 +254,8 @@ func (self *TreeChunker) split(depth int, treeSize int64, key Key, data io.Reade
}
func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC chan error, quitC chan bool, swg, wwg *sync.WaitGroup) {
defer self.decrementWorkerCount()
hasher := self.hashFunc()
if wwg != nil {
defer wwg.Done()
......@@ -249,7 +268,6 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
return
}
// now we got the hashes in the chunk, then hash the chunks
hasher.Reset()
self.hashChunk(hasher, job, chunkC, swg)
case <-quitC:
return
......@@ -260,9 +278,11 @@ func (self *TreeChunker) hashWorker(jobC chan *hashJob, chunkC chan *Chunk, errC
// The treeChunkers own Hash hashes together
// - the size (of the subtree encoded in the Chunk)
// - the Chunk, ie. the contents read from the input reader
func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
hasher.Write(job.chunk)
func (self *TreeChunker) hashChunk(hasher SwarmHash, job *hashJob, chunkC chan *Chunk, swg *sync.WaitGroup) {
hasher.ResetWithLength(job.chunk[:8]) // 8 bytes of length
hasher.Write(job.chunk[8:]) // minus 8 []byte length
h := hasher.Sum(nil)
newChunk := &Chunk{
Key: h,
SData: job.chunk,
......@@ -285,6 +305,10 @@ func (self *TreeChunker) hashChunk(hasher hash.Hash, job *hashJob, chunkC chan *
}
}
func (self *TreeChunker) Append(key Key, data io.Reader, chunkC chan *Chunk, swg, wwg *sync.WaitGroup) (Key, error) {
return nil, errAppendOppNotSuported
}
// LazyChunkReader implements LazySectionReader
type LazyChunkReader struct {
key Key // root key
......@@ -298,7 +322,6 @@ type LazyChunkReader struct {
// implements the Joiner interface
func (self *TreeChunker) Join(key Key, chunkC chan *Chunk) LazySectionReader {
return &LazyChunkReader{
key: key,
chunkC: chunkC,
......
This diff is collapsed.
......@@ -76,7 +76,7 @@ func testStore(m ChunkStore, l int64, branches int64, t *testing.T) {
}()
chunker := NewTreeChunker(&ChunkerParams{
Branches: branches,
Hash: defaultHash,
Hash: SHA3Hash,
})
swg := &sync.WaitGroup{}
key, _ := chunker.Split(rand.Reader, l, chunkC, swg, nil)
......
......@@ -72,12 +72,12 @@ type DbStore struct {
gcPos, gcStartPos []byte
gcArray []*gcItem
hashfunc Hasher
hashfunc SwarmHasher
lock sync.Mutex
}
func NewDbStore(path string, hash Hasher, capacity uint64, radius int) (s *DbStore, err error) {
func NewDbStore(path string, hash SwarmHasher, capacity uint64, radius int) (s *DbStore, err error) {
s = new(DbStore)
s.hashfunc = hash
......
......@@ -29,7 +29,7 @@ func initDbStore(t *testing.T) *DbStore {
if err != nil {
t.Fatal(err)
}
m, err := NewDbStore(dir, MakeHashFunc(defaultHash), defaultDbCapacity, defaultRadius)
m, err := NewDbStore(dir, MakeHashFunc(SHA3Hash), defaultDbCapacity, defaultRadius)
if err != nil {
t.Fatal("can't create store:", err)
}
......
......@@ -28,7 +28,7 @@ type LocalStore struct {
}
// This constructor uses MemStore and DbStore as components
func NewLocalStore(hash Hasher, params *StoreParams) (*LocalStore, error) {
func NewLocalStore(hash SwarmHasher, params *StoreParams) (*LocalStore, error) {
dbStore, err := NewDbStore(params.ChunkDbPath, hash, params.DbCapacity, params.Radius)
if err != nil {
return nil, err
......
......@@ -36,7 +36,7 @@ NetStore falls back to a backend (CloudStorage interface)
implemented by bzz/network/forwarder. forwarder or IPFS or IPΞS
*/
type NetStore struct {
hashfunc Hasher
hashfunc SwarmHasher
localStore *LocalStore
cloud CloudStore
}
......@@ -69,7 +69,7 @@ func NewStoreParams(path string) (self *StoreParams) {
// netstore contructor, takes path argument that is used to initialise dbStore,
// the persistent (disk) storage component of LocalStore
// the second argument is the hive, the connection/logistics manager for the node
func NewNetStore(hash Hasher, lstore *LocalStore, cloud CloudStore, params *StoreParams) *NetStore {
func NewNetStore(hash SwarmHasher, lstore *LocalStore, cloud CloudStore, params *StoreParams) *NetStore {
return &NetStore{
hashfunc: hash,
localStore: lstore,
......
This diff is collapsed.
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package storage
import (
"hash"
)
const (
BMTHash = "BMT"
SHA3Hash = "SHA3" // http://golang.org/pkg/hash/#Hash
)
type SwarmHash interface {
hash.Hash
ResetWithLength([]byte)
}
type HashWithLength struct {
hash.Hash
}
func (self *HashWithLength) ResetWithLength(length []byte) {
self.Reset()
self.Write(length)
}
......@@ -24,12 +24,13 @@ import (
"io"
"sync"
// "github.com/ethereum/go-ethereum/bmt"
"github.com/ethereum/go-ethereum/bmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto/sha3"
)
type Hasher func() hash.Hash
type SwarmHasher func() SwarmHash
// Peer is the recorded as Source on the chunk
// should probably not be here? but network should wrap chunk object
......@@ -78,12 +79,18 @@ func IsZeroKey(key Key) bool {
var ZeroKey = Key(common.Hash{}.Bytes())
func MakeHashFunc(hash string) Hasher {
func MakeHashFunc(hash string) SwarmHasher {
switch hash {
case "SHA256":
return crypto.SHA256.New
return func() SwarmHash { return &HashWithLength{crypto.SHA256.New()} }
case "SHA3":
return sha3.NewKeccak256
return func() SwarmHash { return &HashWithLength{sha3.NewKeccak256()} }
case "BMT":
return func() SwarmHash {
hasher := sha3.NewKeccak256
pool := bmt.NewTreePool(hasher, bmt.DefaultSegmentCount, bmt.DefaultPoolSize)
return bmt.New(pool)
}
}
return nil
}
......@@ -192,6 +199,13 @@ type Splitter interface {
A closed error signals process completion at which point the key can be considered final if there were no errors.
*/
Split(io.Reader, int64, chan *Chunk, *sync.WaitGroup, *sync.WaitGroup) (Key, error)
/* This is the first step in making files mutable (not chunks)..
Append allows adding more data chunks to the end of the already existsing file.
The key for the root chunk is supplied to load the respective tree.
Rest of the parameters behave like Split.
*/
Append(Key, io.Reader, chan *Chunk, *sync.WaitGroup, *sync.WaitGroup) (Key, error)
}
type Joiner interface {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment