From b7311a856f65e541c29543b97415f5b2c88e79b6 Mon Sep 17 00:00:00 2001 From: akash Date: Thu, 7 Dec 2023 21:43:14 +0600 Subject: [PATCH 1/3] update the inplace merge sstable process --- .gitignore | 3 +- docs/TODO.md | 5 +- internal/core/PriorityQueue.go | 1 + .../backgroundprocess/compaction.go | 147 ++++++++++++++++-- .../storageengine/channels/sharedchannel.go | 6 + .../configs/storageengineconfigs.go | 2 +- internal/storageengine/memtable/memtable.go | 78 ++++++---- internal/storageengine/sstable/sstable.go | 32 +++- internal/storageengine/store/Store.go | 3 +- main.go | 55 ++++--- 10 files changed, 261 insertions(+), 71 deletions(-) create mode 100644 internal/core/PriorityQueue.go diff --git a/.gitignore b/.gitignore index 5d252d7..8a05c0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -storage \ No newline at end of file +storage +*.pprof \ No newline at end of file diff --git a/docs/TODO.md b/docs/TODO.md index 37fbe1e..eec450e 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -1,5 +1,8 @@ +- Invoke cmd to access in memory db - Replace with bloom filters (fix length for block but think about length for sstable) - what about using btree instead of sstable - how rocksdb opens hundrade of table at once - transaction : keep snapshot of memtable -- locking issue \ No newline at end of file +- locking issue +- merge sorts while doing compaction +- don't make large arrary or map while doing compaction diff --git a/internal/core/PriorityQueue.go b/internal/core/PriorityQueue.go new file mode 100644 index 0000000..9a8bc95 --- /dev/null +++ b/internal/core/PriorityQueue.go @@ -0,0 +1 @@ +package core diff --git a/internal/storageengine/backgroundprocess/compaction.go b/internal/storageengine/backgroundprocess/compaction.go index 6f898f8..195d852 100644 --- a/internal/storageengine/backgroundprocess/compaction.go +++ b/internal/storageengine/backgroundprocess/compaction.go @@ -1,6 +1,7 @@ package backgroundprocess import ( + "container/heap" "pkvstore/internal/storageengine/channels" "pkvstore/internal/storageengine/configs" "pkvstore/internal/storageengine/lsmtree" @@ -15,27 +16,43 @@ type Compaction struct { } func NewCompaction(lsmtree *lsmtree.LSMTree) *Compaction { - return &Compaction{ + compaction := &Compaction{ lsmTree: lsmtree, sharedChan: channels.GetSharedChannel(), } + + go compaction.listenFlushMemtable() + go compaction.listenToCompact() + + return compaction } -func (compaction *Compaction) ListenToCompact() { - for event := range compaction.sharedChan.NewMutationEventChannel { - if event < 1 || !compaction.lsmTree.MemTable.ShouldFlush() { +func (compaction *Compaction) listenToCompact() { + + for event := range compaction.sharedChan.CompactionEvent { + + if event < 1 { continue } - compaction.flushMemtable() compaction.tryCompactionProcess() } } -func (compaction *Compaction) flushMemtable() { - oldMemtable := compaction.lsmTree.MemTable.ReplaceNewTable() - newSSTable := createSSTableFromMemtable(oldMemtable) - compaction.lsmTree.SSTables[newSSTable.Header.Level] = append(compaction.lsmTree.SSTables[newSSTable.Header.Level], newSSTable) +func (compaction *Compaction) listenFlushMemtable() { + + for event := range compaction.sharedChan.FlushMemtableEvent { + + if event < 1 || compaction.lsmTree.MemTable.ReadOnlyTable == nil { + continue + } + + newSSTable := createSSTableFromMemtable(compaction.lsmTree.MemTable.ReadOnlyTable) + compaction.lsmTree.SSTables[newSSTable.Header.Level] = append(compaction.lsmTree.SSTables[newSSTable.Header.Level], newSSTable) + compaction.lsmTree.MemTable.ClearReadOnlyMemtable() + + compaction.sharedChan.CompactionEvent <- 1 + } } func (compaction *Compaction) tryCompactionProcess() { @@ -57,16 +74,16 @@ func (compaction *Compaction) tryCompactionProcess() { } func (compaction *Compaction) mergeAndReplaceSSTables(currentLevel, newLevel uint8) { - mergedSSTable := mergeSSTables(compaction.lsmTree.SSTables[currentLevel], newLevel) + mergedSSTable := mergeGetSSTables(compaction.lsmTree.SSTables[currentLevel], newLevel) compaction.lsmTree.SSTables[currentLevel] = make([]*sstable.SSTable, 0) compaction.lsmTree.SSTables[newLevel] = append(compaction.lsmTree.SSTables[newLevel], mergedSSTable) } -func mergeSSTables(sstables []*sstable.SSTable, newLevel uint8) *sstable.SSTable { +func mergeSSTables(sstablesInLevel []*sstable.SSTable, newLevel uint8) *sstable.SSTable { keyValues := make(map[string]*sstable.SSTableEntry) // Deduplication - for _, ssTable := range sstables { + for _, ssTable := range sstablesInLevel { for _, block := range ssTable.Blocks { for _, entry := range block.Entries { keyValues[entry.Key] = entry @@ -87,13 +104,115 @@ func mergeSSTables(sstables []*sstable.SSTable, newLevel uint8) *sstable.SSTable return sstable.CreateSSTable(mergedSSTableEntries, newLevel) } -func createSSTableFromMemtable(memTable *memtable.FlushableMemTable) *sstable.SSTable { +func createSSTableFromMemtable(memTable map[string]*memtable.MemTableEntry) *sstable.SSTable { sstableEntries := make([]*sstable.SSTableEntry, 0) config := configs.GetStorageEngineConfig() - for k, v := range memTable.Table { + for k, v := range memTable { sstableEntries = append(sstableEntries, sstable.NewSSTableEntry(k, v.Value, v.IsTombstone)) } + sort.Slice(sstableEntries, func(i, j int) bool { + return sstableEntries[i].Key < sstableEntries[j].Key + }) + return sstable.CreateSSTable(sstableEntries, uint8(config.LSMTreeConfig.FirstLevel)) } + +type MergeItem struct { + Entry *sstable.SSTableEntry + SSTableID int + BlockID int + EntryID int + Index int +} + +type MergePriorityQueue []*MergeItem + +func (mpq MergePriorityQueue) Len() int { + return len(mpq) +} + +func (mpq MergePriorityQueue) Less(i, j int) bool { + if mpq[i].Entry.Key == mpq[j].Entry.Key { + return mpq[i].SSTableID > mpq[j].SSTableID + } + return mpq[i].Entry.Key < mpq[j].Entry.Key +} + +func (pq MergePriorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] + pq[i].Index = i + pq[j].Index = j +} + +func (pq *MergePriorityQueue) Push(x any) { + n := len(*pq) + item := x.(*MergeItem) + item.Index = n + *pq = append(*pq, item) +} + +func (pq *MergePriorityQueue) Pop() any { + old := *pq + n := len(old) + item := old[n-1] + old[n-1] = nil // avoid memory leak + item.Index = -1 // for safety + *pq = old[0 : n-1] + return item +} + +func mergeGetSSTables(sstablesInLevel []*sstable.SSTable, newLevel uint8) *sstable.SSTable { + frontier := make(MergePriorityQueue, 0) + numberEntries := uint(0) + + for sstableID, ssTable := range sstablesInLevel { + numberEntries += ssTable.Header.NumberEntries + + firstBlock := ssTable.Blocks[0] + heap.Push(&frontier, &MergeItem{ + Entry: firstBlock.Entries[0], + SSTableID: sstableID, + BlockID: 0, + EntryID: 0, + }) + } + + newSSTable := sstable.OpenSSTable(newLevel, numberEntries) + lastKey := "" + + for len(frontier) > 0 { + item := heap.Pop(&frontier).(*MergeItem) + + // Deduplication + if lastKey != item.Entry.Key { + newSSTable.AddEntry(item.Entry) + lastKey = item.Entry.Key + } + + if item.EntryID+1 < len(sstablesInLevel[item.SSTableID].Blocks[item.BlockID].Entries) { + block := sstablesInLevel[item.SSTableID].Blocks[item.BlockID] + heap.Push(&frontier, &MergeItem{ + Entry: block.Entries[item.EntryID+1], + SSTableID: item.SSTableID, + BlockID: item.BlockID, + EntryID: item.EntryID + 1, + }) + + continue + } + + if item.BlockID+1 < len(sstablesInLevel[item.SSTableID].Blocks) { + block := sstablesInLevel[item.SSTableID].Blocks[item.BlockID+1] + heap.Push(&frontier, &MergeItem{ + Entry: block.Entries[0], + SSTableID: item.SSTableID, + BlockID: item.BlockID + 1, + EntryID: 0, + }) + } + } + + return newSSTable.CompleteSSTableCreation() +} diff --git a/internal/storageengine/channels/sharedchannel.go b/internal/storageengine/channels/sharedchannel.go index 2d6fa04..78622f7 100644 --- a/internal/storageengine/channels/sharedchannel.go +++ b/internal/storageengine/channels/sharedchannel.go @@ -4,11 +4,17 @@ import "sync" type SharedChannel struct { NewMutationEventChannel chan int + SwitchMemtableEvent chan int + FlushMemtableEvent chan int + CompactionEvent chan int } func newSharedChannel() *SharedChannel { return &SharedChannel{ NewMutationEventChannel: make(chan int, 10000), + SwitchMemtableEvent: make(chan int, 10000), + FlushMemtableEvent: make(chan int, 100), + CompactionEvent: make(chan int, 10), } } diff --git a/internal/storageengine/configs/storageengineconfigs.go b/internal/storageengine/configs/storageengineconfigs.go index a092a9e..9400061 100644 --- a/internal/storageengine/configs/storageengineconfigs.go +++ b/internal/storageengine/configs/storageengineconfigs.go @@ -40,7 +40,7 @@ func NewStorageEngineConfig() *StorageEngineConfig { config.SSTableConfig.BlockCapacity = 2048 //2048 config.SSTableConfig.BlockFilterFalsePositive = 0.001 // 1 in 1000, 3.59KiB, hash function 10 - config.MemTableConfig.MaxCapacity = 16384 //4096 + config.MemTableConfig.MaxCapacity = 2 //4096 return config } diff --git a/internal/storageengine/memtable/memtable.go b/internal/storageengine/memtable/memtable.go index 2517538..5c8e7b9 100644 --- a/internal/storageengine/memtable/memtable.go +++ b/internal/storageengine/memtable/memtable.go @@ -1,7 +1,9 @@ package memtable import ( + "os" "pkvstore/internal/models" + "pkvstore/internal/storageengine/channels" "pkvstore/internal/storageengine/configs" "sync" ) @@ -19,35 +21,26 @@ func NewMemTableEntry(value string) *MemTableEntry { } type MemTable struct { - Table map[string]*MemTableEntry - size uint32 - mutex sync.RWMutex -} - -type FlushableMemTable struct { - Table map[string]*MemTableEntry - size uint32 + Table map[string]*MemTableEntry + ReadOnlyTable map[string]*MemTableEntry + size uint32 + mutex sync.RWMutex + sharedChannel *channels.SharedChannel } func NewMemTable() *MemTable { - return &MemTable{ - Table: make(map[string]*MemTableEntry), - size: 0, + m := &MemTable{ + Table: make(map[string]*MemTableEntry), + ReadOnlyTable: nil, + size: 0, + sharedChannel: channels.GetSharedChannel(), } -} -func NewFlushableMemTable(table map[string]*MemTableEntry, size uint32) *FlushableMemTable { - return &FlushableMemTable{ - Table: table, - size: size, - } -} + var wg sync.WaitGroup -func (m *MemTable) Put(key string, value string) { - m.mutex.Lock() - defer m.mutex.Unlock() + go m.ListenSwitchTableEvent(&wg) - m.Table[key] = NewMemTableEntry(value) + return m } func (m *MemTable) Get(key string) *models.Result { @@ -66,6 +59,13 @@ func (m *MemTable) Get(key string) *models.Result { return models.NewNotFoundResult() } +func (m *MemTable) Put(key string, value string) { + m.mutex.Lock() + defer m.mutex.Unlock() + + m.Table[key] = NewMemTableEntry(value) +} + func (m *MemTable) Delete(key string) { m.mutex.Lock() defer m.mutex.Unlock() @@ -75,25 +75,41 @@ func (m *MemTable) Delete(key string) { } func (m *MemTable) Size() int { - m.mutex.Lock() - defer m.mutex.Unlock() return len(m.Table) } -func (m *MemTable) ShouldFlush() bool { +func (m *MemTable) ListenSwitchTableEvent(wg *sync.WaitGroup) { + + defer wg.Done() + + exitSignal := make(chan os.Signal, 1) + config := configs.GetStorageEngineConfig() - return m.Size() >= config.MemTableConfig.MaxCapacity + for { + select { + case switchevent := <-m.sharedChannel.SwitchMemtableEvent: + if switchevent < 0 || m.Size() < config.MemTableConfig.MaxCapacity || m.ReadOnlyTable != nil { + continue + } + + m.swtichMemtable() + + m.sharedChannel.FlushMemtableEvent <- 1 + case <-exitSignal: + return + } + } } -func (m *MemTable) ReplaceNewTable() *FlushableMemTable { +func (m *MemTable) swtichMemtable() { m.mutex.Lock() defer m.mutex.Unlock() - oldTable := NewFlushableMemTable(m.Table, m.size) - + m.ReadOnlyTable = m.Table m.Table = make(map[string]*MemTableEntry) - m.size = 0 +} - return oldTable +func (m *MemTable) ClearReadOnlyMemtable() { + m.ReadOnlyTable = nil } diff --git a/internal/storageengine/sstable/sstable.go b/internal/storageengine/sstable/sstable.go index c3f435c..2afc32c 100644 --- a/internal/storageengine/sstable/sstable.go +++ b/internal/storageengine/sstable/sstable.go @@ -15,6 +15,7 @@ type SSTableHeader struct { Version string BlockSize uint32 NumberEntries uint + sealed bool } // SSTableEntry represents an entry in an SSTable. @@ -26,7 +27,7 @@ type SSTableEntry struct { // SSTableBlock represents a block in an SSTable. type SSTableBlock struct { - sequence int + Sequence int Anchor *SSTableEntry Entries []*SSTableEntry Filter *core.BloomFilter @@ -53,6 +54,7 @@ func newSSTableHeader(level uint8, version string, blockSize uint32, numberEntri Version: version, BlockSize: blockSize, NumberEntries: numberEntries, + sealed: false, } } @@ -70,7 +72,7 @@ func newSSTableBlock(sequence int) *SSTableBlock { configs := configs.GetStorageEngineConfig() return &SSTableBlock{ - sequence: sequence, + Sequence: sequence, Entries: make([]*SSTableEntry, 0), Filter: core.NewBloomFilter(uint(configs.SSTableConfig.BlockCapacity), configs.SSTableConfig.BlockFilterFalsePositive, "optimal"), } @@ -94,6 +96,32 @@ func newSSTable(level uint8, numberOfEntries uint) *SSTable { } } +// region +func OpenSSTable(level uint8, NumberEntries uint) *SSTable { + configs := configs.GetStorageEngineConfig() + return &SSTable{ + Header: newSSTableHeader(level, configs.SSTableConfig.Version, uint32(configs.SSTableConfig.BlockCapacity), 0), + Blocks: make([]*SSTableBlock, 0), + Filter: core.NewBloomFilter(NumberEntries/10, configs.SSTableConfig.FilterFalsePositive, "optimal"), + } +} + +func (sstable *SSTable) AddEntry(newSSTableEntry *SSTableEntry) { + if sstable.Header.sealed { + return + } + + sstable.Header.NumberEntries += 1 + sstable.addEntry(newSSTableEntry) +} + +func (sstable *SSTable) CompleteSSTableCreation() *SSTable { + sstable.Header.sealed = true + return sstable +} + +//end region + // CreateSSTable creates an SSTable from SSTableEntries. func CreateSSTable(sstableEntries []*SSTableEntry, level uint8) *SSTable { newSSTable := newSSTable(level, uint(len(sstableEntries))) diff --git a/internal/storageengine/store/Store.go b/internal/storageengine/store/Store.go index bb1fa35..c0f03c3 100644 --- a/internal/storageengine/store/Store.go +++ b/internal/storageengine/store/Store.go @@ -16,7 +16,6 @@ type Store struct { func NewStore() *Store { lsm := lsmtree.NewLSMTree() compaction := backgroundprocess.NewCompaction(lsm) - go compaction.ListenToCompact() return &Store{ lsmTree: lsm, @@ -50,7 +49,7 @@ func (store *Store) Delete(key string) { func (store *Store) notifyWriteOperation() { - store.sharedChan.NewMutationEventChannel <- 1 + store.sharedChan.SwitchMemtableEvent <- 1 } func (store *Store) notifyReadOperation() { diff --git a/main.go b/main.go index 97e02e8..e6c1064 100644 --- a/main.go +++ b/main.go @@ -2,9 +2,7 @@ package main import ( "fmt" - "log" "math/rand" - "net/http" _ "net/http/pprof" "pkvstore/internal/storageengine/store" "strconv" @@ -17,46 +15,65 @@ func generateAndPutData(current int, lsm *store.Store, wg *sync.WaitGroup) { var key string - for i := int64(0); i < 10000; i++ { + for i := int64(0); i < 10; i++ { randomNumber := rand.Int63n(1<<63 - 1) key = strconv.FormatInt(randomNumber, 10) value := strconv.FormatInt(randomNumber, 10) - if i%2 == 0 { - lsm.Put(key, value) - } else { - lsm.Get(key) - } - + lsm.Put(key, value) } fmt.Printf("Goroutine %d completed\n", current) } func main() { // Start profiling server - go func() { - log.Println(http.ListenAndServe("localhost:6060", nil)) - }() + // go func() { + // log.Println(http.ListenAndServe("localhost:6060", nil)) + // }() store := store.NewStore() - store.Put("name", "akash") + // store.Put("name", "akash") + + // for i := int64(0); i < 2; i++ { + // // randomNumber := rand.Int63n(1<<63 - 1) + // // key := strconv.FormatInt(randomNumber, 10) + // // value := strconv.FormatInt(randomNumber, 10) + + // store.Put("name", "akash") + // store.Put("address", "dhanmandi") + // } + store.Put("name1", "akash") + + store.Put("address1", "dhanmandi") + + store.Put("name2", "akash") + + store.Put("address2", "dhanmandi") + + store.Put("name3", "akash") + + store.Put("address3", "dhanmandi") + + store.Put("name", "ckash") + fmt.Println("name: ", store.Get("name")) // Output: one + +} + +func newUser(store *store.Store, i int) { start := time.Now() - numGoroutines := 1000 + numGoroutines := 100 var wg sync.WaitGroup wg.Add(numGoroutines) + for j := 0; j < numGoroutines; j++ { go generateAndPutData(j, store, &wg) } wg.Wait() - fmt.Println("name: ", store.Get("name")) // Output: one - end := time.Now() duration := end.Sub(start) - fmt.Println("duration: ", duration.Minutes()) - - time.Sleep(time.Hour) + fmt.Println("duration: ", i, duration.Minutes()) } From 6e7b2bc30dede4582323e70de1edb271e0366981 Mon Sep 17 00:00:00 2001 From: akash Date: Tue, 12 Dec 2023 00:52:24 +0600 Subject: [PATCH 2/3] ok --- sop.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 sop.txt diff --git a/sop.txt b/sop.txt new file mode 100644 index 0000000..549f3ad --- /dev/null +++ b/sop.txt @@ -0,0 +1,15 @@ +In the face of the ongoing exponential data growth, the pressing questions arise: How can we read and write this increasing volume efficiently? Furthermore, how can we ensure the availability and reliability of this stored data? Inspired by these questions, my present research is dedicated to systems that address these challenges. + +Looking back at my undergraduate studies, a comprehensive curriculum introduced me to the captivating domain of distributed systems. As part of the assignment, I worked on a simple word count problem using Hadoop (HDS and MapReduce). Observing the harmony and coordination among these interconnected systems as they solve complex problems has sparked my passion. Motivated by a growing curiosity, I found myself navigating the details of Hadoop's inner workings, immersing deeply into the foundational papers that shaped its evolution—specifically, works on the Google File System and MapReduce. This exploration shifted me towards researching large-scale storage and processing systems. + +While pursuing my undergraduate degree, I had the opportunity to research horizontal auto-scaling of cloud infrastructure. Firstly, I discovered that many small businesses went online and started using cloud infrastructure during the COVID-19 lockdown. Still, most cloud providers didn’t provide proactive auto-scaling for all resources, and reactive scaling was challenging to manage for them. So, during my thesis, I led a team of two to work on proactive auto-scaling of cloud resources. We explored the publicly available web traces of websites, such as NASA and Wikipedia, and noticed a web usage pattern. Based on our observation, we designed a heuristic algorithm based on exponential smoothing to predict the pattern using system usage logs. Then, we designed a system consisting of five interconnected services based on the MAPE loop. They perform the required steps, such as collecting, storing, and analyzing the system logs and planning and executing the scaling decision. Segregation of the services provided us the options to run them on the optimized devices, such as running the analyzer on GPU or keeping the logs cache near the analyzer. This research led to a peer-reviewed journal paper titled An event-driven and lightweight proactive auto-scaling architecture for cloud applications. This architecture scales resources proactively based on real-time demand patterns, achieving cost efficiency, a seamless user experience, a smaller memory footprint, and low CPU usage. + +During my tenure as an R&D software engineer at Cefalo, I worked on a project, Spenn, which provides blockchain-based free mobile banking services across Africa. On top of the blockchain layer, partitioned databases acted as a global state for the blockchain ledger. This setup posed a risk of inconsistency because updates needed to be made in multiple places during an operation, both in the databases and blockchain. In the realm of financial technology applications, maintaining atomicity, consistency, and isolation is crucial. I played a part in addressing this challenge by exploring database transaction mechanisms and weighing their advantages and disadvantages. In the end, my choice leaned towards the saga transaction mechanism. Its long-running and non-blocking nature aligns well with blockchain's inherent time-consuming aspects, particularly in verifying and incorporating new operations. Consequently, this contribution ensured eventual consistency at the expected level. Otherwise, this could result in a lack of trust in the reliability of our service, hindering its adoption among African users. + +All my experiences collectively shaped my research interests and motivated me to pursue graduate studies. In preparation, I am currently doing a hands-on system design of a storage system focusing on enhancing the read-write performance in pluggable storage engines. I am experimenting with various data structures, compression algorithms, and memory for these storage systems. Moreover, I am looking into ensuring consistency and fault tolerance when these storage engines are deployed in heterogeneous multi-master systems environments. + +Furthermore, I had the privilege of contributing as one of the problem setters and judges in the esteemed SUST Inter-University Programming Contest. While authoring and reviewing the problems, I embraced the challenge of articulating them in a manner that allows contestants to understand the problem in the blink of an eye and invest their whole time in solving the problem. Besides, I also got an opportunity to take on the role of a trainer for Fresher Software Engineers at Cefalo, where my approach involved simplifying complex concepts, fostering comprehensive knowledge sharing, and progressively presenting them with real-world problems of escalating difficulty levels. + +The University of Texas Arlington’s robust research program and Lab support in this field seem tailor-made for my goal. ACES, DBXLAB, and Adaptive and Scalable Systems LAB can provide the perfect and cherished environment to collaborate with distinguished professors and peers who share my passion. Their past and current work indicates its members’ unique strengths in this field. I would be excited to work with Song Jiang, Hui Lui, Jia Rao, and Huang Jong. Specifically, Song Jiang has made outstanding contributions to storage research, especially in his recent work TurboHash, enhancing the scalability and reducing the search scope of a hash table. The sharding mechanism discussed in this work is captivating to me due to its utilization of 256B PMEM access. My research interests also considerably overlap with Hui Lu and Jia Rao’s work, such as P2CACHE, where they introduced an efficient buffering mechanism for well-tested kernel file systems to leverage persistent memory (PM). I found it fascinating since it doesn't require any modifications to applications or kernel systems but provide fast read and write performance on kernel file system. + +After graduate studies, I aim to pursue a career in academia so that I can continue to develop research to address these challenges and more. I also hope that my research will play a role in shaping the future of information technology, paving the way for innovative applications and services reliant on vast amounts of data. Pursuing further education at the University of Texas Arlington would be a significant stride toward achieving this goal. From 36780ab4935fe9dfa3f09180094b8960bb36df96 Mon Sep 17 00:00:00 2001 From: akash Date: Tue, 12 Dec 2023 19:11:27 +0600 Subject: [PATCH 3/3] update main and readme --- README.md | 2 +- main.go | 39 +++++++++++++++------------------------ sop.txt | 15 --------------- 3 files changed, 16 insertions(+), 40 deletions(-) delete mode 100644 sop.txt diff --git a/README.md b/README.md index 827b815..0519ecb 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# lsm-storage-engine + \ No newline at end of file diff --git a/main.go b/main.go index e6c1064..6af26e6 100644 --- a/main.go +++ b/main.go @@ -10,21 +10,6 @@ import ( "time" ) -func generateAndPutData(current int, lsm *store.Store, wg *sync.WaitGroup) { - defer wg.Done() - - var key string - - for i := int64(0); i < 10; i++ { - randomNumber := rand.Int63n(1<<63 - 1) - key = strconv.FormatInt(randomNumber, 10) - value := strconv.FormatInt(randomNumber, 10) - - lsm.Put(key, value) - } - fmt.Printf("Goroutine %d completed\n", current) -} - func main() { // Start profiling server // go func() { @@ -32,16 +17,7 @@ func main() { // }() store := store.NewStore() - // store.Put("name", "akash") - - // for i := int64(0); i < 2; i++ { - // // randomNumber := rand.Int63n(1<<63 - 1) - // // key := strconv.FormatInt(randomNumber, 10) - // // value := strconv.FormatInt(randomNumber, 10) - // store.Put("name", "akash") - // store.Put("address", "dhanmandi") - // } store.Put("name1", "akash") store.Put("address1", "dhanmandi") @@ -60,6 +36,21 @@ func main() { } +func generateAndPutData(current int, lsm *store.Store, wg *sync.WaitGroup) { + defer wg.Done() + + var key string + + for i := int64(0); i < 10; i++ { + randomNumber := rand.Int63n(1<<63 - 1) + key = strconv.FormatInt(randomNumber, 10) + value := strconv.FormatInt(randomNumber, 10) + + lsm.Put(key, value) + } + fmt.Printf("Goroutine %d completed\n", current) +} + func newUser(store *store.Store, i int) { start := time.Now() diff --git a/sop.txt b/sop.txt deleted file mode 100644 index 549f3ad..0000000 --- a/sop.txt +++ /dev/null @@ -1,15 +0,0 @@ -In the face of the ongoing exponential data growth, the pressing questions arise: How can we read and write this increasing volume efficiently? Furthermore, how can we ensure the availability and reliability of this stored data? Inspired by these questions, my present research is dedicated to systems that address these challenges. - -Looking back at my undergraduate studies, a comprehensive curriculum introduced me to the captivating domain of distributed systems. As part of the assignment, I worked on a simple word count problem using Hadoop (HDS and MapReduce). Observing the harmony and coordination among these interconnected systems as they solve complex problems has sparked my passion. Motivated by a growing curiosity, I found myself navigating the details of Hadoop's inner workings, immersing deeply into the foundational papers that shaped its evolution—specifically, works on the Google File System and MapReduce. This exploration shifted me towards researching large-scale storage and processing systems. - -While pursuing my undergraduate degree, I had the opportunity to research horizontal auto-scaling of cloud infrastructure. Firstly, I discovered that many small businesses went online and started using cloud infrastructure during the COVID-19 lockdown. Still, most cloud providers didn’t provide proactive auto-scaling for all resources, and reactive scaling was challenging to manage for them. So, during my thesis, I led a team of two to work on proactive auto-scaling of cloud resources. We explored the publicly available web traces of websites, such as NASA and Wikipedia, and noticed a web usage pattern. Based on our observation, we designed a heuristic algorithm based on exponential smoothing to predict the pattern using system usage logs. Then, we designed a system consisting of five interconnected services based on the MAPE loop. They perform the required steps, such as collecting, storing, and analyzing the system logs and planning and executing the scaling decision. Segregation of the services provided us the options to run them on the optimized devices, such as running the analyzer on GPU or keeping the logs cache near the analyzer. This research led to a peer-reviewed journal paper titled An event-driven and lightweight proactive auto-scaling architecture for cloud applications. This architecture scales resources proactively based on real-time demand patterns, achieving cost efficiency, a seamless user experience, a smaller memory footprint, and low CPU usage. - -During my tenure as an R&D software engineer at Cefalo, I worked on a project, Spenn, which provides blockchain-based free mobile banking services across Africa. On top of the blockchain layer, partitioned databases acted as a global state for the blockchain ledger. This setup posed a risk of inconsistency because updates needed to be made in multiple places during an operation, both in the databases and blockchain. In the realm of financial technology applications, maintaining atomicity, consistency, and isolation is crucial. I played a part in addressing this challenge by exploring database transaction mechanisms and weighing their advantages and disadvantages. In the end, my choice leaned towards the saga transaction mechanism. Its long-running and non-blocking nature aligns well with blockchain's inherent time-consuming aspects, particularly in verifying and incorporating new operations. Consequently, this contribution ensured eventual consistency at the expected level. Otherwise, this could result in a lack of trust in the reliability of our service, hindering its adoption among African users. - -All my experiences collectively shaped my research interests and motivated me to pursue graduate studies. In preparation, I am currently doing a hands-on system design of a storage system focusing on enhancing the read-write performance in pluggable storage engines. I am experimenting with various data structures, compression algorithms, and memory for these storage systems. Moreover, I am looking into ensuring consistency and fault tolerance when these storage engines are deployed in heterogeneous multi-master systems environments. - -Furthermore, I had the privilege of contributing as one of the problem setters and judges in the esteemed SUST Inter-University Programming Contest. While authoring and reviewing the problems, I embraced the challenge of articulating them in a manner that allows contestants to understand the problem in the blink of an eye and invest their whole time in solving the problem. Besides, I also got an opportunity to take on the role of a trainer for Fresher Software Engineers at Cefalo, where my approach involved simplifying complex concepts, fostering comprehensive knowledge sharing, and progressively presenting them with real-world problems of escalating difficulty levels. - -The University of Texas Arlington’s robust research program and Lab support in this field seem tailor-made for my goal. ACES, DBXLAB, and Adaptive and Scalable Systems LAB can provide the perfect and cherished environment to collaborate with distinguished professors and peers who share my passion. Their past and current work indicates its members’ unique strengths in this field. I would be excited to work with Song Jiang, Hui Lui, Jia Rao, and Huang Jong. Specifically, Song Jiang has made outstanding contributions to storage research, especially in his recent work TurboHash, enhancing the scalability and reducing the search scope of a hash table. The sharding mechanism discussed in this work is captivating to me due to its utilization of 256B PMEM access. My research interests also considerably overlap with Hui Lu and Jia Rao’s work, such as P2CACHE, where they introduced an efficient buffering mechanism for well-tested kernel file systems to leverage persistent memory (PM). I found it fascinating since it doesn't require any modifications to applications or kernel systems but provide fast read and write performance on kernel file system. - -After graduate studies, I aim to pursue a career in academia so that I can continue to develop research to address these challenges and more. I also hope that my research will play a role in shaping the future of information technology, paving the way for innovative applications and services reliant on vast amounts of data. Pursuing further education at the University of Texas Arlington would be a significant stride toward achieving this goal.