diff --git a/.github/workflows/build_dependencies.yml b/.github/workflows/build_dependencies.yml index 6ffc6455e..f02d8f7b9 100644 --- a/.github/workflows/build_dependencies.yml +++ b/.github/workflows/build_dependencies.yml @@ -164,6 +164,13 @@ jobs: fail_on_cache_miss: true if: ${{ inputs.testing == 'True' && github.event_name != 'pull_request' && steps.restore-cache.outputs.cache-hit != 'true' }} + - name: Check disk space and mount + run: | + sudo mkdir -p /mnt/tmp + sudo chmod 1777 /mnt/tmp + sudo mount --bind /mnt/tmp /tmp + df -h /tmp + - name: Code Coverage Run run: | conan install \ @@ -177,6 +184,13 @@ jobs: . conan build . if: ${{ inputs.testing == 'True' && inputs.platform == 'ubuntu-22.04' && inputs.build-type == 'Debug' && inputs.malloc-impl == 'libc' && inputs.prerelease == 'False' }} + continue-on-error: true + + - name: Check disk space if previous step failed + if: failure() && steps.code_coverage_run.outcome == 'failure' + run: | + echo "Code Coverage Run. Checking disk space..." + df -h - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 @@ -201,3 +215,10 @@ jobs: --build missing \ . if: ${{ inputs.testing == 'True' && ( inputs.platform != 'ubuntu-22.04' || inputs.build-type != 'Debug' || inputs.malloc-impl != 'libc' || inputs.prerelease != 'False' ) }} + continue-on-error: true + + - name: Check disk space if previous step failed + if: failure() && steps.create_and_test_package.outcome == 'failure' + run: | + echo "Create and Test Package step failed. Checking disk space..." + df -h diff --git a/.github/workflows/pr_conan_build.yml b/.github/workflows/pr_conan_build.yml index 2c3202822..fa4e7b6e9 100644 --- a/.github/workflows/pr_conan_build.yml +++ b/.github/workflows/pr_conan_build.yml @@ -1,6 +1,7 @@ name: Homestore PR Build on: + workflow_dispatch: pull_request: branches: - master diff --git a/conanfile.py b/conanfile.py index 71c6ffc40..e1219a87e 100644 --- a/conanfile.py +++ b/conanfile.py @@ -2,7 +2,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "3.7.3" + version = "3.7.4" homepage = "https://github.corp.ebay.com/SDS/homestore" description = "HomeStore" diff --git a/src/engine/blkalloc/varsize_blk_allocator.cpp b/src/engine/blkalloc/varsize_blk_allocator.cpp index bb0b7a696..874d4680f 100644 --- a/src/engine/blkalloc/varsize_blk_allocator.cpp +++ b/src/engine/blkalloc/varsize_blk_allocator.cpp @@ -368,7 +368,9 @@ void VarsizeBlkAllocator::fill_cache_in_portion(const blk_num_t portion_num, blk HS_DBG_ASSERT_GE(end_blk_id, b.start_bit, "Expected start bit to be smaller than portion end bit"); HS_DBG_ASSERT_GE(end_blk_id, (b.start_bit + b.nbits - 1), "Expected end bit to be smaller than portion end bit"); +#ifndef NDEBUG HISTOGRAM_OBSERVE(m_metrics, frag_pct_distribution, 100 / (static_cast< double >(b.nbits))); +#endif // Fill the blk cache and keep accounting of number of blks added fill_req.start_blk_num = b.start_bit; diff --git a/src/engine/blkalloc/varsize_blk_allocator.h b/src/engine/blkalloc/varsize_blk_allocator.h index 2f48d2288..456902e47 100644 --- a/src/engine/blkalloc/varsize_blk_allocator.h +++ b/src/engine/blkalloc/varsize_blk_allocator.h @@ -196,10 +196,10 @@ class BlkAllocMetrics : public sisl::MetricsGroup { REGISTER_COUNTER(num_alloc_partial, "Number of blk alloc partial allocations"); REGISTER_COUNTER(num_retries, "Number of times it retried because of empty cache"); REGISTER_COUNTER(num_blks_alloc_direct, "Number of blks alloc attempt directly because of empty cache"); - +#ifndef NDEBUG REGISTER_HISTOGRAM(frag_pct_distribution, "Distribution of fragmentation percentage", HistogramBucketsType(LinearUpto64Buckets)); - +#endif register_me_to_farm(); } diff --git a/src/engine/blkstore/blkstore.hpp b/src/engine/blkstore/blkstore.hpp index db6141ea3..b9a658360 100644 --- a/src/engine/blkstore/blkstore.hpp +++ b/src/engine/blkstore/blkstore.hpp @@ -168,9 +168,8 @@ class BlkStoreMetrics : public sisl::MetricsGroupWrapper { HistogramBucketsType(LinearUpto64Buckets)) REGISTER_HISTOGRAM(blkstore_cache_read_latency, "BlkStore cache read latency"); REGISTER_HISTOGRAM(blkstore_cache_write_latency, "BlkStore cache write latency"); - REGISTER_HISTOGRAM(blkstore_drive_write_latency, "BlkStore drive write latency"); - REGISTER_HISTOGRAM(blkstore_drive_read_latency, "BlkStore drive read latency"); - REGISTER_HISTOGRAM(blkstore_wbcache_hold_time, "Time data is held in writeback cache before flush"); + REGISTER_HISTOGRAM(blkstore_drive_write_latency, "BlkStore drive write latency", HistogramBucketsType(OpLatecyBuckets)); + REGISTER_HISTOGRAM(blkstore_drive_read_latency, "BlkStore drive read latency", HistogramBucketsType(OpLatecyBuckets)); register_me_to_farm(); } diff --git a/src/engine/device/device.h b/src/engine/device/device.h index b18d571e1..aa5d38281 100644 --- a/src/engine/device/device.h +++ b/src/engine/device/device.h @@ -450,8 +450,8 @@ class PhysicalDevMetrics : public sisl::MetricsGroupWrapper { REGISTER_COUNTER(drive_spurios_events, "Total number of spurious events per drive"); REGISTER_COUNTER(drive_skipped_chunk_bm_writes, "Total number of skipped writes for chunk bitmap"); - REGISTER_HISTOGRAM(drive_write_latency, "BlkStore drive write latency in us"); - REGISTER_HISTOGRAM(drive_read_latency, "BlkStore drive read latency in us"); + REGISTER_HISTOGRAM(drive_write_latency, "BlkStore drive write latency in us", HistogramBucketsType(OpLatecyBuckets)); + REGISTER_HISTOGRAM(drive_read_latency, "BlkStore drive read latency in us", HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(write_io_sizes, "Write IO Sizes", "io_sizes", {"io_direction", "write"}, HistogramBucketsType(ExponentialOfTwoBuckets)); diff --git a/src/engine/index/indx_mgr.cpp b/src/engine/index/indx_mgr.cpp index 9b9b5493f..d3a6a641a 100644 --- a/src/engine/index/indx_mgr.cpp +++ b/src/engine/index/indx_mgr.cpp @@ -1208,7 +1208,6 @@ void IndxMgr::unmap_indx_async(const indx_req_ptr& ireq) { void IndxMgr::do_remaining_unmap(void* unmap_meta_blk_cntx, const sisl::byte_array& key, const seq_id_t seqid, const std::shared_ptr< homeds::btree::BtreeQueryCursor >& btree_cur) { /* persist superblock */ - COUNTER_INCREMENT(m_metrics, indx_unmap_async_count, 1); write_cp_unmap_sb(unmap_meta_blk_cntx, key->size, seqid, *(btree_cur.get()), key->bytes); #ifdef _PRERELEASE if (homestore_flip->test_flip("unmap_post_sb_write_abort")) { diff --git a/src/engine/index/indx_mgr.hpp b/src/engine/index/indx_mgr.hpp index 98332a18c..bfb56f9b6 100644 --- a/src/engine/index/indx_mgr.hpp +++ b/src/engine/index/indx_mgr.hpp @@ -563,7 +563,6 @@ class StaticIndxMgr { class IndxMgrMetrics : public sisl::MetricsGroupWrapper { public: explicit IndxMgrMetrics(const char* const indx_name) : sisl::MetricsGroupWrapper{"Index", indx_name} { - REGISTER_COUNTER(indx_unmap_async_count, "Total number of async unmaps"); REGISTER_HISTOGRAM(btree_msg_time, "time spent in sending message to btree", "btree_msg_time"); register_me_to_farm(); } diff --git a/src/homeblks/home_blks.hpp b/src/homeblks/home_blks.hpp index 5424db83a..84eb613ed 100644 --- a/src/homeblks/home_blks.hpp +++ b/src/homeblks/home_blks.hpp @@ -105,7 +105,6 @@ typedef std::map< boost::uuids::uuid, std::shared_ptr< homestore::Volume > > vol class HomeBlksMetrics : public sisl::MetricsGroupWrapper { public: explicit HomeBlksMetrics(const char* homeblks_name) : sisl::MetricsGroupWrapper("HomeBlks", homeblks_name) { - REGISTER_HISTOGRAM(scan_volumes_latency, "Scan Volumes latency"); REGISTER_COUNTER(boot_cnt, "boot cnt", sisl::_publish_as::publish_as_gauge); REGISTER_GAUGE(recovery_phase0_latency, "recovery phase0 latency"); REGISTER_GAUGE(recovery_phase1_latency, "recovery phase1 latency"); diff --git a/src/homeblks/volume/volume.cpp b/src/homeblks/volume/volume.cpp index 77ff6fee7..6fe530b6d 100644 --- a/src/homeblks/volume/volume.cpp +++ b/src/homeblks/volume/volume.cpp @@ -171,7 +171,7 @@ Volume::Volume(const vol_params& params) : throw std::runtime_error("shutdown in progress"); } m_sobject = m_hb->sobject_mgr()->create_object("volume", params.vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); m_state = vol_state::UNINITED; } @@ -190,7 +190,7 @@ Volume::Volume(meta_blk* mblk_cookie, sisl::byte_view sb_buf) : HS_REL_ASSERT_EQ(sb->magic, vol_sb_magic, "magic mismatch"); m_hb = HomeBlks::safe_instance(); m_sobject = m_hb->sobject_mgr()->create_object("volume", sb->vol_name, - std::bind(&Volume::get_status, this, std::placeholders::_1)); + std::bind(&Volume::get_status, this, std::placeholders::_1)); } void Volume::init() { @@ -612,9 +612,11 @@ bool Volume::check_and_complete_req(const volume_req_ptr& vreq, const std::error HISTOGRAM_OBSERVE(m_metrics, volume_pieces_per_write, vreq->vc_req_cnt); HISTOGRAM_OBSERVE(m_metrics, volume_write_latency, latency_us); } else if (vreq->is_unmap()) { +#ifndef NDEBUG HISTOGRAM_OBSERVE(m_metrics, volume_unmap_latency, latency_us); COUNTER_INCREMENT(m_metrics, volume_unmap_size_total, size); HISTOGRAM_OBSERVE(m_metrics, volume_unmap_size_distribution, size); +#endif } if (latency_us > 5000000) { THIS_VOL_LOG(WARN, , vreq, "vol req took time {} us", latency_us); } @@ -924,11 +926,11 @@ sisl::status_response Volume::get_status(const sisl::status_request& request) { auto active_indx_json = get_active_indx()->sobject()->run_callback(request).json; if (!active_indx_json.empty()) { response.json["index"] = active_indx_json; } - response.json["name"] = sobject()->name(); + response.json["name"] = sobject()->name(); response.json["type"] = sobject()->type(); response.json["uuid"] = boost::lexical_cast< std::string >(get_uuid()); response.json["state"] = is_offline() ? "Offline" : "Online"; - response.json["size"]= get_size(); + response.json["size"] = get_size(); return response; } diff --git a/src/homeblks/volume/volume.hpp b/src/homeblks/volume/volume.hpp index 10c2fbbf5..e9477fdce 100644 --- a/src/homeblks/volume/volume.hpp +++ b/src/homeblks/volume/volume.hpp @@ -152,7 +152,9 @@ class VolumeMetrics : public sisl::MetricsGroupWrapper { sisl::MetricsGroupWrapper("Volume", vol_name), m_volume(vol) { REGISTER_COUNTER(volume_read_count, "Total Volume read operations", "volume_op_count", {"op", "read"}); REGISTER_COUNTER(volume_write_count, "Total Volume write operations", "volume_op_count", {"op", "write"}); +#ifndef NDEBUG REGISTER_COUNTER(volume_unmap_count, "Total Volume unmap operations", "volume_op_count", {"op", "unmap"}); +#endif REGISTER_COUNTER(volume_outstanding_data_read_count, "Total Volume data outstanding read cnt", sisl::_publish_as::publish_as_gauge); REGISTER_COUNTER(volume_outstanding_data_write_count, "Total Volume data outstanding write cnt", @@ -168,36 +170,42 @@ class VolumeMetrics : public sisl::MetricsGroupWrapper { REGISTER_COUNTER(volume_write_size_total, "Total Volume data size written", "volume_data_size", {"op", "write"}); REGISTER_COUNTER(volume_read_size_total, "Total Volume data size read", "volume_data_size", {"op", "read"}); +#ifndef NDEBUG REGISTER_COUNTER(volume_unmap_size_total, "Total Volume unmap size written", "volume_unmap_size", {"op", "unmap"}); +#endif REGISTER_GAUGE(volume_data_used_size, "Total Volume data used size"); REGISTER_GAUGE(volume_index_used_size, "Total Volume index used size"); REGISTER_GAUGE(volume_state, "Volume state"); - REGISTER_HISTOGRAM(volume_read_latency, "Volume overall read latency", "volume_op_latency", {"op", "read"}); - REGISTER_HISTOGRAM(volume_write_latency, "Volume overall write latency", "volume_op_latency", {"op", "write"}); + REGISTER_HISTOGRAM(volume_read_latency, "Volume overall read latency", "volume_op_latency", {"op", "read"}, HistogramBucketsType(OpLatecyBuckets)); + REGISTER_HISTOGRAM(volume_write_latency, "Volume overall write latency", "volume_op_latency", {"op", "write"}, HistogramBucketsType(OpLatecyBuckets)); +#ifndef NDEBUG REGISTER_HISTOGRAM(volume_unmap_latency, "Volume overall unmap latency", "volume_op_latency", {"op", "unmap"}); +#endif REGISTER_HISTOGRAM(volume_data_read_latency, "Volume data blocks read latency", "volume_data_op_latency", - {"op", "read"}); + {"op", "read"}, HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(volume_data_write_latency, "Volume data blocks write latency", "volume_data_op_latency", - {"op", "write"}); + {"op", "write"}, HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(volume_map_read_latency, "Volume mapping read latency", "volume_map_op_latency", - {"op", "read"}); + {"op", "read"}, HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(volume_map_write_latency, "Volume mapping write latency", "volume_map_op_latency", - {"op", "write"}); - REGISTER_HISTOGRAM(volume_blkalloc_latency, "Volume block allocation latency (in ns)"); + {"op", "write"}, HistogramBucketsType(OpLatecyBuckets)); + REGISTER_HISTOGRAM(volume_blkalloc_latency, "Volume block allocation latency (in ns)", HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(volume_pieces_per_write, "Number of individual pieces per write", HistogramBucketsType(LinearUpto64Buckets)); REGISTER_COUNTER(volume_read_on_hole, "Number of reads from empty lba"); REGISTER_HISTOGRAM(volume_pieces_per_read, "Number of individual pieces per read", HistogramBucketsType(LinearUpto64Buckets)); REGISTER_HISTOGRAM(volume_write_size_distribution, "Distribution of volume write sizes", - HistogramBucketsType(ExponentialOfTwoBuckets)); + HistogramBucketsType(OpSizeBuckets)); +#ifndef NDEBUG REGISTER_HISTOGRAM(volume_unmap_size_distribution, "Distribution of volume unmap sizes", HistogramBucketsType(ExponentialOfTwoBuckets)); +#endif REGISTER_HISTOGRAM(volume_read_size_distribution, "Distribution of volume read sizes", - HistogramBucketsType(ExponentialOfTwoBuckets)); + HistogramBucketsType(OpSizeBuckets)); register_me_to_farm(); attach_gather_cb(std::bind(&VolumeMetrics::on_gather, this)); } diff --git a/src/homelogstore/log_store_mgr.cpp b/src/homelogstore/log_store_mgr.cpp index 64b705efa..4d7e849a4 100644 --- a/src/homelogstore/log_store_mgr.cpp +++ b/src/homelogstore/log_store_mgr.cpp @@ -188,8 +188,8 @@ HomeLogStoreMgrMetrics::HomeLogStoreMgrMetrics() : sisl::MetricsGroup("LogStores {"op", "write"}); REGISTER_COUNTER(logstore_read_count, "Total number of read requests to log stores", "logstore_op_count", {"op", "read"}); - REGISTER_HISTOGRAM(logstore_append_latency, "Logstore append latency", "logstore_op_latency", {"op", "write"}); - REGISTER_HISTOGRAM(logstore_read_latency, "Logstore read latency", "logstore_op_latency", {"op", "read"}); + REGISTER_HISTOGRAM(logstore_append_latency, "Logstore append latency", "logstore_op_latency", {"op", "write"}, HistogramBucketsType(OpLatecyBuckets)); + REGISTER_HISTOGRAM(logstore_read_latency, "Logstore read latency", "logstore_op_latency", {"op", "read"}, HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(logdev_flush_size_distribution, "Distribution of flush data size", HistogramBucketsType(ExponentialOfTwoBuckets)); REGISTER_HISTOGRAM(logdev_flush_records_distribution, "Distribution of num records to flush", @@ -198,7 +198,7 @@ HomeLogStoreMgrMetrics::HomeLogStoreMgrMetrics() : sisl::MetricsGroup("LogStores HistogramBucketsType(ExponentialOfTwoBuckets)); REGISTER_HISTOGRAM(logdev_flush_done_msg_time_ns, "Logdev flush completion msg time in ns"); REGISTER_HISTOGRAM(logdev_post_flush_processing_latency, - "Logdev post flush processing (including callbacks) latency"); + "Logdev post flush processing (including callbacks) latency", HistogramBucketsType(OpLatecyBuckets)); REGISTER_HISTOGRAM(logdev_fsync_time_us, "Logdev fsync completion time in us"); register_me_to_farm();