Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Nov 8, 2024
2 parents 8e0373a + 62ccb55 commit a14a8d5
Show file tree
Hide file tree
Showing 55 changed files with 884 additions and 1,012 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/testmac.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:
jobs:
testmac:
name: Test on Mac
runs-on: macos-12
runs-on: macos-15

steps:
- name: Use cache
Expand All @@ -26,14 +26,14 @@ jobs:
lib
include
bin
key: ${{ runner.os }}-12-${{ github.ref }}
key: ${{ runner.os }}-15-${{ github.ref }}
# Restore keys are a "list", but really only a multiline string is
# accepted. Also we match by prefix. And the most recent cache is
# used, not the most specific.
# See: https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
restore-keys: |
${{ runner.os }}-12-${{ github.base_ref }}
${{ runner.os }}-12
${{ runner.os }}-15-${{ github.base_ref }}
${{ runner.os }}-15
- name: Checkout code without submodules
uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ local-build-test-job:
script:
- THREADS=8
- nvm version
- python3 ./configure.py
- source ./source_me.sh
- make get-deps
- make -j${THREADS}
- echo Testing
Expand All @@ -96,6 +94,8 @@ local-build-test-job:
- make test
- make static -j${THREADS}
# Also test as a backend for the tube map
# Tube map expects vg on PATH
- export PATH="$(pwd)/bin:${PATH}"
- git clone https://github.com/vgteam/sequenceTubeMap.git
- cd sequenceTubeMap
# Tube map expects local IPv6 but Kubernetes won't let us have it
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
url = https://github.com/adamnovak/backward-cpp.git
[submodule "deps/elfutils"]
path = deps/elfutils
url = git://sourceware.org/git/elfutils.git
url = https://sourceware.org/git/elfutils.git
[submodule "deps/structures"]
path = deps/structures
url = https://github.com/vgteam/structures.git
Expand Down
7 changes: 3 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ RUN apt-get -qq -y update && apt-get -qq -y upgrade && apt-get -qq -y install \
###DEPS_END###

# Prepare to build submodule dependencies
COPY source_me.sh /vg/source_me.sh
COPY deps /vg/deps
# To increase portability of the docker image, when building for amd64, set the
# target CPU architecture to Nehalem (2008) rather than auto-detecting the
Expand All @@ -59,17 +58,17 @@ RUN if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then sed -i s/m
RUN find . -name CMakeCache.txt | xargs rm -f
# Build the dependencies
COPY Makefile /vg/Makefile
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" CFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) deps
RUN CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" CFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) deps

# Bring in the sources, which we need in order to build.
COPY src /vg/src

# Build all the object files for vg, but don't link.
# Also pass the arch here
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) objs
RUN CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) objs

# Do the final build and link, knowing the version. Trim down the resulting binary but make sure to include enough debug info for profiling.
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) static && strip -d bin/vg
RUN CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) static && strip -d bin/vg

# Ship the scripts
COPY scripts /vg/scripts
Expand Down
148 changes: 74 additions & 74 deletions Makefile

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ Note that a 64-bit OS is required. Ubuntu 20.04 should work.

#### Linux: Build

When you are ready, build with `. ./source_me.sh && make`. You can use `make -j16` to run 16 build threads at a time, which greatly accelerates the process. If you have more CPU cores, you can use higher numbers.
When you are ready, build with `make`. You can use `make -j16` to run 16 build threads at a time, which greatly accelerates the process. If you have more CPU cores, you can use higher numbers.

Note that vg can take anywhere from 10 minutes to more than an hour to compile depending on your machine and the number of threads used.

Expand Down Expand Up @@ -161,7 +161,7 @@ Homebrew provides another package management solution for OSX, and may be prefer

With dependencies installed, VG can now be built:

. ./source_me.sh && make
make

As with Linux, you can add `-j16` or other numbers at the end to run multiple build tasks at once, if your computer can handle them.

Expand Down
44 changes: 0 additions & 44 deletions configure.py

This file was deleted.

2 changes: 1 addition & 1 deletion deps/gcsa2
2 changes: 1 addition & 1 deletion deps/sublinear-Li-Stephens
2 changes: 1 addition & 1 deletion doc/wiki
Submodule wiki updated from f70ea3 to f28a1e
2 changes: 1 addition & 1 deletion scripts/setup-server
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.9 100 --slave
git clone --recursive https://github.com/vgteam/vg.git

# build vg
cd vg && source ./source_me.sh && make -j 32 static && sudo cp bin/vg /usr/local/bin/
cd vg && make -j 32 static && sudo cp bin/vg /usr/local/bin/
sudo cp scripts/vg_sim_pos_compare.py /usr/local/bin/
cd ~

Expand Down
27 changes: 4 additions & 23 deletions source_me.sh
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,23 +1,4 @@
export LIBRARY_PATH=`pwd`/lib:$LIBRARY_PATH
export LD_LIBRARY_PATH=`pwd`/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=`pwd`/lib:$DYLD_LIBRARY_PATH
export LD_INCLUDE_PATH=`pwd`/include:$LD_INCLUDE_PATH
# Setting include directories via C_INCLUDE_PATH/CPLUS_INCLUDE_PATH will
# automatically get them demoted to the end of the search list even if a -I
# option is passed to try and bump them up earlier, before other -I options.
# We leave the Makefile in charge of finding all the include directories.
export CFLAGS="-I $(pwd)/include ${CFLAGS}"
export CXXFLAGS="-I $(pwd)/include -I$(pwd)/include/dynamic ${CXXFLAGS}"
export PATH=`pwd`/bin:`pwd`/scripts:"$PATH"
export CC=$(which gcc)
export CXX=$(which g++)

#
# disable until file arguments work as in normal bash :(
#
# add bash autocompletion
#if test -n "$BASH_VERSION"
#then
#
# . ./autocomp.bash
#fi
# We used to have a script here to set up all the include and library search
# paths for the vg build. But now the Makefile knows how to do it all for the
# build, and the vg binary knows where to look for its dynamic libraries.
echo 1>&2 "Sourcing source_me.sh is no longer necessary"
160 changes: 97 additions & 63 deletions src/algorithms/subgraph.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "subgraph.hpp"
#include "../path.hpp"
#include "../crash.hpp"

namespace vg {
namespace algorithms {
Expand Down Expand Up @@ -290,83 +291,116 @@ void extract_path_range(const PathPositionHandleGraph& source, path_handle_t pat
}
}

/// add subpaths to the subgraph, providing a concatenation of subpaths that are discontiguous over the subgraph
/// based on their order in the path position index provided by the source graph
/// will clear any path found in both graphs before writing the new steps into it
/// if subpath_naming is true, a suffix will be added to each path in the subgraph denoting its offset
/// in the source graph (unless the subpath was not cut up at all)
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph,
bool subpath_naming) {
std::unordered_map<std::string, std::map<uint64_t, handle_t> > subpaths;
void add_subpaths_to_subgraph(const PathPositionHandleGraph& source, MutablePathHandleGraph& subgraph) {

// We want to organize all visits by base path. This key type holds the
// sense, sample and locus names, haplotype, and phase block.
using base_metadata_t = std::tuple<PathSense, string, string, size_t, size_t>;

// This stores, for each source graph base path, for each start offset, the handle at that offset on the path.
std::unordered_map<base_metadata_t, std::map<uint64_t, handle_t> > subpaths;

// This stores information about base paths that don't have subranges, and
// their full lengths and circularity flags, so we can avoid generating new
// subrange metadata when we just have all of a path.
std::unordered_map<base_metadata_t, std::pair<size_t, bool>> full_path_info;

subgraph.for_each_handle([&](const handle_t& h) {
handlegraph::nid_t id = subgraph.get_id(h);
if (source.has_node(id)) {
handle_t handle = source.get_handle(id);
source.for_each_step_position_on_handle(handle, [&](const step_handle_t& step, const bool& is_rev, const uint64_t& pos) {
path_handle_t path = source.get_path_handle_of_step(step);
std::string path_name = source.get_path_name(path);
subpaths[path_name][pos] = is_rev ? subgraph.flip(h) : h;
// Figure out the base path this visit is on
base_metadata_t key = {source.get_sense(path), source.get_sample_name(path), source.get_locus_name(path), source.get_haplotype(path), source.get_phase_block(path)};
// Figure out the subrange of the base path it is relative to
subrange_t path_subrange = source.get_subrange(path);
uint64_t visit_offset = pos;
if (path_subrange != PathMetadata::NO_SUBRANGE) {
// If we have the position relative to a subrange, adjust by that subrange's offset.
visit_offset += path_subrange.first;
}
subpaths[key][visit_offset] = is_rev ? subgraph.flip(h) : h;

if (path_subrange == PathMetadata::NO_SUBRANGE) {
// There's no subrange set, so this path is full-length in the source graph.
// See if we know of this path as a full-length path or not
auto it = full_path_info.find(key);
if (it == full_path_info.end()) {
// We haven't recorded its length and circularity yet, so do it.
full_path_info.emplace_hint(it, key, std::make_pair(source.get_path_length(path), source.get_is_circular(path)));
}
}
return true;
});
}
});

function<path_handle_t(const string&, bool, size_t)> new_subpath =
[&subgraph](const string& path_name, bool is_circular, size_t subpath_offset) {
PathSense sense;
string sample;
string locus;
size_t haplotype;
size_t phase_block;
subrange_t subrange;
PathMetadata::parse_path_name(path_name, sense, sample, locus, haplotype, phase_block, subrange);
if (subrange == PathMetadata::NO_SUBRANGE) {
subrange.first = subpath_offset;
} else {
subrange.first += subpath_offset;
}
subrange.first = subpath_offset;
subrange.second = PathMetadata::NO_END_POSITION;
string subpath_name = PathMetadata::create_path_name(sense, sample, locus, haplotype, phase_block, subrange);
if (subgraph.has_path(subpath_name)) {
subgraph.destroy_path(subgraph.get_path_handle(subpath_name));
}
return subgraph.create_path_handle(subpath_name, is_circular);
};
for (auto& base_and_visits : subpaths) {
// For each base path
const base_metadata_t& base_path_metadata = base_and_visits.first;
const auto& start_to_handle = base_and_visits.second;
// If we didn't put anything in the visit collection, it shouldn't be here.
crash_unless(!start_to_handle.empty());

for (auto& subpath : subpaths) {
const std::string& path_name = subpath.first;
path_handle_t source_path_handle = source.get_path_handle(path_name);
// destroy the path if it exists
if (subgraph.has_path(path_name)) {
subgraph.destroy_path(subgraph.get_path_handle(path_name));
}
// create a new path. give it a subpath name if the flag's on and its smaller than original
path_handle_t path;
if (!subpath_naming || subpath.second.size() == source.get_step_count(source_path_handle) ||
subpath.second.empty()) {
path = subgraph.create_path_handle(path_name, source.get_is_circular(source_path_handle));
} else {
path = new_subpath(path_name, source.get_is_circular(source_path_handle), subpath.second.begin()->first);
}
for (auto p = subpath.second.begin(); p != subpath.second.end(); ++p) {
const handle_t& handle = p->second;
if (p != subpath.second.begin() && subpath_naming) {
auto prev = p;
--prev;
const handle_t& prev_handle = prev->second;
// distance from map
size_t delta = p->first - prev->first;
// what the distance should be if they're contiguous depends on relative orienations
size_t cont_delta = subgraph.get_length(prev_handle);
if (delta != cont_delta) {
// we have a discontinuity! we'll make a new path can continue from there
assert(subgraph.get_step_count(path) > 0);
path = new_subpath(path_name, subgraph.get_is_circular(path), p->first);
// We're going to walk over all the visits and find contiguous runs
auto run_start = start_to_handle.begin();
auto run_end = run_start;
size_t start_coordinate = run_start->first;
while (run_end != start_to_handle.end()) {
// Until we run out of runs
// Figure out where this node ends on the path
size_t stop_coordinate = run_end->first + subgraph.get_length(run_end->second);

// Look ahead
++run_end;

if (run_end != start_to_handle.end() && run_end->first == stop_coordinate) {
// The next visit is still contiguous, so advance.
continue;
}

// Otherwise we've reached a break in continuity. We have a
// contiguous run from run_start to run_end, visiting the subrange
// start_coordinate to stop_coordinate.

// Find out if we cover a full source graph path.
subrange_t run_subrange = {start_coordinate, stop_coordinate};
bool is_circular = false;
if (start_coordinate == 0) {
// We might be a full path
auto found_length_and_circularity = full_path_info.find(base_path_metadata);
if (found_length_and_circularity != full_path_info.end() && found_length_and_circularity->second.first == stop_coordinate) {
// We are a full path
run_subrange = PathMetadata::NO_SUBRANGE;
// We can be circular.
is_circular = found_length_and_circularity->second.second;
}
}
//fill in the path information
subgraph.append_step(path, handle);

// Make a path with all the metadata
path_handle_t new_path = subgraph.create_path(
std::get<0>(base_path_metadata),
std::get<1>(base_path_metadata),
std::get<2>(base_path_metadata),
std::get<3>(base_path_metadata),
std::get<4>(base_path_metadata),
run_subrange,
is_circular
);

for (auto it = run_start; it != run_end; ++it) {
// Copy the path's visits
subgraph.append_step(new_path, it->second);
}

// Set up the next subpath.
// Set where it starts.
run_start = run_end;
if (run_start != start_to_handle.end()) {
// And if it will exist, set its start coordinate.
start_coordinate = run_start->first;
}
}
}
}
Expand Down
Loading

1 comment on commit a14a8d5

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch lr-giraffe. View the full report here.

14 tests passed, 0 tests failed and 0 tests skipped in 15134 seconds

Please sign in to comment.