Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/dnmtools_release_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ jobs:
apt-get update && apt-get install --no-install-recommends -y automake libgsl-dev && \
find /usr -name libz.so -exec rm {} \; && \
find /usr -name libgsl\*.so -exec rm {} \; && \
git clone https://github.com/ebiggers/libdeflate.git && \
cd libdeflate && \
cmake -B build \
-DLIBDEFLATE_BUILD_GZIP=off \
-DLIBDEFLATE_BUILD_TESTS=off \
-DLIBDEFLATE_BUILD_SHARED_LIB=off \
-DCMAKE_VERBOSE_MAKEFILE=on \
-DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j4 && \
cmake --install build --prefix=/usr/local && \
cd .. && \
git clone --recursive https://github.com/samtools/htslib.git && \
cd htslib && \
autoreconf -i && \
Expand All @@ -35,14 +46,14 @@ jobs:
--disable-libcurl \
--disable-lzma \
--disable-ref-cache \
LDADD="-L/usr/local/lib" && \
--with-libdeflate && \
make -j4 CFLAGS="-Wall -O2 -fvisibility=hidden" libhts.a && \
cp libhts.a /usr/local/lib/ && \
cp -r ../htslib /usr/local/include/ && \
cd /workspace && \
autoreconf -i && \
mkdir build && cd build && \
../configure && \
../configure --with-libdeflate && \
make -j4 LDFLAGS="-static-libgcc -static-libstdc++ -s"
run: |
docker exec build-container bash -c "$SCRIPT"
Expand Down
21 changes: 18 additions & 3 deletions .github/workflows/dnmtools_release_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ jobs:
sudo cp $(brew --prefix gsl)/lib/*.a /opt/dnmtools/lib
sudo cp -r $(brew --prefix zlib)/include/* /opt/dnmtools/include
sudo cp -r $(brew --prefix gsl)/include/* /opt/dnmtools/include
- name: Build and install libdeflate
run: |
git clone https://github.com/ebiggers/libdeflate.git && \
cd libdeflate && \
cmake -B build \
-DLIBDEFLATE_BUILD_GZIP=off \
-DLIBDEFLATE_BUILD_TESTS=off \
-DLIBDEFLATE_BUILD_SHARED_LIB=off \
-DCMAKE_VERBOSE_MAKEFILE=on \
-DCMAKE_BUILD_TYPE=Release && \
cmake --build build -j4 && \
sudo cmake --install build --prefix=/opt/dnmtools
- name: Build and install HTSlib
run: |
git clone --recursive https://github.com/samtools/htslib.git
Expand All @@ -34,15 +46,18 @@ jobs:
--disable-libcurl \
--disable-lzma \
--disable-ref-cache \
--without-libdeflate \
LDADD="-L/usr/local/lib"
--with-libdeflate \
LDFLAGS="-L/opt/dnmtools/lib" CPPFLAGS="-I/opt/dnmtools/include"
make -j4 CFLAGS="-Wall -O2 -fvisibility=hidden" libhts.a
sudo cp libhts.a /opt/dnmtools/lib
- name: Build dnmtools
run: |
./autogen.sh
mkdir build && cd build
../configure CXX=g++-14 LDFLAGS="-L/opt/dnmtools/lib -static-libgcc -static-libstdc++ -Wl,-dead_strip" CPPFLAGS="-I/opt/dnmtools/include"
../configure --with-libdeflate \
CXX=g++-14 \
LDFLAGS="-L/opt/dnmtools/lib -static-libgcc -static-libstdc++ -Wl,-dead_strip" \
CPPFLAGS="-I/opt/dnmtools/include"
make -j4
- name: Rename the binary
run: mv build/dnmtools dnmtools_$(uname -m)
Expand Down
12 changes: 11 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,20 @@ the LDFLAGS and CPPFLAGS variables to specify the directories where the GSL
library and headers can be found.
"

dnl arg for using libdeflate, which might happen by default anyway
AC_ARG_WITH([libdeflate],
[AS_HELP_STRING([--with-libdeflate], [use libdeflate for BAM output])],
[with_libdeflate=yes], [with_libdeflate=no])

dnl check for required libraries
AC_SEARCH_LIBS([pthread_create], [pthread], [], [AC_MSG_FAILURE(["pthread library not found"])])
AC_SEARCH_LIBS([gzopen], [z], [], [AC_MSG_FAILURE(["Zlib library not found"])])
AC_SEARCH_LIBS([hts_version], [hts], [], [AC_MSG_FAILURE([$hts_fail_msg])], [-lz -lpthread])
AS_IF([test "x$with_libdeflate" = "xyes"],
[
AC_SEARCH_LIBS([libdeflate_deflate_compress], [deflate], [],
[AC_MSG_ERROR([--with-libdeflate specified but libdeflate not found])])
])
AC_SEARCH_LIBS([hts_version], [hts], [], [AC_MSG_FAILURE([$hts_fail_msg])])
AC_SEARCH_LIBS([cblas_dgemm], [gslcblas], [], [AC_MSG_FAILURE([$gsl_fail_msg])])
AC_SEARCH_LIBS([gsl_blas_dgemm], [gsl], [], [AC_MSG_FAILURE([$gsl_fail_msg])])

Expand Down
54 changes: 28 additions & 26 deletions src/analysis/nanopore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@
/* HTSlib */
#include <htslib/sam.h>

using bamxx::bam_header;
using bamxx::bam_rec;
using bamxx::bgzf_file;

[[nodiscard]] inline bool
is_cytosine(const char c) {
return c == 'c' || c == 'C';
Expand Down Expand Up @@ -73,7 +69,7 @@ is_cpg(const std::string &s, const std::size_t i) {
static void
read_fasta_file(const std::string &filename, std::vector<std::string> &names,
std::vector<std::string> &sequences) {
bgzf_file in(filename, "r");
bamxx::bgzf_file in(filename, "r");
if (!in)
throw std::runtime_error("error opening genome file: " + filename);
names.clear();
Expand All @@ -96,7 +92,7 @@ read_fasta_file(const std::string &filename, std::vector<std::string> &names,
}

[[nodiscard]] static std::string
get_basecall_model(const bam_header &hdr) {
get_basecall_model(const bamxx::bam_header &hdr) {
kstring_t ks{};

ks = {0, 0, nullptr};
Expand Down Expand Up @@ -583,7 +579,7 @@ struct match_counter {
};

static void
count_states_fwd(const bam_rec &aln, std::vector<CountSet> &counts,
count_states_fwd(const bamxx::bam_rec &aln, std::vector<CountSet> &counts,
mod_prob_buffer &mod_buf, const std::string &chrom,
match_counter &mc) {
/* Move through cigar, reference and read positions without
Expand Down Expand Up @@ -612,9 +608,12 @@ count_states_fwd(const bam_rec &aln, std::vector<CountSet> &counts,
const decltype(qpos) end_qpos = qpos + n;
for (; qpos < end_qpos; ++qpos) {
const auto r_nuc = *ref_itr++;
mc.add_fwd(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr,
seq_nt16_str[bam_seqi(seq, qpos)],
qpos == q_lim ? 'N' : seq_nt16_str[bam_seqi(seq, qpos + 1)]);
if (qpos + 1 < end_qpos) {
mc.add_fwd(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr,
seq_nt16_str[bam_seqi(seq, qpos)],
qpos == q_lim ? 'N'
: seq_nt16_str[bam_seqi(seq, qpos + 1)]);
}
counts[rpos++].add_count_fwd(*hydroxy_prob_itr, *methyl_prob_itr);
++methyl_prob_itr;
++hydroxy_prob_itr;
Expand All @@ -638,7 +637,7 @@ count_states_fwd(const bam_rec &aln, std::vector<CountSet> &counts,
}

static void
count_states_rev(const bam_rec &aln, std::vector<CountSet> &counts,
count_states_rev(const bamxx::bam_rec &aln, std::vector<CountSet> &counts,
mod_prob_buffer &mod_buf, const std::string &chrom,
match_counter &mc) {
/* Move through cigar, reference and (*backward*) through read
Expand Down Expand Up @@ -672,8 +671,10 @@ count_states_rev(const bam_rec &aln, std::vector<CountSet> &counts,
const auto r_nuc = *ref_itr++;
const auto q_nuc = seq_nt16_str[bam_seqi(seq, q_idx)];
++q_idx;
mc.add_rev(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr, q_nuc,
q_idx == l_qseq ? 'N' : seq_nt16_str[bam_seqi(seq, q_idx)]);
if (end_qpos + 1 < qpos)
mc.add_rev(r_nuc, ref_itr == end_ref ? 'N' : *ref_itr, q_nuc,
q_idx == l_qseq ? 'N'
: seq_nt16_str[bam_seqi(seq, q_idx)]);
--methyl_prob_itr;
--hydroxy_prob_itr;
counts[rpos++].add_count_rev(*hydroxy_prob_itr, *methyl_prob_itr);
Expand All @@ -699,7 +700,7 @@ count_states_rev(const bam_rec &aln, std::vector<CountSet> &counts,
[[nodiscard]] static std::tuple<std::map<std::int32_t, std::size_t>,
std::set<std::int32_t>>
get_tid_to_idx(
const bam_header &hdr,
const bamxx::bam_header &hdr,
const std::unordered_map<std::string, std::size_t> &name_to_idx) {
std::set<std::int32_t> missing_tids;
std::map<std::int32_t, std::size_t> tid_to_idx;
Expand All @@ -718,7 +719,7 @@ get_tid_to_idx(
}

[[nodiscard]] static bool
consistent_targets(const bam_header &hdr,
consistent_targets(const bamxx::bam_header &hdr,
const std::map<std::int32_t, std::size_t> &tid_to_idx,
const std::vector<std::string> &names,
const std::vector<std::size_t> &sizes) {
Expand All @@ -741,7 +742,8 @@ consistent_targets(const bam_header &hdr,

[[nodiscard]] static bool
consistent_existing_targets(
const bam_header &hdr, const std::map<std::int32_t, std::size_t> &tid_to_idx,
const bamxx::bam_header &hdr,
const std::map<std::int32_t, std::size_t> &tid_to_idx,
const std::vector<std::string> &names,
const std::vector<std::size_t> &sizes) {
const std::size_t n_targets = hdr.h->n_targets;
Expand Down Expand Up @@ -805,10 +807,10 @@ struct read_processor {
output_skipped_chromosome(
const std::int32_t tid,
const std::map<std::int32_t, std::size_t> &tid_to_idx,
const bam_header &hdr,
const bamxx::bam_header &hdr,
const std::vector<std::string>::const_iterator chroms_beg,
const std::vector<std::size_t> &chrom_sizes, std::vector<CountSet> &counts,
bgzf_file &out) const {
bamxx::bgzf_file &out) const {

// get the index of the next chrom sequence
const auto chrom_idx = tid_to_idx.find(tid);
Expand All @@ -831,7 +833,7 @@ struct read_processor {
}

void
write_output_all(const bam_header &hdr, bgzf_file &out,
write_output_all(const bamxx::bam_header &hdr, bamxx::bgzf_file &out,
const std::int32_t tid, const std::string &chrom,
const std::vector<CountSet> &counts) const {
static constexpr auto out_fmt = "%ld\t%c\t%s\t%.6g\t%d\t%.6g\t%.6g\n";
Expand Down Expand Up @@ -886,7 +888,7 @@ struct read_processor {
}

void
write_output_sym(const bam_header &hdr, bgzf_file &out,
write_output_sym(const bamxx::bam_header &hdr, bamxx::bgzf_file &out,
const std::int32_t tid, const std::string &chrom,
const std::vector<CountSet> &counts) const {
static constexpr auto out_fmt = "%ld\t+\tCpG\t%.6g\t%d\t%.6g\t%.6g\n";
Expand Down Expand Up @@ -952,8 +954,8 @@ struct read_processor {
}

void
write_output(const bam_header &hdr, bgzf_file &out, const std::int32_t tid,
const std::string &chrom,
write_output(const bamxx::bam_header &hdr, bamxx::bgzf_file &out,
const std::int32_t tid, const std::string &chrom,
const std::vector<CountSet> &counts) const {
if (symmetric)
write_output_sym(hdr, out, tid, chrom, counts);
Expand Down Expand Up @@ -989,7 +991,7 @@ struct read_processor {
if (!hts)
throw std::runtime_error("failed to open input file");
// load the input file's header
bam_header hdr(hts);
bamxx::bam_header hdr(hts);
if (!hdr)
throw std::runtime_error("failed to read header");

Expand All @@ -1011,7 +1013,7 @@ struct read_processor {

// open the output file
const std::string output_mode = compress_output ? "w" : "wu";
bgzf_file out(outfile, output_mode);
bamxx::bgzf_file out(outfile, output_mode);
if (!out)
throw std::runtime_error("error opening output file: " + outfile);

Expand Down Expand Up @@ -1044,7 +1046,7 @@ struct read_processor {

// now iterate over the reads, switching chromosomes and writing
// output as needed
bam_rec aln;
bamxx::bam_rec aln;
std::int32_t prev_tid = -1;

std::vector<std::string>::const_iterator chrom_itr{};
Expand Down Expand Up @@ -1190,7 +1192,7 @@ main_nanocount(int argc, const char **argv) {
throw std::runtime_error("thread count cannot be negative");

std::ostringstream cmd;
copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));
std::copy(argv, argv + argc, std::ostream_iterator<const char *>(cmd, " "));

// file types from HTSlib use "-" for the filename to go to stdout
if (outfile.empty())
Expand Down