TIP: if you're reading this on GitHub, I recommend starting a Binder session to have color output (and an interactive session!). Here is the link:
Alternatively, use nbviewer, which preserves the formatting.
UPDATE: I opened an issue and seems like the current master
version is the culprit (more specifically 57af16a). Reverting back to the 1.0.4
release fixes the problem. There is a new section at the end of this notebook with more details.
Last week Will Rowe posted the preprint for HULK, and in a comment in a related discussion Heng Li suggested using ntHash
to speed up hashing of k-mers, which makes more sense than using murmurhash3 when using sliding windows (since you can avoid recalculating the full hash all the time). He also linked to an answer on the bioinformatics stack exchange with a very clear explanation by Karel Brinda on how it works.
So clear was this explanation that I used it to implement a Rust version. And since the default template for libraries (when running cargo init --lib
) already includes a test example, I wrote a simple one that calculated the ntHash for a 5-mer and compared to the same value generated by the original implementation (in C++).
And... the test failed.
I spent some time figuring out if I did something wrong, if there was something weird with my for loop, up to going to a whiteboard and checking indexing on the string by hand. It seemed to be right, so maybe there was something I was misinterpreting in the Rust syntax?
Well, back to the comfort zone: let's try in Python!
Again, pretty straighforward to implement, but I had to make a rol
function to rotate a 64-bit integer. Here is the implementation:
!cat nthash.py
import sys h = { 'A': 0x3c8bfbb395c60474, 'C': 0x3193c18562a02b4c, 'G': 0x20323ed082572324, 'T': 0x295549f54be24456, 'N': 0, } rc = { 'A': 0x295549f54be24456, 'C': 0x20323ed082572324, 'G': 0x3193c18562a02b4c, 'T': 0x3c8bfbb395c60474, 'N': 0, } def rol(x, k): return ((x << k % 64) & (2 ** 64 - 1) | ((x & (2 ** 64 - 1)) >> (64 - (k % 64)))) def f(s, idx, k): out = 0 for i, v in enumerate(s[idx: idx+k], 1): out ^= rol(h[v], k - i) return out def r(s, idx, k): out = 0 for i, v in enumerate(reversed(s[idx: idx+k]), 1): out ^= rol(rc[v], k - i) return out def nthash(s, k): fval = f(s, 0, k) rval = r(s, 0, k) return min(fval, rval), fval, rval hval, fval, rval = nthash(sys.argv[1], len(sys.argv[1])) print('NTC64 0x{:0>16x}'.format(hval)) print("fhVal 0x{:0>16x}".format(fval)) print("rhVal 0x{:0>16x}".format(rval))
So I ran it, and... it matches my Rust result, but doesn't match the original implementation. Hmm.
While re-reading the paper I looked into the supplementary materials, and they have the NT64
function defined (which is missing in the implementation on GitHub). So I used that to make the canonical version NTC64
, which takes the minimum hash from both forward (NT64F
) and reverse (NT64R
) strands with the same parameters from the current implementation (so we can compare them easily).
Here is the simple implementation, without worrying about optimizations:
!cat nthash_simple.hpp
#include <string> using namespace std; namespace nthash { uint64_t h(char i) { switch (i) { case 'A': return 0x3c8bfbb395c60474; case 'C': return 0x3193c18562a02b4c; case 'G': return 0x20323ed082572324; case 'T': return 0x295549f54be24456; case 'N': return 0x0000000000000000; default: break; } return 0; } uint64_t rc(char i) { switch (i) { case 'A': return 0x295549f54be24456; case 'C': return 0x20323ed082572324; case 'G': return 0x3193c18562a02b4c; case 'T': return 0x3c8bfbb395c60474; case 'N': return 0x0000000000000000; default: break; } return 0; } uint64_t rol(uint64_t v, unsigned k) { return (v << k) | (v >> (64 - k)); } inline uint64_t NT64(const char * kmerSeq, const unsigned k) { uint64_t hVal = 0; for(unsigned i=0; i < k; i++) hVal ^= rol(h(kmerSeq[i]), k-1-i); return hVal; } inline uint64_t NTF64(const char * kmerSeq, const unsigned k) { uint64_t hVal = 0; for(unsigned i=0; i < k; i++) hVal ^= rol(h(kmerSeq[i]), k-1-i); return hVal; } inline uint64_t NTR64(const char * kmerSeq, const unsigned k) { uint64_t hVal = 0; for(unsigned i=0; i < k; i++) hVal ^= rol(rc(kmerSeq[k-i-1]), k-1-i); return hVal; } inline uint64_t NTC64(const char * kmerSeq, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) { fhVal=NTF64(kmerSeq, k); rhVal=NTR64(kmerSeq, k); return (rhVal<fhVal)? rhVal : fhVal; } }
And... again it matches my Rust and Python code, but not the original implementation. To compare all of them I wrote a small programs that take a sequence from the first command line argument and generate the ntHash, printing also both forward and reverse strand values. Here is the C++ version, taking a preprocessor directive (set by passing -DNTHASH_OPT
to the compiler) to compile with the original implementation or with the simple implementation based on the article:
!cat nt_main.cpp
#ifndef NTHASH_OPT #include "nthash_simple.hpp" using namespace nthash; #else #ifndef NTHASH_104 #include "nthash.hpp" #else #include "nthash_104.hpp" #endif #endif #include <string> #include <iostream> #include <iomanip> using namespace std; int main(int argc, char** argv) { string seq = argv[1]; uint64_t hVal, fhVal, rhVal; hVal = NTC64(seq.c_str(), seq.size(), fhVal, rhVal); // initial hash value cout << "NTC64 0x" << hex << setfill('0') << setw(16) << hVal << endl; cout << "fhVal 0x" << hex << setfill('0') << setw(16) << fhVal << endl; cout << "rhVal 0x" << hex << setfill('0') << setw(16) << rhVal << endl; return 0; }
The Python version is at the end of the nthash.py
file above. And finally the Rust version (using the nthash
crate from my repo):
!cat src/main.rs
extern crate nthash; use nthash::{f, nthash, r}; use std::env; fn main() { let seq = env::args().nth(1).unwrap(); println!( "NTC64 0x{:0>16x}", nthash(seq.as_bytes(), seq.len() as u8)[0] ); println!("fhVal 0x{:0>16x}", f(seq.as_bytes(), 0, seq.len() as u32)); println!("rhVal 0x{:0>16x}", r(seq.as_bytes(), 0, seq.len() as u32)); }
I also put everything in one Makefile
, so it's easy to compile and run a basic test:
!make -B
g++ -O3 -DNTHASH_OPT -I. nt_main.cpp -o nt_opt g++ -O3 -I. nt_main.cpp -o nt_article g++ -O3 -DNTHASH_104 -I. nt_main.cpp -o nt_104
!make test
./nt_opt TGCAG NTC64 0x0bafa6628fc6dab7 fhVal 0x0bafa6628fc6dab7 rhVal 0x8cf2d41f2cca4802 ./nt_article TGCAG NTC64 0x0bafa6728fc6dabf fhVal 0x0bafa6728fc6dabf rhVal 0x8cf2d4072cca480e python nthash.py TGCAG NTC64 0x0bafa6728fc6dabf fhVal 0x0bafa6728fc6dabf rhVal 0x8cf2d4072cca480e cargo run -q TGCAG NTC64 0x0bafa6728fc6dabf fhVal 0x0bafa6728fc6dabf rhVal 0x8cf2d4072cca480e
But for running more tests it is a bit annoying to go in the Makefile and change it, so we can benefit from the IPython %%bash
magic:
%%bash -l
./nt_opt AAAAA
./nt_article AAAAA
python nthash.py AAAAA
cargo run -q AAAAA
NTC64 0x01542d2a1299ba7e fhVal 0x9b5384ab1b0279a4 rhVal 0x01542d2a1299ba7e NTC64 0x01542d341299ba71 fhVal 0x9b5384bf1b0279ae rhVal 0x01542d341299ba71 NTC64 0x01542d341299ba71 fhVal 0x9b5384bf1b0279ae rhVal 0x01542d341299ba71 NTC64 0x01542d341299ba71 fhVal 0x9b5384bf1b0279ae rhVal 0x01542d341299ba71
But we still have to set the sequence in 4 different places. Let's make a function to do that for us:
import numpy as np
import pandas as pd
from IPython.display import display_html
from functools import partial, reduce
def color_same(unique, s):
# 4 different colors here, because we have at most 4
# (HOPEFULLY) different values
colors = dict(zip(unique, ('#ff7f0e', '#2ca02c',
'#1f77b4', '#d62728')))
return ['color: {}'.format(colors[v]) for v in s]
def run_for_seq(seq):
''' Returns a dataframe and a formatted styler (good for display) '''
opt = %sx ./nt_opt {seq}
simple = %sx ./nt_article {seq}
py = %sx python nthash.py {seq}
rust = %sx . ~/.cargo/env && cargo run -q {seq}
def parse(result):
out = {}
for line in result:
k, v = line.strip().split()
out[k] = int(v, 16)
return out
df = pd.DataFrame.from_dict({
'opt': parse(opt),
'simple': parse(simple),
'py': parse(py),
'rust': parse(rust)
}).T
unique_values = np.unique(df.values.flatten())
formatter = (df.style.format('0x{:0>16x}')
.apply(partial(color_same, unique_values), axis=1))
return df, formatter
With out new function run_for_seq
it is a bit easier to see patterns (using colors to highlight same values). For example, the output for our previous example (AAAAA
) is now
df, form = run_for_seq('AAAAA')
form
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x01542d2a1299ba7e | 0x9b5384ab1b0279a4 | 0x01542d2a1299ba7e |
simple | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
py | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
rust | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
Let's start with the simple case: what is the hash for the 1-mers?
for nt in "ACGT":
df, form = run_for_seq(nt)
print(f"Sequence: {nt}")
display(form)
Sequence: A
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
simple | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
py | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
rust | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
Sequence: C
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
simple | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
py | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
rust | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
Sequence: G
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
simple | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
py | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
rust | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
Sequence: T
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
simple | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
py | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
rust | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
So far so good, all values in the columns are the same,
and fhVal
is the canonical form for A
and C
and rhVal
is the canonical form for T
and G
(since it ends up being h(A)
and h(C)
).
Let's start with 2-mers then! One case to analyze is for palindrome 2-mers like GC
, where both forward and reverse strands are the same. We expect that all values will be the same, and indeed that's what we see for GC
:
df, form = run_for_seq('GC')
form
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
simple | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
py | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
rust | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
But it's not what we see for the other cases (but at least opt
is consistent and is giving the same value for fhVal
and rhVal
):
for seq in ('AT', 'TA', 'GC', 'CG'):
df, form = run_for_seq(seq)
print(f"Sequence: {seq}")
display(form)
Sequence: AT
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x5042be90606e4cbf | 0x5042be90606e4cbf | 0x5042be90606e4cbf |
simple | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
py | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
rust | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
Sequence: TA
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x6e21685b02028cd9 | 0x6e21685b02028cd9 | 0x6e21685b02028cd9 |
simple | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
py | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
rust | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
Sequence: GC
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
simple | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
py | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
rust | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
Sequence: CG
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x4315bdd8471775bd | 0x4315bdd8471775bd | 0x4315bdd8471775bd |
simple | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
py | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
rust | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
When we check 2-mers with the same base, something interesting: rhVal(CC)
matches fhVal(GG)
for all implementations, but for all the other cases we see the same pattern (of opt
disagreeing with the other implementations).
for seq in ('AA', 'TT', 'CC', 'GG'):
df, form = run_for_seq(seq)
print(f"Sequence: {seq}")
display(form)
Sequence: AA
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x459c0cd6be4a0c9d | 0x459c0cd6be4a0c9d | 0x7bffda1ddc26ccfb |
simple | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
py | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
rust | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
Sequence: TT
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x459c0cd6be4a0c9d | 0x7bffda1ddc26ccfb | 0x459c0cd6be4a0c9d |
simple | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
py | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
rust | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
Sequence: CC
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x52b4428da7e07dd5 | 0x52b4428da7e07dd5 | 0x6056437186f9656c |
simple | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
py | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
rust | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
Sequence: GG
NTC64 | fhVal | rhVal | |
---|---|---|---|
opt | 0x52b4428da7e07dd5 | 0x6056437186f9656c | 0x52b4428da7e07dd5 |
simple | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
py | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
rust | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
It seems like there is something off with the optimizations implemented in ntHash
, but I'm not sure how to help to track down more than providing some reduced test cases.
From reading the original code we see that there are no tests (the nttest.cpp
code is for comparing ntHash
with other hash functions). Unit testing would be a good start (because it helps to catch simple bugs), but because the codebase went through an optimization phase there is an option that really shines for this use case: property based testing.
Unit tests are usually implementing by giving an input to a function and checking if the return value is correct. Property based testing extend this idea further by defining properties that the function must respect, and then generating random inputs to try to falsify the property. This post has a great explanation of the process.
I first heard about property based testing via Hypothesis, but sadly it is a Python library and I wasn't planning to write bindings for ntHash. There is an alternative for C++, autocheck, which is more similar to QuickCheck and, for this purpose, is good enough to demonstrate the idea.
For this specific use case, we can use an oracle to implement a property: the unoptimized implementation is our oracle, and the results for the optimized version must match the results for the unoptimized version. In autocheck
this property can be written like this:
struct prop_nt_oracle {
bool operator () (const string& seq) {
uint64_t fhVal, rhVal, hValOpt, hValArticle;
hValOpt = NTC64(seq.c_str(), seq.size(), fhVal, rhVal);
hValArticle = nthash::NTC64(seq.c_str(), seq.size(), fhVal, rhVal);
return hValOpt == hValArticle;
}
};
The full code (using catch
as a test runner) is in test/test.cpp
:
!cat test/test.cpp
#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() - only do this in one cpp file #include "catch.hpp" #include "autocheck/autocheck.hpp" #include "autocheck/check.hpp" namespace ac = autocheck; #include "nthash.hpp" #include "nthash_simple.hpp" static const char alnts[] = "ACGTN"; class seq_generator { public: typedef std::basic_string<char> result_type; result_type operator() (size_t size = 1) { result_type rv; rv.reserve(size); std::uniform_int_distribution<int> dist(0, 5); std::generate_n(std::back_insert_iterator<result_type>(rv), size, [&]{ return alnts[dist(ac::rng())]; }); return rv; } }; struct prop_nt_oracle { bool operator () (const string& seq) { uint64_t fhVal, rhVal, hValOpt, hValArticle; hValOpt = NTC64(seq.c_str(), seq.size(), fhVal, rhVal); hValArticle = nthash::NTC64(seq.c_str(), seq.size(), fhVal, rhVal); return hValOpt == hValArticle; } }; TEST_CASE( "oracle", "[nthash]" ) { ac::catch_reporter rep; SECTION("optimized matches simple implementation outputs?") { ac::check<std::string>(prop_nt_oracle(), 100, ac::make_arbitrary(seq_generator()), rep); } }
I defined a new input generator, seq_generator
, to create relevant genomic sequences for our test cases. It just takes a size
and randomly generate a string of that size with ACTGN
characters.
To run this test case, there is a Makefile in the test
directory that compiles everything:
! cd test/ && make
! test/test
make: 'test' is up to date. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ test is a Catch v2.4.0 host application. Run with -? for options ------------------------------------------------------------------------------- oracle optimized matches simple implementation outputs? ------------------------------------------------------------------------------- test.cpp:47 ............................................................................... autocheck/reporter.hpp:111: FAILED: explicitly with message: Falsifiable, after 7 tests: (NTC) =============================================================================== test cases: 1 | 1 failed assertions: 1 | 1 failed
And if the implementations don't match, it will keep finding inputs that falsify our property (in this case, any input where the optimized implementation doesn't match the simple one).
I opened an issue and seems like the current master
version is the culprit (more specifically 57af16a). Reverting back to the 1.0.4
release fixes the problem.
So, in the end it wasn't a case of an optimization bug, but the case of a versioning bug =]
Re-running the analysis code with the 1.0.4
matches expectations:
def run_for_seq(seq):
''' Returns a dataframe and a formatted styler (good for display) '''
opt = %sx ./nt_104 {seq}
simple = %sx ./nt_article {seq}
py = %sx python nthash.py {seq}
rust = %sx . ~/.cargo/env && cargo run -q {seq}
def parse(result):
out = {}
for line in result:
k, v = line.strip().split()
out[k] = int(v, 16)
return out
df = pd.DataFrame.from_dict({
'1.0.4': parse(opt),
'simple': parse(simple),
'py': parse(py),
'rust': parse(rust)
}).T
unique_values = np.unique(df.values.flatten())
formatter = (df.style.format('0x{:0>16x}')
.apply(partial(color_same, unique_values), axis=1))
return df, formatter
df, form = run_for_seq('AAAAA')
form
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
simple | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
py | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
rust | 0x01542d341299ba71 | 0x9b5384bf1b0279ae | 0x01542d341299ba71 |
for nt in "ACGT":
df, form = run_for_seq(nt)
print(f"Sequence: {nt}")
display(form)
Sequence: A
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
simple | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
py | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
rust | 0x295549f54be24456 | 0x3c8bfbb395c60474 | 0x295549f54be24456 |
Sequence: C
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
simple | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
py | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
rust | 0x20323ed082572324 | 0x3193c18562a02b4c | 0x20323ed082572324 |
Sequence: G
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
simple | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
py | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
rust | 0x20323ed082572324 | 0x20323ed082572324 | 0x3193c18562a02b4c |
Sequence: T
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
simple | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
py | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
rust | 0x295549f54be24456 | 0x295549f54be24456 | 0x3c8bfbb395c60474 |
for seq in ('AT', 'TA', 'GC', 'CG'):
df, form = run_for_seq(seq)
print(f"Sequence: {seq}")
display(form)
Sequence: AT
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
simple | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
py | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
rust | 0x5042be92606e4cbe | 0x5042be92606e4cbe | 0x5042be92606e4cbe |
Sequence: TA
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
simple | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
py | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
rust | 0x6e21685902028cd8 | 0x6e21685902028cd8 | 0x6e21685902028cd8 |
Sequence: GC
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
simple | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
py | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
rust | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 | 0x71f7bc24660e6d04 |
Sequence: CG
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
simple | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
py | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
rust | 0x4315bdda471775bc | 0x4315bdda471775bc | 0x4315bdda471775bc |
for seq in ('AA', 'TT', 'CC', 'GG'):
df, form = run_for_seq(seq)
print(f"Sequence: {seq}")
display(form)
Sequence: AA
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
simple | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
py | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
rust | 0x459c0cd4be4a0c9c | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa |
Sequence: TT
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
simple | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
py | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
rust | 0x459c0cd4be4a0c9c | 0x7bffda1fdc26ccfa | 0x459c0cd4be4a0c9c |
Sequence: CC
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
simple | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
py | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
rust | 0x52b4428fa7e07dd4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c |
Sequence: GG
NTC64 | fhVal | rhVal | |
---|---|---|---|
1.0.4 | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
simple | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
py | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
rust | 0x52b4428fa7e07dd4 | 0x6056437186f9656c | 0x52b4428fa7e07dd4 |
! cd test && make test_104
! test/test_104
make: 'test_104' is up to date. OK, passed 10000 tests. =============================================================================== All tests passed (1 assertion in 1 test case)