import numpy as np
import h5py
#f = h5py.File("/oak/stanford/groups/akundaje/avsec/basepair/data/"
# +"processed/comparison/output/nexus,peaks,OSNK,0,10"
# +",1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE"
# +",FALSE,1/deeplift.imp_score.h5", "r")
f = h5py.File("deeplift.imp_score.h5","r")
nanog_mask = np.array(f['metadata']['interval_from_task'][:]=='Nanog')
nanog_profile_wn_hypimp = np.array(f["hyp_imp/Nanog/profile/wn"][:])[nanog_mask]
onehot_seq = np.array(f["inputs/seq"][:])[nanog_mask]
nanog_profile_wn_contribs = nanog_profile_wn_hypimp*onehot_seq
import modisco
track_set = modisco.tfmodisco_workflow.workflow.prep_track_set(
task_names=["Nanog_profile_wn"],
contrib_scores={'Nanog_profile_wn': nanog_profile_wn_contribs},
hypothetical_contribs={'Nanog_profile_wn': nanog_profile_wn_hypimp},
one_hot=onehot_seq)
#grp = h5py.File("/oak/stanford/groups/akundaje/avsec/basepair/data/processed/comparison/output/nexus"
# +",peaks,OSNK,0,10,1,FALSE,same,0.5,64,25,0.004,9,FALSE,[1,50],TRUE,FALSE,1/deeplift"
# +"/Nanog/out/profile/wn/modisco.h5","r")
grp = h5py.File("modisco.h5","r")
loaded_tfmodisco_results =\
modisco.tfmodisco_workflow.workflow.TfModiscoResults.from_hdf5(grp, track_set=track_set)
grp.close()
patterns = (loaded_tfmodisco_results
.metacluster_idx_to_submetacluster_results["metacluster_0"]
.seqlets_to_patterns_result.patterns)
len(loaded_tfmodisco_results.multitask_seqlet_creation_results.final_seqlets)
98300
nanog_profile_wn_contribs.shape
(55233, 1000, 4)
#Saving the seqlets
"""extracted_contrib_scores = []
extracted_hypothetical_scores = []
extracted_onehot_seqs = []
seqlets_list = loaded_tfmodisco_results.multitask_seqlet_creation_results.final_seqlets
window_around = 50
#extract +/- 50bp around each seqlet
for seqlet in seqlets_list:
example_idx = seqlet.coor.example_idx
start = seqlet.coor.start
end = seqlet.coor.end
if ((start>=window_around) and (end<=1000-window_around)):
extracted_contrib_scores.append(
nanog_profile_wn_contribs[example_idx,start-window_around:end+window_around])
extracted_hypothetical_scores.append(
nanog_profile_wn_hypimp[example_idx,start-window_around:end+window_around])
extracted_onehot_seqs.append(
onehot_seq[example_idx,start-window_around:end+window_around])
np.save("extracted_contrib_scores.npy", np.array(extracted_contrib_scores))
np.save("extracted_hypothetical_scores.npy", np.array(extracted_hypothetical_scores))
np.save("extracted_onehot.npy", np.array(extracted_onehot_seqs))"""
'extracted_contrib_scores = []\nextracted_hypothetical_scores = []\nextracted_onehot_seqs = []\nseqlets_list = loaded_tfmodisco_results.multitask_seqlet_creation_results.final_seqlets\n\nwindow_around = 50\n\n#extract +/- 50bp around each seqlet\nfor seqlet in seqlets_list:\n example_idx = seqlet.coor.example_idx\n start = seqlet.coor.start\n end = seqlet.coor.end\n if ((start>=window_around) and (end<=1000-window_around)):\n extracted_contrib_scores.append(\n nanog_profile_wn_contribs[example_idx,start-window_around:end+window_around])\n extracted_hypothetical_scores.append(\n nanog_profile_wn_hypimp[example_idx,start-window_around:end+window_around])\n extracted_onehot_seqs.append(\n onehot_seq[example_idx,start-window_around:end+window_around])\n\nnp.save("extracted_contrib_scores.npy", np.array(extracted_contrib_scores))\nnp.save("extracted_hypothetical_scores.npy", np.array(extracted_hypothetical_scores))\nnp.save("extracted_onehot.npy", np.array(extracted_onehot_seqs))'
!ls
523a1a82c23711ea9f9157d4c043164e.bin 523a1a82c23711ea9f9157d4c043164e_graph.bin 523a1a82c23711ea9f9157d4c043164e_graph.weights 523a1a82c23711ea9f9157d4c043164e.tree agkm_50kseqlets_allpattern_hits.txt agkm_50kseqlets_esrrb_hits.txt agkm_50kseqlets_goingin.txt agkm_50kseqlets_laststage.txt agkmfewerseqles_newclustermerging.h5 agkmfewerseqles_noclustermerging.h5 agkmfewerseqles_withmotifsprereassignment.h5 agkm_fewerseqlets_allpattern_hits.txt agkm_fewerseqlets_esrrb_hits.txt agkm_fewerseqlets_goingin.txt agkm_fewerseqlets_laststage.txt agkm_r2_allpattern_hits.txt agkm_r2_esrrb_hits.txt agkm_r2_goingin.txt agkm_r2_laststage.txt BpNet-Viz.ipynb commit0975f52_l6g2m0_results.hdf5 commit20e04f_corrthresh0_nomemeinit_4layerembedding_results.hdf5 commit20e04f_corrthresh0p1_nomemeinit_4layerembedding_results.hdf5 commit20e04f_nomemeinit_4layerembedding_results.hdf5 commit891181_memeinit_l6g2m0_results.hdf5 commit891181_nomemeinit_l6g2m0_results.hdf5 commit97f7d5b_agkm_default_results.hdf5 commitb86c5a_l6g2m0_results.hdf5 CompareBPNetExemplarHitScoring.ipynb copy_data.py copy_model.sh dcfeda32c23611ea9f9157d4c043164e.bin deeplift.imp_score.h5 embedding_model.h5 esrrb_2pcseqlets_hits.txt ExemplarHitScoringDev.ipynb extracted_contrib_scores.npy extracted_hypothetical_scores.npy extracted_onehot.npy gappedkmers_l6g2m0_results.hdf5 instances.parq LocalNormSeqletIdentification-Copy1.ipynb MakeEmbeddingModelAndRunTfModisco_corrthresh-0.1.ipynb MakeEmbeddingModelAndRunTfModisco.ipynb meme_out model.h5 modisco.h5 myrun_results.hdf5 nanog_profile_wn_contribs_fewerseqlets.npy nanog_profile_wn_hypimp_fewerseqlets.npy null.deeplift.imp_score.h5 onehot_seq_fewerseqlets.npy presparserun_l8g3m2_results.hdf5 presparserun_results.hdf5 ReplaceSeqletsWithAggregates-Copy1.ipynb ReplaceSeqletsWithAggregates.ipynb retained_hits.txt runme.sh sparserun_l6g2m0_results.hdf5 sparserun_results.hdf5 subset2pc_commit5fe538_agkm_default_results.hdf5 subset2pc_commitaadf4e_agkm_default_results.hdf5 tmp_hits.txt trial1.deeplift.imp_score.h5 trial1.modisco.h5 TryBpNet_AGKM_embeddings_fewerseqlets-Copy1.ipynb TryBpNet_AGKM_embeddings_fewerseqlets.ipynb TryBpNet_AGKM_embeddings.ipynb TryBpNet.ipynb TryBpNet-MEMEinit.ipynb TryBpNet_reducemem.ipynb TryBpNet_v0.5.10.2.ipynb TryBpNet_v0.5.11.0.ipynb Trying_Advanced_GappedKmerEmbedding.ipynb Trying_FullConv_FilterEmbedding.ipynb Untitled.ipynb v0.5.10.0.hdf5 v0.5.10.0_variablelen.hdf5 v0.5.8.0_agkm_default_results.hdf5 VariableLengthSeqletIdentification.ipynb
#for i in range(10):
# seqlets_list = loaded_tfmodisco_results.multitask_seqlet_creation_results.final_seqlets
# modisco.visualization.viz_sequence.plot_weights(seqlets_list[i]["Nanog_profile_wn_contrib_scores"].fwd)
#visualize the saved patterns:
"""%matplotlib inline
from modisco.visualization import viz_sequence
for idx,pattern in enumerate(patterns):
print("pattern idx",idx)
print(len(pattern.seqlets))
viz_sequence.plot_weights(
pattern["Nanog_profile_wn_contrib_scores"].fwd)
viz_sequence.plot_weights(pattern["sequence"].fwd)"""
'%matplotlib inline\nfrom modisco.visualization import viz_sequence\nfor idx,pattern in enumerate(patterns):\n print("pattern idx",idx)\n print(len(pattern.seqlets))\n viz_sequence.plot_weights(\n pattern["Nanog_profile_wn_contrib_scores"].fwd)\n viz_sequence.plot_weights(pattern["sequence"].fwd)'
#print modisco commit hash
%cd /users/avanti/tfmodisco
!git log -n 1
%cd /users/avanti/tfmodisco_bio_experiments/bpnet/trial1
from importlib import reload
%matplotlib inline
import h5py
import numpy as np
import modisco
import modisco.seqlet_embedding.advanced_gapped_kmer
reload(modisco.seqlet_embedding.advanced_gapped_kmer)
import modisco.seqlet_embedding
reload(modisco.seqlet_embedding)
import modisco
reload(modisco)
reload(modisco.util)
import modisco.cluster.phenograph.core
reload(modisco.cluster.phenograph.core)
import modisco.cluster.phenograph.cluster
reload(modisco.cluster.phenograph.cluster)
import modisco.cluster.phenograph
reload(modisco.cluster.phenograph)
import modisco.cluster.core
reload(modisco.cluster.core)
import modisco.cluster
reload(modisco.cluster)
import modisco.affinitymat.core
reload(modisco.affinitymat.core)
import modisco.affinitymat.transformers
reload(modisco.affinitymat.transformers)
import modisco.tfmodisco_workflow.seqlets_to_patterns
reload(modisco.tfmodisco_workflow.seqlets_to_patterns)
import modisco.tfmodisco_workflow.workflow
reload(modisco.tfmodisco_workflow.workflow)
import modisco.nearest_neighbors
reload(modisco.nearest_neighbors)
import modisco.affinitymat
reload(modisco.affinitymat)
import modisco.aggregator
reload(modisco.aggregator)
import modisco.value_provider
reload(modisco.value_provider)
import modisco.core
reload(modisco.core)
import modisco.coordproducers
reload(modisco.coordproducers)
import modisco.metaclusterers
reload(modisco.metaclusterers)
import modisco.clusterinit.memeinit
reload(modisco.clusterinit.memeinit)
%matplotlib inline
N_CORES = 10
workflow = modisco.tfmodisco_workflow.workflow.TfModiscoWorkflow(
sliding_window_size=21,#[5,9,13,17,21],
flank_size=10,
target_seqlet_fdr=0.01,
min_passing_windows_frac=0.03,
max_passing_windows_frac=0.03,
min_metacluster_size=2000,
min_metacluster_size_frac=0.02,
max_seqlets_per_metacluster=50000,
seqlets_to_patterns_factory=
modisco.tfmodisco_workflow.seqlets_to_patterns.TfModiscoSeqletsToPatternsFactory(
#initclusterer_factory=modisco.clusterinit.memeinit.MemeInitClustererFactory(
# meme_command="/software/meme/5.0.1/bin/meme",
# base_outdir="meme_out",
# num_seqlets_to_use=10000,
# nmotifs=20, n_jobs=4),
use_louvain=False,
trim_to_window_size=30,
initial_flank_to_add=10,
embedder_factory=modisco.seqlet_embedding
.advanced_gapped_kmer
.AdvancedGappedKmerEmbedderFactory(n_jobs=N_CORES),
#kmer_len=6,
#num_gaps=2,
#num_mismatches=0,
n_cores=N_CORES,
final_min_cluster_size=60
)
)
results = workflow(
task_names=["Nanog_profile_wn"],
contrib_scores={'Nanog_profile_wn': nanog_profile_wn_contribs},
hypothetical_contribs={'Nanog_profile_wn': nanog_profile_wn_hypimp},
one_hot=onehot_seq)
/mnt/lab_data2/avanti/tfmodisco commit 99299097c6ed0904960c15bf601fb951cf6949fd (HEAD -> reducemem3, origin/reducemem3) Author: Av Shrikumar <avanti.shrikumar@gmail.com> Date: Wed Feb 10 10:21:20 2021 -0800 further fixes /mnt/lab_data2/avanti/tfmodisco_bio_experiments/bpnet/trial1 MEMORY 4.227170304 On task Nanog_profile_wn Computing windowed sums on original Generating null dist peak(mu)= 0.00754788601747714 Computing threshold Subsampling! For increasing = True , the minimum IR precision was 0.37577224214168486 occurring at 0.0 implying a frac_neg of 0.6019793855866568 To be conservative, adjusted frac neg is 0.95 For increasing = False , the minimum IR precision was 0.48571764379950555 occurring at -1.4484976418316364e-07 implying a frac_neg of 0.9444571409915278 To be conservative, adjusted frac neg is 0.95 Thresholds from null dist were -0.026598883792757988 and 0.25093573331832886 with frac passing 0.047041 Passing windows frac was 0.047041 , which is above 0.03 ; adjusting New thresholds are 0.31916173219680793 and -0.31916173219680793 Final raw thresholds are -0.31916173219680793 and 0.31916173219680793 Final transformed thresholds are -0.9701270905407408 and 0.9701270905407408
Got 97968 coords After resolving overlaps, got 97968 seqlets Across all tasks, the weakest transformed threshold used was: 0.9700270905407408 MEMORY 6.195666944 97968 identified in total 1 activity patterns with support >= 2000 out of 2 possible patterns Metacluster sizes: [97965] Idx to activities: {0: '1'} MEMORY 6.196334592 On metacluster 0 Metacluster size 97965 limited to 50000 Relevant tasks: ('Nanog_profile_wn',) Relevant signs: (1,) TfModiscoSeqletsToPatternsFactory: seed=1234 (Round 1) num seqlets: 50000 (Round 1) Computing coarse affmat MEMORY 6.196334592 Beginning embedding computation MEMORY 6.196334592
[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. [Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 2.1s [Parallel(n_jobs=10)]: Done 290 tasks | elapsed: 4.6s [Parallel(n_jobs=10)]: Done 790 tasks | elapsed: 9.2s [Parallel(n_jobs=10)]: Done 1490 tasks | elapsed: 15.4s [Parallel(n_jobs=10)]: Done 2390 tasks | elapsed: 23.1s [Parallel(n_jobs=10)]: Done 3490 tasks | elapsed: 32.6s [Parallel(n_jobs=10)]: Done 4790 tasks | elapsed: 43.7s [Parallel(n_jobs=10)]: Done 6290 tasks | elapsed: 56.1s [Parallel(n_jobs=10)]: Done 7990 tasks | elapsed: 1.2min [Parallel(n_jobs=10)]: Done 9890 tasks | elapsed: 1.4min [Parallel(n_jobs=10)]: Done 11950 tasks | elapsed: 1.8min [Parallel(n_jobs=10)]: Done 14250 tasks | elapsed: 2.1min [Parallel(n_jobs=10)]: Done 16750 tasks | elapsed: 2.6min [Parallel(n_jobs=10)]: Done 19450 tasks | elapsed: 3.1min [Parallel(n_jobs=10)]: Done 22350 tasks | elapsed: 3.7min [Parallel(n_jobs=10)]: Done 25410 tasks | elapsed: 4.3min [Parallel(n_jobs=10)]: Done 28710 tasks | elapsed: 4.8min [Parallel(n_jobs=10)]: Done 32210 tasks | elapsed: 5.3min [Parallel(n_jobs=10)]: Done 35910 tasks | elapsed: 5.8min [Parallel(n_jobs=10)]: Done 39770 tasks | elapsed: 6.4min [Parallel(n_jobs=10)]: Done 43870 tasks | elapsed: 7.0min [Parallel(n_jobs=10)]: Done 48170 tasks | elapsed: 7.6min [Parallel(n_jobs=10)]: Done 50000 out of 50000 | elapsed: 8.0min finished [Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. [Parallel(n_jobs=10)]: Done 40 tasks | elapsed: 0.5s [Parallel(n_jobs=10)]: Done 340 tasks | elapsed: 3.4s [Parallel(n_jobs=10)]: Done 840 tasks | elapsed: 7.8s [Parallel(n_jobs=10)]: Done 1540 tasks | elapsed: 14.1s [Parallel(n_jobs=10)]: Done 2440 tasks | elapsed: 21.7s [Parallel(n_jobs=10)]: Done 3540 tasks | elapsed: 31.2s [Parallel(n_jobs=10)]: Done 4840 tasks | elapsed: 42.2s [Parallel(n_jobs=10)]: Done 6340 tasks | elapsed: 54.6s [Parallel(n_jobs=10)]: Done 8040 tasks | elapsed: 1.1min [Parallel(n_jobs=10)]: Done 9940 tasks | elapsed: 1.4min [Parallel(n_jobs=10)]: Done 12400 tasks | elapsed: 1.8min [Parallel(n_jobs=10)]: Done 17000 tasks | elapsed: 2.5min [Parallel(n_jobs=10)]: Done 22000 tasks | elapsed: 3.2min [Parallel(n_jobs=10)]: Done 27400 tasks | elapsed: 3.9min [Parallel(n_jobs=10)]: Done 33030 tasks | elapsed: 4.8min [Parallel(n_jobs=10)]: Done 39210 tasks | elapsed: 5.6min [Parallel(n_jobs=10)]: Done 45810 tasks | elapsed: 6.6min [Parallel(n_jobs=10)]: Done 50000 out of 50000 | elapsed: 7.2min finished [Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers. [Parallel(n_jobs=1)]: Done 50000 out of 50000 | elapsed: 3.1min finished
Constructing csr matrix... csr matrix made in 14.020972967147827 s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers. [Parallel(n_jobs=1)]: Done 50000 out of 50000 | elapsed: 2.9min finished
Constructing csr matrix... csr matrix made in 14.557479858398438 s Finished embedding computation in 1360.85 s MEMORY 14.074732544 Starting affinity matrix computations MEMORY 14.074732544 Batching in slices of size 1342
100%|██████████| 38/38 [13:01<00:00, 20.57s/it]
Finished affinity matrix computations in 782.17 s MEMORY 14.075088896
(Round 1) Computed coarse affmat MEMORY 13.474430976 (Round 1) Computing affinity matrix on nearest neighbors MEMORY 13.474430976 Launching nearest neighbors affmat calculation job MEMORY 13.474430976 Parallel runs completed MEMORY 13.922717696 Job completed in: 388.36 s MEMORY 13.922455552 Launching nearest neighbors affmat calculation job MEMORY 13.922455552 Parallel runs completed MEMORY 14.351634432 Job completed in: 366.18 s MEMORY 14.351634432 (Round 1) Computed affinity matrix on nearest neighbors in 773.31 s MEMORY 14.31883776 Filtered down to 41997 of 50000 (Round 1) Retained 41997 rows out of 50000 after filtering MEMORY 14.31916544 (Round 1) Computing density adapted affmat MEMORY 14.31916544 [t-SNE] Computed conditional probabilities for sample 1000 / 41997 [t-SNE] Computed conditional probabilities for sample 2000 / 41997 [t-SNE] Computed conditional probabilities for sample 3000 / 41997 [t-SNE] Computed conditional probabilities for sample 4000 / 41997 [t-SNE] Computed conditional probabilities for sample 5000 / 41997 [t-SNE] Computed conditional probabilities for sample 6000 / 41997 [t-SNE] Computed conditional probabilities for sample 7000 / 41997 [t-SNE] Computed conditional probabilities for sample 8000 / 41997 [t-SNE] Computed conditional probabilities for sample 9000 / 41997 [t-SNE] Computed conditional probabilities for sample 10000 / 41997 [t-SNE] Computed conditional probabilities for sample 11000 / 41997 [t-SNE] Computed conditional probabilities for sample 12000 / 41997 [t-SNE] Computed conditional probabilities for sample 13000 / 41997 [t-SNE] Computed conditional probabilities for sample 14000 / 41997 [t-SNE] Computed conditional probabilities for sample 15000 / 41997 [t-SNE] Computed conditional probabilities for sample 16000 / 41997 [t-SNE] Computed conditional probabilities for sample 17000 / 41997 [t-SNE] Computed conditional probabilities for sample 18000 / 41997 [t-SNE] Computed conditional probabilities for sample 19000 / 41997 [t-SNE] Computed conditional probabilities for sample 20000 / 41997 [t-SNE] Computed conditional probabilities for sample 21000 / 41997 [t-SNE] Computed conditional probabilities for sample 22000 / 41997 [t-SNE] Computed conditional probabilities for sample 23000 / 41997 [t-SNE] Computed conditional probabilities for sample 24000 / 41997 [t-SNE] Computed conditional probabilities for sample 25000 / 41997 [t-SNE] Computed conditional probabilities for sample 26000 / 41997 [t-SNE] Computed conditional probabilities for sample 27000 / 41997 [t-SNE] Computed conditional probabilities for sample 28000 / 41997 [t-SNE] Computed conditional probabilities for sample 29000 / 41997 [t-SNE] Computed conditional probabilities for sample 30000 / 41997 [t-SNE] Computed conditional probabilities for sample 31000 / 41997 [t-SNE] Computed conditional probabilities for sample 32000 / 41997 [t-SNE] Computed conditional probabilities for sample 33000 / 41997 [t-SNE] Computed conditional probabilities for sample 34000 / 41997 [t-SNE] Computed conditional probabilities for sample 35000 / 41997 [t-SNE] Computed conditional probabilities for sample 36000 / 41997 [t-SNE] Computed conditional probabilities for sample 37000 / 41997 [t-SNE] Computed conditional probabilities for sample 38000 / 41997 [t-SNE] Computed conditional probabilities for sample 39000 / 41997 [t-SNE] Computed conditional probabilities for sample 40000 / 41997 [t-SNE] Computed conditional probabilities for sample 41000 / 41997 [t-SNE] Computed conditional probabilities for sample 41997 / 41997 [t-SNE] Mean sigma: 0.207874 (Round 1) Computing clustering MEMORY 14.31916544 Beginning preprocessing + Leiden
0%| | 0/50 [00:00<?, ?it/s]
Quality: 0.7561596221748645
12%|█▏ | 6/50 [09:47<1:12:26, 98.79s/it]
Quality: 0.756257737241806
26%|██▌ | 13/50 [17:40<43:49, 71.06s/it]
Quality: 0.7563831136214795
60%|██████ | 30/50 [37:33<21:53, 65.67s/it]
Quality: 0.7564157093885451
70%|███████ | 35/50 [42:12<14:24, 57.64s/it]
Quality: 0.7566731316389486
100%|██████████| 50/50 [1:00:10<00:00, 72.20s/it]
Got 36 clusters after round 1 Counts: {17: 919, 7: 2063, 10: 1803, 12: 1581, 3: 2693, 8: 1961, 4: 2527, 11: 1774, 13: 1546, 5: 2225, 16: 1084, 14: 1380, 9: 1944, 1: 3419, 0: 4668, 20: 657, 2: 3028, 15: 1165, 18: 766, 6: 2137, 19: 683, 21: 592, 31: 20, 30: 40, 23: 344, 35: 10, 28: 58, 27: 64, 24: 133, 32: 20, 25: 93, 29: 44, 22: 446, 26: 83, 34: 12, 33: 15} MEMORY 14.06961664 (Round 1) Aggregating seqlets in each cluster MEMORY 14.06961664 Aggregating for cluster 0 with 4668 seqlets MEMORY 14.06961664 Trimming eliminated 0 seqlets out of 4668 Skipped 1 seqlets Removed 12 duplicate seqlets Aggregating for cluster 1 with 3419 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 3419 Skipped 1 seqlets Removed 7 duplicate seqlets Aggregating for cluster 2 with 3028 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 3028 Removed 14 duplicate seqlets Aggregating for cluster 3 with 2693 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 2693 Removed 27 duplicate seqlets Aggregating for cluster 4 with 2527 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 2527 Removed 22 duplicate seqlets Aggregating for cluster 5 with 2225 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 2225 Skipped 2 seqlets Removed 7 duplicate seqlets Aggregating for cluster 6 with 2137 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 2137 Aggregating for cluster 7 with 2063 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 2063 Skipped 3 seqlets Removed 19 duplicate seqlets Aggregating for cluster 8 with 1961 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1961 Skipped 1 seqlets Removed 13 duplicate seqlets Aggregating for cluster 9 with 1944 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1944 Removed 5 duplicate seqlets Aggregating for cluster 10 with 1803 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1803 Skipped 1 seqlets Removed 10 duplicate seqlets Aggregating for cluster 11 with 1774 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1774 Removed 29 duplicate seqlets Aggregating for cluster 12 with 1581 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1581 Removed 5 duplicate seqlets Removed 1 duplicate seqlets Aggregating for cluster 13 with 1546 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1546 Removed 5 duplicate seqlets Aggregating for cluster 14 with 1380 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1380 Removed 4 duplicate seqlets Aggregating for cluster 15 with 1165 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1165 Skipped 1 seqlets Removed 3 duplicate seqlets Aggregating for cluster 16 with 1084 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 1084 Removed 4 duplicate seqlets Aggregating for cluster 17 with 919 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 919 Removed 3 duplicate seqlets Aggregating for cluster 18 with 766 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 766 Aggregating for cluster 19 with 683 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 683 Removed 1 duplicate seqlets Aggregating for cluster 20 with 657 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 657 Removed 2 duplicate seqlets Aggregating for cluster 21 with 592 seqlets MEMORY 14.069620736 Trimming eliminated 0 seqlets out of 592 Removed 1 duplicate seqlets Aggregating for cluster 22 with 446 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 446 Aggregating for cluster 23 with 344 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 344 Aggregating for cluster 24 with 133 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 133 Aggregating for cluster 25 with 93 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 93 Removed 2 duplicate seqlets Aggregating for cluster 26 with 83 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 83 Aggregating for cluster 27 with 64 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 64 Removed 3 duplicate seqlets Aggregating for cluster 28 with 58 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 58 Aggregating for cluster 29 with 44 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 44 Aggregating for cluster 30 with 40 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 40 Removed 4 duplicate seqlets Aggregating for cluster 31 with 20 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 20 Aggregating for cluster 32 with 20 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 20 Aggregating for cluster 33 with 15 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 15 Aggregating for cluster 34 with 12 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 12 Aggregating for cluster 35 with 10 seqlets MEMORY 14.069624832 Trimming eliminated 0 seqlets out of 10 (Round 2) num seqlets: 41729 (Round 2) Computing coarse affmat MEMORY 14.069624832 Beginning embedding computation MEMORY 14.069624832
[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. [Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 3.1s [Parallel(n_jobs=10)]: Done 470 tasks | elapsed: 6.2s [Parallel(n_jobs=10)]: Done 1470 tasks | elapsed: 13.3s [Parallel(n_jobs=10)]: Done 2870 tasks | elapsed: 23.2s [Parallel(n_jobs=10)]: Done 4670 tasks | elapsed: 36.1s [Parallel(n_jobs=10)]: Done 6870 tasks | elapsed: 52.2s [Parallel(n_jobs=10)]: Done 9470 tasks | elapsed: 1.2min [Parallel(n_jobs=10)]: Done 12470 tasks | elapsed: 1.5min [Parallel(n_jobs=10)]: Done 15870 tasks | elapsed: 1.8min [Parallel(n_jobs=10)]: Done 19670 tasks | elapsed: 2.2min [Parallel(n_jobs=10)]: Done 23870 tasks | elapsed: 2.6min [Parallel(n_jobs=10)]: Done 28310 tasks | elapsed: 3.2min [Parallel(n_jobs=10)]: Done 33310 tasks | elapsed: 3.7min [Parallel(n_jobs=10)]: Done 38710 tasks | elapsed: 4.2min [Parallel(n_jobs=10)]: Done 41710 out of 41729 | elapsed: 4.6min remaining: 0.1s [Parallel(n_jobs=10)]: Done 41729 out of 41729 | elapsed: 4.6min finished [Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers. [Parallel(n_jobs=10)]: Done 40 tasks | elapsed: 0.3s [Parallel(n_jobs=10)]: Done 620 tasks | elapsed: 4.4s [Parallel(n_jobs=10)]: Done 1620 tasks | elapsed: 11.5s [Parallel(n_jobs=10)]: Done 3020 tasks | elapsed: 21.3s [Parallel(n_jobs=10)]: Done 4820 tasks | elapsed: 34.3s [Parallel(n_jobs=10)]: Done 7020 tasks | elapsed: 49.7s [Parallel(n_jobs=10)]: Done 9490 tasks | elapsed: 1.2min [Parallel(n_jobs=10)]: Done 12490 tasks | elapsed: 1.5min [Parallel(n_jobs=10)]: Done 15890 tasks | elapsed: 1.8min [Parallel(n_jobs=10)]: Done 19690 tasks | elapsed: 2.2min [Parallel(n_jobs=10)]: Done 23890 tasks | elapsed: 2.7min [Parallel(n_jobs=10)]: Done 28490 tasks | elapsed: 3.1min [Parallel(n_jobs=10)]: Done 33490 tasks | elapsed: 3.7min [Parallel(n_jobs=10)]: Done 38760 tasks | elapsed: 4.4min [Parallel(n_jobs=10)]: Done 41729 out of 41729 | elapsed: 4.7min finished [Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers. [Parallel(n_jobs=1)]: Done 41729 out of 41729 | elapsed: 2.4min finished
Constructing csr matrix... csr matrix made in 11.846987962722778 s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers. [Parallel(n_jobs=1)]: Done 41729 out of 41729 | elapsed: 2.5min finished
Constructing csr matrix... csr matrix made in 11.435955047607422 s Finished embedding computation in 927.76 s MEMORY 15.297540096 Starting affinity matrix computations MEMORY 15.297277952 Batching in slices of size 1608
100%|██████████| 26/26 [07:26<00:00, 17.18s/it]
Finished affinity matrix computations in 447.25 s MEMORY 15.297277952
(Round 2) Computed coarse affmat MEMORY 14.800371712 (Round 2) Computing affinity matrix on nearest neighbors MEMORY 14.800371712 Launching nearest neighbors affmat calculation job MEMORY 15.107506176 Parallel runs completed MEMORY 15.232884736 Job completed in: 435.44 s MEMORY 15.232622592 Launching nearest neighbors affmat calculation job MEMORY 15.232622592 Parallel runs completed MEMORY 15.419973632 Job completed in: 518.4 s MEMORY 15.419711488 (Round 2) Computed affinity matrix on nearest neighbors in 967.19 s MEMORY 15.006138368 Not applying filtering for rounds above first round MEMORY 15.006138368 (Round 2) Computing density adapted affmat MEMORY 15.037239296 [t-SNE] Computed conditional probabilities for sample 1000 / 41729 [t-SNE] Computed conditional probabilities for sample 2000 / 41729 [t-SNE] Computed conditional probabilities for sample 3000 / 41729 [t-SNE] Computed conditional probabilities for sample 4000 / 41729 [t-SNE] Computed conditional probabilities for sample 5000 / 41729 [t-SNE] Computed conditional probabilities for sample 6000 / 41729 [t-SNE] Computed conditional probabilities for sample 7000 / 41729 [t-SNE] Computed conditional probabilities for sample 8000 / 41729 [t-SNE] Computed conditional probabilities for sample 9000 / 41729 [t-SNE] Computed conditional probabilities for sample 10000 / 41729 [t-SNE] Computed conditional probabilities for sample 11000 / 41729 [t-SNE] Computed conditional probabilities for sample 12000 / 41729 [t-SNE] Computed conditional probabilities for sample 13000 / 41729 [t-SNE] Computed conditional probabilities for sample 14000 / 41729 [t-SNE] Computed conditional probabilities for sample 15000 / 41729 [t-SNE] Computed conditional probabilities for sample 16000 / 41729 [t-SNE] Computed conditional probabilities for sample 17000 / 41729 [t-SNE] Computed conditional probabilities for sample 18000 / 41729 [t-SNE] Computed conditional probabilities for sample 19000 / 41729 [t-SNE] Computed conditional probabilities for sample 20000 / 41729 [t-SNE] Computed conditional probabilities for sample 21000 / 41729 [t-SNE] Computed conditional probabilities for sample 22000 / 41729 [t-SNE] Computed conditional probabilities for sample 23000 / 41729 [t-SNE] Computed conditional probabilities for sample 24000 / 41729 [t-SNE] Computed conditional probabilities for sample 25000 / 41729 [t-SNE] Computed conditional probabilities for sample 26000 / 41729 [t-SNE] Computed conditional probabilities for sample 27000 / 41729 [t-SNE] Computed conditional probabilities for sample 28000 / 41729 [t-SNE] Computed conditional probabilities for sample 29000 / 41729 [t-SNE] Computed conditional probabilities for sample 30000 / 41729 [t-SNE] Computed conditional probabilities for sample 31000 / 41729 [t-SNE] Computed conditional probabilities for sample 32000 / 41729 [t-SNE] Computed conditional probabilities for sample 33000 / 41729 [t-SNE] Computed conditional probabilities for sample 34000 / 41729 [t-SNE] Computed conditional probabilities for sample 35000 / 41729 [t-SNE] Computed conditional probabilities for sample 36000 / 41729 [t-SNE] Computed conditional probabilities for sample 37000 / 41729 [t-SNE] Computed conditional probabilities for sample 38000 / 41729 [t-SNE] Computed conditional probabilities for sample 39000 / 41729 [t-SNE] Computed conditional probabilities for sample 40000 / 41729 [t-SNE] Computed conditional probabilities for sample 41000 / 41729 [t-SNE] Computed conditional probabilities for sample 41729 / 41729 [t-SNE] Mean sigma: 0.219380 (Round 2) Computing clustering MEMORY 15.11489536 Beginning preprocessing + Leiden
0%| | 0/50 [00:00<?, ?it/s]
Quality: 0.7322010420713774
2%|▏ | 1/50 [00:57<46:33, 57.01s/it]
Quality: 0.7340107999623549
6%|▌ | 3/50 [03:35<53:47, 68.67s/it]
Quality: 0.7342343444473574
8%|▊ | 4/50 [04:54<55:06, 71.89s/it]
Quality: 0.7342852098214336
10%|█ | 5/50 [06:08<54:17, 72.38s/it]
Quality: 0.7348613971762101
46%|████▌ | 23/50 [30:09<33:16, 73.95s/it]
Quality: 0.735031852765332
72%|███████▏ | 36/50 [44:46<13:54, 59.64s/it]
Quality: 0.7350370206481343
100%|██████████| 50/50 [1:00:58<00:00, 73.17s/it]
Got 39 clusters after round 2 Counts: {0: 4751, 3: 2771, 13: 1420, 1: 3509, 7: 2057, 11: 1767, 5: 2354, 12: 1686, 34: 20, 24: 256, 20: 524, 4: 2508, 14: 1185, 17: 820, 21: 496, 10: 1802, 8: 1994, 25: 177, 19: 542, 2: 3118, 6: 2065, 16: 874, 9: 1885, 30: 72, 36: 11, 15: 1028, 23: 263, 18: 758, 37: 10, 31: 46, 32: 44, 29: 76, 22: 438, 27: 119, 38: 10, 28: 81, 35: 16, 26: 156, 33: 20} MEMORY 15.078539264 (Round 2) Aggregating seqlets in each cluster MEMORY 15.078539264 Aggregating for cluster 0 with 4751 seqlets MEMORY 15.078539264 Trimming eliminated 0 seqlets out of 4751 Removed 216 duplicate seqlets Aggregating for cluster 1 with 3509 seqlets MEMORY 15.078539264 Trimming eliminated 0 seqlets out of 3509 Skipped 1 seqlets Removed 96 duplicate seqlets Aggregating for cluster 2 with 3118 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 3118 Removed 74 duplicate seqlets Aggregating for cluster 3 with 2771 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 2771 Removed 226 duplicate seqlets Aggregating for cluster 4 with 2508 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 2508 Skipped 1 seqlets Removed 170 duplicate seqlets Aggregating for cluster 5 with 2354 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 2354 Removed 210 duplicate seqlets Aggregating for cluster 6 with 2065 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 2065 Removed 84 duplicate seqlets Aggregating for cluster 7 with 2057 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 2057 Skipped 1 seqlets Removed 144 duplicate seqlets Aggregating for cluster 8 with 1994 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1994 Removed 132 duplicate seqlets Aggregating for cluster 9 with 1885 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1885 Removed 73 duplicate seqlets Aggregating for cluster 10 with 1802 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1802 Skipped 1 seqlets Removed 88 duplicate seqlets Aggregating for cluster 11 with 1767 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1767 Skipped 1 seqlets Removed 105 duplicate seqlets Aggregating for cluster 12 with 1686 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1686 Removed 288 duplicate seqlets Aggregating for cluster 13 with 1420 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1420 Removed 92 duplicate seqlets Aggregating for cluster 14 with 1185 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1185 Removed 93 duplicate seqlets Aggregating for cluster 15 with 1028 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 1028 Removed 51 duplicate seqlets Aggregating for cluster 16 with 874 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 874 Removed 53 duplicate seqlets Aggregating for cluster 17 with 820 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 820 Removed 38 duplicate seqlets Aggregating for cluster 18 with 758 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 758 Removed 91 duplicate seqlets Aggregating for cluster 19 with 542 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 542 Skipped 1 seqlets Removed 33 duplicate seqlets Aggregating for cluster 20 with 524 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 524 Removed 13 duplicate seqlets Aggregating for cluster 21 with 496 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 496 Removed 24 duplicate seqlets Aggregating for cluster 22 with 438 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 438 Removed 3 duplicate seqlets Aggregating for cluster 23 with 263 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 263 Removed 15 duplicate seqlets Aggregating for cluster 24 with 256 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 256 Removed 5 duplicate seqlets Aggregating for cluster 25 with 177 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 177 Removed 7 duplicate seqlets Aggregating for cluster 26 with 156 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 156 Removed 38 duplicate seqlets Aggregating for cluster 27 with 119 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 119 Removed 1 duplicate seqlets Aggregating for cluster 28 with 81 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 81 Removed 1 duplicate seqlets Aggregating for cluster 29 with 76 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 76 Aggregating for cluster 30 with 72 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 72 Removed 1 duplicate seqlets Aggregating for cluster 31 with 46 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 46 Aggregating for cluster 32 with 44 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 44 Aggregating for cluster 33 with 20 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 20 Removed 3 duplicate seqlets Aggregating for cluster 34 with 20 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 20 Aggregating for cluster 35 with 16 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 16 Aggregating for cluster 36 with 11 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 11 Aggregating for cluster 37 with 10 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 10 Removed 1 duplicate seqlets Aggregating for cluster 38 with 10 seqlets MEMORY 15.07854336 Trimming eliminated 0 seqlets out of 10 Got 39 clusters Splitting into subclusters... MEMORY 15.07854336 Inspecting for spurious merging Wrote graph to binary file in 35.29324698448181 seconds MEMORY 16.045576192 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0035167 After 3 runs, maximum modularity is Q = 0.00353631 After 4 runs, maximum modularity is Q = 0.0036014 After 10 runs, maximum modularity is Q = 0.00360187 Louvain completed 30 runs in 82.25335907936096 seconds Similarity is 0.93307614; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 19.931432723999023 seconds MEMORY 15.64338176 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00602221 After 3 runs, maximum modularity is Q = 0.00602287 After 18 runs, maximum modularity is Q = 0.00602288 After 36 runs, maximum modularity is Q = 0.00602289 Louvain completed 56 runs in 76.18261122703552 seconds Similarity is 0.8876958; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 16.203216552734375 seconds MEMORY 15.5198464 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0079132 After 2 runs, maximum modularity is Q = 0.00821755 Louvain completed 22 runs in 26.59377408027649 seconds Similarity is 0.9483532; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 10.944892406463623 seconds MEMORY 15.3812992 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00570488 After 4 runs, maximum modularity is Q = 0.00570495 After 9 runs, maximum modularity is Q = 0.00570496 Louvain completed 29 runs in 30.494176864624023 seconds Similarity is 0.82006204; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 9.015119791030884 seconds MEMORY 15.327481856 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00549338 After 3 runs, maximum modularity is Q = 0.00549365 After 5 runs, maximum modularity is Q = 0.00549389 After 6 runs, maximum modularity is Q = 0.00549405 After 9 runs, maximum modularity is Q = 0.00549411 Louvain completed 29 runs in 27.354629516601562 seconds Similarity is 0.8526772; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 7.72732400894165 seconds MEMORY 15.277051904 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00529494 After 2 runs, maximum modularity is Q = 0.0052952 Louvain completed 22 runs in 19.65471863746643 seconds Similarity is 0.84580207; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 6.407199382781982 seconds MEMORY 15.242080256 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00454161 After 4 runs, maximum modularity is Q = 0.00454167 After 18 runs, maximum modularity is Q = 0.00454203 Louvain completed 38 runs in 36.75714039802551 seconds Similarity is 0.8743118; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 5.9826507568359375 seconds MEMORY 15.213617152 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0050971 After 2 runs, maximum modularity is Q = 0.00509741 After 3 runs, maximum modularity is Q = 0.00509749 Louvain completed 23 runs in 19.226727724075317 seconds Similarity is 0.86179835; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 5.7875940799713135 seconds MEMORY 15.207579648 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00744901 Louvain completed 21 runs in 16.012732982635498 seconds Similarity is 0.7859383; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 1.831265926361084 seconds MEMORY 15.132643328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00531805 Louvain completed 21 runs in 11.87748384475708 seconds Similarity is 0.86380917; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 1.202864646911621 seconds MEMORY 15.132643328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00570864 After 2 runs, maximum modularity is Q = 0.00570877 Louvain completed 22 runs in 11.651382446289062 seconds Similarity is 0.86664426; is_dissimilar is False Got 2 subclusters Inspecting for spurious merging Wrote graph to binary file in 5.769506454467773 seconds MEMORY 15.195742208 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00697174 After 2 runs, maximum modularity is Q = 0.006972 After 3 runs, maximum modularity is Q = 0.00697203 Louvain completed 23 runs in 17.48375964164734 seconds Similarity is 0.8594708; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 5.136689901351929 seconds MEMORY 15.243132928 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00382016 After 2 runs, maximum modularity is Q = 0.00382029 Louvain completed 22 runs in 17.78551983833313 seconds Similarity is 0.94444716; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 4.841457366943359 seconds MEMORY 15.236653056 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00462296 After 2 runs, maximum modularity is Q = 0.00462366 After 9 runs, maximum modularity is Q = 0.00462373 After 17 runs, maximum modularity is Q = 0.00462376 Louvain completed 37 runs in 29.013113975524902 seconds Similarity is 0.8680991; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 3.4617502689361572 seconds MEMORY 15.183781888 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0114846 Louvain completed 21 runs in 13.263652324676514 seconds Similarity is 0.90738046; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 3.095365047454834 seconds MEMORY 15.164993536 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0051117 After 2 runs, maximum modularity is Q = 0.00511176 After 6 runs, maximum modularity is Q = 0.00511185 After 8 runs, maximum modularity is Q = 0.00511186 After 11 runs, maximum modularity is Q = 0.00511198 After 31 runs, maximum modularity is Q = 0.00511209 Louvain completed 51 runs in 33.09899711608887 seconds Similarity is 0.8621207; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 2.0984137058258057 seconds MEMORY 15.1139328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00578566 Louvain completed 21 runs in 11.835179805755615 seconds Similarity is 0.83924794; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 1.6748793125152588 seconds MEMORY 15.1139328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00733574 Louvain completed 21 runs in 11.3117036819458 seconds Similarity is 0.8994125; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 1.1274290084838867 seconds MEMORY 15.1139328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00697723 After 2 runs, maximum modularity is Q = 0.00697724 Louvain completed 22 runs in 11.939918994903564 seconds Similarity is 0.79851484; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.2937178611755371 seconds MEMORY 15.1139328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0054777 After 2 runs, maximum modularity is Q = 0.00637933 Louvain completed 22 runs in 11.615338325500488 seconds Similarity is 0.8410438; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.2917516231536865 seconds MEMORY 15.1139328 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00427911 After 2 runs, maximum modularity is Q = 0.00439068 After 4 runs, maximum modularity is Q = 0.00440417 Louvain completed 24 runs in 12.178392887115479 seconds Similarity is 0.88598204; is_dissimilar is False Got 2 subclusters Inspecting for spurious merging Wrote graph to binary file in 1.050516128540039 seconds MEMORY 15.067369472 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00990382 After 2 runs, maximum modularity is Q = 0.00990383 Louvain completed 22 runs in 11.805655717849731 seconds Similarity is 0.85944176; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.7751307487487793 seconds MEMORY 15.067369472 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0185604 Louvain completed 21 runs in 10.46213960647583 seconds Similarity is 0.8311509; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.45108938217163086 seconds MEMORY 15.067369472 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00444484 Louvain completed 21 runs in 10.02882981300354 seconds Similarity is 0.87520206; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.4597961902618408 seconds MEMORY 15.067369472 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0167757 Louvain completed 21 runs in 10.812129974365234 seconds Similarity is 0.913223; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.39180707931518555 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00568533 After 3 runs, maximum modularity is Q = 0.00611416 After 4 runs, maximum modularity is Q = 0.00611956 After 12 runs, maximum modularity is Q = 0.00612232 Louvain completed 32 runs in 17.815216779708862 seconds Similarity is 0.86466414; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.32753634452819824 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0126803 Louvain completed 21 runs in 10.553840160369873 seconds Similarity is 0.6034484; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.14125800132751465 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0148422 Louvain completed 21 runs in 10.55634593963623 seconds Similarity is 0.49824274; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.05579185485839844 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00979133 Louvain completed 21 runs in 10.474316596984863 seconds Similarity is 0.7196858; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.024448871612548828 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00569521 After 5 runs, maximum modularity is Q = 0.00577104 Louvain completed 25 runs in 13.017910242080688 seconds Similarity is 0.7600992; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.01274418830871582 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00409699 After 2 runs, maximum modularity is Q = 0.0043624 After 3 runs, maximum modularity is Q = 0.00476095 After 7 runs, maximum modularity is Q = 0.0049405 Louvain completed 27 runs in 14.48758840560913 seconds Similarity is 0.7136414; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.007965803146362305 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00620169 After 3 runs, maximum modularity is Q = 0.00628582 Louvain completed 23 runs in 11.745251893997192 seconds Similarity is 0.8443024; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.021912574768066406 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00372796 After 2 runs, maximum modularity is Q = 0.00381008 Louvain completed 22 runs in 11.620916604995728 seconds Similarity is 0.9270509; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.04184293746948242 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0124428 After 3 runs, maximum modularity is Q = 0.0124549 Louvain completed 23 runs in 11.995532274246216 seconds Similarity is 0.864118; is_dissimilar is False Got 6 subclusters Inspecting for spurious merging Wrote graph to binary file in 0.10737204551696777 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0100902 Louvain completed 21 runs in 10.48461127281189 seconds Similarity is 0.6885791; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.05280637741088867 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0039414 After 2 runs, maximum modularity is Q = 0.00455923 After 7 runs, maximum modularity is Q = 0.00463545 Louvain completed 27 runs in 14.33355712890625 seconds Similarity is 0.8942932; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.012663125991821289 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0154124 Louvain completed 21 runs in 10.761322975158691 seconds Similarity is 0.8473745; is_dissimilar is False Got 2 subclusters Inspecting for spurious merging Wrote graph to binary file in 0.11027860641479492 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0125176 Louvain completed 21 runs in 10.373656272888184 seconds Similarity is 0.74810433; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.05240893363952637 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0132504 Louvain completed 21 runs in 10.413550853729248 seconds Similarity is 0.7806651; is_dissimilar is True Inspecting for spurious merging Wrote graph to binary file in 0.03396010398864746 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00549504 Louvain completed 21 runs in 10.551645040512085 seconds Similarity is 0.81753504; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.012459754943847656 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0144228 After 2 runs, maximum modularity is Q = 0.0144318 Louvain completed 22 runs in 11.2331702709198 seconds Similarity is 0.8495423; is_dissimilar is False Got 3 subclusters Inspecting for spurious merging Wrote graph to binary file in 0.06672430038452148 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.00922513 Louvain completed 21 runs in 10.726106643676758 seconds Similarity is 0.83493507; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.025907039642333984 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0193431 Louvain completed 21 runs in 10.302609920501709 seconds Similarity is 0.90042657; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.025609970092773438 seconds MEMORY 15.067373568 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0130489 After 2 runs, maximum modularity is Q = 0.0168431 Louvain completed 22 runs in 11.548471927642822 seconds Similarity is 0.8715037; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.012748241424560547 seconds MEMORY 15.067377664 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0146273 After 2 runs, maximum modularity is Q = 0.0349524 Louvain completed 22 runs in 11.304418087005615 seconds Similarity is 0.52975154; is_dissimilar is True Got 2 subclusters Inspecting for spurious merging Wrote graph to binary file in 0.01141214370727539 seconds MEMORY 15.067377664 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0177272 Louvain completed 21 runs in 10.573816776275635 seconds Similarity is 0.9128522; is_dissimilar is False Inspecting for spurious merging Wrote graph to binary file in 0.017812728881835938 seconds MEMORY 15.067377664 Running Louvain modularity optimization After 1 runs, maximum modularity is Q = 0.0326612 After 2 runs, maximum modularity is Q = 0.0327666 Louvain completed 22 runs in 11.538895845413208 seconds Similarity is 0.61184216; is_dissimilar is True Got 2 subclusters Merging on 51 clusters MEMORY 15.067377664 On merging iteration 1 Numbers for each pattern pre-subsample: [4535, 3412, 3044, 2545, 2337, 2144, 1981, 1912, 1025, 837, 1812, 1713, 1661, 1398, 1328, 1092, 977, 412, 409, 782, 667, 508, 511, 472, 46, 151, 108, 62, 34, 34, 168, 80, 138, 79, 34, 170, 118, 118, 40, 40, 76, 37, 34, 46, 44, 17, 20, 16, 11, 9, 10] Numbers after subsampling: [300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 46, 151, 108, 62, 34, 34, 168, 80, 138, 79, 34, 170, 118, 118, 40, 40, 76, 37, 34, 46, 44, 17, 20, 16, 11, 9, 10] Cluster sizes [4535 3412 3044 2545 2337 2144 1981 1912 1025 837 1812 1713 1661 1398 1328 1092 977 412 409 782 667 508 511 472 46 151 108 62 34 34 168 80 138 79 34 170 118 118 40 40 76 37 34 46 44 17 20 16 11 9 10] Cross-contamination matrix: [[1. 0.67 0.01 ... 0.05 0.09 0.01] [0.73 1. 0.03 ... 0.06 0.04 0.02] [0.08 0.11 1. ... 0.1 0.16 0.51] ... [0. 0. 0. ... 1. 0. 0. ] [0. 0. 0. ... 0. 1. 0. ] [0. 0. 0. ... 0. 0. 1. ]] Pattern-to-pattern sim matrix: [[1. 0.82 0.11 ... 0.37 0.29 0.12] [0.82 1. 0.21 ... 0.39 0.31 0.19] [0.11 0.21 1. ... 0.2 0.28 0.42] ... [0.39 0.41 0.21 ... 1. 0.2 0.23] [0.3 0.35 0.28 ... 0.2 1. 0.16] [0.13 0.21 0.42 ... 0.24 0.16 1. ]] Collapsing 0 & 32 with crosscontam 0.935315618357488 and sim 0.9644776816770225 Collapsing 1 & 22 with crosscontam 0.7439919150617285 and sim 0.9015113876542068 Collapsing 1 & 32 with crosscontam 0.8395530428126271 and sim 0.9000446419643353 Collapsing 5 & 28 with crosscontam 0.8875731057398738 and sim 0.8900539593410017 Collapsing 10 & 21 with crosscontam 0.7261318312345679 and sim 0.8759707915253263 Collapsing 7 & 32 with crosscontam 0.722864054749388 and sim 0.8717348540396053 Collapsing 6 & 10 with crosscontam 0.7507281966666667 and sim 0.8620943508535983 Collapsing 22 & 34 with crosscontam 0.6206372754860499 and sim 0.8620724275442557 Collapsing 10 & 15 with crosscontam 0.7340165951234568 and sim 0.8620617408222342 Collapsing 1 & 7 with crosscontam 0.7630233908641976 and sim 0.861149841684182 Collapsing 10 & 11 with crosscontam 0.6840879422222224 and sim 0.8559102885032961 Aborting collapse as 11 & 21 have cross-contam 0.09513696543209893 and sim 0.4481399034518663 Collapsing 22 & 32 with crosscontam 0.6840055933977456 and sim 0.8509872840240437 Trimming eliminated 0 seqlets out of 4673 Removed 47 duplicate seqlets Trimming eliminated 1 seqlets out of 3923 Removed 10 duplicate seqlets Trimming eliminated 0 seqlets out of 8538 Skipped 6 seqlets Removed 44 duplicate seqlets Trimming eliminated 0 seqlets out of 2178 Removed 12 duplicate seqlets Trimming eliminated 0 seqlets out of 2320 Removed 8 duplicate seqlets Trimming eliminated 0 seqlets out of 10400 Removed 23 duplicate seqlets Removed 3 duplicate seqlets Trimming eliminated 0 seqlets out of 4293 Skipped 1 seqlets Removed 19 duplicate seqlets Trimming eliminated 0 seqlets out of 10408 Removed 36 duplicate seqlets Trimming eliminated 0 seqlets out of 5365 Skipped 1 seqlets Removed 19 duplicate seqlets Removed 4 duplicate seqlets Unmerged patterns remapping: OrderedDict([(2, 2), (3, 3), (4, 4), (8, 10), (9, 12), (11, 6), (12, 7), (13, 8), (14, 9), (16, 11), (17, 16), (18, 17), (19, 13), (20, 14), (23, 15), (24, 28), (25, 20), (26, 23), (27, 27), (29, 34), (30, 19), (31, 24), (33, 25), (35, 18), (36, 21), (37, 22), (38, 31), (39, 32), (40, 26), (41, 33), (42, 35), (43, 29), (44, 30), (45, 37), (46, 36), (47, 38), (48, 39), (49, 41), (50, 40)]) Time spent on merging iteration: 1264.02308511734 On merging iteration 2 Numbers for each pattern pre-subsample: [10372, 5341, 3044, 2545, 2337, 2166, 1713, 1661, 1398, 1328, 1025, 977, 837, 782, 667, 472, 412, 409, 170, 168, 151, 118, 118, 108, 80, 79, 76, 62, 46, 46, 44, 40, 40, 37, 34, 34, 20, 17, 16, 11, 10, 9] Numbers after subsampling: [300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 170, 168, 151, 118, 118, 108, 80, 79, 76, 62, 46, 46, 44, 40, 40, 37, 34, 34, 20, 17, 16, 11, 10, 9] Cluster sizes [10372 5341 3044 2545 2337 2166 1713 1661 1398 1328 1025 977 837 782 667 472 412 409 170 168 151 118 118 108 80 79 76 62 46 46 44 40 40 37 34 34 20 17 16 11 10 9] Cross-contamination matrix: [[1. 0.57 0.52 ... 0.62 0.51 0.49] [0.48 1. 0.35 ... 1. 0.61 0.82] [0.09 0.07 1. ... 0.1 0.51 0.16] ... [0. 0. 0. ... 1. 0. 0. ] [0. 0. 0. ... 0. 1. 0. ] [0. 0. 0. ... 0. 0. 1. ]] Pattern-to-pattern sim matrix: [[1. 0.45 0.16 ... 0.4 0.15 0.24] [0.45 1. 0.16 ... 0.71 0.31 0.58] [0.16 0.16 1. ... 0.2 0.42 0.28] ... [0.41 0.74 0.21 ... 1. 0.23 0.2 ] [0.16 0.34 0.42 ... 0.24 1. 0.16] [0.27 0.65 0.28 ... 0.2 0.16 1. ]] Collapsing 1 & 6 with crosscontam 0.7592684666666666 and sim 0.8654233820389916 Collapsing 1 & 28 with crosscontam 0.8554507093504287 and sim 0.8401876731063171 Trimming eliminated 0 seqlets out of 7054 Skipped 2 seqlets Removed 7 duplicate seqlets Removed 5 duplicate seqlets Trimming eliminated 0 seqlets out of 7086 Removed 4 duplicate seqlets Unmerged patterns remapping: OrderedDict([(0, 0), (2, 2), (3, 3), (4, 4), (5, 5), (7, 6), (8, 7), (9, 8), (10, 9), (11, 10), (12, 11), (13, 12), (14, 13), (15, 14), (16, 15), (17, 16), (18, 17), (19, 18), (20, 19), (21, 20), (22, 21), (23, 22), (24, 23), (25, 24), (26, 25), (27, 26), (29, 27), (30, 28), (31, 29), (32, 30), (33, 31), (34, 32), (35, 33), (36, 34), (37, 35), (38, 36), (39, 37), (40, 38), (41, 39)]) Time spent on merging iteration: 164.63235759735107 On merging iteration 3 Numbers for each pattern pre-subsample: [10372, 7082, 3044, 2545, 2337, 2166, 1661, 1398, 1328, 1025, 977, 837, 782, 667, 472, 412, 409, 170, 168, 151, 118, 118, 108, 80, 79, 76, 62, 46, 44, 40, 40, 37, 34, 34, 20, 17, 16, 11, 10, 9] Numbers after subsampling: [300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 170, 168, 151, 118, 118, 108, 80, 79, 76, 62, 46, 44, 40, 40, 37, 34, 34, 20, 17, 16, 11, 10, 9] Cluster sizes [10372 7082 3044 2545 2337 2166 1661 1398 1328 1025 977 837 782 667 472 412 409 170 168 151 118 118 108 80 79 76 62 46 44 40 40 37 34 34 20 17 16 11 10 9] Cross-contamination matrix: [[1. 0.51 0.52 ... 0.62 0.51 0.49] [0.79 1. 0.72 ... 1. 0.98 1. ] [0.09 0.06 1. ... 0.1 0.51 0.16] ... [0. 0. 0. ... 1. 0. 0. ] [0. 0. 0. ... 0. 1. 0. ] [0. 0. 0. ... 0. 0. 1. ]] Pattern-to-pattern sim matrix: [[1. 0.46 0.16 ... 0.4 0.15 0.24] [0.46 1. 0.16 ... 0.71 0.31 0.56] [0.16 0.16 1. ... 0.2 0.42 0.28] ... [0.41 0.73 0.21 ... 1. 0.23 0.2 ] [0.16 0.33 0.42 ... 0.24 1. 0.16] [0.27 0.63 0.28 ... 0.2 0.16 1. ]] Got 40 patterns after merging MEMORY 15.068553216 Performing seqlet reassignment MEMORY 15.068553216 Cross contin jaccard time taken: 0.44 s Cross contin jaccard time taken: 0.48 s Discarded 10 seqlets Removed 1 duplicate seqlets Removed 1 duplicate seqlets Removed 15 duplicate seqlets Removed 1 duplicate seqlets Removed 1 duplicate seqlets Removed 9 duplicate seqlets Got 27 patterns after reassignment MEMORY 15.068684288 Total time taken is 15990.08s MEMORY 15.068684288
import h5py
import modisco.util
reload(modisco.util)
import os
file_path = "v0.5.11.0_reducemem.hdf5"
if (os.path.exists(file_path)):
os.remove(file_path)
grp = h5py.File(file_path, "w")
results.save_hdf5(grp)
grp.close()
from modisco.visualization import viz_sequence
%matplotlib inline
hdf5_results = h5py.File(file_path,"r")
metacluster_names = [
x.decode("utf-8") for x in
list(hdf5_results["metaclustering_results"]
["all_metacluster_names"][:])]
all_patterns = []
background = np.array([0.27, 0.23, 0.23, 0.27])
for metacluster_name in metacluster_names:
print(metacluster_name)
metacluster_grp = (hdf5_results["metacluster_idx_to_submetacluster_results"]
[metacluster_name])
print("activity pattern:",metacluster_grp["activity_pattern"][:])
all_pattern_names = [x.decode("utf-8") for x in
list(metacluster_grp["seqlets_to_patterns_result"]
["patterns"]["all_pattern_names"][:])]
if (len(all_pattern_names)==0):
print("No motifs found for this activity pattern")
for pattern_name in all_pattern_names:
print(metacluster_name, pattern_name)
all_patterns.append((metacluster_name, pattern_name))
pattern = metacluster_grp["seqlets_to_patterns_result"]["patterns"][pattern_name]
print("total seqlets:",len(pattern["seqlets_and_alnmts"]["seqlets"]))
print("Task 0 hypothetical scores:")
viz_sequence.plot_weights(pattern["Nanog_profile_wn_hypothetical_contribs"]["fwd"])
print("Task 0 actual importance scores:")
viz_sequence.plot_weights(pattern["Nanog_profile_wn_contrib_scores"]["fwd"])
print("onehot, fwd and rev:")
viz_sequence.plot_weights(np.array(pattern["sequence"]["fwd"]))
viz_sequence.plot_weights(np.array(pattern["sequence"]["rev"]))
hdf5_results.close()
metacluster_0 activity pattern: [1] metacluster_0 pattern_0 total seqlets: 10449 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_1 total seqlets: 7117 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_2 total seqlets: 3051 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_3 total seqlets: 2581 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_4 total seqlets: 2353 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_5 total seqlets: 2172 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_6 total seqlets: 1665 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_7 total seqlets: 1399 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_8 total seqlets: 1350 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_9 total seqlets: 1025 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_10 total seqlets: 983 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_11 total seqlets: 841 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_12 total seqlets: 790 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_13 total seqlets: 668 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_14 total seqlets: 476 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_15 total seqlets: 415 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_16 total seqlets: 416 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_17 total seqlets: 178 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_18 total seqlets: 171 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_19 total seqlets: 170 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_20 total seqlets: 126 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_21 total seqlets: 129 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_22 total seqlets: 119 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_23 total seqlets: 83 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_24 total seqlets: 88 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_25 total seqlets: 83 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev:
metacluster_0 pattern_26 total seqlets: 66 Task 0 hypothetical scores:
Task 0 actual importance scores:
onehot, fwd and rev: