#!/usr/bin/env python3
# import pp
import os
import instagraal.pyramid_sparse as pyr
# import Image
# from OpenGL.GL import *
import OpenGL.GL
from OpenGL.arrays import vbo
import numpy as np
from instagraal.cuda_lib_gl_single import sampler as sampler_lib
# from cuda_lib_gl import sampler as sampler_lib
import matplotlib.pyplot as plt
from instagraal import log
from instagraal.log import logger
logger.setLevel(log.CURRENT_LOG_LEVEL)
# cuda.init()
[docs]def kth_diag_indices(a, k):
rows, cols = np.diag_indices_from(a)
if k < 0:
return rows[:k], cols[-k:]
elif k > 0:
return rows[k:], cols[:-k]
else:
return rows, cols
[docs]class simulation:
def __init__(
self,
name,
folder_path,
fasta,
level,
n_iterations,
is_simu,
gl_window,
use_rippe,
gl_size_im,
thresh_factor=1,
output_folder=None,
):
self.name = name
self.use_rippe = use_rippe
self.str_sub_level = str(level - 1)
self.str_level = str(level)
self.thresh_factor = thresh_factor
self.data_set = name
toolbox_directory = os.path.dirname(os.path.abspath(__file__))
self.data_set_root = toolbox_directory
self.dir_home = toolbox_directory
self.data_set_root = toolbox_directory
self.fasta = fasta
# default_level = size_pyramid - 1
# self.base_folder = os.path.join(
# self.data_set_root, self.data_set
# )
self.base_folder = folder_path
if output_folder is None:
self.output_folder = os.path.join(self.data_set_root, "results")
else:
self.output_folder = output_folder
self.select_data_set(name)
self.n_iterations = n_iterations
self.gl_size_im = gl_size_im
self.int4 = np.dtype(
[
("x", np.int32),
("y", np.int32),
("z", np.int32),
("w", np.int32),
],
align=True,
)
self.float3 = np.dtype(
[("x", np.float32), ("y", np.float32), ("z", np.float32)],
align=True,
)
self.float4 = np.dtype(
[
("x", np.float32),
("y", np.float32),
("z", np.float32),
("w", np.float32),
],
align=True,
)
self.int3 = np.dtype(
[("x", np.int32), ("y", np.int32), ("z", np.int32)], align=True
)
self.int2 = np.dtype([("x", np.int32), ("y", np.int32)], align=True)
self.level = self.hic_pyr.get_level(level)
self.level.build_seq_per_bin(genome_fasta=self.fasta)
self.sub_level = self.hic_pyr.get_level(level - 1)
self.n_frags = self.level.n_frags
self.create_sub_frags()
self.mean_squared_frags_per_bin = np.float32(
(self.collect_accu_frags.mean()) ** 2
)
logger.info(
"mean frag area = {}".format(self.mean_squared_frags_per_bin)
)
self.gl_window = gl_window
# DEFINE REPEATED SEQ ####
(
self.candidate_dup,
self.data_candidate_dup,
self.sub_candidates_dup,
self.sub_candidates_output_data,
) = self.select_repeated_frags()
self.modify_vect_frags()
self.blacklist_contig()
# self.discard_low_coverage_frags()
self.load_gl_buffers()
self.create_new_sub_frags()
self.modify_sub_vect_frags()
self.gl_size_im = min(gl_size_im, self.n_frags)
self.init_gl_image()
self.sampler = sampler_lib(
use_rippe,
self.new_S_o_A_frags,
self.collector_id_repeats,
self.frag_dispatcher,
self.candidate_dup,
self.frag_blacklisted,
self.init_n_frags,
self.n_frags,
self.init_n_sub_frags,
self.n_new_sub_frags,
self.rep_sub_frags_id,
self.level.sparse_mat_csr,
self.np_sub_frags_len_bp,
self.np_sub_frags_id,
self.np_sub_frags_accu,
self.np_sub_frags_2_frags,
self.mean_squared_frags_per_bin,
self.norm_vect,
self.sub_candidates_dup,
self.sub_candidates_output_data,
self.new_sub_S_o_A_frags,
self.sub_collector_id_repeats,
self.sub_frag_dispatcher,
self.sub_level.sparse_mat_csr,
self.sub_level.mean_value_trans,
n_iterations,
is_simu,
self.gl_window,
self.pos_vbo,
self.col_vbo,
self.vel,
self.pos,
self.raw_im_init,
self.pbo_im_buffer,
self.gl_size_im,
)
# display_graph = False
display_graph = True
id_start = np.nonzero(self.sampler.gpu_vect_frags.start_bp == 0)[0]
max_dist_kb = (
self.sampler.gpu_vect_frags.l_cont_bp[id_start].max() / 1000.
)
logger.info("max dist kb = {}".format(max_dist_kb))
# size_bin_kb = self.sampler.gpu_vect_frags.len_bp.mean() / 1000.0
mean_size_bin_kb = self.new_sub_S_o_A_frags["len_bp"].mean() / 1000.0
logger.info("mean size kb = {}".format(mean_size_bin_kb))
logger.info(
"min fragment length = {}".format(
self.new_sub_S_o_A_frags["len_bp"].min() / 1000.0
)
)
if is_simu:
self.sampler.simulate_rippe_contacts(
100, 9.6, -1.5, 0.5, 1, 800, 200
)
else:
if self.use_rippe:
self.sampler.estimate_parameters_rippe(
max_dist_kb, mean_size_bin_kb / 2., display_graph
)
else:
self.sampler.estimate_parameters(
max_dist_kb, mean_size_bin_kb / 2, display_graph
)
# self.sampler.setup_texture()
[docs] def blacklist_contig(self):
# list_blacklist_manual = raw_input("ids (separated by space): ")
list_blacklist_manual = ""
if list_blacklist_manual != "":
list_blacklist_manual = list_blacklist_manual.split(" ")
candidates_blacklist = [int(i) for i in list_blacklist_manual]
else:
candidates_blacklist = []
init_vect_frags = self.level.S_o_A_frags
list_id_c = init_vect_frags["id_c"]
# candidates_blacklist = []
frag_blacklisted = []
for id_c_black in candidates_blacklist:
id_black_list = np.nonzero(list_id_c == id_c_black)[0]
for init_f in id_black_list:
dis = self.frag_dispatcher[init_f]
ids = self.collector_id_repeats[dis["x"] : dis["y"]]
frag_blacklisted.extend(list(ids))
self.frag_blacklisted = frag_blacklisted
for id_f_black in self.frag_blacklisted:
self.col_vect_frags_4_GL[id_f_black, 0] = np.float32(0)
self.col_vect_frags_4_GL[id_f_black, 1] = np.float32(0)
self.col_vect_frags_4_GL[id_f_black, 2] = np.float32(0)
self.col_vect_frags_4_GL[id_f_black, 3] = np.float32(0)
[docs] def discard_low_coverage_frags(self):
mat = np.copy(self.level.im_curr)
mat_norm = np.array(
self.norm_vect.T * self.norm_vect, dtype=np.float32
)
self.matrix_normalized = mat / mat_norm
coverage = self.matrix_normalized.sum(axis=1)
mean_coverage = coverage.mean()
std_coverage = coverage.std()
mean_coverage_ext = mean_coverage - 0.1 * std_coverage
candidates_low = np.nonzero(coverage < mean_coverage_ext)[0]
logger.info(
"n discarded frag of low coverage = {}".format(
candidates_low.shape[0]
)
)
for init_f in candidates_low:
dis = self.frag_dispatcher[init_f]
ids = self.collector_id_repeats[dis["x"] : dis["y"]]
self.frag_blacklisted.extend(list(ids))
[docs] def modify_vect_frags(self):
"include repeated frags"
modified_vect_frags = dict()
init_vect_frags = self.level.S_o_A_frags
# init_max_id_d = init_vect_frags["id"].max()
max_id_F = len(init_vect_frags["id"])
max_id_C = init_vect_frags["id_c"].max() + 1
# HSV_tuples = [(x*1.0/(max_id_C - 1), 0.5, 0.5) for x in range(0,
# (max_id_C-1))]
# cmap = plt.cm.gist_ncar
cmap = plt.cm.prism
# extract all colors from the .jet map
cmaplist = [cmap(i) for i in range(cmap.N)]
id_smple = np.linspace(0, cmap.N, num=max_id_C)
RGB_tuples = []
for i in range(0, max_id_C - 1):
RGB_tuples.append(cmaplist[int(id_smple[i])])
# RGB_tuples = map(lambda x: colorsys.hsv_to_rgb(*x), HSV_tuples)
self.init_n_frags = len(init_vect_frags["id"])
modified_vect_frags["pos"] = list(init_vect_frags["pos"])
modified_vect_frags["sub_pos"] = list(init_vect_frags["sub_pos"])
modified_vect_frags["id_c"] = list(init_vect_frags["id_c"])
modified_vect_frags["start_bp"] = list(init_vect_frags["start_bp"])
modified_vect_frags["len_bp"] = list(init_vect_frags["len_bp"])
modified_vect_frags["sub_len"] = list(init_vect_frags["sub_len"])
modified_vect_frags["circ"] = list(init_vect_frags["circ"])
modified_vect_frags["id"] = list(init_vect_frags["id"])
modified_vect_frags["prev"] = list(init_vect_frags["prev"])
modified_vect_frags["next"] = list(init_vect_frags["next"])
modified_vect_frags["l_cont"] = list(init_vect_frags["l_cont"])
modified_vect_frags["sub_l_cont"] = list(init_vect_frags["sub_l_cont"])
modified_vect_frags["l_cont_bp"] = list(init_vect_frags["l_cont_bp"])
modified_vect_frags["n_accu"] = list(init_vect_frags["n_accu"])
modified_vect_frags["rep"] = list(np.zeros(max_id_F, dtype=np.int32))
modified_vect_frags["activ"] = list(np.ones(max_id_F, dtype=np.int32))
modified_vect_frags["id_d"] = list(init_vect_frags["id"])
for data_dup in self.data_candidate_dup:
n_dup = int(data_dup[1])
id_f = data_dup[0]
for k in range(0, n_dup):
modified_vect_frags["pos"].append(0)
modified_vect_frags["sub_pos"].append(0)
modified_vect_frags["id_c"].append(max_id_C)
modified_vect_frags["start_bp"].append(0)
modified_vect_frags["len_bp"].append(
init_vect_frags["len_bp"][id_f]
)
modified_vect_frags["sub_len"].append(
init_vect_frags["sub_len"][id_f]
)
modified_vect_frags["circ"].append(
init_vect_frags["circ"][id_f]
)
modified_vect_frags["id"].append(max_id_F)
modified_vect_frags["prev"].append(-1)
modified_vect_frags["next"].append(-1)
modified_vect_frags["l_cont"].append(1)
modified_vect_frags["sub_l_cont"].append(
init_vect_frags["sub_len"][id_f]
)
modified_vect_frags["l_cont_bp"].append(
init_vect_frags["len_bp"][id_f]
)
modified_vect_frags["n_accu"].append(
init_vect_frags["n_accu"][id_f]
)
modified_vect_frags["rep"].append(1)
modified_vect_frags["activ"].append(1)
modified_vect_frags["id_d"].append(init_vect_frags["id"][id_f])
max_id_F += 1
max_id_C += 1
modified_vect_frags["pos"] = np.array(
modified_vect_frags["pos"], dtype=np.int32
)
modified_vect_frags["sub_pos"] = np.array(
modified_vect_frags["sub_pos"], dtype=np.int32
)
modified_vect_frags["id_c"] = np.array(
modified_vect_frags["id_c"], dtype=np.int32
)
modified_vect_frags["start_bp"] = np.array(
modified_vect_frags["start_bp"], dtype=np.int32
)
modified_vect_frags["len_bp"] = np.array(
modified_vect_frags["len_bp"], dtype=np.int32
)
modified_vect_frags["sub_len"] = np.array(
modified_vect_frags["sub_len"], dtype=np.int32
)
modified_vect_frags["circ"] = np.array(
modified_vect_frags["circ"], dtype=np.int32
)
modified_vect_frags["id"] = np.array(
modified_vect_frags["id"], dtype=np.int32
)
modified_vect_frags["prev"] = np.array(
modified_vect_frags["prev"], dtype=np.int32
)
modified_vect_frags["next"] = np.array(
modified_vect_frags["next"], dtype=np.int32
)
modified_vect_frags["l_cont"] = np.array(
modified_vect_frags["l_cont"], dtype=np.int32
)
modified_vect_frags["sub_l_cont"] = np.array(
modified_vect_frags["sub_l_cont"], dtype=np.int32
)
modified_vect_frags["l_cont_bp"] = np.array(
modified_vect_frags["l_cont_bp"], dtype=np.int32
)
modified_vect_frags["n_accu"] = np.array(
modified_vect_frags["n_accu"], dtype=np.int32
)
modified_vect_frags["rep"] = np.array(
modified_vect_frags["rep"], dtype=np.int32
)
modified_vect_frags["activ"] = np.array(
modified_vect_frags["activ"], dtype=np.int32
)
modified_vect_frags["id_d"] = np.array(
modified_vect_frags["id_d"], dtype=np.int32
)
id_x = 0
collector_id_repeats = []
frag_dispatcher = []
# BUILD LINK BETWEEN FRAGS AND SUB FRAGS
for id_f in range(0, self.init_n_frags):
if id_f in self.candidate_dup:
id_start = id_x
id_dup = np.nonzero(modified_vect_frags["id_d"] == id_f)[0]
collector_id_repeats.extend(list(id_dup))
n_rep = len(id_dup)
frag_dispatcher.append(
(np.int32(id_start), np.int32(id_start + n_rep))
)
id_x += n_rep
else:
id_start = id_x
n_rep = 1
frag_dispatcher.append(
(np.int32(id_start), np.int32(id_start + n_rep))
)
collector_id_repeats.append(id_f)
id_x += 1
self.collector_id_repeats = np.array(
collector_id_repeats, dtype=np.int32
)
self.frag_dispatcher = np.array(frag_dispatcher, dtype=self.int2)
self.n_frags = len(modified_vect_frags["id"])
pos_vect_frags_4_GL = np.ndarray((self.n_frags, 4), dtype=np.float32)
col_vect_frags_4_GL = np.ndarray((self.n_frags, 4), dtype=np.float32)
for id_f_curr in range(0, self.n_frags):
id_d = modified_vect_frags["id_d"][id_f_curr]
id_c = init_vect_frags["id_c"][id_d]
pos_vect_frags_4_GL[id_f_curr, 0] = modified_vect_frags["pos"][
id_f_curr
]
pos_vect_frags_4_GL[id_f_curr, 1] = modified_vect_frags["id_c"][
id_f_curr
]
pos_vect_frags_4_GL[id_f_curr, 2] = 0.
pos_vect_frags_4_GL[id_f_curr, 3] = np.float32(1.0)
col_vect_frags_4_GL[id_f_curr, 0] = np.float32(
RGB_tuples[id_c - 1][0]
)
col_vect_frags_4_GL[id_f_curr, 1] = np.float32(
RGB_tuples[id_c - 1][1]
)
col_vect_frags_4_GL[id_f_curr, 2] = np.float32(
RGB_tuples[id_c - 1][2]
)
col_vect_frags_4_GL[id_f_curr, 3] = np.float32(1.0)
self.col_vect_frags_4_GL = col_vect_frags_4_GL
self.pos_vect_frags_4_GL = pos_vect_frags_4_GL
self.new_S_o_A_frags = modified_vect_frags
[docs] def modify_sub_vect_frags(self):
"include repeated frags"
modified_vect_frags = dict()
init_vect_frags = self.sub_level.S_o_A_frags
# init_max_id_d = init_vect_frags["id"].max()
max_id_F = len(init_vect_frags["id"])
max_id_C = init_vect_frags["id_c"].max() + 1
# HSV_tuples = [(x*1.0/(max_id_C - 1), 0.5, 0.5) for x in range(0,
# (max_id_C-1))]
# cmap = plt.cm.gist_ncar
cmap = plt.cm.prism
# extract all colors from the .jet map
cmaplist = [cmap(i) for i in range(cmap.N)]
id_smple = np.linspace(0, cmap.N, num=max_id_C)
RGB_tuples = []
for i in range(0, max_id_C - 1):
RGB_tuples.append(cmaplist[int(id_smple[i])])
# RGB_tuples = map(lambda x: colorsys.hsv_to_rgb(*x), HSV_tuples)
self.init_n_sub_frags = len(init_vect_frags["id"])
modified_vect_frags["pos"] = list(init_vect_frags["pos"])
modified_vect_frags["sub_pos"] = list(init_vect_frags["sub_pos"])
modified_vect_frags["id_c"] = list(init_vect_frags["id_c"])
modified_vect_frags["start_bp"] = list(init_vect_frags["start_bp"])
modified_vect_frags["len_bp"] = list(init_vect_frags["len_bp"])
modified_vect_frags["sub_len"] = list(init_vect_frags["sub_len"])
modified_vect_frags["circ"] = list(init_vect_frags["circ"])
modified_vect_frags["id"] = list(init_vect_frags["id"])
modified_vect_frags["prev"] = list(init_vect_frags["prev"])
modified_vect_frags["next"] = list(init_vect_frags["next"])
modified_vect_frags["l_cont"] = list(init_vect_frags["l_cont"])
modified_vect_frags["sub_l_cont"] = list(init_vect_frags["sub_l_cont"])
modified_vect_frags["l_cont_bp"] = list(init_vect_frags["l_cont_bp"])
modified_vect_frags["n_accu"] = list(init_vect_frags["n_accu"])
modified_vect_frags["rep"] = list(np.zeros(max_id_F, dtype=np.int32))
modified_vect_frags["activ"] = list(np.ones(max_id_F, dtype=np.int32))
modified_vect_frags["id_d"] = list(init_vect_frags["id"])
# WARNING IMPLICT BREAKING OF THE CONTIGS
for data_dup in self.sub_candidates_output_data:
n_dup = int(data_dup[1])
id_f = data_dup[0]
for k in range(0, n_dup):
modified_vect_frags["pos"].append(0)
modified_vect_frags["sub_pos"].append(0)
modified_vect_frags["id_c"].append(max_id_C)
modified_vect_frags["start_bp"].append(0)
modified_vect_frags["len_bp"].append(
init_vect_frags["len_bp"][id_f]
)
modified_vect_frags["sub_len"].append(
init_vect_frags["sub_len"][id_f]
)
modified_vect_frags["circ"].append(
init_vect_frags["circ"][id_f]
)
modified_vect_frags["id"].append(max_id_F)
modified_vect_frags["prev"].append(-1)
modified_vect_frags["next"].append(-1)
modified_vect_frags["l_cont"].append(1)
modified_vect_frags["sub_l_cont"].append(
init_vect_frags["sub_len"][id_f]
)
modified_vect_frags["l_cont_bp"].append(
init_vect_frags["len_bp"][id_f]
)
modified_vect_frags["n_accu"].append(
init_vect_frags["n_accu"][id_f]
)
modified_vect_frags["rep"].append(1)
modified_vect_frags["activ"].append(1)
modified_vect_frags["id_d"].append(init_vect_frags["id"][id_f])
max_id_F += 1
max_id_C += 1
logger.info("MAX ID CONTIG = {}".format(max_id_C))
modified_vect_frags["pos"] = np.array(
modified_vect_frags["pos"], dtype=np.int32
)
modified_vect_frags["sub_pos"] = np.array(
modified_vect_frags["sub_pos"], dtype=np.int32
)
modified_vect_frags["id_c"] = np.array(
modified_vect_frags["id_c"], dtype=np.int32
)
modified_vect_frags["start_bp"] = np.array(
modified_vect_frags["start_bp"], dtype=np.int32
)
modified_vect_frags["len_bp"] = np.array(
modified_vect_frags["len_bp"], dtype=np.int32
)
modified_vect_frags["sub_len"] = np.array(
modified_vect_frags["sub_len"], dtype=np.int32
)
modified_vect_frags["circ"] = np.array(
modified_vect_frags["circ"], dtype=np.int32
)
modified_vect_frags["id"] = np.array(
modified_vect_frags["id"], dtype=np.int32
)
modified_vect_frags["prev"] = np.array(
modified_vect_frags["prev"], dtype=np.int32
)
modified_vect_frags["next"] = np.array(
modified_vect_frags["next"], dtype=np.int32
)
modified_vect_frags["l_cont"] = np.array(
modified_vect_frags["l_cont"], dtype=np.int32
)
modified_vect_frags["sub_l_cont"] = np.array(
modified_vect_frags["sub_l_cont"], dtype=np.int32
)
modified_vect_frags["l_cont_bp"] = np.array(
modified_vect_frags["l_cont_bp"], dtype=np.int32
)
modified_vect_frags["n_accu"] = np.array(
modified_vect_frags["n_accu"], dtype=np.int32
)
modified_vect_frags["rep"] = np.array(
modified_vect_frags["rep"], dtype=np.int32
)
modified_vect_frags["activ"] = np.array(
modified_vect_frags["activ"], dtype=np.int32
)
modified_vect_frags["id_d"] = np.array(
modified_vect_frags["id_d"], dtype=np.int32
)
id_x = 0
collector_id_repeats = []
frag_dispatcher = []
for id_f in range(0, self.init_n_sub_frags):
if id_f in self.sub_candidates_dup:
id_start = id_x
id_dup = np.nonzero(modified_vect_frags["id_d"] == id_f)[0]
collector_id_repeats.extend(list(id_dup))
n_rep = len(id_dup)
frag_dispatcher.append(
(np.int32(id_start), np.int32(id_start + n_rep))
)
id_x += n_rep
else:
id_start = id_x
n_rep = 1
frag_dispatcher.append(
(np.int32(id_start), np.int32(id_start + n_rep))
)
collector_id_repeats.append(id_f)
id_x += 1
self.sub_collector_id_repeats = np.array(
collector_id_repeats, dtype=np.int32
)
self.sub_frag_dispatcher = np.array(frag_dispatcher, dtype=self.int2)
self.sub_n_frags = len(modified_vect_frags["id"])
# pos_vect_frags_4_GL = np.ndarray((self.n_frags, 4), dtype=np.float32)
# col_vect_frags_4_GL = np.ndarray((self.n_frags, 4), dtype=np.float32)
#
# for id_f_curr in xrange(0 , self.sub_n_frags):
# id_d = modified_vect_frags['id_d'][id_f_curr]
# id_c = init_vect_frags['id_c'][id_d]
# pos_vect_frags_4_GL[id_f_curr, 0] =
# modified_vect_frags['pos'][id_f_curr]
# pos_vect_frags_4_GL[id_f_curr, 1] =
# modified_vect_frags['id_c'][id_f_curr]
# pos_vect_frags_4_GL[id_f_curr, 2] = 0.
# pos_vect_frags_4_GL[id_f_curr, 3] = np.float32(1.0)
#
# col_vect_frags_4_GL[id_f_curr, 0] =
# np.float32(RGB_tuples[id_c - 1][0])
# col_vect_frags_4_GL[id_f_curr, 1] =
# np.float32(RGB_tuples[id_c - 1][1])
# col_vect_frags_4_GL[id_f_curr, 2] =
# np.float32(RGB_tuples[id_c - 1][2])
# col_vect_frags_4_GL[id_f_curr, 3] =
# np.float32(1.0)
#
# self.sub_col_vect_frags_4_GL = col_vect_frags_4_GL
# self.sub_pos_vect_frags_4_GL = pos_vect_frags_4_GL
self.new_sub_S_o_A_frags = modified_vect_frags
[docs] def select_repeated_frags(self): #
# collect_cov = []
# step = 0
# p = ProgressBar('green', width=20, block='▣', empty='□')
# for i in range(0, self.level.n_frags):
# v_r = self.level.sparse_mat_csr[i, :]
# v_c = self.level.sparse_mat_csc[:, i]
# non_zeros = v_c.nnz + v_r.nnz
# collect_cov.append(non_zeros)
# step += 1
# if step%1000 == 0:
# pt = step * 100 / self.level.n_frags
# p.render(pt, 'step %s\nProcessing...\nDescription: computing
# coverage per frag.' % step)
coverage = np.array(self.level.sparse_mat_csr.sum(axis=0))[0]
coverage += np.array(
self.level.sparse_mat_csr.transpose().sum(axis=0)
)[0]
mean_coverage = coverage.mean()
std_coverage = coverage.std()
mean_coverage_ext = mean_coverage + 3 * std_coverage
candidates_dup = np.nonzero(coverage > mean_coverage_ext)[0]
# plt.figure()
# plt.hist(coverage, 100)
# plt.figure()
# plt.plot(coverage)
# plt.axhline(mean_coverage_ext, color='g')
# # plt.show()
# plt.figure()
# n, bins, patches = plt.hist(coverage, 10000, normed=1,
# facecolor='blue', alpha=0.75)
# # add a 'best fit' line
# (mu, sigma) = norm.fit(coverage)
# y = mlab.normpdf( bins, mu, sigma)
# l = plt.plot(bins, y, 'r--', linewidth=2)
# plt.axvline(mean_coverage_ext, color='g', linewidth=2)
# #plot
# plt.xlabel('Raw contacts frequency')
# plt.ylabel('Probability')
# plt.title(r'$\mathrm{Histogram\ of\ HiC\ contact\ (data\ %s):}\
# \mu=%.3f,\ \sigma=%.3f$' %(self.name, mu, sigma))
# plt.legend(["gaussian fit","duplication limit","exp distribution"],
# prop={'size':15})
# plt.grid(True)
#
# plt.show()
# DEBUGGGG ###############s##########
# candidates_dup = []
# print "candidate frags for duplication = ", candidates_dup
test = ""
# test = raw_input("ok?")
if not (test == ""):
# print "---enter id duplicated frags--"
list_dup_manual = input("ids (separated by space): ")
if list_dup_manual != "":
list_dup_manual = list_dup_manual.split(" ")
candidates_dup = [int(i) for i in list_dup_manual]
else:
candidates_dup = []
# DEBUGGGG #########################
# candidates_dup = range(880, 890)
# candidates_dup = range(663, 674)
# candidates_dup = range(0, 800)
# candidates_dup = range(1204, 1217)
candidates_dup = []
# candidates_dup = range(1978, 2009)
# print "you have selected: ", candidates_dup
output_data = []
sub_candidates_dup = []
sub_candidates_output_data = []
for ele in candidates_dup:
cov_ele = coverage[ele]
estim_n_dup = np.max(
[1, np.round(cov_ele / mean_coverage_ext) - 1]
)
# estim_n_dup = np.max([1, np.ceil(cov_ele / mean_coverage_ext) -
# 1])
output_data.append((ele, estim_n_dup))
# print "duplicated data = ", output_data
tmp_sub_ids = self.np_sub_frags_id[ele]
n_subs = tmp_sub_ids[-1]
for i in range(0, n_subs):
sub_candidates_dup.append(tmp_sub_ids[i])
sub_candidates_output_data.append(
(tmp_sub_ids[i], estim_n_dup)
)
logger.info("N frag duplicated = {}".format(len(candidates_dup)))
return (
candidates_dup,
output_data,
sub_candidates_dup,
sub_candidates_output_data,
)
[docs] def select_data_set(self, name):
size_pyramid = 9
factor = 3
self.hic_pyr = pyr.build_and_filter(
self.base_folder,
size_pyramid,
factor,
thresh_factor=self.thresh_factor,
)
logger.info("pyramid loaded")
if not (os.path.exists(self.output_folder)):
os.mkdir(self.output_folder)
self.output_folder = os.path.join(self.output_folder, self.data_set)
if not (os.path.exists(self.output_folder)):
os.mkdir(self.output_folder)
self.output_folder = os.path.join(
self.output_folder, "test_mcmc_" + self.str_level
)
if not (os.path.exists(self.output_folder)):
os.mkdir(self.output_folder)
self.new_fasta = os.path.join(self.output_folder, "genome.fasta")
self.info_frags = os.path.join(self.output_folder, "info_frags.txt")
self.output_matrix_em = os.path.join(
self.output_folder, "post_em.tiff"
)
self.output_matrix_mcmc = os.path.join(
self.output_folder, "post_mcmc.tiff"
)
self.input_matrix = os.path.join(self.output_folder, "pre_simu.tiff")
self.scrambled_input_matrix = os.path.join(
self.output_folder, "scrambled_simu.tiff"
)
[docs] def load_gl_buffers(self):
num = self.n_frags
pos = np.ndarray((num, 4), dtype=np.float32)
seed = np.random.rand(2, num)
pos[:, 0] = seed[0, :]
pos[:, 1] = 0.0
pos[:, 2] = seed[1, :] # z pos
pos[:, 3] = 1. # velocity
# num = self.n_frags
# pos = np.ndarray((num, 4), dtype=np.float32)
# pos[:,1] = np.sin(np.arange(0., num) * 2.001 * np.pi / (10*num))
# pos[:,1] *= np.random.random_sample((num,)) / 3. - 0.2
# pos[:,2] = np.cos(np.arange(0., num) * 2.001 * np.pi /(10* num))
# pos[:,2] *= np.random.random_sample((num,)) / 3. - 0.2
# pos[:,0] = 0. # z pos
# pos[:,3] = 1. # velocity
self.pos = pos
self.pos_vbo = vbo.VBO(
data=self.pos,
usage=OpenGL.GL.GL_DYNAMIC_DRAW,
target=OpenGL.GL.GL_ARRAY_BUFFER,
)
# self.pos_vbo = vbo.VBO(data=self.pos_vect_frags_4_GL,
# usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER)
self.pos_vbo.bind()
self.col_vbo = vbo.VBO(
data=self.col_vect_frags_4_GL,
usage=OpenGL.GL.GL_DYNAMIC_DRAW,
target=OpenGL.GL.GL_ARRAY_BUFFER,
)
self.col_vbo.bind()
self.vel = np.ndarray((self.n_frags, 4), dtype=np.float32)
self.vel[:, 2] = self.pos[:, 2] * 2.
self.vel[:, 1] = self.pos[:, 1] * 2.
self.vel[:, 0] = 3.
self.vel[:, 3] = np.random.random_sample((self.n_frags,))
[docs] def init_gl_image(self,):
self.texid = 0
self.pbo_im_buffer = OpenGL.GL.glGenBuffers(
1
) # generate 1 buffer reference
OpenGL.GL.glBindBuffer(
OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, self.pbo_im_buffer
) # binding to this buffer
# self.raw_im_init = np.uint8(np.random.rand(self.gl_size_im,
# self.gl_size_im))
self.raw_im_init = np.zeros(
(self.gl_size_im, self.gl_size_im), dtype=np.uint8
)
OpenGL.GL.glBufferData(
OpenGL.GL.GL_PIXEL_UNPACK_BUFFER,
self.gl_size_im * self.gl_size_im,
self.raw_im_init,
OpenGL.GL.GL_STREAM_DRAW,
) # Allocate the buffer
OpenGL.GL.glGetBufferParameteriv(
OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, OpenGL.GL.GL_BUFFER_SIZE
) # Check allocated buffer size
# try:
# assert(bsize == self.gl_size_im * self.gl_size_im)
# except AssertionError as e:
# print str(e)
OpenGL.GL.glBindBuffer(OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, 0) # Unbind
OpenGL.GL.glGenTextures(1, self.texid) # generate 1 texture reference
OpenGL.GL.glBindTexture(
OpenGL.GL.GL_TEXTURE_2D, self.texid
) # binding to this texture
OpenGL.GL.glTexParameteri(
OpenGL.GL.GL_TEXTURE_2D,
OpenGL.GL.GL_TEXTURE_MAG_FILTER,
OpenGL.GL.GL_LINEAR,
)
OpenGL.GL.glTexParameteri(
OpenGL.GL.GL_TEXTURE_2D,
OpenGL.GL.GL_TEXTURE_MIN_FILTER,
OpenGL.GL.GL_LINEAR,
)
# glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, self.gl_size_im,
# self.gl_size_im, 0, GL_LUMINANCE, GL_FLOAT, self.raw_im_init) #
# Allocate the texture
OpenGL.GL.glTexImage2D(
OpenGL.GL.GL_TEXTURE_2D,
0,
OpenGL.GL.GL_LUMINANCE,
self.gl_size_im,
self.gl_size_im,
0,
OpenGL.GL.GL_LUMINANCE,
OpenGL.GL.GL_UNSIGNED_BYTE,
None,
) # Allocate the texture
OpenGL.GL.glBindTexture(OpenGL.GL.GL_TEXTURE_2D, 0) # Unbind
OpenGL.GL.glPixelStorei(
OpenGL.GL.GL_UNPACK_ALIGNMENT, 1
) # 1-byte row alignment
OpenGL.GL.glPixelStorei(
OpenGL.GL.GL_PACK_ALIGNMENT, 1
) # 1-byte row alignment
[docs] def create_sub_frags(self):
self.sub_frags_len_bp = []
self.sub_frags_id = []
self.sub_frags_accu = []
unkb = np.float32(1000.0)
self.collect_accu_frags = []
self.norm_vect = []
n_sub_frags = 0
self.sub_frags_2_frags = []
for i in range(0, self.n_frags):
tmp = [
self.hic_pyr.spec_level[self.str_level]["fragments_dict"][
i + 1
]["sub_low_index"]
- 1,
self.hic_pyr.spec_level[self.str_level]["fragments_dict"][
i + 1
]["sub_high_index"]
- 1,
]
# !!! warning!!! m
n_sub = tmp[1] - tmp[0] + 1
v_len = [0, 0, 0]
v_accu = [0, 0, 0]
v_id = [0, 0, 0, n_sub]
n_sub_frags += n_sub
for j in range(0, n_sub):
v_len[j] = (
np.float32(
self.sub_level.vect_frag_np[tmp[0] + j]["len_bp"]
)
/ unkb
)
v_id[j] = np.int32(tmp[0] + j)
v_accu[j] = np.int32(
self.sub_level.vect_frag_np[tmp[0] + j]["n_accu"]
)
self.collect_accu_frags.append(v_accu[j])
# create index to go from high res frags to low res frags
tmp_len = np.array(v_len[0:n_sub], dtype=np.float32)
for j in range(0, n_sub):
w_d = np.sum(tmp_len[0:j]) + tmp_len[j] / 2. # watson distance
c_d = (
np.sum(tmp_len[list(range(n_sub - 1, j, -1))])
+ tmp_len[j] / 2.
) # crick distance
# dat = (i, w_d, c_d)
dat = (i, w_d, c_d, j)
self.sub_frags_2_frags.append(dat)
self.norm_vect.append(np.sum(v_accu))
self.sub_frags_len_bp.append(tuple(v_len))
self.sub_frags_id.append(tuple(v_id))
self.sub_frags_accu.append(tuple(v_accu))
# self.np_sub_frags_2_frags = np.array(self.sub_frags_2_frags,
# dtype=self.float3)
self.np_sub_frags_2_frags = np.array(
self.sub_frags_2_frags, dtype=self.float4
)
self.np_sub_frags_len_bp = np.array(
self.sub_frags_len_bp, dtype=self.float3
)
self.np_sub_frags_accu = np.array(self.sub_frags_accu, dtype=self.int3)
self.np_sub_frags_id = np.array(self.sub_frags_id, dtype=self.int4)
self.collect_accu_frags = np.array(
self.collect_accu_frags, dtype=np.float32
)
self.norm_vect = np.mat(self.norm_vect)
self.init_n_sub_frags = n_sub_frags
[docs] def create_new_sub_frags(self,):
out = 0
rep_sub_frags_id = []
idx = 0
for i in range(0, self.n_frags):
id_d = self.new_S_o_A_frags["id_d"][i]
n_sub = self.np_sub_frags_id[id_d]["w"]
v_id = [0, 0, 0, n_sub]
for j in range(0, n_sub):
v_id[j] = np.int32(idx)
idx += 1
rep_sub_frags_id.append(tuple(v_id))
out += n_sub
self.n_new_sub_frags = out
self.rep_sub_frags_id = np.array(rep_sub_frags_id, dtype=self.int4)
[docs] def plot_info_simu(
self,
collect_likelihood_input,
collect_n_contigs_input,
file_plot,
title_ax,
):
collect_likelihood = np.array(collect_likelihood_input)
collect_n_contigs = np.array(collect_n_contigs_input)
len_collect = len(collect_likelihood)
if len_collect > 1000:
idx_2_plot = np.arange(1000, len_collect)
else:
idx_2_plot = np.arange(0, len_collect)
fig = plt.figure(figsize=(10, 10), dpi=100)
ax1 = fig.add_subplot(111)
ax1.plot(collect_likelihood[idx_2_plot], "r-")
ax1.set_xlabel("iterations")
# Make the y-axis label and tick labels match the line color.
ax1.set_ylabel("likelihood", color="r")
for tl in ax1.get_yticklabels():
tl.set_color("r")
ax2 = ax1.twinx()
# if title_ax == "distance from init genome":
# ax2.semilogy(collect_n_contigs, 'b-')
# else:
ax2.plot(collect_n_contigs[idx_2_plot], "b-")
ax2.set_ylabel(title_ax, color="b")
for tl in ax2.get_yticklabels():
tl.set_color("b")
plt.show()
fig.savefig(file_plot)
if len_collect > 1000:
plt.figure()
plt.hist(collect_n_contigs[idx_2_plot], 100)
plt.title("histogram " + title_ax)
plt.xlabel(title_ax)
plt.ylabel("counts")
plt.show()
# def plot_all_info_simu(self, all_data, headers):
# print "here we go!!"
# folder = self.output_folder
# ## generate files ##
# n_vals =
# # the main axes is subplot(111) by default
# plt.plot(t, s)
# plt.axis([0, 1, 1.1*amin(s), 2*amax(s) ])
# plt.xlabel('time (s)')
# plt.ylabel('current (nA)')
# plt.title('Gaussian colored noise')
#
# # this is an inset axes over the main axes
# a = plt.axes([.65, .6, .2, .2], axisbg='w')
# n, bins, patches = plt.hist(s, 400, normed=1)
# plt.title('Counts')
# plt.setp(a, xticks=[], yticks=[])
# plt.show()
[docs] def export_new_fasta(self):
self.sampler.gpu_vect_frags.copy_from_gpu()
self.level.generate_new_fasta(
self.sampler.gpu_vect_frags, self.new_fasta, self.info_frags
)
[docs] def release(self):
self.sampler.free_gpu()