Source code for instagraal.instagraal

#!/usr/bin/env python3

"""Large genome reassembly based on Hi-C data.

Usage:
    instagraal <hic_folder> <reference.fa> [<output_folder>]
               [--level=4] [--cycles=100] [--coverage-std=1]
               [--neighborhood=5] [--device=0] [--circular] [--bomb]
               [--save-matrix] [--pyramid-only] [--save-pickle] [--simple]
               [--quiet] [--debug]

Options:
    -h, --help              Display this help message.
    --version               Display the program's current version.
    -l 4, --level 4         Level (resolution) of the contact map.
                            Increasing level by one means a threefold smaller
                            resolution but also a threefold faster computation
                            time. [default: 4]
    -n 100, --cycles 100    Number of iterations to perform for each bin.
                            (row/column of the contact map). A high number of
                            cycles has diminishing returns but there is a
                            necessary minimum for assembly convergence.
                            [default: 100]
    -c 1, --coverage-std 1  Number of standard deviations below the mean.
                            coverage, below which fragments should be filtered
                            out prior to binning. [default: 1]
    -N 5, --neighborhood 5  Number of neighbors to sample for potential
                            mutations for each bin. [default: 5]
    --device 0              If multiple graphic cards are available, select
                            a specific device (numbered from 0). [default: 0]
    -C, --circular          Indicates genome is circular. [default: False]
    -b, --bomb              Explode the genome prior to scaffolding.
                            [default: False]
    --pyramid-only          Only build multi-resolution contact maps (pyramids)
                            and don't do any scaffolding. [default: False]
    --save-pickle           Dump all info from the instaGRAAL run into a
                            pickle. Primarily for development purposes, but
                            also for advanced post hoc introspection.
                            [default: False]
    --save-matrix           Saves a preview of the contact map after each
                            cycle. [default: False]
    --simple                Only perform operations at the edge of the contigs.
                            [default: False]
    --quiet                 Only display warnings and errors as outputs.
                            [default: False]
    --debug                 Display debug information. For development purposes
                            only. Mutually exclusive with --quiet, and will
                            override it. [default: False]

"""

import sys
import os
import docopt

import pycuda.driver as cuda
import pycuda.gl as cudagl

# from OpenGL.GL import *
# from OpenGL.GLU import *
# from OpenGL.GLUT import *

import OpenGL.GL
import OpenGL.GLU
import OpenGL.GLUT

# helper modules
from instagraal import glutil
from instagraal.vector import Vec
import numpy as np
import matplotlib.pyplot as plt
from instagraal.simu_single import simulation

import pickle
import logging
from instagraal import log
from instagraal.log import logger

VERSION_NUMBER = "0.1.2"

DEFAULT_CYCLES = 100
DEFAULT_LEVEL = 4
DEFAULT_NEIGHBOURS = 5
DEFAULT_BOMB = True
DEFAULT_ITERATIONS_MCMC = 100
DEFAULT_ITERATIONS_EM = 30
DEFAULT_BURN_IN_CYCLES = 2
DEFAULT_COVERAGE_STDS = 1
DEFAULT_CIRCULAR = False

DEFAULT_GL_SIZE_IM = 1000


[docs]class window: """A window displaying the live movie of the calculations performed by the scaffolder. [description] Parameters ---------- name : str The name of the project. Will determine the window title. folder_path : str or pathlib.Path The directory containing the Hi-C conact map. fasta : str or pathlib.Path The path to the reference genome in FASTA format. device : int The identifier of the graphic card to be used, numbered from 0. If only one is available, it should be 0. level : int The level (resolution) at which to perform scaffolding. n_iterations_em : int The number of EM (expectation maximization) iterations. n_iterations_mcmc : int The number of MCMC (Markov chain Monte-Carlo) iterations. is_simu : bool Whether the parameters should be simulated. Mutually exclusive with use_rippe and will override it. scrambled : bool Whether to scramble the genome. perform_em : bool Whether to perform EM (expectation maximization). use_rippe : bool Whether to explicitly use the model from Rippe et al., 2001. gl_size_im : int The size of the window to be displayed. sample_param : bool Whether to sample the parameters. thresh_factor : float The sparsity (coverage) threshold below which fragments are discarded, as a number of standard deviations below the mean. output_folder : str or pathlib.Path The path to the output folder where the scaffolded genome and other relevant information will be saved. """ def __init__( self, name, folder_path, fasta, device, level, n_iterations_em, n_iterations_mcmc, is_simu, scrambled, perform_em, use_rippe, gl_size_im, sample_param, thresh_factor, output_folder, ): """Initialize parameters """ self.device = device # mouse handling for transforming scene self.mouse_down = False self.size_points = 6 self.mouse_old = Vec([0.0, 0.0]) self.rotate = Vec([0.0, 0.0, 0.0]) self.translate = Vec([0.0, 0.0, 0.0]) self.initrans = Vec([0.0, 0.0, -2.0]) self.scrambled = scrambled self.width = 400 self.height = 400 self.n_iterations_em = n_iterations_em self.n_iterations_mcmc = n_iterations_mcmc self.sample_param = sample_param self.dt = np.float32(0.01) self.white = 1 self.gl_size_im = gl_size_im OpenGL.GLUT.glutInit(sys.argv) OpenGL.GLUT.glutInitDisplayMode( OpenGL.GLUT.GLUT_RGBA | OpenGL.GLUT.GLUT_DOUBLE | OpenGL.GLUT.GLUT_DEPTH ) OpenGL.GLUT.glutInitWindowSize(self.width, self.height) OpenGL.GLUT.glutInitWindowPosition(0, 0) if perform_em: name_window = "Expectation Maximization : " + name else: name_window = "MCMC Metropolis-Hasting : " + name self.win = OpenGL.GLUT.glutCreateWindow(name_window) # manava = raw_input("do we have a deal?") # gets called by GLUT every frame OpenGL.GLUT.glutDisplayFunc(self.draw) # handle user input OpenGL.GLUT.glutKeyboardFunc(self.on_key) OpenGL.GLUT.glutMouseFunc(self.on_click) OpenGL.GLUT.glutMotionFunc(self.on_mouse_motion) # this will call draw every 30 ms OpenGL.GLUT.glutTimerFunc(30, self.timer, 30) # setup OpenGL scene self.glinit() # setup CUDA self.cuda_gl_init() # set up initial conditions self.use_rippe = use_rippe self.simulation = simulation( name, folder_path, fasta, level, n_iterations_em, is_simu, self, use_rippe, gl_size_im, thresh_factor, output_folder=output_folder, ) self.gl_size_im = self.simulation.gl_size_im self.texid = self.simulation.texid self.init_n_frags = self.simulation.init_n_frags self.pbo_im_buffer = self.simulation.pbo_im_buffer self.pos_vbo = self.simulation.pos_vbo self.col_vbo = self.simulation.col_vbo self.n_frags = self.simulation.sampler.n_new_frags self.str_likelihood = "likelihood = " + str(0) self.str_n_contigs = "n contigs = " + str(0) self.str_curr_id_frag = "current id frag = " + str(0) self.str_curr_cycle = "current cycle = " + str(0) self.str_curr_temp = "current temperature = " + str(0) self.str_curr_dist = "current dist = " + str(0) # if perform_em: # self.start_EM() # else: # self.start_MCMC() self.collect_likelihood = [] self.collect_n_contigs = [] self.collect_mean_len = [] self.collect_op_sampled = [] # self.collect_id_f_sampled = [] self.collect_id_fA_sampled = [] self.collect_id_fB_sampled = [] self.collect_full_likelihood = [] self.collect_dist_from_init_genome = [] self.collect_fact = [] self.collect_slope = [] self.collect_d = [] self.collect_d_nuc = [] self.collect_d_max = [] self.collect_likelihood_nuisance = [] self.collect_success = [] self.collect_all = [] # self.simulation.sampler.eval_likelihood() # h = self.simulation.sampler.gpu_full_expected_lin.get() # print "non zero h = ", np.nonzero(h>0)[0] # self.simulation.sampler.step_nuisance_parameters(0, 0, 0) # self.test_model(665, 600) self.file_mean_len = os.path.join( self.simulation.output_folder, "behaviour_mean_len.pdf" ) self.file_n_contigs = os.path.join( self.simulation.output_folder, "behaviour_n_contigs.pdf" ) self.file_fact = os.path.join( self.simulation.output_folder, "behaviour_fact.pdf" ) self.file_slope = os.path.join( self.simulation.output_folder, "behaviour_slope.pdf" ) self.file_d_nuc = os.path.join( self.simulation.output_folder, "behaviour_d_nuc.pdf" ) self.file_d = os.path.join( self.simulation.output_folder, "behaviour_d.pdf" ) self.file_d_max = os.path.join( self.simulation.output_folder, "behaviour_d_max.pdf" ) self.file_dist_init_genome = os.path.join( self.simulation.output_folder, "behaviour_dist_init_genome.pdf" ) self.txt_file_mean_len = os.path.join( self.simulation.output_folder, "list_mean_len.txt" ) self.txt_file_n_contigs = os.path.join( self.simulation.output_folder, "list_n_contigs.txt" ) self.txt_file_dist_init_genome = os.path.join( self.simulation.output_folder, "list_dist_init_genome.txt" ) self.txt_file_likelihood = os.path.join( self.simulation.output_folder, "list_likelihood.txt" ) self.txt_file_fact = os.path.join( self.simulation.output_folder, "list_fact.txt" ) self.txt_file_slope = os.path.join( self.simulation.output_folder, "list_slope.txt" ) self.txt_file_d_nuc = os.path.join( self.simulation.output_folder, "list_d_nuc.txt" ) self.txt_file_d = os.path.join( self.simulation.output_folder, "list_d.txt" ) self.txt_file_d_max = os.path.join( self.simulation.output_folder, "list_d_max.txt" ) self.txt_file_success = os.path.join( self.simulation.output_folder, "list_success.txt" ) self.txt_file_list_mutations = os.path.join( self.simulation.output_folder, "list_mutations.txt" ) self.file_all_data = os.path.join( self.simulation.output_folder, "behaviour_all.txt" ) # self.remote_update() # id_f_ins = 250 # self.simulation.sampler.insert_repeats(id_f_ins) # self.setup_simu(id_f_ins) # self.start_EM() # self.start_MCMC()
[docs] def replay_simu( self, file_simu, file_likelihood, file_n_contigs, file_distances ): h = open(file_simu, "r") all_lines = h.readlines() list_id_fA = [] list_id_fB = [] list_op_sampled = [] for i in range(1, len(all_lines)): line = all_lines[i] data = line.split("\t") id_fA = int(data[0]) id_fB = int(data[1]) id_op = int(data[2]) list_id_fA.append(id_fA) list_id_fB.append(id_fB) list_op_sampled.append(id_op) h.close() h_likeli = open(file_likelihood, "r") all_lines_likeli = h_likeli.readlines() list_likelihood = [] for i in range(0, len(all_lines_likeli)): line = all_lines_likeli[i] likeli = np.float32(line) list_likelihood.append(likeli) h_likeli.close() h_n_contigs = open(file_n_contigs, "r") list_n_contigs = [] all_lines_contigs = h_n_contigs.readlines() for i in range(0, len(all_lines_contigs)): line = all_lines_contigs[i] n_contigs = np.float32(line) list_n_contigs.append(n_contigs) h_n_contigs.close() h_distances = open(file_distances, "r") list_distances = [] all_lines_distances = h_distances.readlines() for i in range(0, len(all_lines_distances)): line = all_lines_distances[i] distance = np.float32(line) list_distances.append(distance) h_distances.close() for i in range(0, 10): self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) self.remote_update() self.str_n_contigs = "n contigs = " + str( self.simulation.sampler.gpu_vect_frags.id_c.max() ) # self.simulation.sampler.explode_genome(self.dt) self.simulation.sampler.bomb_the_genome() # print list_likelihood for i in range(0, len(list_id_fA)): self.simulation.sampler.apply_replay_simu( list_id_fA[i], list_id_fB[i], list_op_sampled[i], self.dt ) self.str_curr_cycle = "cycle = " + str(int(i)) self.str_likelihood = "likelihood = " + str(list_likelihood[i]) self.str_n_contigs = "n contigs = " + str(list_n_contigs[i]) self.str_curr_id_frag = "current frag = " + str(list_id_fA[i]) self.str_curr_dist = "current dist = " + str(list_distances[i]) self.str_curr_temp = "current temperature = " + str(0)
[docs] def start_EM(self,): logger.info("start expectation maximization ... ") delta = 15 logger.info(self.simulation.n_iterations) delta = np.int32( np.floor(np.linspace(3, 5, np.floor(self.n_iterations_em / 3.0))) ) # param ok simu # delta = np.int32(np.floor(np.linspace(3, 4, # np.floor(self.n_iterations_em / 2.)))) delta = list(delta) d_ext = list( np.floor( np.linspace(10, 15, np.floor(self.n_iterations_em / 3.0) + 1) ) ) # param ok simu delta.extend(d_ext) logger.info(delta) logger.info(("len delta = ", len(delta))) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.input_matrix ) self.simulation.sampler.init_likelihood() self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) # ready = 0 # while ready != '1': # ready = raw_input("ready to start?") # self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) # self.remote_update() if self.scrambled: # self.simulation.sampler.modify_genome(500) self.simulation.sampler.explode_genome(self.dt) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.scrambled_input_matrix ) list_frags = np.arange( 0, self.simulation.sampler.n_new_frags, dtype=np.int32 ) iteration = 0 n_iter = np.float32(self.n_iterations_em) self.bins_rippe = self.simulation.sampler.bins for j in range(0, self.n_iterations_em): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) # d = self.simulation.sampler.step_nuisance_parameters(0, 0, 0) for i in list_frags: # print "id_frag =", i if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() # if j == 0 and iter == 0: # raw_input("ready ?") ( o, n_contigs, min_len, mean_len, max_len, op_sampled, id_f_sampled, dist, temp, ) = self.simulation.sampler.step_max_likelihood( i, delta[j], 512, self.dt, np.float32(j), n_iter ) # o, n_contigs, min_len, mean_len, max_len = # self.simulation.sampler.new_sample_fi(i, delta[j], 512, 200) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(n_contigs) self.str_curr_id_frag = "current frag = " + str(i) self.str_curr_dist = "current dist = " + str(dist) self.str_curr_temp = "current temperature = " + str(temp) # self.str_curr_d = "current d = "+ str(d) self.collect_full_likelihood.append( self.simulation.sampler.likelihood_t ) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_op_sampled.append(op_sampled) self.collect_id_fB_sampled.append(id_f_sampled) self.collect_id_fA_sampled.append(i) self.collect_dist_from_init_genome.append(dist) iteration += 1 # sampling nuisance parameters ( fact, d, d_max, d_nuc, slope, likeli, success, y_eval, ) = self.simulation.sampler.step_nuisance_parameters( self.dt, np.float32(j), n_iter ) self.collect_fact.append(fact) self.collect_d.append(d) self.collect_d_max.append(d_max) self.collect_d_nuc.append(d_nuc) self.collect_slope.append(slope) self.collect_likelihood_nuisance.append(likeli) self.collect_success.append(success) self.y_eval = y_eval o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_em ) self.simulation.export_new_fasta() self.simulation.plot_info_simu( self.collect_likelihood, self.collect_n_contigs, self.file_n_contigs, "n_contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_mean_len, self.file_mean_len, "mean length contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_dist_from_init_genome, self.file_dist_init_genome, "distance from init genome", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_slope, self.file_slope, "slope", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_fact, self.file_fact, "scale factor", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_d_nuc, self.file_d_nuc, "val trans", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_d, self.file_d, "d" ) self.save_behaviour_to_txt()
[docs] def start_EM_all(self,): logger.info("start expectation maximization ... ") delta = 15 logger.info((self.simulation.n_iterations)) delta = np.int32( np.floor(np.linspace(3, 4, np.floor(self.n_iterations_em / 2.0))) ) # param ok simu # delta = np.int32(np.floor(np.linspace(3, 4, # np.floor(self.n_iterations_em / 2.)))) delta = list(delta) d_ext = list( np.floor( np.linspace(5, 10, np.floor(self.n_iterations_em / 2.0) + 1) ) ) # param ok simu # d_ext = list(np.floor(np.linspace(10, 15, # np.floor(self.n_iterations_em / 2.) + 1))) delta.extend(d_ext) logger.info(delta) logger.info(("len delta = ", len(delta))) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.input_matrix ) self.simulation.sampler.init_likelihood() self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) if self.scrambled: self.simulation.sampler.explode_genome(self.dt) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.scrambled_input_matrix ) list_frags = np.arange( 0, self.simulation.sampler.n_new_frags, dtype=np.int32 ) iteration = 0 n_iter = np.float32(self.n_iterations_em) self.bins_rippe = self.simulation.sampler.bins self.headers = [ "likelihood", "n_contigs", "min_len_contigs", "mean_len_contigs", "max_len_contigs", "operation_sampled", "id_f_sampled", "distance_from_init", "scale factor", "d", "max_distance_intra", "n_contacts_inter", "slope", "success", ] ( fact, d, d_max, d_nuc, slope, likeli, success, y_eval, ) = self.simulation.sampler.step_nuisance_parameters( self.dt, np.float32(0), n_iter ) for j in range(0, self.n_iterations_em): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) # d = self.simulation.sampler.step_nuisance_parameters(0, 0, 0) for i in list_frags: # print "id_frag =", i if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() # if j == 0 and iter == 0: # raw_input("ready ?") ( o, n_contigs, min_len, mean_len, max_len, op_sampled, id_f_sampled, dist, temp, ) = self.simulation.sampler.step_max_likelihood( i, delta[j], 512, self.dt, np.float32(j), n_iter ) success = 1 vect_collect = ( o, n_contigs, min_len, mean_len, max_len, op_sampled, id_f_sampled, dist, fact, d, d_max, d_nuc, slope, success, ) self.collect_all.append(vect_collect) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(n_contigs) self.str_curr_id_frag = "current frag = " + str(i) self.str_curr_dist = "current dist = " + str(dist) self.str_curr_temp = "current temperature = " + str(temp) # self.str_curr_d = "current d = "+ str(d) self.collect_full_likelihood.append( self.simulation.sampler.likelihood_t ) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_op_sampled.append(op_sampled) self.collect_id_fB_sampled.append(id_f_sampled) self.collect_id_fA_sampled.append(i) self.collect_dist_from_init_genome.append(dist) iteration += 1 # sampling nuisance parameters ( fact, d, d_max, d_nuc, slope, likeli, success, y_eval, ) = self.simulation.sampler.step_nuisance_parameters( self.dt, np.float32(j), n_iter ) vect_collect = ( likeli, n_contigs, min_len, mean_len, max_len, op_sampled, id_f_sampled, dist, fact, d, d_max, d_nuc, slope, success, ) self.collect_all.append(vect_collect) self.collect_fact.append(fact) self.collect_d.append(d) self.collect_d_max.append(d_max) self.collect_d_nuc.append(d_nuc) self.collect_slope.append(slope) self.collect_likelihood_nuisance.append(likeli) self.collect_success.append(success) self.y_eval = y_eval o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_em ) self.simulation.export_new_fasta() self.simulation.plot_all_info_simu(self.collect_all, self.header)
[docs] def start_EM_nuisance(self,): logger.info("start expectation maximization ... ") delta = 15 logger.info((self.simulation.n_iterations)) delta = np.int32( np.floor(np.linspace(3, 4, np.floor(self.n_iterations_em / 2.0))) ) # param ok simu # delta = np.int32(np.floor(np.linspace(3, 4, # np.floor(self.n_iterations_em / 2.)))) delta = list(delta) d_ext = list( np.floor( np.linspace(5, 10, np.floor(self.n_iterations_em / 2.0) + 1) ) ) # param ok simu # d_ext = list(np.floor(np.linspace(10, 15, # np.floor(self.n_iterations_em / 2.) + 1))) delta.extend(d_ext) logger.info(delta) logger.info(("len delta = ", len(delta))) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.input_matrix ) self.simulation.sampler.init_likelihood() self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) # ready = 0 # while ready != '1': # ready = raw_input("ready to start?") # self.simulation.sampler.modify_gl_cuda_buffer(0, self.dt) # self.remote_update() if self.scrambled: # self.simulation.sampler.modify_genome(500) self.simulation.sampler.explode_genome(self.dt) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.scrambled_input_matrix ) list_frags = np.arange( 0, self.simulation.sampler.n_new_frags, dtype=np.int32 ) # iter = 0 n_iter = np.float32(self.n_iterations_em) self.bins_rippe = self.simulation.sampler.bins for j in range(0, self.n_iterations_em): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) # d = self.simulation.sampler.step_nuisance_parameters(0, 0, 0) for i in list_frags: # # print "id_frag =", i # if bool(glutMainLoopEvent): # glutMainLoopEvent() # else: # glutCheckLoop() # # if j == 0 and iter == 0: # # raw_input("ready ?") # o, n_contigs, min_len, mean_len, max_len, op_sampled, # id_f_sampled, dist, temp = # self.simulation.sampler.step_max_likelihood(i, delta[j], 512, # self.dt, np.float32(j), n_iter) # # o, n_contigs, min_len, mean_len, max_len = # self.simulation.sampler.new_sample_fi(i, delta[j], 512, 200) # self.str_likelihood = "likelihood = " + str(o) # self.str_n_contigs = "n contigs = " + str(n_contigs) # self.str_curr_id_frag = "current frag = "+ str(i) # self.str_curr_dist = "current dist = "+ str(dist) # self.str_curr_temp = "current temperature = "+str(temp) # # self.str_curr_d = "current d = "+ str(d) # self.collect_full_likelihood.append(self.simulation.sampler. # likelihood_t) # self.collect_likelihood.append(o) # self.collect_n_contigs.append(n_contigs) # self.collect_mean_len.append(mean_len) # self.collect_op_sampled.append(op_sampled) # self.collect_id_fB_sampled.append(id_f_sampled) # self.collect_id_fA_sampled.append(i) # self.collect_dist_from_init_genome.append(dist) # iter += 1 # # sampling nuisance parameters ( fact, d, d_max, d_nuc, slope, likeli, success, y_eval, ) = self.simulation.sampler.step_nuisance_parameters( self.dt, np.float32(j), n_iter ) self.collect_fact.append(fact) self.collect_d.append(d) self.collect_d_max.append(d_max) self.collect_d_nuc.append(d_nuc) self.collect_slope.append(slope) self.collect_likelihood_nuisance.append(likeli) self.collect_success.append(success) self.y_eval = y_eval o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_em ) self.simulation.export_new_fasta()
# self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_n_contigs, self.file_n_contigs, "n_contigs") # self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_mean_len, self.file_mean_len, "mean length contigs") # self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_dist_from_init_genome, self.file_dist_init_genome, # "distance from init genome") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_slope, self.file_slope, "slope") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_fact, self.file_fact, "scale factor") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_d_nuc, self.file_d_nuc, "val trans") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_d, self.file_d, "d") # self.save_behaviour_to_txt()
[docs] def start_EM_no_scrambled(self,): logger.info("start expectation maximization ... ") delta = 15 logger.info((self.simulation.n_iterations)) # delta = np.int32(np.floor(np.linspace(3, 4, # np.floor(self.n_iterations_em / 2.)))) # param ok simu delta = np.int32( np.floor(np.linspace(3, 4, np.floor(self.n_iterations_em / 2.0))) ) delta = list(delta) # d_ext = list(np.floor(np.linspace(5, 10, # np.floor(self.n_iterations_em / 2.) + 1))) # param ok simu d_ext = list( np.floor( np.linspace(10, 15, np.floor(self.n_iterations_em / 2.0) + 1) ) ) delta.extend(d_ext) logger.info(delta) logger.info(("len delta = ", len(delta))) o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.input_matrix ) # self.simulation.sampler.init_likelihood() o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.scrambled_input_matrix ) list_frags = np.arange( 0, self.simulation.sampler.n_new_frags, dtype=np.int32 ) iter = 0 n_iter = np.float32(self.n_iterations_em) for j in range(0, self.n_iterations_em): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) # d = self.simulation.sampler.step_nuisance_parameters(0, 0, 0) for i in list_frags: # print "id_frag =", i if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() # if j == 0 and iter == 0: # raw_input("ready ?") ( o, n_contigs, min_len, mean_len, max_len, op_sampled, id_f_sampled, dist, temp, ) = self.simulation.sampler.step_max_likelihood( i, delta[j], 512, self.dt, np.float32(j), n_iter ) # o, n_contigs, min_len, mean_len, max_len = # self.simulation.sampler.new_sample_fi(i, delta[j], 512, 200) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(n_contigs) self.str_curr_id_frag = "current frag = " + str(i) self.str_curr_dist = "current dist = " + str(dist) self.str_curr_temp = "current temperature = " + str(temp) # self.str_curr_d = "current d = "+ str(d) self.collect_full_likelihood.append( self.simulation.sampler.likelihood_t ) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_op_sampled.append(op_sampled) self.collect_id_fB_sampled.append(id_f_sampled) self.collect_id_fA_sampled.append(i) self.collect_dist_from_init_genome.append(dist) iter += 1 o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_em ) self.simulation.export_new_fasta() self.simulation.plot_info_simu( self.collect_likelihood, self.collect_n_contigs, self.file_n_contigs, "n_contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_mean_len, self.file_mean_len, "mean length contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_dist_from_init_genome, self.file_dist_init_genome, "distance from init genome", ) self.save_behaviour_to_txt()
[docs] def save_behaviour_to_txt(self): list_file = [ self.txt_file_mean_len, self.txt_file_n_contigs, self.txt_file_dist_init_genome, self.txt_file_likelihood, self.txt_file_fact, self.txt_file_slope, self.txt_file_d_max, self.txt_file_d_nuc, self.txt_file_d, self.txt_file_success, ] list_data = [ self.collect_mean_len, self.collect_n_contigs, self.collect_dist_from_init_genome, self.collect_likelihood, self.collect_fact, self.collect_slope, self.collect_d_max, self.collect_d_nuc, self.collect_d, self.collect_success, ] for d in range(0, len(list_file)): thefile = list_file[d] h = open(thefile, "w") data = list_data[d] for item in data: h.write("%s\n" % item) h.close() f_mutations = open(self.txt_file_list_mutations, "w") f_mutations.write("%s\t%s\t%s\n" % ("id_fA", "id_fB", "id_mutation")) for i in range(0, len(self.collect_id_fA_sampled)): id_fA = self.collect_id_fA_sampled[i] id_fB = self.collect_id_fB_sampled[i] id_mut = self.collect_op_sampled[i] f_mutations.write("%s\t%s\t%s\n" % (id_fA, id_fB, id_mut)) f_mutations.close()
[docs] def start_MCMC(self,): logger.info("set jumping distribution...") delta = 5 self.simulation.sampler.set_jumping_distributions_parameters(delta) self.simulation.sampler.init_likelihood() logger.info("start sampling launched ... ") logger.info((self.simulation.n_iterations)) delta = list(range(5, 5 + self.simulation.n_iterations * 2, 2)) logger.info(delta) # if self.scrambled: # self.simulation.sampler.modify_genome(500) # o, d, d_high = # self.simulation.sampler.display_current_matrix( # self.simulation.input_matrix # ) n_iter = np.float32(self.simulation.n_iterations) list_frags = np.arange(0, self.n_frags, dtype=np.int32) for j in range(0, self.n_iterations_mcmc): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) for i in list_frags: # print "id_frag =", i if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() ( o, n_contigs, min_len, mean_len, max_len, temp, dist, ) = self.simulation.sampler.step_metropolis_hastings_s_a( i, np.float32(j), n_iter, self.dt ) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(n_contigs) self.str_curr_id_frag = "current frag = " + str(i) self.str_curr_temp = "current temperature = " + str(temp) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_dist_from_init_genome.append(dist) self.simulation.export_new_fasta() o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_mcmc ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_n_contigs, self.file_n_contigs, "n_contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_mean_len, self.file_mean_len, "mean length contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_dist_from_init_genome, self.file_dist_init_genome, "distance from init genome", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_slope, self.file_slope, "slope", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_fact, self.file_fact, "scale factor", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_d_nuc, self.file_d_nuc, "val trans", ) self.save_behaviour_to_txt()
[docs] def start_MTM(self,): logger.info("set jumping distribution...") delta = 5 self.simulation.sampler.set_jumping_distributions_parameters(delta) self.simulation.sampler.init_likelihood() logger.info("start sampling launched ... ") logger.info((self.simulation.n_iterations)) delta = list(range(5, 5 + self.simulation.n_iterations * 2, 2)) logger.info(delta) # if self.scrambled: # self.simulation.sampler.modify_genome(500) # o, d, d_high = # self.simulation.sampler.display_current_matrix( # self.simulation.input_matrix # ) if self.scrambled: # self.simulation.sampler.modify_genome(500) self.simulation.sampler.explode_genome(self.dt) n_iter = np.float32(self.simulation.n_iterations) list_frags = np.arange(0, self.n_frags, dtype=np.int32) for j in range(0, self.n_iterations_mcmc): logger.info("cycle = {}".format(j)) self.str_curr_cycle = "current cycle = " + str(j) np.random.shuffle(list_frags) for i in list_frags: # print "id_frag =", i if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() ( o, n_contigs, min_len, mean_len, max_len, temp, dist, ) = self.simulation.sampler.step_mtm( i, np.float32(j), n_iter, self.dt ) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(n_contigs) self.str_curr_id_frag = "current frag = " + str(i) self.str_curr_temp = "current temperature = " + str(temp) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_dist_from_init_genome.append(dist) self.simulation.export_new_fasta() o, d, d_high = self.simulation.sampler.display_current_matrix( self.simulation.output_matrix_mcmc ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_n_contigs, self.file_n_contigs, "n_contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_mean_len, self.file_mean_len, "mean length contigs", ) self.simulation.plot_info_simu( self.collect_likelihood, self.collect_dist_from_init_genome, self.file_dist_init_genome, "distance from init genome", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_slope, self.file_slope, "slope", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_fact, self.file_fact, "scale factor", ) self.simulation.plot_info_simu( self.collect_likelihood_nuisance, self.collect_d_nuc, self.file_d_nuc, "val trans", ) self.save_behaviour_to_txt()
[docs] def remote_update(self): if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop()
[docs] def setup_simu(self, id_f_ins): self.simulation.sampler.insert_repeats(id_f_ins) self.simulation.sampler.simulate_rippe_contacts() plt.imshow( self.simulation.sampler.hic_matrix, interpolation="nearest", vmin=0, vmax=100, ) plt.show()
[docs] def test_model(self, id_fi, delta): id_fi = np.int32(id_fi) id_neighbors = np.copy( self.simulation.sampler.return_neighbours(id_fi, delta) ) id_neighbors.sort() np.sort(id_neighbors) logger.info(("physic model = ", self.simulation.sampler.param_simu)) j = 0 n_iter = self.n_iterations_em self.simulation.sampler.step_max_likelihood( id_fi, delta, 512, self.dt, np.float32(j), n_iter ) nscore = np.copy(self.simulation.sampler.score) plt.figure() plt.plot( id_neighbors, nscore[ list( range(0, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(1, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(2, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(3, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(4, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(5, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(6, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(7, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) # plt.plot(id_neighbors, nscore[range(12, len(nscore), # self.simulation.sampler.n_tmp_struct)], '-*', markersize=10) plt.legend( [self.simulation.sampler.modification_str[i] for i in range(0, 8)] ) plt.ylabel("log likelihood") plt.xlabel("fragments id") # plt.show() plt.figure() for i in range(0, self.simulation.sampler.n_tmp_struct): logger.info(i) plt.plot( id_neighbors, nscore[ list( range( i, len(nscore), self.simulation.sampler.n_tmp_struct, ) ) ], "-", markersize=10, ) plt.legend(self.simulation.sampler.modification_str) plt.show()
[docs] def debug_test_model(self, id_fi, delta): id_fi = np.int32(id_fi) id_neighbors = np.copy( self.simulation.sampler.return_neighbours(id_fi, delta) ) id_neighbors.sort() np.sort(id_neighbors) logger.info(("physic model = ", self.simulation.sampler.param_simu)) self.simulation.sampler.debug_step_max_likelihood( id_fi, delta, 512, self.dt ) nscore = np.copy(self.simulation.sampler.score) plt.figure() plt.plot( id_neighbors, nscore[ list( range(0, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(1, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(2, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(3, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(4, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(5, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.plot( id_neighbors, nscore[ list( range(6, len(nscore), self.simulation.sampler.n_tmp_struct) ) ], "-*", markersize=10, ) plt.legend( [self.simulation.sampler.modification_str[i] for i in range(0, 7)] ) # plt.show() plt.figure() for i in range(0, self.simulation.sampler.n_tmp_struct): logger.info(i) plt.plot( id_neighbors, nscore[ list( range( i, len(nscore), self.simulation.sampler.n_tmp_struct, ) ) ], "-*", markersize=10, ) plt.legend(self.simulation.sampler.modification_str) plt.show()
[docs] def debug_test_EM(self, delta): for id_fA in range(0, self.simulation.sampler.n_new_frags): self.simulation.sampler.debug_step_max_likelihood( id_fA, delta, 512, self.dt )
[docs] def cuda_gl_init(self,): cuda.init() if bool(OpenGL.GLUT.glutMainLoopEvent): id_gpu = self.device curr_gpu = cuda.Device(id_gpu) logger.info("Selected_device: {}".format(curr_gpu.name())) self.ctx_gl = cudagl.make_context( curr_gpu, flags=cudagl.graphics_map_flags.NONE ) else: import pycuda.gl.autoinit curr_gpu = cudagl.autoinit.device self.ctx_gl = cudagl.make_context( curr_gpu, flags=cudagl.graphics_map_flags.NONE )
[docs] def glut_print(self, x, y, font, text, r, g, b, a): blending = False if OpenGL.GL.glIsEnabled(OpenGL.GL.GL_BLEND): blending = True OpenGL.GL.glEnable(OpenGL.GL.GL_BLEND) OpenGL.GL.glColor3f(r, g, b) OpenGL.GL.glRasterPos2f(x, y) for ch in text: OpenGL.GLUT.glutBitmapCharacter( font, OpenGL.GLUT.ctypes.c_int(ord(ch)) ) if not blending: OpenGL.GL.glDisable(OpenGL.GL.GL_BLEND)
[docs] def glinit(self): OpenGL.GL.glViewport(0, 0, self.width, self.height) OpenGL.GL.glMatrixMode(OpenGL.GL.GL_PROJECTION) OpenGL.GL.glLoadIdentity() OpenGL.GLU.gluPerspective( 60.0, self.width / float(self.height), 0.1, 1000.0 ) OpenGL.GL.glMatrixMode(OpenGL.GL.GL_MODELVIEW)
# GL CALLBACKS
[docs] def timer(self, t): OpenGL.GLUT.glutTimerFunc(t, self.timer, t) OpenGL.GLUT.glutPostRedisplay()
[docs] def on_key(self, *args): ESCAPE = "\033" if args[0] == ESCAPE: self.simulation.release() sys.exit() elif args[0] == "p": self.size_points += 1 elif args[0] == "m": self.size_points -= 1 elif args[0] == "s": self.start_EM() elif args[0] == "w": self.white *= -1 # elif args[0] == 'd': # self.simulation.sampler.thresh += 1 # elif args[0] == 'c': # self.simulation.sampler.thresh -=1 elif args[0] == "b": self.modify_image_thresh(-1) elif args[0] == "d": self.modify_image_thresh(1)
# elif args[0] == 'e': # self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_n_contigs, self.file_n_contigs, "n_contigs") # self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_mean_len, self.file_mean_len, "mean length contigs") # self.simulation.plot_info_simu(self.collect_likelihood, # self.collect_dist_from_init_genome, self.file_dist_init_genome, # "distance from init genome") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_slope, self.file_slope, "slope") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_fact, self.file_fact, "scale factor") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_d_nuc, self.file_d_nuc, "val trans") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_d, self.file_d, "d") # self.simulation.plot_info_simu(self.collect_likelihood_nuisance, # self.collect_d_max, self.file_d_max, "dist max intra") plt.figure() # plt.loglog(self.bins_rippe, self.y_eval, '-b') # plt.loglog(self.bins_rippe, self.simulation.sampler.mean_contacts, # '-*r') plt.xlabel("genomic separation ( kb)") plt.ylabel("n # contacts") plt.title("rippe curve") plt.legend(["fit", "obs"]) # plt.show() # self.simulation.sampler.display_modif_vect(0, 0, -1, # 100) self.save_behaviour_to_txt()
[docs] def on_click(self, button, state, x, y): if state == OpenGL.GLUT.GLUT_DOWN: self.mouse_down = True self.button = button else: self.mouse_down = False self.mouse_old.x = x self.mouse_old.y = y
[docs] def on_mouse_motion(self, x, y): dx = x - self.mouse_old.x dy = y - self.mouse_old.y if self.mouse_down and self.button == 0: # left button self.rotate.x += dy * 0.2 self.rotate.y += dx * 0.2 elif self.mouse_down and self.button == 2: # right button self.translate.z -= dy * 0.01 self.mouse_old.x = x self.mouse_old.y = y
# END GL CALLBACKS
[docs] def modify_image_thresh(self, val): """ modify threshold of the matrix """ self.simulation.sampler.modify_image_thresh(val)
# print "threshold = ", self.simulation.sampler.im_thresh
[docs] def draw(self): """Render the particles""" # update or particle positions by calling the OpenCL kernel # self.cle.execute(10) OpenGL.GL.glFlush() OpenGL.GL.glClear( OpenGL.GL.GL_COLOR_BUFFER_BIT | OpenGL.GL.GL_DEPTH_BUFFER_BIT ) if self.white == -1: OpenGL.GL.glClearColor(1, 1, 1, 1) else: OpenGL.GL.glClearColor(0, 0, 0, 0) OpenGL.GL.glMatrixMode(OpenGL.GL.GL_MODELVIEW) OpenGL.GL.glLoadIdentity() # handle mouse transformations OpenGL.GL.glTranslatef( self.initrans.x, self.initrans.y, self.initrans.z ) OpenGL.GL.glRotatef(self.rotate.x, 1, 0, 0) OpenGL.GL.glRotatef( self.rotate.y, 0, 1, 0 ) # we switched around the axis so make this rotate_z OpenGL.GL.glTranslatef( self.translate.x, self.translate.y, self.translate.z ) # render the matrix self.render_image() # render the particles self.render() # draw the x, y and z axis as lines glutil.draw_axes() # enable 2d display # OpenGL.GL.glMatrixMode(OpenGL.GL.GL_PROJECTION) OpenGL.GL.glPushMatrix() OpenGL.GL.glLoadIdentity() OpenGL.GL.glOrtho(0.0, self.width, self.height, 0.0, -1.0, 10.0) OpenGL.GL.glMatrixMode(OpenGL.GL.GL_MODELVIEW) # glPushMatrix() ----Not sure if I need this OpenGL.GL.glLoadIdentity() OpenGL.GL.glDisable(OpenGL.GL.GL_CULL_FACE) OpenGL.GL.glClear(OpenGL.GL.GL_DEPTH_BUFFER_BIT) # glBegin(GL_LINES) # glColor3f(0.0, 1.0, 0.0) # glVertex2f(0.0, 0.0) # glVertex2f(100.0, 0.0) # glVertex2f(100.0, 100.0) # glVertex2f(0.0, 100.0) # glEnd() if self.white == 1: self.glut_print( 10, 15, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_cycle, 0.0, 1.0, 0.0, 1.0, ) self.glut_print( 10, 30, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_temp, 0.0, 1.0, 0.0, 1.0, ) self.glut_print( 10, 45, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_id_frag, 0.0, 1.0, 0.0, 1.0, ) self.glut_print( 10, 60, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_likelihood, 0.0, 1.0, 0.0, 1.0, ) self.glut_print( 10, 75, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_n_contigs, 0.0, 1.0, 0.0, 1.0, ) self.glut_print( 10, 90, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_dist, 0.0, 1.0, 0.0, 1.0, ) else: self.glut_print( 10, 15, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_cycle, 0.0, 0.0, 0.0, 1.0, ) self.glut_print( 10, 30, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_temp, 0.0, 0.0, 0.0, 1.0, ) self.glut_print( 10, 45, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_id_frag, 0.0, 0.0, 0.0, 1.0, ) self.glut_print( 10, 60, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_likelihood, 0.0, 0.0, 0.0, 1.0, ) self.glut_print( 10, 75, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_n_contigs, 0.0, 0.0, 0.0, 1.0, ) self.glut_print( 10, 90, OpenGL.GLUT.GLUT_BITMAP_9_BY_15, self.str_curr_dist, 0.0, 0.0, 0.0, 1.0, ) # self.glut_print( 10 , 15 , GLUT_BITMAP_9_BY_15 , self.str_curr_cycle # , 1.0 , 1.0 , 1.0 , 1.0) self.glut_print( 10 , 30 , # GLUT_BITMAP_9_BY_15 , self.str_curr_temp , 1.0 , 1.0 , 1.0 , 1.0) # self.glut_print( 10 , 45 , GLUT_BITMAP_9_BY_15 , # self.str_curr_id_frag , 1.0 , 1.0 , 1.0 , 1.0) self.glut_print( 10 , # 60 , GLUT_BITMAP_9_BY_15 , self.str_likelihood , 1.0 , 1.0 , 1.0 , # 1.0) self.glut_print( 10.1 , 60.1 , GLUT_BITMAP_9_BY_15 , # self.str_likelihood , 1.0 , 1.0 , 1.0 , 1.0) self.glut_print( 9.9 , # 59.9, GLUT_BITMAP_9_BY_15 , self.str_likelihood , 1.0 , 1.0 , 1.0 , # 1.0) self.glut_print( 10 , 75 , GLUT_BITMAP_9_BY_15 , # self.str_n_contigs , 1.0 , 1.0 , 1.0 , 1.0) self.glut_print( 10 , 90 # , GLUT_BITMAP_9_BY_15 , self.str_curr_dist , 1.0 , 1.0 , 1.0 , 1.0) # self.glut_print( 10 , 15 , GLUT_BITMAP_9_BY_15 , self.str_curr_cycle # , 0.0 , 0.0 , 0.0 , 1.0) self.glut_print( 10 , 30 , # GLUT_BITMAP_9_BY_15 , self.str_curr_temp , 0.0 , 0.0 , 0.0 , 1.0) # self.glut_print( 10 , 45 , GLUT_BITMAP_9_BY_15 , # self.str_curr_id_frag , 0.0 , 0.0 , 0.0 , 1.0) self.glut_print( 10 , # 60 , GLUT_BITMAP_9_BY_15 , self.str_likelihood , 0.0 , 0.0 , 0.0 , # 1.0) self.glut_print( 10 , 75 , GLUT_BITMAP_9_BY_15 , # self.str_n_contigs , 0.0 , 0.0 , 0.0 , 1.0) self.glut_print( 10 , 90 # , GLUT_BITMAP_9_BY_15 , self.str_curr_dist , 0.0 , 0.0 , 0.0 , 1.0) # Making sure we can render 3d again OpenGL.GL.glMatrixMode(OpenGL.GL.GL_PROJECTION) OpenGL.GL.glPopMatrix() OpenGL.GL.glMatrixMode(OpenGL.GL.GL_MODELVIEW) # glPopMatrix()## ----and this? ############# OpenGL.GLUT.glutSwapBuffers()
[docs] def render(self): OpenGL.GL.glEnable(OpenGL.GL.GL_POINT_SMOOTH) OpenGL.GL.glEnable(OpenGL.GL.GL_BLEND) OpenGL.GL.glBlendFunc( OpenGL.GL.GL_SRC_ALPHA, OpenGL.GL.GL_ONE_MINUS_SRC_ALPHA ) # glBegin(GL_POINTS) # glEnable(GL_POINT_SMOOTH) OpenGL.GL.glHint(OpenGL.GL.GL_POINT_SMOOTH_HINT, OpenGL.GL.GL_NICEST) # glHint(GL_POINT_SMOOTH_HINT, GL_NICEST); # glHint(GL_LINE_SMOOTH_HINT, GL_NICEST); # glHint(GL_POLYGON_SMOOTH_HINT, GL_NICEST); # OpenGL.GL.glPointSize(self.size_points) # glEnable(GL_BLEND) # glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) # setup the VBOs self.col_vbo.bind() OpenGL.GL.glColorPointer(4, OpenGL.GL.GL_FLOAT, 0, self.col_vbo) self.pos_vbo.bind() OpenGL.GL.glVertexPointer(4, OpenGL.GL.GL_FLOAT, 0, self.pos_vbo) OpenGL.GL.glEnableClientState(OpenGL.GL.GL_VERTEX_ARRAY) OpenGL.GL.glEnableClientState(OpenGL.GL.GL_COLOR_ARRAY) # draw the VBOs OpenGL.GL.glDrawArrays( OpenGL.GL.GL_POINTS, 0, int(self.simulation.sampler.n_new_frags) ) OpenGL.GL.glDisableClientState(OpenGL.GL.GL_COLOR_ARRAY) OpenGL.GL.glDisableClientState(OpenGL.GL.GL_VERTEX_ARRAY) # glEnd(GL_POINTS) OpenGL.GL.glDisable(OpenGL.GL.GL_BLEND)
[docs] def render_image(self): blending = False if OpenGL.GL.glIsEnabled(OpenGL.GL.GL_BLEND): blending = True else: OpenGL.GL.glEnable(OpenGL.GL.GL_BLEND) OpenGL.GL.glColor4f(1, 1, 1, 1) OpenGL.GL.glEnable(OpenGL.GL.GL_TEXTURE_2D) OpenGL.GL.glBindBuffer( OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, self.pbo_im_buffer ) OpenGL.GL.glBindTexture(OpenGL.GL.GL_TEXTURE_2D, self.texid) OpenGL.GL.glGetBufferParameteriv( OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, OpenGL.GL.GL_BUFFER_SIZE ) # Copyng from buffer to texture OpenGL.GL.glTexSubImage2D( OpenGL.GL.GL_TEXTURE_2D, 0, 0, 0, self.gl_size_im, self.gl_size_im, OpenGL.GL.GL_LUMINANCE, OpenGL.GL.GL_UNSIGNED_BYTE, None, ) OpenGL.GL.glBindBuffer(OpenGL.GL.GL_PIXEL_UNPACK_BUFFER, 0) # Unbind # glDisable(GL_DEPTH_TEST) OpenGL.GL.glEnable(OpenGL.GL.GL_TEXTURE_2D) OpenGL.GL.glTexParameterf( OpenGL.GL.GL_TEXTURE_2D, OpenGL.GL.GL_TEXTURE_MIN_FILTER, OpenGL.GL.GL_LINEAR, ) OpenGL.GL.glTexParameterf( OpenGL.GL.GL_TEXTURE_2D, OpenGL.GL.GL_TEXTURE_MAG_FILTER, OpenGL.GL.GL_LINEAR, ) OpenGL.GL.glTexParameterf( OpenGL.GL.GL_TEXTURE_2D, OpenGL.GL.GL_TEXTURE_WRAP_S, OpenGL.GL.GL_REPEAT, ) OpenGL.GL.glTexParameterf( OpenGL.GL.GL_TEXTURE_2D, OpenGL.GL.GL_TEXTURE_WRAP_T, OpenGL.GL.GL_REPEAT, ) # OpenGL.GL.glBegin(OpenGL.GL.GL_QUADS) # glVertex2f(0-1, 0-1) # glTexCoord2f(0-1, 0-1) # glVertex2f(0-1, 1-1) # glTexCoord2f(1-1, 0-1) # glVertex2f(1-1, 1-1) # glTexCoord2f(1-1, 1-1) # glVertex2f(1-1, 0-1) # glTexCoord2f(0-1, 1-1) OpenGL.GL.glVertex2f(-1, 0) OpenGL.GL.glTexCoord2f(-1, 0) OpenGL.GL.glVertex2f(0, 0) OpenGL.GL.glTexCoord2f(0, 0) OpenGL.GL.glVertex2f(0, 1) OpenGL.GL.glTexCoord2f(0, 1) OpenGL.GL.glVertex2f(-1, 1) OpenGL.GL.glTexCoord2f(-1, 1) # rotation 45 deg # glVertex2f(-1, 0.) # glTexCoord2f(0, 0) # # glVertex2f(0, 1) # glTexCoord2f(0, 1) # # glVertex2f(1, 0.) # glTexCoord2f(1, 1) # # glVertex2f(0, -1) # glTexCoord2f(1, 0) OpenGL.GL.glEnd() # glBindTexture(GL_TEXTURE_2D, 0) # glutSwapBuffers() # glutPostRedisplay() OpenGL.GL.glDisable(OpenGL.GL.GL_TEXTURE_2D) # glEnable(GL_DEPTH_TEST) if not blending: OpenGL.GL.glDisable(OpenGL.GL.GL_BLEND)
[docs] def simple_start(self, n_cycles, n_neighbours, bomb): sampler = self.simulation.sampler if bomb: sampler.bomb_the_genome() sampler.gpu_vect_frags.copy_from_gpu() list_frags_extremities = list( np.nonzero(sampler.gpu_vect_frags.prev == -1)[0] ) # n_neighbours = 5 for j in range(0, n_cycles): sampler.gpu_vect_frags.copy_from_gpu() if j > 0: list_frags_extremities = list( np.nonzero(sampler.gpu_vect_frags.prev == -1)[0] ) id_extrem_right = list( np.nonzero(sampler.gpu_vect_frags.__next__ == -1)[0] ) list_frags_extremities.extend(id_extrem_right) list_frags_extremities = np.array( list_frags_extremities, dtype=np.int32 ) np.random.shuffle(list_frags_extremities) logger.info("cycle = {}".format(j)) # np.random.shuffle(list_frags) for id_frag in list_frags_extremities: if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() ( o, dist, op_sampled, id_f_sampled, mean_len, n_contigs, ) = sampler.step_sampler(id_frag, n_neighbours, self.dt) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(sampler.n_contigs) self.str_curr_id_frag = "current frag = " + str(id_frag) self.str_curr_dist = "current dist = " + str(dist) self.str_curr_cycle = "current cycle = " + str(j)
[docs] def full_em( self, n_cycles, n_neighbours, bomb, id_start_sample_param, save_matrix=False, ): sampler = self.simulation.sampler if bomb: sampler.bomb_the_genome() list_frags = np.arange(0, sampler.n_new_frags) t = 0 n_iter = n_cycles * sampler.n_new_frags for j in range(0, n_cycles): sampler.gpu_vect_frags.copy_from_gpu() np.random.shuffle(list_frags) logger.info("cycle = {}".format(j)) # np.random.shuffle(list_frags) for id_frag in list_frags: if bool(OpenGL.GLUT.glutMainLoopEvent): OpenGL.GLUT.glutMainLoopEvent() else: OpenGL.GLUT.glutCheckLoop() ( o, dist, op_sampled, id_f_sampled, mean_len, n_contigs, ) = sampler.step_sampler(id_frag, n_neighbours, self.dt) self.collect_likelihood.append(o) self.collect_n_contigs.append(n_contigs) self.collect_mean_len.append(mean_len) self.collect_op_sampled.append(op_sampled) self.collect_id_fB_sampled.append(id_f_sampled) self.collect_id_fA_sampled.append(id_frag) self.collect_dist_from_init_genome.append(dist) self.str_likelihood = "likelihood = " + str(o) self.str_n_contigs = "n contigs = " + str(sampler.n_contigs) self.str_curr_id_frag = "current frag = " + str(id_frag) self.str_curr_dist = "current dist = " + str(dist) self.str_curr_cycle = "current cycle = " + str(j) if self.sample_param and j > id_start_sample_param: ( fact, d, d_max, d_nuc, slope, self.likelihood_t_nuis, success, y_rippe, ) = sampler.step_nuisance_parameters(self.dt, t, n_iter) self.collect_fact.append(fact) self.collect_d.append(d) self.collect_d_max.append(d_max) self.collect_d_nuc.append(d_nuc) self.collect_slope.append(slope) self.collect_likelihood_nuisance.append( self.likelihood_t_nuis ) self.collect_success.append(success) self.y_eval = y_rippe t += 1 c = sampler.gpu_vect_frags c.copy_from_gpu() file_out = os.path.join( self.simulation.output_folder, "save_simu_step_" + str(j) + ".txt", ) h = open(file_out, "w") for pos, start_bp, id_c, ori in zip( c.pos, c.start_bp, c.id_c, c.ori ): h.write( str(pos) + "\t" + str(start_bp) + "\t" + str(id_c) + "\t" + str(ori) + "\n" ) h.close() try: self.simulation.export_new_fasta() self.save_behaviour_to_txt() except OSError as e: logger.warning( "Warning, could not write output files at {}: {}".format( j, e ) ) try: if save_matrix: my_file_path = os.path.join( self.simulation.output_folder, "matrix_cycle_" + str(j) + ".png", ) matrix = self.simulation.sampler.gpu_im_gl.get() plt.gca().set_axis_off() plt.subplots_adjust( top=1, bottom=0, right=1, left=0, hspace=0, wspace=0 ) matrix = matrix + matrix.T - np.diag(np.diag(matrix)) plt.margins(0, 0) plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.figure() plt.imshow( matrix, vmax=np.percentile(matrix, 99), cmap="Reds" ) plt.axis("off") plt.savefig( my_file_path, bbox_inches="tight", pad_inches=0.0, dpi=300, ) plt.close() except OSError as e: logger.warning( "Could not write matrix at cycle {} " "due to error: {}".format(j, e) ) self.save_behaviour_to_txt()
[docs]def main(): arguments = docopt.docopt(__doc__, version=VERSION_NUMBER) # print(arguments) project_folder = arguments["<hic_folder>"] output_folder = arguments["<output_folder>"] reference_fasta = arguments["<reference.fa>"] number_cycles = int(arguments["--cycles"]) level = int(arguments["--level"]) thresh_factor = float(arguments["--coverage-std"]) neighborhood = int(arguments["--neighborhood"]) device = int(arguments["--device"]) circ = arguments["--circular"] bomb = arguments["--bomb"] save_matrix = arguments["--save-matrix"] simple = arguments["--simple"] quiet = arguments["--quiet"] debug = arguments["--debug"] pyramid_only = arguments["--pyramid-only"] pickle_name = arguments["--save-pickle"] log_level = logging.INFO if quiet: log_level = logging.WARNING if debug: log_level = logging.DEBUG logger.setLevel(log_level) log.CURRENT_LOG_LEVEL = log_level name = os.path.basename(os.path.normpath(project_folder)) is_simu = False scrambled = False n_iterations_em = 100 n_iterations_mcmc = 30 perform_em = False use_rippe = True gl_size_im = 1000 sample_param = True if not output_folder: output_folder = None p2 = window( name=name, folder_path=project_folder, fasta=reference_fasta, device=device, level=level, n_iterations_em=DEFAULT_ITERATIONS_EM, n_iterations_mcmc=DEFAULT_ITERATIONS_MCMC, is_simu=False, scrambled=False, perform_em=False, use_rippe=True, gl_size_im=DEFAULT_GL_SIZE_IM, sample_param=True, thresh_factor=thresh_factor, output_folder=output_folder, ) if circ: p2.simulation.level.S_o_A_frags["circ"] += 1 if not pyramid_only: if not simple: p2.full_em( n_cycles=number_cycles, n_neighbours=neighborhood, bomb=bomb, id_start_sample_param=4, save_matrix=save_matrix, ) else: p2.simple_start( n_cycles=number_cycles, n_neighbours=neighborhood, bomb=bomb ) if pickle_name: with open("graal.pkl", "wb") as pickle_handle: pickle.dump(p2, pickle_handle) p2.ctx_gl.pop()
# sampler.step_sampler(50) # sampler.gpu_vect_frags.copy_from_gpu() # max_id = sampler.gpu_vect_frags.id_c.max() # frag_a = 0 # frag_b = 1 # id_c_a = sampler.gpu_vect_frags.id_c[frag_a] # id_c_b = sampler.gpu_vect_frags.id_c[frag_b] # print "id_frag a =", frag_a, "id contig = ", id_c_a # print "id_frag b =", frag_b, "id contig = ", id_c_b # ############################################################# # sampler.perform_mutations(frag_a, frag_b, max_id, 1 == 0) # ############################################################# # flip_eject = 1 # sampler.extract_uniq_mutations(frag_a, frag_b, flip_eject) # t0 = time.time() # l = sampler.eval_likelihood() # t1 = time.time() # sampler.slice_sparse_mat(id_c_a, id_c_b) # sampler.extract_current_sub_likelihood() # v_l = sampler.eval_all_sub_likelihood() # t2 = time.time() # print "single likelihood = ", l # print "vect likelihood = ", v_l # print "Time single = ", t1 - t0 # print "Time all_mut = ", t2 - t1 # print "###################################################" # # t0 = time.time() # # l = sampler.eval_likelihood() # t1 = time.time() # # sampler.slice_sparse_mat(id_c_a, id_c_b) # v_l = sampler.eval_all_sub_likelihood() # t2 = time.time() # # print "single likelihood = ", l # print "vect likelihood = ", v_l # # print "Time single = ", t1 - t0 # print "Time all_mut = ", t2 - t1 # n_neighbours = 5 # # sampler.explode_genome(p2.dt) # # sampler.bomb_the_genome() if __name__ == "__main__": main()