instagraal package

Submodules

instagraal.cuda_lib_gl_single module

class instagraal.cuda_lib_gl_single.sampler(use_rippe, S_o_A_frags, collector_id_repeats, frag_dispatcher, id_frag_duplicated, id_frags_blacklisted, n_frags, n_new_frags, init_n_sub_frags, n_new_sub_frags, np_rep_sub_frags_id, sub_sampled_sparse_matrix, np_sub_frags_len_bp, np_sub_frags_id, np_sub_frags_accu, np_sub_frags_2_frags, mean_squared_frags_per_bin, norm_vect_accu, sub_candidates_dup, sub_candidates_output_data, S_o_A_sub_frags, sub_collector_id_repeats, sub_frag_dispatcher, sparse_matrix, mean_value_trans, n_iterations, is_simu, gl_window, pos_vbo, col_vbo, vel, pos, raw_im_init, pbo_im_buffer, gl_size_im)[source]

Bases: object

apply_replay_simu(id_fA, id_fB, op_sampled, dt)[source]
approx_all_likelihood_on_zeros()[source]
approx_single_likelihood_on_zeros()[source]
approx_single_likelihood_on_zeros_mut()[source]
approx_single_likelihood_on_zeros_nuisance()[source]
bomb_the_genome()[source]
create_gpu_struct(data)[source]
display_current_matrix(filename)[source]
dist_inter_genome(tmp_gpu_vect_frags)[source]
estimate_parameters(max_dist_kb, size_bin_kb, display_graph)[source]

estimation by least square optimization of Rippe parameters on the experimental data :param max_dist_kb: :param size_bin_kb:

estimate_parameters_rippe(max_dist_kb, size_bin_kb, display_graph)[source]

estimation by least square optimization of Rippe parameters on the experimental data :param max_dist_kb: :param size_bin_kb:

eval_all_sub_likelihood()[source]
eval_likelihood()[source]
eval_likelihood_4_nuisance()[source]
eval_likelihood_init()[source]
eval_likelihood_on_mut(id_mut)[source]
explode_genome(dt)[source]
extract_current_sub_likelihood()[source]
extract_uniq_mutations(id_fi, id_fj, flip_eject)[source]
f_hic(x, param)[source]
f_rippe(x, param)[source]
fill_dist_all_mut()[source]
fill_dist_single()[source]
fill_dist_single_mut(id_mut)[source]
free_gpu()[source]
genome_content()[source]
insert_blocks(id_fA, id_fB, max_id)[source]
insert_repeats(id_f_ins)[source]
loadProgram(filename)[source]
load_gl_cuda_tex_buffer(im_init)[source]
load_gl_cuda_vbo()[source]
local_flip(id_fA, mode, max_id)[source]
modify_genome(n)[source]
modify_gl_cuda_buffer(id_fi, dt)[source]
modify_image_thresh(val)[source]
modify_param_simu(param_simu, id_val, val)[source]
perform_mutations(id_fA, id_fB, max_id, is_first)[source]
pop_out_pop_in(id_f_pop, id_f_ins, mode, max_id)[source]
prepare_sparse_call()[source]
return_neighbours(id_fA, delta0)[source]
return_rippe_vals(p0)[source]
set_jumping_distributions_parameters(delta)[source]
setup_all_gpu_struct()[source]
setup_distri_frags()[source]
setup_model_parameters(param, d_max)[source]
setup_rippe_parameters(param, d_max)[source]
setup_rippe_parameters_4_simu(kuhn, lm, slope, d, val_inter, d_max)[source]
setup_thrust_modules()[source]
show_sub_slice(id_ctg1, id_ctg2, id_frag_a, id_frag_b)[source]
slice_sparse_mat(id_ctg1, id_ctg2, id_fragA, id_fragB)[source]
sparse_data_2_gpu()[source]
sparse_data_4_gl(precision)[source]
step_nuisance_parameters(dt, t, n_step)[source]
step_sampler(id_frag, n_neighbours, dt)[source]
step_sampler_debug(id_frag, n_neighbours)[source]
temperature(t, n_step)[source]
test_copy_struct(id_fA, id_f_sampled, mode, max_id)[source]
test_thrust()[source]
transloc(id_fA, id_fB, max_id)[source]
update_neighbourhood()[source]

instagraal.fragment module

class instagraal.fragment.basic_fragment[source]

Bases: object

classmethod initiate(np_id_abs, id_init, init_contig, curr_id, start_pos, end_pos, length_kb, gc_content, init_frag_start, init_frag_end, sub_frag_start, sub_frag_end, super_index, id_contig, n_accu_frags)[source]
class instagraal.fragment.fragment[source]

Bases: object

classmethod copy(frag)[source]
classmethod initiate(np_id_abs, id_init, init_contig, curr_id, start_pos, end_pos, length_kb, gc_content)[source]
update_name(contig_id)[source]

instagraal.glutil module

instagraal.glutil.draw_axes()[source]
instagraal.glutil.draw_line(v1, v2)[source]
instagraal.glutil.init(width, height)[source]
instagraal.glutil.lights()[source]

instagraal.gpustruct module

class instagraal.gpustruct.GPUStruct(objs)[source]

Bases: object

copy_from_gpu(skip=None)[source]
copy_to_gpu(skip=None)[source]
get_packed()[source]
get_ptr()[source]

instagraal.init_nuisance module

instagraal.init_nuisance.estimate_max_dist_intra(p, val_inter)[source]
instagraal.init_nuisance.estimate_param_hic(y_meas, x_bins)[source]
instagraal.init_nuisance.log_residuals(param, y, x)[source]
instagraal.init_nuisance.log_residuals_4_min(param, y, x)[source]
instagraal.init_nuisance.peval(x, param)[source]
instagraal.init_nuisance.residual_4_max_dist(x, p)[source]
instagraal.init_nuisance.residuals(param, y, x)[source]

instagraal.instagraal module

Large genome reassembly based on Hi-C data.

Usage:
instagraal <hic_folder> <reference.fa> [<output_folder>]
[–level=4] [–cycles=100] [–coverage-std=1] [–neighborhood=5] [–device=0] [–circular] [–bomb] [–save-matrix] [–pyramid-only] [–save-pickle] [–simple] [–quiet] [–debug]
Options:
-h, --help Display this help message.
--version Display the program’s current version.
-l 4, –level 4 Level (resolution) of the contact map.
Increasing level by one means a threefold smaller resolution but also a threefold faster computation time. [default: 4]
-n 100, –cycles 100 Number of iterations to perform for each bin.
(row/column of the contact map). A high number of cycles has diminishing returns but there is a necessary minimum for assembly convergence. [default: 100]
-c 1, –coverage-std 1 Number of standard deviations below the mean.
coverage, below which fragments should be filtered out prior to binning. [default: 1]
-N 5, –neighborhood 5 Number of neighbors to sample for potential
mutations for each bin. [default: 5]
–device 0 If multiple graphic cards are available, select
a specific device (numbered from 0). [default: 0]
-C, --circular Indicates genome is circular. [default: False]
-b, --bomb Explode the genome prior to scaffolding. [default: False]
--pyramid-only Only build multi-resolution contact maps (pyramids) and don’t do any scaffolding. [default: False]
--save-pickle Dump all info from the instaGRAAL run into a pickle. Primarily for development purposes, but also for advanced post hoc introspection. [default: False]
--save-matrix Saves a preview of the contact map after each cycle. [default: False]
--simple Only perform operations at the edge of the contigs. [default: False]
--quiet Only display warnings and errors as outputs. [default: False]
--debug Display debug information. For development purposes only. Mutually exclusive with –quiet, and will override it. [default: False]
instagraal.instagraal.main()[source]
class instagraal.instagraal.window(name, folder_path, fasta, device, level, n_iterations_em, n_iterations_mcmc, is_simu, scrambled, perform_em, use_rippe, gl_size_im, sample_param, thresh_factor, output_folder)[source]

Bases: object

A window displaying the live movie of the calculations performed by the scaffolder.

[description]

Parameters:
  • name (str) – The name of the project. Will determine the window title.
  • folder_path (str or pathlib.Path) – The directory containing the Hi-C conact map.
  • fasta (str or pathlib.Path) – The path to the reference genome in FASTA format.
  • device (int) – The identifier of the graphic card to be used, numbered from 0. If only one is available, it should be 0.
  • level (int) – The level (resolution) at which to perform scaffolding.
  • n_iterations_em (int) – The number of EM (expectation maximization) iterations.
  • n_iterations_mcmc (int) – The number of MCMC (Markov chain Monte-Carlo) iterations.
  • is_simu (bool) – Whether the parameters should be simulated. Mutually exclusive with use_rippe and will override it.
  • scrambled (bool) – Whether to scramble the genome.
  • perform_em (bool) – Whether to perform EM (expectation maximization).
  • use_rippe (bool) – Whether to explicitly use the model from Rippe et al., 2001.
  • gl_size_im (int) – The size of the window to be displayed.
  • sample_param (bool) – Whether to sample the parameters.
  • thresh_factor (float) – The sparsity (coverage) threshold below which fragments are discarded, as a number of standard deviations below the mean.
  • output_folder (str or pathlib.Path) – The path to the output folder where the scaffolded genome and other relevant information will be saved.
cuda_gl_init()[source]
debug_test_EM(delta)[source]
debug_test_model(id_fi, delta)[source]
draw()[source]

Render the particles

full_em(n_cycles, n_neighbours, bomb, id_start_sample_param, save_matrix=False)[source]
glinit()[source]
glut_print(x, y, font, text, r, g, b, a)[source]
modify_image_thresh(val)[source]

modify threshold of the matrix

on_click(button, state, x, y)[source]
on_key(*args)[source]
on_mouse_motion(x, y)[source]
remote_update()[source]
render()[source]
render_image()[source]
replay_simu(file_simu, file_likelihood, file_n_contigs, file_distances)[source]
save_behaviour_to_txt()[source]
setup_simu(id_f_ins)[source]
simple_start(n_cycles, n_neighbours, bomb)[source]
start_EM()[source]
start_EM_all()[source]
start_EM_no_scrambled()[source]
start_EM_nuisance()[source]
start_MCMC()[source]
start_MTM()[source]
test_model(id_fi, delta)[source]
timer(t)[source]

instagraal.leastsqbound module

Constrained multivariate Levenberg-Marquardt optimization

An updated version of this file can be found at https://github.com/jjhelmus/leastsqbound-scipy

The version here has known bugs which have been fixed above, proceed at your own risk.

instagraal.leastsqbound.calc_cov_x(infodic, p)[source]

Calculate cov_x from fjac, ipvt and p as is done in leastsq

instagraal.leastsqbound.err(p, bounds, efunc, args)[source]
instagraal.leastsqbound.external2internal(xe, bounds)[source]

Convert a series of external variables to internal variables

instagraal.leastsqbound.i2e_cov_x(xi, bounds, cov_x)[source]
instagraal.leastsqbound.internal2external(xi, bounds)[source]

Convert a series of internal variables to external variables

instagraal.leastsqbound.internal2external_grad(xi, bounds)[source]

Calculate the internal to external gradiant

Calculates the partial of external over internal

instagraal.leastsqbound.leastsqbound(func, x0, bounds, args=(), **kw)[source]

Constrained multivariant Levenberg-Marquard optimization

Minimize the sum of squares of a given function using the Levenberg-Marquard algorithm. Contraints on parameters are inforced using variable transformations as described in the MINUIT User’s Guide by Fred James and Matthias Winkler.

Parameters:

  • func functions to call for optimization.
  • x0 Starting estimate for the minimization.
  • bounds (min,max) pair for each element of x, defining the bounds on
    that parameter. Use None for one of min or max when there is no bound in that direction.
  • args Any extra arguments to func are places in this tuple.

Returns: (x,{cov_x,infodict,mesg},ier)

Return is described in the scipy.optimize.leastsq function. x and con_v are corrected to take into account the parameter transformation, infodic is not corrected.

Additional keyword arguments are passed directly to the scipy.optimize.leastsq algorithm.

instagraal.linkage module

instagraal.log module

Basic logging setup for instaGRAAL.

Logging level can be set by the user and determines the verbosity of the whole program.

instagraal.optim_rippe_curve_update module

instagraal.optim_rippe_curve_update.estimate_max_dist_intra(p, val_inter)[source]
instagraal.optim_rippe_curve_update.estimate_max_dist_intra_nuis(p, val_inter, old_s)[source]
instagraal.optim_rippe_curve_update.estimate_param_rippe(y_meas, x_bins)[source]
instagraal.optim_rippe_curve_update.log_peval(x, param)[source]
instagraal.optim_rippe_curve_update.log_residuals(p, y, x)[source]
instagraal.optim_rippe_curve_update.peval(x, param)[source]
instagraal.optim_rippe_curve_update.residual_4_max_dist(x, p)[source]
instagraal.optim_rippe_curve_update.residuals(p, y, x)[source]

instagraal.parse_info_frags module

instagraal.pyramid_sparse module

Pyramid library

Create and handle so-called ‘pyramid’ objects, i.e. a series of decreasing-resolution contact maps in hdf5 format.

instagraal.pyramid_sparse.abs_contact_2_coo_file(abs_contact_file, coo_file)[source]

Convert contact maps between old-style and new-style formats.

A legacy function that converts contact maps from the older GRAAL format to the simpler instaGRAAL format. This is useful with datasets generated by Hi-C box.

Parameters:
  • abs_contact_file (str, file or pathlib.Path) – The input old-style contact map.
  • coo_file (str, file, or pathlib.Path) – The output path to the generated contact map; must be writable.
instagraal.pyramid_sparse.build(base_folder, size_pyramid, factor, min_bin_per_contig)[source]

Build a pyramid of contact maps

Build a fragment pyramid for multi-scale analysis

Parameters:
  • base_folder (str or pathlib.Path) – Where to create the hdf5 files containing the matrices.
  • size_pyramid (int) – How many levels (contact maps of decreasing resolution) to generate.
  • factor (int) – Subsampling factor (binning) from one level to the next.
  • min_bin_per_contig (int) – The minimum number of bins per contig below which binning shall not be performed.
instagraal.pyramid_sparse.build_and_filter(base_folder, size_pyramid, factor, thresh_factor=1)[source]

Build a filtered pyramid of contact maps

Build a fragment pyramid for multi-scale analysis and remove high sparsity (i.e. low-coverage) and short fragments.

Parameters:
  • base_folder (str or pathlib.Path) – Where to create the hdf5 files containing the matrices.
  • size_pyramid (int) – How many levels (contact maps of decreasing resolution) to generate.
  • factor (int) – Subsampling factor (binning) from one level to the next.
  • thresh_factor (float, optional) – Number of standard deviations below the mean coverage beyond which lesser covered fragments will be discarded. Default is 1.
Returns:

obj_pyramid – The pyramid object containing all the levels.

Return type:

Pyramid

instagraal.pyramid_sparse.file_len(fname)[source]
instagraal.pyramid_sparse.fill_sparse_pyramid_level(pyramid_handle, level, contact_file, nfrags)[source]

Fill a level with sparse contact map data

Fill values from the simple text matrix file to the hdf5-based pyramid level with contact data.

Parameters:
  • pyramid_handle (h5py.File) – The hdf5 file handle containing the whole dataset.
  • level (int) – The level (resolution) to be filled with contact data.
  • contact_file (str, file or pathlib.Path) – The binned contact map file to be converted to hdf5 data.
  • nfrags (int) – The number of fragments/bins in that specific level.
instagraal.pyramid_sparse.get_contig_info_from_file(contig_info)[source]
instagraal.pyramid_sparse.get_frag_info_from_fil(fragments_list)[source]
instagraal.pyramid_sparse.init_frag_list(fragment_list, new_frag_list)[source]

Adapt the original fragment list to fit the build function requirements

Parameters:
  • fragment_list (str, file or pathlib.Path) – The input fragment list.
  • new_frag_list (str, file or pathlib.Path) – The output fragment list to be written.
Returns:

i – The number of records processed this way.

Return type:

int

class instagraal.pyramid_sparse.level(pyramid, level)[source]

Bases: object

build_seq_per_bin(genome_fasta)[source]
define_inter_chrom_coord()[source]
generate_new_fasta(vect_frags, new_fasta, info_frags)[source]
init_data()[source]
load_data(pyramid)[source]
Parameters:pyramid – hic pyramid
instagraal.pyramid_sparse.main()[source]
instagraal.pyramid_sparse.new_remove_problematic_fragments(contig_info, fragments_list, abs_fragments_contacts, new_contig_list_file, new_fragments_list_file, new_abs_fragments_contacts_file, pyramid)[source]
class instagraal.pyramid_sparse.pyramid(pyramid_folder, n_levels)[source]

Bases: object

build_frag_dictionnary(fragments_list, level)[source]
close()[source]
full_zoom_in_frag(curr_frag)[source]
Parameters:curr_frag
get_level(level_id)[source]
load_reference_sequence(genome_fasta)[source]
update_super_index(dict_frag, super_index_file)[source]
update_super_index_in_dict_contig(dict_frag, dict_contig)[source]
zoom_in_area(area)[source]

zoom in area

zoom_in_frag(curr_frag)[source]
Parameters:curr_frag
zoom_in_pixel(curr_pixel)[source]

return the curr_frag at a higher resolution

zoom_out_frag(curr_frag)[source]
Parameters:curr_frag
zoom_out_pixel(curr_pixel)[source]

return the curr_frag at a lower resolution

instagraal.pyramid_sparse.remove_problematic_fragments(contig_info, fragments_list, abs_fragments_contacts, new_contig_list_file, new_fragments_list_file, new_abs_fragments_contacts_file, pyramid, thresh_factor=1)[source]
instagraal.pyramid_sparse.subsample_data_set(contig_info, fragments_list, fact_sub_sample, abs_fragments_contacts, new_abs_fragments_contacts_file, min_bin_per_contig, new_contig_list_file, new_fragments_list_file, old_2_new_file)[source]

instagraal.simu_single module

instagraal.simu_single.kth_diag_indices(a, k)[source]
class instagraal.simu_single.simulation(name, folder_path, fasta, level, n_iterations, is_simu, gl_window, use_rippe, gl_size_im, thresh_factor=1, output_folder=None)[source]

Bases: object

blacklist_contig()[source]
create_new_sub_frags()[source]
create_sub_frags()[source]
discard_low_coverage_frags()[source]
export_new_fasta()[source]
init_gl_image()[source]
load_gl_buffers()[source]
modify_sub_vect_frags()[source]

include repeated frags

modify_vect_frags()[source]

include repeated frags

plot_info_simu(collect_likelihood_input, collect_n_contigs_input, file_plot, title_ax)[source]
release()[source]
select_data_set(name)[source]
select_repeated_frags()[source]

instagraal.vector module

class instagraal.vector.Vec[source]

Bases: numpy.ndarray

props = ['x', 'y', 'z', 'w']
instagraal.vector.normalize(u)[source]

instagraal.version module

Module contents