Source code for instagraal.parse_info_frags

#!/usr/bin/env python
# coding: utf-8

"""
A couple of functions to manipulate info_frags.txt files and BED files.

This module started as a couple scripts to flip back artefact inversions
introduced by the GRAAL assembler and its derivatives (see:
https://github.com/koszullab/GRAAL, https://github.com/koszullab/instaGRAAL).
It has greatly evolved since, and provides a range of functions to polish
assemblies and correct potential missassemblies.


"""

import argparse
import copy
import itertools
import operator
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq, IUPAC
from matplotlib import pyplot as plt

# Defaults:
# -Scaffolds below DEFAULT_MIN_SCAFFOLD_SIZE are not considered for polishing
# -Defaut names for output files
# -Junction sequence between two bins that have been stitched together
# in order to reflect that some base pairs may be missing
# -Two default schemes ('criteria') for each inversion corrector
DEFAULT_MIN_SCAFFOLD_SIZE = 0
DEFAULT_NEW_INFO_FRAGS_NAME = "new_info_frags.txt"
DEFAULT_NEW_GENOME_NAME = "new_genome.fa"
DEFAULT_JUNCTION_SEQUENCE = Seq("NNNNNN", IUPAC.ambiguous_dna)
DEFAULT_CRITERION = "colinear"
DEFAULT_CRITERION_2 = "blocks"


[docs]def parse_info_frags(info_frags): """Import an info_frags.txt file and return a dictionary where each key is a newly formed scaffold and each value is the list of bins and their origin on the initial scaffolding. """ new_scaffolds = {} with open(info_frags, "r") as info_frags_handle: current_new_contig = None for line in info_frags_handle: if line.startswith(">"): current_new_contig = str(line[1:-1]) new_scaffolds[current_new_contig] = [] elif line.startswith("init_contig"): pass else: (init_contig, id_frag, orientation, pos_start, pos_end) = str( line[:-1] ).split("\t") start = int(pos_start) end = int(pos_end) ori = int(orientation) fragid = int(id_frag) assert start < end assert ori in {-1, 1} new_scaffolds[current_new_contig].append( [init_contig, fragid, start, end, ori] ) return new_scaffolds
# The format of info_frags.txt files looks a lot like the BED format, so this # function may come in handy
[docs]def parse_bed(bed_file): """Import a BED file (where the data entries are analogous to what may be expected in an info_frags.txt file) and return a scaffold dictionary, similarly to parse_info_frags. """ new_scaffolds = {} with open(bed_file) as bed_handle: for line in bed_handle: chrom, start, end, query, qual, strand = line.split()[:7] if strand == "+": ori = 1 elif strand == "-": ori = -1 else: raise ValueError( "Error when parsing strand " "orientation: {}".format(strand) ) if int(qual) > 0: bed_bin = [query, -2, int(start), int(end), ori] try: new_scaffolds[chrom].append(bed_bin) except KeyError: new_scaffolds[chrom] = [bed_bin] return new_scaffolds
[docs]def correct_scaffolds(scaffolds, corrector): """Unfinished """ new_scaffolds = {} def are_overlapping(bin1, bin2): """Check for overlapping regions between two regions - necessary requirement before potentially merging """ if bin2 is None: return False init1, _, start1, end1, _ = bin1 init2, _, start2, end2, _ = bin2 if init1 != init2: return False else: return (start2 <= start1 <= end2) or (start1 <= start2 <= end1) def merge_bins(bin1, bin2, ignore_ori=True): """Painstakingly check for every edge case in order to properly merge two overlapping bins. """ init1, _, start1, end1, ori1 = bin1 init2, _, start2, end2, ori2 = bin2 assert init1 == init2 start = min(start1, start2) end = max(end1, end2) if ignore_ori: ori = ori1 else: len1 = end1 - start1 len2 = end2 - start2 if len2 > len1: ori = ori2 else: ori = ori1 new_bin = [init1, -3, start, end, ori] return new_bin corrector_bins = copy.deepcopy(corrector) for name, scaffold in scaffolds.items(): new_scaffold = [] for _, blocks in itertools.groupby(scaffold, operator.itemgetter(0)): merged_bin = None while "Reading blocks": try: my_bin = next(blocks) if are_overlapping(my_bin, merged_bin): merged_bin = merge_bins(my_bin, merged_bin) continue else: if merged_bin is not None: new_scaffold.append(merged_bin) merged_bin = my_bin i = 0 for correct_bin in corrector_bins: if are_overlapping(my_bin, correct_bin): merged_bin = merge_bins(my_bin, correct_bin) corrector_bins.pop(i) i -= 1 i += 1 except StopIteration: if merged_bin is not None: new_scaffold.append(merged_bin) break new_scaffolds[name] = new_scaffold return new_scaffolds
[docs]def format_info_frags(info_frags): """A function to seamlessly run on either scaffold dictionaries or info_frags.txt files without having to check the input first. """ if isinstance(info_frags, dict): return info_frags else: try: scaffolds = parse_info_frags(info_frags) return scaffolds except OSError: print("Error when opening info_frags.txt") raise
[docs]def plot_info_frags(scaffolds): """A crude way to visualize new scaffolds according to their origin on the initial scaffolding. Each scaffold spawns a new plot. Orientations are represented by different colors. """ scaffolds = format_info_frags(scaffolds) for name, scaffold in scaffolds.items(): plt.figure() xs = range(len(scaffold)) color = [] names = {} ys = [] for my_bin in scaffold: current_color = "r" if my_bin[4] > 0 else "g" color += [current_color] name = my_bin[0] if name in names: ys.append(names[name]) else: names[name] = len(names) ys.append(names[name]) plt.scatter(xs, ys, c=color) plt.show()
[docs]def remove_spurious_insertions(scaffolds): """Remove all bins whose left and right neighbors belong to the same, different scaffold. Example with three such insertions in two different scaffolds: >>> scaffolds = { ... "scaffold1": [ ... ["contig1", 0, 0, 100, 1], ... ["contig1", 1, 100, 200, 1], ... ["contig23", 53, 1845, 2058, -1], # <-- insertion ... ["contig1", 4, 254, 408, 1], ... ["contig1", 7, 805, 1253, 1], ... ["contig5", 23, 1500, 1605, -1], ... ["contig65", 405, 32145, 45548, -1], # <-- insertion ... ["contig5", 22, 1385, 1499, -1], ... ], ... "scaffold2": [ ... ["contig8", 0, 0, 250, 1], ... ["contig17", 2454, 8754, -1], # <-- insertion ... ["contig8", 2, 320, 480, 1], ... ], ... } >>> new_scaffolds = remove_spurious_insertions(scaffolds) >>> for my_bin in new_scaffolds['scaffold1']: ... print(my_bin) ... ['contig1', 0, 0, 100, 1] ['contig1', 1, 100, 200, 1] ['contig1', 4, 254, 408, 1] ['contig1', 7, 805, 1253, 1] ['contig5', 23, 1500, 1605, -1] ['contig5', 22, 1385, 1499, -1] >>> for my_bin in new_scaffolds['scaffold2']: ... print(my_bin) ... ['contig8', 0, 0, 250, 1] ['contig8', 2, 320, 480, 1] """ scaffolds = format_info_frags(scaffolds) new_scaffolds = {} for name, scaffold in scaffolds.items(): new_scaffold = [] if len(scaffold) > 2: for i in range(len(scaffold)): # First take care of edge cases: *-- or --* if i == 0: if not ( scaffold[i][0] != scaffold[i + 1][0] and scaffold[i + 1][0] == scaffold[i + 2][0] ): new_scaffold.append(scaffold[i]) elif i == len(scaffold) - 1: if not ( scaffold[i][0] != scaffold[i - 1][0] and scaffold[i - 1][0] == scaffold[i - 2][0] ): new_scaffold.append(scaffold[i]) # Otherwise, looking for -*- else: if not ( scaffold[i - 1][0] == scaffold[i + 1][0] and scaffold[i - 1][0] != scaffold[i][0] ): new_scaffold.append(scaffold[i]) else: # Can't remove insertions if 2 bins or less new_scaffold = copy.deepcopy(scaffold) new_scaffolds[name] = new_scaffold return new_scaffolds
[docs]def correct_spurious_inversions(scaffolds, criterion="colinear"): """Invert bins based on orientation neighborhoods. Neighborhoods can be defined by three criteria: -a 'cis' neighborhood is a group of bins belonging to the same initial contig -a 'colinear' neighborhood is a 'cis' neighborhood where bins are ordered the same way they were on the initial contig -a 'contiguous' neighborhood is a 'colinear' neighborhood where all bins are exactly consecutive, i.e. the end position of each bin matches the starting position of the next bin This function looks for such neighborhoods and orients all bins in it according to the majority orientation. An example with three inversions, one for each criterion: >>> scaffolds = { ... "scaffold1": [ ... ["contig1", 1, 100, 200, 1], ... ["contig1", 2, 200, 300, 1], ... ["contig1", 3, 300, 400, -1], # <-- inversion (contiguous) ... ["contig1", 4, 400, 500, 1], ... ["contig1", 10, 1500, 1605, 1], ... ["contig1", 12, 1750, 1850, -1], # <-- inversion (colinear) ... ["contig1", 23, 2100, 2499, 1], ... ["contig1", 28, 2850, 3000, 1], ... ["contig1", 0, 0, 100, -1], # <-- inversion (cis) ... ["contig2", 554, 1850, 1900, -1], ... ], ... } With the 'cis' criterion, pretty much all bins from "contig1" get inverted to the majority orientation (+): >>> sc_cis = correct_spurious_inversions(scaffolds, "cis") >>> for my_bin in sc_cis['scaffold1']: ... print(my_bin) ... ['contig1', 1, 100, 200, 1] ['contig1', 2, 200, 300, 1] ['contig1', 3, 300, 400, 1] ['contig1', 4, 400, 500, 1] ['contig1', 10, 1500, 1605, 1] ['contig1', 12, 1750, 1850, 1] ['contig1', 23, 2100, 2499, 1] ['contig1', 28, 2850, 3000, 1] ['contig1', 0, 0, 100, 1] ['contig2', 554, 1850, 1900, -1] With the 'colinear' criterion, the bin ['contig1', 0, 0, 100, -1] is treated as a different neighborhood from the rest (as it is not colinear with the other bins from 'contig1') and remains untouched: >>> sc_colinear = correct_spurious_inversions(scaffolds, "colinear") >>> for my_bin in sc_colinear['scaffold1']: ... print(my_bin) ... ['contig1', 1, 100, 200, 1] ['contig1', 2, 200, 300, 1] ['contig1', 3, 300, 400, 1] ['contig1', 4, 400, 500, 1] ['contig1', 10, 1500, 1605, 1] ['contig1', 12, 1750, 1850, 1] ['contig1', 23, 2100, 2499, 1] ['contig1', 28, 2850, 3000, 1] ['contig1', 0, 0, 100, -1] ['contig2', 554, 1850, 1900, -1] With the 'contiguous' criterion, the ['contig1', 12, 1750, 1850, -1] breaks with the contiguous region spanning from 100 to 400 bp on 'contig1' and so is treated as a different neighborhood as well: >>> sc_cont = correct_spurious_inversions(scaffolds, "contiguous") >>> for my_bin in sc_cont['scaffold1']: ... print(my_bin) ... ['contig1', 1, 100, 200, 1] ['contig1', 2, 200, 300, 1] ['contig1', 3, 300, 400, 1] ['contig1', 4, 400, 500, 1] ['contig1', 10, 1500, 1605, 1] ['contig1', 12, 1750, 1850, -1] ['contig1', 23, 2100, 2499, 1] ['contig1', 28, 2850, 3000, 1] ['contig1', 0, 0, 100, -1] ['contig2', 554, 1850, 1900, -1] Note that 'contig2' remains untouched at all times since bins in it are never in the same neighborhood as those from 'contig1'. """ scaffolds = format_info_frags(scaffolds) new_scaffolds = {} def is_cis(bin1, bin2): return bin1[0] == bin2[0] def is_contiguous(bin1, bin2): return is_cis(bin1, bin2) and bin1[3] == bin2[2] def is_colinear(bin1, bin2): return is_cis(bin1, bin2) and bin1[3] <= bin2[2] condition_callables = { "cis": is_cis, "colinear": is_colinear, "contiguous": is_contiguous, } block_test = condition_callables.get(criterion, "colinear") for name, scaffold in scaffolds.items(): new_scaffold = [] block_cumulative_ori = 0 if len(scaffold) > 2: current_bin = scaffold[0] block_buffer = [] for my_bin in scaffold: if not block_buffer: new_bin = copy.deepcopy(my_bin) block_buffer.append(new_bin) block_cumulative_ori = my_bin[-1] continue elif not block_test(current_bin, my_bin): for my_buf_bin in block_buffer: new_bin = copy.deepcopy(my_buf_bin) if block_cumulative_ori >= 0: new_bin[-1] = 1 else: new_bin[-1] = -1 new_scaffold.append(new_bin) block_cumulative_ori = my_bin[-1] current_bin = copy.deepcopy(my_bin) block_buffer = copy.deepcopy([my_bin]) else: block_cumulative_ori += my_bin[-1] new_bin = copy.deepcopy(my_bin) block_buffer.append(new_bin) current_bin = my_bin for my_bin in block_buffer: new_bin = copy.deepcopy(my_bin) if block_cumulative_ori >= 0: new_bin[-1] = 1 else: new_bin[-1] = -1 new_scaffold.append(new_bin) new_scaffolds[name] = copy.deepcopy(new_scaffold) else: new_scaffolds[name] = copy.deepcopy(scaffold) return new_scaffolds
[docs]def rearrange_intra_scaffolds(scaffolds): """Rearranges all bins within each scaffold such that all bins belonging to the same initial contig are grouped together in the same order. When two such groups are found, the smaller one is moved to the larger one. """ scaffolds = format_info_frags(scaffolds) new_scaffolds = {} ordering = dict() for name, scaffold in scaffolds.items(): new_scaffold = [] ordering = dict() order = 0 my_blocks = [] for _, my_block in itertools.groupby(scaffold, operator.itemgetter(0)): my_bins = list(my_block) my_blocks.append(my_bins) block_length = len(my_bins) block_name = my_bins[0][0] if block_name in ordering.keys(): if block_length > ordering[block_name][1]: ordering[block_name] = (order, block_length) else: ordering[block_name] = (order, block_length) order += 1 def block_order(block): return ordering[block[0][0]][0] for my_block in sorted(my_blocks, key=block_order): for my_bin in my_block: new_scaffold.append(my_bin) new_scaffolds[name] = copy.deepcopy(new_scaffold) return new_scaffolds
[docs]def reorient_consecutive_blocks(scaffolds, mode="blocks"): scaffolds = format_info_frags(scaffolds) new_scaffolds = {} for name, scaffold in scaffolds.items(): # print(scaffold) new_scaffold = [] for _, my_block in itertools.groupby(scaffold, operator.itemgetter(0)): my_bins = list(my_block) if mode == "sequences": if len(my_bins) < 2: new_scaffold.append(my_bins[0]) continue else: previous_bin = [] end_bin = [-2, -2, -2, -2, -2] current_ori = 0 # print(my_bins + end_bin) for my_bin in my_bins + [end_bin]: if not previous_bin: # print("début") previous_bin = copy.copy(my_bin) continue elif my_bin[1] == previous_bin[1] + 1: current_ori = 1 previous_bin[-1] = 1 new_scaffold.append(previous_bin) previous_bin = copy.copy(my_bin) elif my_bin[1] == previous_bin[1] - 1: current_ori = -1 previous_bin[-1] = -1 new_scaffold.append(previous_bin) previous_bin = copy.copy(my_bin) else: if current_ori == 0: new_scaffold.append(previous_bin) else: previous_bin[-1] = current_ori new_scaffold.append(previous_bin) current_ori = 0 previous_bin = copy.copy(my_bin) assert previous_bin[0] == -2 elif mode == "blocks": total_ori = sum([my_bin[-1] for my_bin in my_bins]) if total_ori >= 0: block_ori = 1 sorted_block = sorted(my_bins, key=operator.itemgetter(1)) else: block_ori = -1 sorted_block = sorted( my_bins, key=operator.itemgetter(1), reverse=True ) for my_bin in sorted_block: my_bin[-1] = block_ori new_scaffold.append(my_bin) new_scaffolds[name] = copy.deepcopy(new_scaffold) return new_scaffolds
[docs]def write_info_frags(scaffolds, output="new_info_frags.txt"): scaffolds = format_info_frags(scaffolds) with open(output, "w") as info_frags_handle: for new_name, scaffold in scaffolds.items(): info_frags_handle.write(">{}\n".format(new_name)) header_line = "\t".join( ["init_contig", "id_frag", "orientation", "start", "end"] ) info_frags_handle.write("{}\n".format(header_line)) for my_bin in scaffold: init_contig, id_frag, pos_start, pos_end, orientation = my_bin assert orientation in {-1, 1} my_line = "\t".join( [ str(init_contig), str(id_frag), str(orientation), str(pos_start), str(pos_end), ] ) info_frags_handle.write("{}\n".format(my_line))
[docs]def write_fasta( init_fasta, info_frags, output=DEFAULT_NEW_GENOME_NAME, junction=False ): """Convert an info_frags.txt file into a fasta file given a reference. Optionally adds junction sequences to reflect the possibly missing base pairs between two newly joined scaffolds. """ init_genome = { record.id: record.seq for record in SeqIO.parse(init_fasta, "fasta") } my_new_records = [] with open(info_frags, "r") as info_frags_handle: current_seq = "" current_id = None previous_contig = None for line in info_frags_handle: if line.startswith(">"): previous_contig = None if current_id is not None: new_record = SeqRecord( current_seq, id=current_id, description="" ) my_new_records.append(new_record) current_seq = "" current_id = str(line[1:]) elif line.startswith("init_contig"): previous_contig = None else: (init_contig, _, orientation, pos_start, pos_end) = str( line[:-1] ).split("\t") start = int(pos_start) end = int(pos_end) ori = int(orientation) assert start < end assert ori in {-1, 1} seq_to_add = init_genome[init_contig][start:end] if ori == 1: current_seq += seq_to_add elif ori == -1: current_seq += seq_to_add.reverse_complement() if junction and previous_contig not in {init_contig, None}: error_was_raised = False try: extra_seq = Seq(junction, IUPAC.ambiguous_dna) current_seq = extra_seq + current_seq except TypeError: if not error_was_raised: print("Invalid junction sequence") error_was_raised = True previous_contig = init_contig new_record = SeqRecord(current_seq, id=current_id, description="") my_new_records.append(new_record) SeqIO.write(my_new_records, output, "fasta")
[docs]def find_lost_dna(init_fasta, scaffolds, output_file=None): my_scaffolds = format_info_frags(scaffolds) my_records = sorted( SeqIO.parse(init_fasta, "fasta"), reverse=True, key=len ) that_which_was_removed = {} fasta_dict = {} def consecutiveness(key_base_tuple): key, base = key_base_tuple return base - key for record in my_records: fasta_dict[record.id] = record.seq remaining_regions_ordered = range(len(record)) remaining_regions = set(remaining_regions_ordered) regions = [ my_bin for scaffold in my_scaffolds.values() for my_bin in scaffold if my_bin[0] == record.id ] for region in regions: start, end = region[2], region[3] remaining_regions -= set( remaining_regions_ordered[start : end + 1] ) sorted_regions = sorted(remaining_regions) for _, g in itertools.groupby( enumerate(sorted_regions), consecutiveness ): swath = list(map(operator.itemgetter(1), g)) start = min(swath) end = max(swath) + 1 ori = 1 my_bin = [record.id, -1, start, end, ori] try: that_which_was_removed[record.id].append(my_bin) except KeyError: that_which_was_removed[record.id] = [my_bin] if output_file: try: with open(output_file, "w") as output_handle: for name, chunks in that_which_was_removed.items(): for chunk in chunks: try: start, end = chunk[2], chunk[3] except ValueError: continue header_line = ">{}_{}_{}\n".format(name, start, end) output_handle.write(header_line) sequence = fasta_dict[name][start:end] sequence_line = "{}\n".format(sequence) output_handle.write(sequence_line) except OSError: print("Couldn't write fasta file.") return that_which_was_removed
[docs]def integrate_lost_dna(scaffolds, lost_dna_positions): scaffolds = format_info_frags(scaffolds) remaining_dna_positions = copy.deepcopy(lost_dna_positions) new_scaffolds = {} for name, scaffold in scaffolds.items(): scaffold_to_modify = copy.deepcopy(scaffold) i = 0 for my_bin in scaffold: init_name = my_bin[0] try: lost_dna_chunk = lost_dna_positions[init_name] start = my_bin[2] end = my_bin[3] ori = my_bin[4] for lost_bin in lost_dna_chunk: lost_start = lost_bin[2] lost_end = lost_bin[3] if end == lost_start - 1: # print( # "A bin was reintegrated after position {}" # " in scaffold {}".format(my_bin[3], init_name) # ) bin_to_add = [ init_name, -1, lost_start - 1, lost_end + 1, ori, ] scaffold_to_modify.insert( i + 1 - (ori < 0), bin_to_add ) remaining_dna_positions.pop(init_name) i += 1 elif start in (lost_end, lost_end - 1, lost_end + 1): # print( # "A bin was reintegrated before position {}" # " in scaffold {}".format(my_bin[3], init_name) # ) bin_to_add = [init_name, -1, lost_start, lost_end, ori] scaffold_to_modify.insert(i - 1, bin_to_add) remaining_dna_positions.pop(init_name) i += 1 except (ValueError, KeyError): i += 1 continue i += 1 new_scaffolds[name] = copy.deepcopy(scaffold_to_modify) print("Appending the rest...") for remaining_name, remaining_bins in remaining_dna_positions.items(): for my_bin in remaining_bins: try: remaining_bin = [remaining_name, -1, my_bin[2], my_bin[3], 1] new_scaffolds[remaining_name] = [remaining_bin] except ValueError: continue return new_scaffolds
[docs]def is_block(bin_list): """Check if a bin list has exclusively consecutive bin ids. """ id_set = set((my_bin[1] for my_bin in bin_list)) start_id, end_id = min(id_set), max(id_set) return id_set == set(range(start_id, end_id + 1))
[docs]def main(): parser = argparse.ArgumentParser( description="Process 3C bin " "formalized scaffolds." ) parser.add_argument("-m", "--mode", help="Process mode", required=True) parser.add_argument( "-i", "--input", type=str, help="Input info_frags.txt to process", required=True, ) parser.add_argument( "-f", "--fasta", type=str, help="Reference FASTA file to generate " "new genome with info_frags.txt", ) parser.add_argument("-o", "--output", help="Output file to generate") parser.add_argument( "-c", "--criterion", type=str, help="Block criterion stringency" ) parser.add_argument( "-s", "--min-scaffold-size", type=int, help="Minimum scaffold size in bins", default=DEFAULT_MIN_SCAFFOLD_SIZE, ) parser.add_argument( "-j", "--junction", type=str, help="Junction sequence", default="" ) args = parser.parse_args() info_frags = args.input min_size = args.min_scaffold_size scaffolds = { name: scaffold for (name, scaffold) in parse_info_frags(info_frags).items() if len(scaffold) > min_size } if args.mode == "fasta": init_fasta = args.fasta output_file = args.output junction = args.junction if init_fasta is None: print( "Error! An initial FASTA file must be provided to write " "the bins into sequences." ) write_fasta( init_fasta=init_fasta, info_frags=info_frags, junction=junction, output=output_file, ) elif "singleton" in args.mode: output_file = args.output new_scaffolds = remove_spurious_insertions(scaffolds) write_info_frags(new_scaffolds, output=output_file) elif args.mode == "inversion": output_file = args.output or DEFAULT_NEW_INFO_FRAGS_NAME criterion = args.criterion or DEFAULT_CRITERION new_scaffolds = correct_spurious_inversions( scaffolds=scaffolds, criterion=criterion ) write_info_frags(new_scaffolds, output=output_file) elif args.mode == "inversion2": output_file = args.output or DEFAULT_NEW_INFO_FRAGS_NAME criterion = args.criterion or DEFAULT_CRITERION_2 new_scaffolds = reorient_consecutive_blocks( scaffolds=scaffolds, mode=criterion ) write_info_frags(new_scaffolds, output=output_file) elif "rearrange" in args.mode: output_file = args.output or DEFAULT_NEW_INFO_FRAGS_NAME new_scaffolds = rearrange_intra_scaffolds(scaffolds=scaffolds) write_info_frags(new_scaffolds, output=output_file) elif "reincorporation" in args.mode: init_fasta = args.fasta output_file = args.output or DEFAULT_NEW_INFO_FRAGS_NAME if init_fasta is None: print( "Error! An initial FASTA file must be provided" " for bin reincorporation." ) removed = find_lost_dna(init_fasta=init_fasta, scaffolds=scaffolds) new_scaffolds = integrate_lost_dna( scaffolds=scaffolds, lost_dna_positions=removed ) write_info_frags(new_scaffolds, output=output_file) elif "polishing" in args.mode: init_fasta = args.fasta output_file = args.output arranged_scaffolds = rearrange_intra_scaffolds(scaffolds=scaffolds) reoriented_scaffolds = reorient_consecutive_blocks(arranged_scaffolds) removed = find_lost_dna( init_fasta=init_fasta, scaffolds=reoriented_scaffolds ) new_scaffolds = integrate_lost_dna( scaffolds=reoriented_scaffolds, lost_dna_positions=removed ) write_info_frags(new_scaffolds, output=DEFAULT_NEW_INFO_FRAGS_NAME) write_fasta( init_fasta=init_fasta, info_frags=DEFAULT_NEW_INFO_FRAGS_NAME, output=output_file, ) elif args.mode == "plot": plot_info_frags(scaffolds) else: print( "Wrong mode. Available modes are: " "fasta, singletons, inversions, inversion2, rearrange, " "reincorporations, polishing, plot" )
if __name__ == "__main__": main()