Coverage for PanACoTA/subcommands/prepare.py: 99%
146 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
1#!/usr/bin/env python3
2# coding: utf-8
4# ###############################################################################
5# This file is part of PanACOTA. #
6# #
7# Authors: Amandine Perrin #
8# Copyright © 2018-2020 Institut Pasteur (Paris). #
9# See the COPYRIGHT file for details. #
10# #
11# PanACOTA is a software providing tools for large scale bacterial comparative #
12# genomics. From a set of complete and/or draft genomes, you can: #
13# - Do a quality control of your strains, to eliminate poor quality #
14# genomes, which would not give any information for the comparative study #
15# - Uniformly annotate all genomes #
16# - Do a Pan-genome #
17# - Do a Core or Persistent genome #
18# - Align all Core/Persistent families #
19# - Infer a phylogenetic tree from the Core/Persistent families #
20# #
21# PanACOTA is free software: you can redistribute it and/or modify it under the #
22# terms of the Affero GNU General Public License as published by the Free #
23# Software Foundation, either version 3 of the License, or (at your option) #
24# any later version. #
25# #
26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY #
27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License #
29# for more details. #
30# #
31# You should have received a copy of the Affero GNU General Public License #
32# along with PanACOTA (COPYING file). #
33# If not, see <https://www.gnu.org/licenses/>. #
34# ###############################################################################
36"""
37Subcommand to prepare a dataset:
39- Download all genomes of a given species from refseq
40- Filter them with L90 and number of contigs thresholds
41- Remove too close/far genomes using Mash
44@author Amandine PERRIN
45August 2019
46"""
47import os
48import sys
49import logging
50from termcolor import colored
52from PanACoTA import __version__ as version
53from PanACoTA import utils
54from PanACoTA.prepare_module import download_genomes_func as dgf
55from PanACoTA.prepare_module import filter_genomes as fg
58def main_from_parse(arguments):
59 """
60 Call main function from the arguments given by parser
62 Parameters
63 ----------
64 arguments : argparse.Namespace
65 result of argparse parsing of all arguments in command line
67 """
68 cmd = "PanACoTA " + ' '.join(arguments.argv)
69 main(cmd, arguments.ncbi_species_name, arguments.ncbi_species_taxid, arguments.ncbi_taxid, arguments.strains,
70 arguments.levels, arguments.ncbi_section, arguments.outdir, arguments.tmp_dir, arguments.parallel, arguments.norefseq,
71 arguments.db_dir, arguments.only_mash,
72 arguments.info_file, arguments.l90, arguments.nbcont, arguments.cutn, arguments.min_dist,
73 arguments.max_dist, arguments.verbose, arguments.quiet)
76def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, ncbi_strains, levels, ncbi_section,
77 outdir, tmp_dir, threads, norefseq, db_dir,
78 only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet):
79 """
80 Main method, constructing the draft dataset for the given species
82 verbosity:
83 - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more, .log.err contains warning and more
84 - 1: same as 0 + WARNING in stderr
85 - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details
86 - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything from info to debug
89 Parameters
90 ----------
91 cmd : str
92 command line used to launch this program
93 ncbi_species_name : str
94 name of species to download, as given by NCBI
95 ncbi_species_taxid : int
96 species taxid given in NCBI
97 ncbi_taxid : int
98 NCBI taxid (sub-species)
99 ncbi_strains : str
100 specific strains to download
101 levels: str
102 Level of assembly to download. Choice between 'all', 'complete', 'chromosome',
103 'scaffold', 'contig'. Default is 'all'
104 outdir : str
105 path to output directory (where created database will be saved).
106 tmp_dir : str
107 Path to directory where tmp files are saved (sequences split at each row of 5 'N')
108 threads : int
109 max number of threads to use
110 norefseq : bool
111 True if user does not want to download again the database
112 db_dir : str
113 Name of the folder where already downloaded fasta files are saved.
114 only_mash : bool
115 True if user user already has the database and quality of each genome (L90, #contigs etc.)
116 info_file : str
117 File containing information on QC if it was already ran before (columns to_annotate,
118 gsize, nb_conts and L90).
119 l90 : int
120 Max L90 allowed to keep a genome
121 nbcont : int
122 Max number of contigs allowed to keep a genome
123 cutn : int
124 cut at each when there are 'cutn' N in a row. Don't cut if equal to 0
125 min_dist : int
126 lower limit of distance between 2 genomes to keep them
127 max_dist : int
128 upper limit of distance between 2 genomes to keep them (default is 0.06)
129 verbose : int
130 verbosity:
131 - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more,
132 .log.err contains warning and more
133 - 1: same as 0 + WARNING in stderr
134 - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details
135 - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything
136 from info to debug
137 quiet : bool
138 True if nothing must be sent to stdout/stderr, False otherwise
139 """
141 # get species name in NCBI format
142 # -> will be used to name output directory
143 # -> will be used to download summary file if given species corresponds to NCBI name
144 if ncbi_species_name:
145 species_linked = "_".join(ncbi_species_name.split())
146 species_linked = "_".join(species_linked.split("/"))
148 # if species name not given by user, use species taxID (if given) to name output directory
149 elif ncbi_species_taxid:
150 species_linked = str(ncbi_species_taxid)
151 # if species name not species taxid by user, use taxID (if given) to name output directory
152 elif ncbi_taxid:
153 species_linked = str(ncbi_taxid)
154 # If no species nor taxID, get specific strain names
155 elif ncbi_strains:
156 if os.path.isfile(ncbi_strains):
157 species_linked = os.path.basename(ncbi_strains)
158 species_linked = os.path.splitext(species_linked)[0]
159 else:
160 species_linked = "_".join(ncbi_strains.split())
161 species_linked = "-".join(species_linked.split("/"))
162 species_linked = "_and_".join(species_linked.split(","))
163 # if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA
164 else:
165 species_linked = "NA"
166 # Default outdir is species name if given, or species taxID
167 if not outdir:
168 outdir = species_linked
169 # Default tmp_dir is outdir/tmp_files
170 if not tmp_dir:
171 tmp_dir = os.path.join(outdir, "tmp_files")
172 # directory that will be created by ncbi_genome_download
173 ncbidir = os.path.join(outdir, ncbi_section, "bacteria")
174 os.makedirs(outdir, exist_ok=True)
175 os.makedirs(tmp_dir, exist_ok=True)
177 # Initialize logger
178 # set level of logger: level is the minimum level that will be considered.
179 if verbose <= 1:
180 level = logging.INFO
181 # for verbose = 2, ignore only debug
182 if verbose >= 2 and verbose < 15:
183 level = utils.detail_lvl() # int corresponding to detail level
184 # for verbose >= 15, write everything
185 if verbose >= 15:
186 level = logging.DEBUG
187 logfile_base = os.path.join(outdir, "PanACoTA_prepare_{}").format(species_linked)
188 logfile_base, logger = utils.init_logger(logfile_base, level, 'prepare', log_details=True,
189 verbose=verbose, quiet=quiet)
191 # Message on what will be done (cmd, cores used)
192 logger.info(f'PanACoTA version {version}')
193 logger.info("Command used\n \t > " + cmd)
194 message = f"'PanACoTA prepare' will run on {threads} "
195 message += f"cores" if threads>1 else "core"
196 logger.info(message)
198 # Start prepare step
199 # Run more than only mash filter (!only_mash):
200 # - start from QC and mash (norefseq)
201 # - start from genome download (!norefseq))
202 if not only_mash:
203 # Not only mash, so a new info file will be created. If the user still gave an info
204 # file (he will be warned that it will be ignored), rename it with '.bak'
205 # to avoid erasing it
206 if info_file and os.path.isfile(info_file):
207 os.rename(info_file, info_file + ".back")
209 # 'norefseq = True" : Do not download genomes, just do QC and mash filter on given genomes
210 # -> if not, error and exit
211 if norefseq:
212 logger.warning(f'You asked to skip {ncbi_section} downloads.')
214 # -> if db_dir given, watch for sequences there. If does not exist, error and exit
215 # (user gave a directory (even if it does not exist), so we won't look for
216 # the sequences in other folders)
217 if db_dir:
218 if not os.path.exists(db_dir):
219 logger.error(f"Database folder {db_dir} supposed to contain fasta "
220 "sequences does not "
221 "exist. Please give a valid folder, or leave the default "
222 "directory (no '-d' option).")
223 sys.exit(1)
224 # -> If user did not give db_dir, genomes could be in
225 # outdir/Database_init/<genome_name>.fna
226 else:
227 db_dir = os.path.join(outdir, "Database_init")
228 # If it does not exist, check if default compressed files folder exists.
229 if not os.path.exists(db_dir):
230 logger.warning(f"Database folder {db_dir} supposed to contain fasta "
231 "sequences does not "
232 "exist. We will check if the download folder (with compressed "
233 "sequences) exists.")
234 # -> if not in database_init, genomes must be in
235 # outdir/refeq/bacteria/<genome_name>.fna.gz. In that case,
236 # uncompress and add them to Database_init
237 if not os.path.exists(ncbidir):
238 logger.error(f"Folder {ncbidir} does not exist. You do not have any "
239 "genome to analyse. Possible reasons:\n"
240 "- if you want to rerun analysis in the same folder as "
241 "sequences were downloaded (my_outdir/Database_init or "
242 f"my_outdir/{ncbi_section}), make sure you have '-o my_outdir' "
243 "option\n"
244 "- if you want to rerun analysis and save them in a new "
245 "output folder called 'new_outdir', make sure you have "
246 "'-o new_outdir' option, "
247 "and you specified where the uncompressed sequences to "
248 "use are ('-d sequence_database_path'). ")
249 sys.exit(1)
250 # add genomes from refseq/bacteria folder to Database_init
251 nb_gen, _ = dgf.to_database(outdir, ncbi_section)
252 # No sequence: Do all steps -> download, QC, mash filter
253 else:
254 # Download all genomes of the given taxID
255 db_dir, nb_gen = dgf.download_from_ncbi(species_linked, ncbi_section, ncbi_species_name, ncbi_species_taxid,
256 ncbi_taxid, ncbi_strains, levels, outdir, threads)
257 logger.info(f"{nb_gen} {ncbi_section} genome(s) downloaded")
259 # Now that genomes are downloaded and uncompressed, check their quality to remove bad ones
260 genomes = fg.check_quality(species_linked, db_dir, tmp_dir, l90, nbcont, cutn)
262 # Do only mash filter. Genomes must be already downloaded, and there must be a file with
263 # all information on these genomes (L90 etc.)
264 else:
265 logger.warning('You asked to run only mash steps.')
266 if not os.path.exists(info_file): # info-file missing -> error and exit
267 logger.error(f"Your info file {info_file} does not exist. Please provide the "
268 "right name/path, or remove the '--mash-only option to rerun "
269 "quality control.")
270 sys.exit(1)
271 logger.info(("You want to run only mash steps. Getting information "
272 "from {}").format(info_file))
273 genomes = utils.read_genomes_info(info_file, species_linked, )
275 # Run Mash
276 # genomes : {genome_file: [genome_name, orig_name, path_to_seq_to_annotate, size, nbcont, l90]}
277 # sorted_genome : [genome_file] ordered by L90/nbcont (keys of genomes)
278 sorted_genomes = fg.sort_genomes_minhash(genomes, l90, nbcont)
280 # Write discarded genomes to a file -> orig_name, to_annotate, gsize, nb_conts, L90
281 discQC = f"by-L90_nbcont-{species_linked}.txt"
282 utils.write_genomes_info(genomes, sorted_genomes, discQC, outdir)
284 # Remove genomes not corresponding to mash filters
285 removed = fg.iterative_mash(sorted_genomes, genomes, outdir, species_linked,
286 min_dist, max_dist, threads, quiet)
287 # Write list of genomes kept, and list of genomes discarded by mash step
288 info_file = fg.write_outputfiles(genomes, sorted_genomes, removed, outdir, species_linked,
289 min_dist, max_dist)
290 logger.info("End")
291 return info_file
294def build_parser(parser):
295 """
296 Method to create a parser for command-line options
298 Parameters
299 ----------
300 parser : argparse.ArgumentParser
301 parser to configure in order to extract command-line arguments
302 """
303 import argparse
304 from PanACoTA import utils_argparse
306 general = parser.add_argument_group('General arguments')
307 general.add_argument("-T", dest="ncbi_species_taxid", default="",
308 help=("Species taxid to download, corresponding to the "
309 "'species taxid' provided by the NCBI. "
310 "This will download all sequences of this species and all its sub-species."
311 "A comma-separated list of species taxids can also be provided. "
312 "(Ex: -T 573 for Klebsiella pneumoniae)")
313 )
314 general.add_argument("-t", dest="ncbi_taxid", default="",
315 help=("Taxid to download. "
316 "This can be the taxid of a sub-species, or of a specific strain. "
317 "A taxid of a subspecies will download all strains in this subspecies "
318 "EXCEPT the ones which have a specific taxid."
319 "A comma-separated list of taxids can also be provided."
320 "Ex: '-t 72407' will download all 'general' K. pneumoniae subsp. pneumoniae strains, "
321 "and '-t 1123862' will download the strain K. pneumoniae subsp. pneumoniae Kp13 "
322 "(not included in -t 72407, as it is a strain of the subspecies with a specific taxid).")
323 )
324 general.add_argument("-S", dest="strains", default="",
325 help=("List of strains to download. "
326 "A comma-separated list of strain names is possible, "
327 "as well as a path to a filename containing one name per line."
328 "Ex: '-S SB2390, IA565' for Klebsiella pneumoniae SB2390 and Klebsiella pneumoniae IA565 strains"
329 "Ex: '-S path/to/list.txt' path to file with strain names, one per line.")
330 )
331 general.add_argument("-g", dest="ncbi_species_name", default="",
332 help=("Species to download, corresponding to the "
333 "'organism name' provided by the NCBI. Give name between "
334 "quotes (for example \"escherichia coli\")")
335 )
336 general.add_argument("-s", dest="ncbi_section", default="refseq", choices = ["refseq", "genbank"],
337 help=("NCBI section to download: all genbank, or only refseq (default)")
338 )
339 general.add_argument("-l", "--assembly_level", dest="levels", default="",
340 help=("Assembly levels of genomes to download (default: all). "
341 "Possible levels are: 'all', 'complete', 'chromosome', "
342 "'scaffold', 'contig'."
343 "You can also provide a comma-separated list of assembly "
344 "levels. For ex: 'complete,chromosome'")
345 )
346 general.add_argument("-o", dest="outdir",
347 help=("Give the path to the directory where you want to save the "
348 "downloaded database. In the given directory, it will create "
349 "a folder 'Database_init' containing all fasta "
350 "files that will be sent to the control procedure, as well as "
351 "a folder 'refseq' with all original compressed files "
352 "downloaded from refseq. By default, this output dir name is the "
353 "ncbi_species name if given, or ncbi_species_taxid or ncbi_taxid otherwise.")
354 )
355 general.add_argument("--tmp", dest="tmp_dir",
356 help=("Specify where the temporary files (sequence split by rows "
357 "of 'N', sequence with new contig names etc.) must be saved. "
358 "By default, it will be saved in your "
359 "out_dir/tmp_files.")
360 )
361 general.add_argument("--cutn", dest="cutn", type=utils_argparse.positive_int, default=5,
362 help=("By default, each genome will be cut into new contigs when "
363 "at least 5 'N' in a row are found in its sequence. "
364 "If you don't want to "
365 "cut genomes into new contigs when there are rows of 'N', "
366 "put 0 to this option. If you want to cut from a different number "
367 "of 'N' in a row, put this value to this option.")
368 )
369 general.add_argument("--l90", dest="l90", type=int, default=100,
370 help="Maximum value of L90 allowed to keep a genome. Default is 100.")
371 general.add_argument("--nbcont", dest="nbcont", type=utils_argparse.cont_num, default=999,
372 help=("Maximum number of contigs allowed to keep a genome. "
373 "Default is 999."))
374 general.add_argument("--min_dist", dest="min_dist", default=1e-4,
375 type=utils_argparse.mash_dist,
376 help="By default, genomes whose distance to the reference is not "
377 "between 1e-4 and 0.06 are discarded. You can specify your own "
378 "lower limit (instead of 1e-4) with this option.")
379 general.add_argument("--max_dist", dest="max_dist", default=0.06,
380 type=utils_argparse.mash_dist,
381 help="By default, genomes whose distance to the reference is not "
382 "between 1e-4 and 0.06 are discarded. You can specify your own "
383 "lower limit (instead of 0.06) with this option.")
384 general.add_argument("-p", "--threads", dest="parallel", type=utils_argparse.thread_num,
385 default=1, help=("Run 'N' downloads in parallel (default=1). Put 0 if "
386 "you want to use all cores of your computer."))
388 optional = parser.add_argument_group('Alternatives')
389 optional.add_argument("--norefseq", dest="norefseq", action="store_true",
390 help=("If you already downloaded refseq genomes and do not want to "
391 "check them, add this option to directly go to the next steps:"
392 "quality control (L90, number of contigs...) and mash filter. "
393 "Don't forget to specify the db_dir (-d option) where you "
394 "already have your genome sequences."))
395 optional.add_argument("-d", dest="db_dir",
396 help=("If your already downloaded sequences are not in the default "
397 "directory (outdir/Database_init), you can specify here the "
398 "path to those fasta files."))
399 optional.add_argument('-M', '--only-mash', dest="only_mash", action="store_true",
400 help=("Add this option if you already downloaded complete and refseq "
401 "genomes, and ran quality control (you have, a file "
402 "containing all genome names, as well as their genome size, "
403 "number of contigs and L90 values). "
404 "It will then get information on genomes quality from this "
405 "file, and run mash steps."))
406 optional.add_argument("--info", dest="info_file",
407 help=("If you already ran the quality control, specify from which "
408 "file PanACoTA can read this information, in order to proceed "
409 "to the mash step. This file must contain at "
410 "least 4 columns, tab separated, with the following headers: "
411 "'to_annotate', 'gsize', 'nb_conts', 'L90'. Any other column "
412 "will be ignored."))
414 helper = parser.add_argument_group('Others')
415 helper.add_argument("-v", "--verbose", dest="verbose", action="count", default=0,
416 help="Increase verbosity in stdout/stderr and log files.")
417 helper.add_argument("-q", "--quiet", dest="quiet", action="store_true", default=False,
418 help=("Do not display anything to stdout/stderr. log files will "
419 "still be created."))
420 helper.add_argument("-h", "--help", dest="help", action="help",
421 help="show this help message and exit")
424def parse(parser, argu):
425 """
426 Parse arguments given to parser
428 Parameters
429 ----------
430 parser : argparse.ArgumentParser
431 the parser used
432 argu : [str]
433 command-line given by user, to parse using parser
435 Returns
436 -------
437 argparse.Namespace
438 Parsed arguments
439 """
440 import argparse
442 args = parser.parse_args(argu)
443 return check_args(parser, args)
446def check_args(parser, args):
447 """
448 Check that arguments given to parser are as expected.
450 Parameters
451 ----------
452 parser : argparse.ArgumentParser
453 The parser used to parse command-line
454 args : argparse.Namespace
455 Parsed arguments
457 Returns
458 -------
459 argparse.Namespace or None
460 The arguments parsed, updated according to some rules. Exit program
461 with error message if error occurs with arguments given.
463 """
464 # Message if user kept default thresholds for L90 and nbcont. Just to warn him, to be sure
465 # it was on purpose
466 def thresholds_message(l90, nbcont):
467 return (" !! Your genomes will be filtered, and only the ones with 'L90' <= {} "
468 "and 'number of contigs' < {} will be kept. If you want to change those "
469 "thresholds, use '--l90' and '--nbcont' "
470 "options.".format(args.l90, args.nbcont))
472 # ERRORS
474 # We don't want to run only mash, nor only quality control, but don't give a NCBI taxID.
475 # -> Give at least 1!
476 if (not args.only_mash and not args.norefseq and
477 not args.ncbi_species_taxid and not args.ncbi_species_name and
478 not args.ncbi_taxid and not args.strains):
479 parser.error("As you did not put the '--norefseq' nor the '-M' option, it means that "
480 "you want to download refseq (or genbank) genomes. But you did not provide "
481 "any information, so PanACoTA cannot guess which species you want to "
482 "download. Specify NCBI_taxid (-t), and/or NCBI species taxid (-T) "
483 "and/or NCBI_species (-g) and/or NCBI_strain (-S) to download, or add one of "
484 "the 2 options (--norefseq or -M) if you want to skip the 'download step'.")
486 # If norefseq, give output directory
487 # - folder containing Database_init, with all sequences
488 # - or new folder where you want to put the new results
489 if args.norefseq and not args.outdir:
490 parser.error("You must provide an output directory, where your results will be saved.")
492 # If user wants only mash steps, check that he gave info file, and outdir
493 if args.only_mash and not args.info_file:
494 parser.error("If you want to run only Mash filtering steps, please give the "
495 "info file with the required information (see '--info' option)")
496 if args.only_mash and not args.outdir:
497 parser.error("If you want to run only Mash filtering steps, please give the "
498 "output directory where you want to save your results (see '-o' option)")
500 # Cannot be verbose and quiet at the same time
501 if int(args.verbose) > 0 and args.quiet:
502 parser.error("Choose between a verbose output (-v) or a quiet output (-q)."
503 " You cannot have both.")
505 # min_dist must be higher than max_dist
506 if float(args.min_dist) >= float(args.max_dist):
507 parser.error(f"min_dist ({args.min_dist}) cannot be higher "
508 f"than max_dist ({args.max_dist})")
510 # Check that levels, if given, are among possible ones
511 possible = ["all", "complete", "chromosome", "scaffold", "contig"]
512 if args.levels:
513 for level in args.levels.split(","):
514 if level not in possible:
515 parser.error("Please choose between available assembly levels: 'all', 'complete', "
516 "'chromosome', 'scaffold', 'contig'. If several levels, provide a "
517 f"comma-separated list. Invalid value: '{args.levels}'")
519 # WARNINGS
520 # User did not specify a species name
521 if not args.ncbi_species_name and not args.outdir:
522 if args.ncbi_species_taxid:
523 print(colored("WARNING: you did not provide a species name ('-g species' option) "
524 "nor an output directory ('-o outdir'). "
525 "All files will be downloaded in a folder called with the NCBI species "
526 f"taxid {args.ncbi_species_taxid} instead of the species name.", "yellow"))
527 elif args.strains:
528 print(colored("WARNING: you did not provide a species name ('-g species' option) "
529 "nor a species taxid ('-T spetaxid') nor an output directory ('-o outdir'). "
530 "All files will be downloaded in a folder called with the specified strains "
531 f"names {args.strains} instead of the species name.", "yellow"))
532 else:
533 print(colored("WARNING: you did not provide a species name ('-g species' option) "
534 "nor a species taxid ('-T spetaxid') nor an output directory ('-o outdir'). "
535 "All files will be downloaded in a folder called with the NCBI "
536 f"taxid {args.ncbi_taxid}.", "yellow"))
537 # If user wants to cut genomes, warn him to check that it is on purpose (because default is cut at each 5'N')
538 if args.cutn == 5:
539 message = (" !! Your genomes will be split when sequence contains at "
540 "least {}'N' in a row. If you want to change this threshold, use "
541 "'--cutn n' option (n=0 if you do not want to cut)").format(args.cutn)
542 print(colored(message, "yellow"))
544 # Warn user about selection of genomes thresholds
545 if args.l90 == 100 or args.nbcont == 999:
546 print(colored(thresholds_message(args.l90, args.nbcont), "yellow"))
548 # Warn if user gave info file, but does not ask to run only Mash -> info file will be ignored
549 if (args.info_file and not args.only_mash) or (args.info_file and not args.norefseq):
550 message = (" !! You gave an info file (--info option), but did not ask to run only Mash "
551 "step (-M option). Your info file will be ignored (and renamed with '.back' "
552 "at the end), and another one will "
553 "be created with the new calculated values.")
554 print(colored(message, "yellow"))
555 return args
559if __name__ == '__main__':
560 import argparse
561 from textwrap import dedent
562 header = '''
563 ___ _____ ___ _____ _____
564 ( _`\ ( _ )( _`\ (_ _)( _ )
565 | |_) ) _ _ ___ | (_) || ( (_) _ | | | (_) |
566 | ,__/'/'_` )/' _ `\| _ || | _ /'_`\ | | | _ |
567 | | ( (_| || ( ) || | | || (_( )( (_) )| | | | | |
568 (_) `\__,_)(_) (_)(_) (_)(____/'`\___/'(_) (_) (_)
571 Large scale comparative genomics tools
573 -------------------------------------------
574 '''
575 my_parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
576 description=dedent(header), add_help=False)
577 build_parser(my_parser)
578 OPTIONS = parse(my_parser, sys.argv[1:])
579 main_from_parse(OPTIONS)