Coverage for PanACoTA/subcommands/prepare.py: 99%

1#!/usr/bin/env python3

2# coding: utf-8

4# ###############################################################################

5# This file is part of PanACOTA. #

6# #

7# Authors: Amandine Perrin #

9# See the COPYRIGHT file for details. #

10# #

11# PanACOTA is a software providing tools for large scale bacterial comparative #

12# genomics. From a set of complete and/or draft genomes, you can: #

13# - Do a quality control of your strains, to eliminate poor quality #

14# genomes, which would not give any information for the comparative study #

15# - Uniformly annotate all genomes #

16# - Do a Pan-genome #

17# - Do a Core or Persistent genome #

18# - Align all Core/Persistent families #

19# - Infer a phylogenetic tree from the Core/Persistent families #

20# #

21# PanACOTA is free software: you can redistribute it and/or modify it under the #

22# terms of the Affero GNU General Public License as published by the Free #

23# Software Foundation, either version 3 of the License, or (at your option) #

24# any later version. #

25# #

26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY #

27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #

28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License #

29# for more details. #

30# #

31# You should have received a copy of the Affero GNU General Public License #

32# along with PanACOTA (COPYING file). #

33# If not, see <https://www.gnu.org/licenses/>. #

34# ###############################################################################

36"""

37Subcommand to prepare a dataset:

39- Download all genomes of a given species from refseq

40- Filter them with L90 and number of contigs thresholds

41- Remove too close/far genomes using Mash

44@author Amandine PERRIN

45August 2019

46"""

47import os

48import sys

49import logging

50from termcolor import colored

52from PanACoTA import __version__ as version

53from PanACoTA import utils

54from PanACoTA.prepare_module import download_genomes_func as dgf

55from PanACoTA.prepare_module import filter_genomes as fg

58def main_from_parse(arguments):

59 """

60 Call main function from the arguments given by parser

62 Parameters

63 ----------

64 arguments : argparse.Namespace

65 result of argparse parsing of all arguments in command line

67 """

68 cmd = "PanACoTA " + ' '.join(arguments.argv)

69 main(cmd, arguments.ncbi_species_name, arguments.ncbi_species_taxid, arguments.ncbi_taxid, arguments.strains,

70 arguments.levels, arguments.ncbi_section, arguments.outdir, arguments.tmp_dir, arguments.parallel, arguments.norefseq,

71 arguments.db_dir, arguments.only_mash,

72 arguments.info_file, arguments.l90, arguments.nbcont, arguments.cutn, arguments.min_dist,

73 arguments.max_dist, arguments.verbose, arguments.quiet)

76def main(cmd, ncbi_species_name, ncbi_species_taxid, ncbi_taxid, ncbi_strains, levels, ncbi_section,

77 outdir, tmp_dir, threads, norefseq, db_dir,

78 only_mash, info_file, l90, nbcont, cutn, min_dist, max_dist, verbose, quiet):

79 """

80 Main method, constructing the draft dataset for the given species

82 verbosity:

83 - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more, .log.err contains warning and more

84 - 1: same as 0 + WARNING in stderr

85 - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details

86 - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything from info to debug

89 Parameters

90 ----------

91 cmd : str

92 command line used to launch this program

93 ncbi_species_name : str

94 name of species to download, as given by NCBI

95 ncbi_species_taxid : int

96 species taxid given in NCBI

97 ncbi_taxid : int

98 NCBI taxid (sub-species)

99 ncbi_strains : str

100 specific strains to download

101 levels: str

102 Level of assembly to download. Choice between 'all', 'complete', 'chromosome',

103 'scaffold', 'contig'. Default is 'all'

104 outdir : str

105 path to output directory (where created database will be saved).

106 tmp_dir : str

107 Path to directory where tmp files are saved (sequences split at each row of 5 'N')

108 threads : int

109 max number of threads to use

110 norefseq : bool

111 True if user does not want to download again the database

112 db_dir : str

113 Name of the folder where already downloaded fasta files are saved.

114 only_mash : bool

115 True if user user already has the database and quality of each genome (L90, #contigs etc.)

116 info_file : str

117 File containing information on QC if it was already ran before (columns to_annotate,

118 gsize, nb_conts and L90).

119 l90 : int

120 Max L90 allowed to keep a genome

121 nbcont : int

122 Max number of contigs allowed to keep a genome

123 cutn : int

124 cut at each when there are 'cutn' N in a row. Don't cut if equal to 0

125 min_dist : int

126 lower limit of distance between 2 genomes to keep them

127 max_dist : int

128 upper limit of distance between 2 genomes to keep them (default is 0.06)

129 verbose : int

130 verbosity:

131 - defaut 0 : stdout contains INFO, stderr contains ERROR, .log contains INFO and more,

132 .log.err contains warning and more

133 - 1: same as 0 + WARNING in stderr

134 - 2: same as 1 + DETAILS in stdout + DETAILS in .log.details

135 - >=15: same as 2 + Add DEBUG in stdout + create .log.debug with everything

136 from info to debug

137 quiet : bool

138 True if nothing must be sent to stdout/stderr, False otherwise

139 """

140

141 # get species name in NCBI format

142 # -> will be used to name output directory

143 # -> will be used to download summary file if given species corresponds to NCBI name

144 if ncbi_species_name:

145 species_linked = "_".join(ncbi_species_name.split())

146 species_linked = "_".join(species_linked.split("/"))

147

148 # if species name not given by user, use species taxID (if given) to name output directory

149 elif ncbi_species_taxid:

150 species_linked = str(ncbi_species_taxid)

151 # if species name not species taxid by user, use taxID (if given) to name output directory

152 elif ncbi_taxid:

153 species_linked = str(ncbi_taxid)

154 # If no species nor taxID, get specific strain names

155 elif ncbi_strains:

156 if os.path.isfile(ncbi_strains):

157 species_linked = os.path.basename(ncbi_strains)

158 species_linked = os.path.splitext(species_linked)[0]

159 else:

160 species_linked = "_".join(ncbi_strains.split())

161 species_linked = "-".join(species_linked.split("/"))

162 species_linked = "_and_".join(species_linked.split(","))

163 # if neither speName, speID, taxID nor strainName given (--norefseq, mashonly), name is NA

164 else:

165 species_linked = "NA"

166 # Default outdir is species name if given, or species taxID

167 if not outdir:

168 outdir = species_linked

169 # Default tmp_dir is outdir/tmp_files

170 if not tmp_dir:

171 tmp_dir = os.path.join(outdir, "tmp_files")

172 # directory that will be created by ncbi_genome_download

173 ncbidir = os.path.join(outdir, ncbi_section, "bacteria")

174 os.makedirs(outdir, exist_ok=True)

175 os.makedirs(tmp_dir, exist_ok=True)

176

177 # Initialize logger

178 # set level of logger: level is the minimum level that will be considered.

179 if verbose <= 1:

180 level = logging.INFO

181 # for verbose = 2, ignore only debug

182 if verbose >= 2 and verbose < 15:

183 level = utils.detail_lvl() # int corresponding to detail level

184 # for verbose >= 15, write everything

185 if verbose >= 15:

186 level = logging.DEBUG

187 logfile_base = os.path.join(outdir, "PanACoTA_prepare_{}").format(species_linked)

188 logfile_base, logger = utils.init_logger(logfile_base, level, 'prepare', log_details=True,

189 verbose=verbose, quiet=quiet)

190

191 # Message on what will be done (cmd, cores used)

192 logger.info(f'PanACoTA version {version}')

193 logger.info("Command used\n \t > " + cmd)

194 message = f"'PanACoTA prepare' will run on {threads} "

195 message += f"cores" if threads>1 else "core"

196 logger.info(message)

197

198 # Start prepare step

199 # Run more than only mash filter (!only_mash):

200 # - start from QC and mash (norefseq)

201 # - start from genome download (!norefseq))

202 if not only_mash:

203 # Not only mash, so a new info file will be created. If the user still gave an info

204 # file (he will be warned that it will be ignored), rename it with '.bak'

205 # to avoid erasing it

206 if info_file and os.path.isfile(info_file):

207 os.rename(info_file, info_file + ".back")

208

209 # 'norefseq = True" : Do not download genomes, just do QC and mash filter on given genomes

210 # -> if not, error and exit

211 if norefseq:

212 logger.warning(f'You asked to skip {ncbi_section} downloads.')

213

214 # -> if db_dir given, watch for sequences there. If does not exist, error and exit

215 # (user gave a directory (even if it does not exist), so we won't look for

216 # the sequences in other folders)

217 if db_dir:

218 if not os.path.exists(db_dir):

219 logger.error(f"Database folder {db_dir} supposed to contain fasta "

220 "sequences does not "

221 "exist. Please give a valid folder, or leave the default "

222 "directory (no '-d' option).")

223 sys.exit(1)

224 # -> If user did not give db_dir, genomes could be in

225 # outdir/Database_init/<genome_name>.fna

226 else:

227 db_dir = os.path.join(outdir, "Database_init")

228 # If it does not exist, check if default compressed files folder exists.

229 if not os.path.exists(db_dir):

230 logger.warning(f"Database folder {db_dir} supposed to contain fasta "

231 "sequences does not "

232 "exist. We will check if the download folder (with compressed "

233 "sequences) exists.")

234 # -> if not in database_init, genomes must be in

235 # outdir/refeq/bacteria/<genome_name>.fna.gz. In that case,

236 # uncompress and add them to Database_init

237 if not os.path.exists(ncbidir):

238 logger.error(f"Folder {ncbidir} does not exist. You do not have any "

239 "genome to analyse. Possible reasons:\n"

240 "- if you want to rerun analysis in the same folder as "

241 "sequences were downloaded (my_outdir/Database_init or "

242 f"my_outdir/{ncbi_section}), make sure you have '-o my_outdir' "

243 "option\n"

244 "- if you want to rerun analysis and save them in a new "

245 "output folder called 'new_outdir', make sure you have "

246 "'-o new_outdir' option, "

247 "and you specified where the uncompressed sequences to "

248 "use are ('-d sequence_database_path'). ")

249 sys.exit(1)

250 # add genomes from refseq/bacteria folder to Database_init

251 nb_gen, _ = dgf.to_database(outdir, ncbi_section)

252 # No sequence: Do all steps -> download, QC, mash filter

253 else:

254 # Download all genomes of the given taxID

255 db_dir, nb_gen = dgf.download_from_ncbi(species_linked, ncbi_section, ncbi_species_name, ncbi_species_taxid,

256 ncbi_taxid, ncbi_strains, levels, outdir, threads)

257 logger.info(f"{nb_gen} {ncbi_section} genome(s) downloaded")

258

259 # Now that genomes are downloaded and uncompressed, check their quality to remove bad ones

260 genomes = fg.check_quality(species_linked, db_dir, tmp_dir, l90, nbcont, cutn)

261

262 # Do only mash filter. Genomes must be already downloaded, and there must be a file with

263 # all information on these genomes (L90 etc.)

264 else:

265 logger.warning('You asked to run only mash steps.')

266 if not os.path.exists(info_file): # info-file missing -> error and exit

267 logger.error(f"Your info file {info_file} does not exist. Please provide the "

268 "right name/path, or remove the '--mash-only option to rerun "

269 "quality control.")

270 sys.exit(1)

271 logger.info(("You want to run only mash steps. Getting information "

272 "from {}").format(info_file))

273 genomes = utils.read_genomes_info(info_file, species_linked, )

274

275 # Run Mash

276 # genomes : {genome_file: [genome_name, orig_name, path_to_seq_to_annotate, size, nbcont, l90]}

277 # sorted_genome : [genome_file] ordered by L90/nbcont (keys of genomes)

278 sorted_genomes = fg.sort_genomes_minhash(genomes, l90, nbcont)

279

280 # Write discarded genomes to a file -> orig_name, to_annotate, gsize, nb_conts, L90

281 discQC = f"by-L90_nbcont-{species_linked}.txt"

282 utils.write_genomes_info(genomes, sorted_genomes, discQC, outdir)

283

284 # Remove genomes not corresponding to mash filters

285 removed = fg.iterative_mash(sorted_genomes, genomes, outdir, species_linked,

286 min_dist, max_dist, threads, quiet)

287 # Write list of genomes kept, and list of genomes discarded by mash step

288 info_file = fg.write_outputfiles(genomes, sorted_genomes, removed, outdir, species_linked,

289 min_dist, max_dist)

290 logger.info("End")

291 return info_file

292

293

294def build_parser(parser):

295 """

296 Method to create a parser for command-line options

297

298 Parameters

299 ----------

300 parser : argparse.ArgumentParser

301 parser to configure in order to extract command-line arguments

302 """

303 import argparse

304 from PanACoTA import utils_argparse

305

306 general = parser.add_argument_group('General arguments')

307 general.add_argument("-T", dest="ncbi_species_taxid", default="",

308 help=("Species taxid to download, corresponding to the "

309 "'species taxid' provided by the NCBI. "

310 "This will download all sequences of this species and all its sub-species."

311 "A comma-separated list of species taxids can also be provided. "

312 "(Ex: -T 573 for Klebsiella pneumoniae)")

313 )

314 general.add_argument("-t", dest="ncbi_taxid", default="",

315 help=("Taxid to download. "

316 "This can be the taxid of a sub-species, or of a specific strain. "

317 "A taxid of a subspecies will download all strains in this subspecies "

318 "EXCEPT the ones which have a specific taxid."

319 "A comma-separated list of taxids can also be provided."

320 "Ex: '-t 72407' will download all 'general' K. pneumoniae subsp. pneumoniae strains, "

321 "and '-t 1123862' will download the strain K. pneumoniae subsp. pneumoniae Kp13 "

322 "(not included in -t 72407, as it is a strain of the subspecies with a specific taxid).")

323 )

324 general.add_argument("-S", dest="strains", default="",

325 help=("List of strains to download. "

326 "A comma-separated list of strain names is possible, "

327 "as well as a path to a filename containing one name per line."

328 "Ex: '-S SB2390, IA565' for Klebsiella pneumoniae SB2390 and Klebsiella pneumoniae IA565 strains"

329 "Ex: '-S path/to/list.txt' path to file with strain names, one per line.")

330 )

331 general.add_argument("-g", dest="ncbi_species_name", default="",

332 help=("Species to download, corresponding to the "

333 "'organism name' provided by the NCBI. Give name between "

334 "quotes (for example \"escherichia coli\")")

335 )

336 general.add_argument("-s", dest="ncbi_section", default="refseq", choices = ["refseq", "genbank"],

337 help=("NCBI section to download: all genbank, or only refseq (default)")

338 )

339 general.add_argument("-l", "--assembly_level", dest="levels", default="",

340 help=("Assembly levels of genomes to download (default: all). "

341 "Possible levels are: 'all', 'complete', 'chromosome', "

342 "'scaffold', 'contig'."

343 "You can also provide a comma-separated list of assembly "

344 "levels. For ex: 'complete,chromosome'")

345 )

346 general.add_argument("-o", dest="outdir",

347 help=("Give the path to the directory where you want to save the "

348 "downloaded database. In the given directory, it will create "

349 "a folder 'Database_init' containing all fasta "

350 "files that will be sent to the control procedure, as well as "

351 "a folder 'refseq' with all original compressed files "

352 "downloaded from refseq. By default, this output dir name is the "

353 "ncbi_species name if given, or ncbi_species_taxid or ncbi_taxid otherwise.")

354 )

355 general.add_argument("--tmp", dest="tmp_dir",

356 help=("Specify where the temporary files (sequence split by rows "

357 "of 'N', sequence with new contig names etc.) must be saved. "

358 "By default, it will be saved in your "

359 "out_dir/tmp_files.")

360 )

361 general.add_argument("--cutn", dest="cutn", type=utils_argparse.positive_int, default=5,

362 help=("By default, each genome will be cut into new contigs when "

363 "at least 5 'N' in a row are found in its sequence. "

364 "If you don't want to "

365 "cut genomes into new contigs when there are rows of 'N', "

366 "put 0 to this option. If you want to cut from a different number "

367 "of 'N' in a row, put this value to this option.")

368 )

369 general.add_argument("--l90", dest="l90", type=int, default=100,

370 help="Maximum value of L90 allowed to keep a genome. Default is 100.")

371 general.add_argument("--nbcont", dest="nbcont", type=utils_argparse.cont_num, default=999,

372 help=("Maximum number of contigs allowed to keep a genome. "

373 "Default is 999."))

374 general.add_argument("--min_dist", dest="min_dist", default=1e-4,

375 type=utils_argparse.mash_dist,

376 help="By default, genomes whose distance to the reference is not "

377 "between 1e-4 and 0.06 are discarded. You can specify your own "

378 "lower limit (instead of 1e-4) with this option.")

379 general.add_argument("--max_dist", dest="max_dist", default=0.06,

380 type=utils_argparse.mash_dist,

381 help="By default, genomes whose distance to the reference is not "

382 "between 1e-4 and 0.06 are discarded. You can specify your own "

383 "lower limit (instead of 0.06) with this option.")

384 general.add_argument("-p", "--threads", dest="parallel", type=utils_argparse.thread_num,

385 default=1, help=("Run 'N' downloads in parallel (default=1). Put 0 if "

386 "you want to use all cores of your computer."))

387

388 optional = parser.add_argument_group('Alternatives')

389 optional.add_argument("--norefseq", dest="norefseq", action="store_true",

390 help=("If you already downloaded refseq genomes and do not want to "

391 "check them, add this option to directly go to the next steps:"

392 "quality control (L90, number of contigs...) and mash filter. "

393 "Don't forget to specify the db_dir (-d option) where you "

394 "already have your genome sequences."))

395 optional.add_argument("-d", dest="db_dir",

396 help=("If your already downloaded sequences are not in the default "

397 "directory (outdir/Database_init), you can specify here the "

398 "path to those fasta files."))

399 optional.add_argument('-M', '--only-mash', dest="only_mash", action="store_true",

400 help=("Add this option if you already downloaded complete and refseq "

401 "genomes, and ran quality control (you have, a file "

402 "containing all genome names, as well as their genome size, "

403 "number of contigs and L90 values). "

404 "It will then get information on genomes quality from this "

405 "file, and run mash steps."))

406 optional.add_argument("--info", dest="info_file",

407 help=("If you already ran the quality control, specify from which "

408 "file PanACoTA can read this information, in order to proceed "

409 "to the mash step. This file must contain at "

410 "least 4 columns, tab separated, with the following headers: "

411 "'to_annotate', 'gsize', 'nb_conts', 'L90'. Any other column "

412 "will be ignored."))

413

414 helper = parser.add_argument_group('Others')

415 helper.add_argument("-v", "--verbose", dest="verbose", action="count", default=0,

416 help="Increase verbosity in stdout/stderr and log files.")

417 helper.add_argument("-q", "--quiet", dest="quiet", action="store_true", default=False,

418 help=("Do not display anything to stdout/stderr. log files will "

419 "still be created."))

420 helper.add_argument("-h", "--help", dest="help", action="help",

421 help="show this help message and exit")

422

423

424def parse(parser, argu):

425 """

426 Parse arguments given to parser

427

428 Parameters

429 ----------

430 parser : argparse.ArgumentParser

431 the parser used

432 argu : [str]

433 command-line given by user, to parse using parser

434

435 Returns

436 -------

437 argparse.Namespace

438 Parsed arguments

439 """

440 import argparse

441

442 args = parser.parse_args(argu)

443 return check_args(parser, args)

444

445

446def check_args(parser, args):

447 """

448 Check that arguments given to parser are as expected.

449

450 Parameters

451 ----------

452 parser : argparse.ArgumentParser

453 The parser used to parse command-line

454 args : argparse.Namespace

455 Parsed arguments

456

457 Returns

458 -------

459 argparse.Namespace or None

460 The arguments parsed, updated according to some rules. Exit program

461 with error message if error occurs with arguments given.

462

463 """

464 # Message if user kept default thresholds for L90 and nbcont. Just to warn him, to be sure

465 # it was on purpose

466 def thresholds_message(l90, nbcont):

467 return (" !! Your genomes will be filtered, and only the ones with 'L90' <= {} "

468 "and 'number of contigs' < {} will be kept. If you want to change those "

469 "thresholds, use '--l90' and '--nbcont' "

470 "options.".format(args.l90, args.nbcont))

471

472 # ERRORS

473

474 # We don't want to run only mash, nor only quality control, but don't give a NCBI taxID.

475 # -> Give at least 1!

476 if (not args.only_mash and not args.norefseq and

477 not args.ncbi_species_taxid and not args.ncbi_species_name and

478 not args.ncbi_taxid and not args.strains):

479 parser.error("As you did not put the '--norefseq' nor the '-M' option, it means that "

480 "you want to download refseq (or genbank) genomes. But you did not provide "

481 "any information, so PanACoTA cannot guess which species you want to "

482 "download. Specify NCBI_taxid (-t), and/or NCBI species taxid (-T) "

483 "and/or NCBI_species (-g) and/or NCBI_strain (-S) to download, or add one of "

484 "the 2 options (--norefseq or -M) if you want to skip the 'download step'.")

485

486 # If norefseq, give output directory

487 # - folder containing Database_init, with all sequences

488 # - or new folder where you want to put the new results

489 if args.norefseq and not args.outdir:

490 parser.error("You must provide an output directory, where your results will be saved.")

491

492 # If user wants only mash steps, check that he gave info file, and outdir

493 if args.only_mash and not args.info_file:

494 parser.error("If you want to run only Mash filtering steps, please give the "

495 "info file with the required information (see '--info' option)")

496 if args.only_mash and not args.outdir:

497 parser.error("If you want to run only Mash filtering steps, please give the "

498 "output directory where you want to save your results (see '-o' option)")

499

500 # Cannot be verbose and quiet at the same time

501 if int(args.verbose) > 0 and args.quiet:

502 parser.error("Choose between a verbose output (-v) or a quiet output (-q)."

503 " You cannot have both.")

504

505 # min_dist must be higher than max_dist

506 if float(args.min_dist) >= float(args.max_dist):

507 parser.error(f"min_dist ({args.min_dist}) cannot be higher "

508 f"than max_dist ({args.max_dist})")

509

510 # Check that levels, if given, are among possible ones

511 possible = ["all", "complete", "chromosome", "scaffold", "contig"]

512 if args.levels:

513 for level in args.levels.split(","):

514 if level not in possible:

515 parser.error("Please choose between available assembly levels: 'all', 'complete', "

516 "'chromosome', 'scaffold', 'contig'. If several levels, provide a "

517 f"comma-separated list. Invalid value: '{args.levels}'")

518

519 # WARNINGS

520 # User did not specify a species name

521 if not args.ncbi_species_name and not args.outdir:

522 if args.ncbi_species_taxid:

523 print(colored("WARNING: you did not provide a species name ('-g species' option) "

524 "nor an output directory ('-o outdir'). "

525 "All files will be downloaded in a folder called with the NCBI species "

526 f"taxid {args.ncbi_species_taxid} instead of the species name.", "yellow"))

527 elif args.strains:

528 print(colored("WARNING: you did not provide a species name ('-g species' option) "

529 "nor a species taxid ('-T spetaxid') nor an output directory ('-o outdir'). "

530 "All files will be downloaded in a folder called with the specified strains "

531 f"names {args.strains} instead of the species name.", "yellow"))

532 else:

533 print(colored("WARNING: you did not provide a species name ('-g species' option) "

534 "nor a species taxid ('-T spetaxid') nor an output directory ('-o outdir'). "

535 "All files will be downloaded in a folder called with the NCBI "

536 f"taxid {args.ncbi_taxid}.", "yellow"))

537 # If user wants to cut genomes, warn him to check that it is on purpose (because default is cut at each 5'N')

538 if args.cutn == 5:

539 message = (" !! Your genomes will be split when sequence contains at "

540 "least {}'N' in a row. If you want to change this threshold, use "

541 "'--cutn n' option (n=0 if you do not want to cut)").format(args.cutn)

542 print(colored(message, "yellow"))

543

544 # Warn user about selection of genomes thresholds

545 if args.l90 == 100 or args.nbcont == 999:

546 print(colored(thresholds_message(args.l90, args.nbcont), "yellow"))

547

548 # Warn if user gave info file, but does not ask to run only Mash -> info file will be ignored

549 if (args.info_file and not args.only_mash) or (args.info_file and not args.norefseq):

550 message = (" !! You gave an info file (--info option), but did not ask to run only Mash "

551 "step (-M option). Your info file will be ignored (and renamed with '.back' "

552 "at the end), and another one will "

553 "be created with the new calculated values.")

554 print(colored(message, "yellow"))

555 return args

556

557

558

559if __name__ == '__main__':

560 import argparse

561 from textwrap import dedent

562 header = '''

563 ___ _____ ___ _____ _____

564 ( _`\ ( _ )( _`\ (_ _)( _ )

565 | |_) ) _ _ ___ | (_) || ( (_) _ | | | (_) |

566 | ,__/'/'_` )/' _ `\| _ || | _ /'_`\ | | | _ |

567 | | ( (_| || ( ) || | | || (_( )( (_) )| | | | | |

568 (_) `\__,_)(_) (_)(_) (_)(____/'`\___/'(_) (_) (_)

569

570

571 Large scale comparative genomics tools

572

573 -------------------------------------------

574 '''

575 my_parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,

576 description=dedent(header), add_help=False)

577 build_parser(my_parser)

578 OPTIONS = parse(my_parser, sys.argv[1:])

579 main_from_parse(OPTIONS)