Coverage for PanACoTA/subcommands/tree.py: 100%
106 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
1#!/usr/bin/env python3
2# coding: utf-8
4# ###############################################################################
5# This file is part of PanACOTA. #
6# #
7# Authors: Amandine Perrin #
8# Copyright © 2018-2020 Institut Pasteur (Paris). #
9# See the COPYRIGHT file for details. #
10# #
11# PanACOTA is a software providing tools for large scale bacterial comparative #
12# genomics. From a set of complete and/or draft genomes, you can: #
13# - Do a quality control of your strains, to eliminate poor quality #
14# genomes, which would not give any information for the comparative study #
15# - Uniformly annotate all genomes #
16# - Do a Pan-genome #
17# - Do a Core or Persistent genome #
18# - Align all Core/Persistent families #
19# - Infer a phylogenetic tree from the Core/Persistent families #
20# #
21# PanACOTA is free software: you can redistribute it and/or modify it under the #
22# terms of the Affero GNU General Public License as published by the Free #
23# Software Foundation, either version 3 of the License, or (at your option) #
24# any later version. #
25# #
26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY #
27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License #
29# for more details. #
30# #
31# You should have received a copy of the Affero GNU General Public License #
32# along with PanACOTA (COPYING file). #
33# If not, see <https://www.gnu.org/licenses/>. #
34# ###############################################################################
36"""
37tree is a subcommand of PanACoTA
39@author gem
40June 2017
41"""
42import sys
45def main_from_parse(args):
46 """
47 Call main function from the arguments given by parser
49 Parameters
50 ----------
51 args : argparse.Namespace
52 result of argparse parsing of all arguments in command line
53 """
54 cmd = "PanACoTA " + ' '.join(args.argv)
55 main(cmd, args.alignment, args.outdir, args.soft, args.model, args.threads,
56 args.boot, args.write_boot, args.write_mat, args.memory, args.fast, args.verbose, args.quiet)
59def main(cmd, align, outdir, soft, model, threads, boot=False, write_boot=False, write_mat=False,
60 memory=False, fast=False, verbose=0, quiet=False):
61 """
62 Inferring a phylogenetic tree from an alignment file, with the given software.
64 Parameters
65 ----------
66 cmd: str
67 command used to launch tree module
68 align: str
69 Path to file containing alignments of persistent families grouped by genome
70 outdir: str or None
71 Path to file which will contain the tree inferred
72 soft: str
73 Soft to use to infer the phylogenetic tree: 1 of quicktree, fasttree or fastme
74 model: str or None
75 DNA substitution model chosen by user, None if quicktree used
76 threads: int
77 Maximum number of threads to use
78 boot: int or None
79 Number of bootstraps to compute. None if no bootstrap asked
80 write_boot: bool
81 True if all bootstrap pseudo-trees must be saved into a file, False otherwise
82 write_mat: bool
83 True if distance matrix must be saved, false otherwise
84 memory: str
85 Maximal RAM usage in GB | MB | % - Only for iqtree
86 fast: boolean
87 use -fast option with IQtree
88 verbose : int
89 verbosity:
90 - defaut 0 : stdout contains INFO, stderr contains ERROR.
91 - 1: stdout contains INFO, stderr contains WARNING and ERROR
92 - 2: stdout contains (DEBUG), DETAIL and INFO, stderr contains WARNING and ERROR
93 - >=15: Add DEBUG in stdout
94 quiet: bool
95 True if nothing must be sent to stdout/stderr, False otherwise
96 """
97 # import needed packages
98 import logging
99 import os
100 from PanACoTA import utils
101 from PanACoTA import __version__ as version
102 tree = None
103 if soft == "fasttree":
104 # test if fasttree is installed and in the path
105 if not utils.check_installed("FastTreeMP"): # pragma: no cover
106 print("FastTreeMP is not installed. 'PanACoTA tree' cannot run.")
107 sys.exit(1)
108 from PanACoTA.tree_module import fasttree_func as tree
109 elif soft == "fastme":
110 # test if fastME is installed and in the path
111 if not utils.check_installed("fastme"): # pragma: no cover
112 print("fastme is not installed. 'PanACoTA tree' cannot run.")
113 sys.exit(1)
114 from PanACoTA.tree_module import fastme_func as tree
115 elif soft == "quicktree":
116 # test if fastME is installed and in the path
117 if not utils.check_installed("quicktree"): # pragma: no cover
118 print("quicktree is not installed. 'PanACoTA tree' cannot run.")
119 sys.exit(1)
120 from PanACoTA.tree_module import quicktree_func as tree
121 elif soft == "iqtree2":
122 # by default, iqtree2 (not iqtree).
123 # So, if user did not specify, it means iqtree2. But if 'iqtree2' command
124 # does not exist, use iqtree command instead.
125 # test if iqtree2 is installed and in the path
126 if not utils.check_installed("iqtree2"): # pragma: no cover
127 if not utils.check_installed("iqtree"):
128 print("IQtree2 is not installed. 'PanACoTA tree' cannot run.")
129 sys.exit(1)
130 else:
131 soft = "iqtree"
132 from PanACoTA.tree_module import iqtree_func as tree
133 elif soft == "iqtree":
134 # user specifically asked for iqtree (version 1)
135 if not utils.check_installed("iqtree"): # pragma: no cover
136 print("IQtree is not installed. 'PanACoTA tree' cannot run.")
137 sys.exit(1)
138 from PanACoTA.tree_module import iqtree_func as tree
140 # If outdir does not already exist, create it
141 if not os.path.isdir(outdir):
142 os.makedirs(outdir)
143 # name logfile, add timestamp if already existing
144 logfile_base = os.path.join(outdir, "PanACoTA-tree-" + soft)
145 # level is the minimum level that will be considered.
146 # for verbose = 0 or 1, ignore details and debug, start from info
147 if verbose <= 1:
148 level = logging.INFO
149 # for verbose = 2, ignore only debug
150 if verbose >= 2 and verbose < 15:
151 level = 15 # int corresponding to detail level
152 # for verbose >= 15, write everything
153 if verbose >= 15:
154 level = logging.DEBUG
156 utils.init_logger(logfile_base, level, 'tree', verbose=verbose, quiet=quiet, log_details=True)
157 logger = logging.getLogger("tree")
158 logger.info(f'PanACoTA version {version}')
159 logger.info("Command used\n \t > " + cmd)
160 tree.run_tree(align, boot, outdir, quiet, threads, model=model, wb=write_boot,
161 mem=memory, s=soft, f=fast, matrix=write_mat)
163 logger.info("END")
166def build_parser(parser):
167 """
168 Method to create a parser for command-line options
170 Parameters
171 ----------
172 parser : argparse.ArgumentParser
173 parser to configure in order to extract command-line arguments
174 """
175 import argparse
176 from PanACoTA import utils_argparse
179 # Create command-line parser for all options and arguments to give
180 required = parser.add_argument_group('Required arguments')
181 required.add_argument("-a", dest="alignment", required=True,
182 help=("Alignment file in multi-fasta: each header will be a "
183 "leaf of the inferred tree."))
184 required.add_argument("-o", dest="outdir", required=True,
185 help=("Directory where tree results will be saved. "))
187 # Choose with which soft inferring phylogenetic tree
188 softparse = parser.add_argument_group('Choose soft to use (default is IQtree2)')
189 softs = ["fasttree", "fastme", "quicktree", "iqtree", "iqtree2"]
190 softparse.add_argument("-s", "--soft", dest="soft", choices=softs, default="iqtree2",
191 help=("Choose with which software you want to infer the "
192 "phylogenetic tree. Default is IQtree2 "
193 "(versions 2.x of IQtree). If you want version 1.x of "
194 "IQtree, use '-s iqtree'"))
196 optional = parser.add_argument_group('Optional arguments')
197 optional.add_argument("-b", "--boot", dest="boot", type=int,
198 help=("Indicate how many bootstraps you want to compute. By "
199 "default, no bootstrap is calculated. For IQtree, it "
200 "will use ultrafast bootstrap (>=1000)."))
202 optional.add_argument("--threads", dest="threads", default=1, type=utils_argparse.thread_num,
203 help=("add this option if you want to parallelize on several threads. "
204 "Indicate on how many threads you want to parallelize. "
205 "By default, it uses 1 thread. Put 0 if you want to use "
206 "all threads of your computer. Not available with quicktree."))
207 optional.add_argument("-m", "--model", dest="model",
208 help=("Choose your DNA substitution model.\n"
209 "Default for FastTree and IQtree: GTR. Default for FastME: F84.\n"
210 "For FastTree, the choices are 'GTR' and 'JC'.\n"
211 "For FastME, choices are: 'p-distance' "
212 "(or 'p'), 'RY symmetric' (or 'Y'), 'RY' (or 'R'), "
213 "'JC69' (or 'J'), 'K2P' (or 'K'), 'F81' (or '1'), "
214 "'F84' (or '4'), 'TN93' (or 'T'), 'LogDet' (or 'L').\n"
215 "For IQtree, choices are HKY, JC, F81, K2P, K3P, K81uf,"
216 " TNef, TIM, TIMef, TVM, TVMef, SYM, GTR, TEST. TEST to run standard model selection."))
217 optional.add_argument("-B", dest="write_boot", action="store_true",
218 help=("Add this option if you want to write all bootstrap "
219 "pseudo-trees. Only available with FastME and IQtree."))
220 optional.add_argument("-M", dest="write_mat", action="store_true",
221 help=("Add this option if you want to write the distance matrix. "
222 "Only available with FastME."))
223 optional.add_argument("--mem", dest="memory",
224 help=("Maximal RAM usage in GB | MB. Only available with iqtree."))
225 optional.add_argument("-fast", dest="fast", action="store_true",
226 help=("Use -fast option with iqtree."))
228 helper = parser.add_argument_group('Others')
229 helper.add_argument("-v", "--verbose", dest="verbose", action="count", default=0,
230 help="Increase verbosity in stdout/stderr.")
231 helper.add_argument("-q", "--quiet", dest="quiet", action="store_true", default=False,
232 help=("Do not display anything to stdout/stderr. log files will "
233 "still be created."))
234 helper.add_argument("-h", "--help", dest="help", action="help",
235 help="show this help message and exit")
238def check_args(parser, args):
239 """
240 Check that arguments given to parser are as expected.
242 Parameters
243 ----------
244 parser : argparse.ArgumentParser
245 The parser used to parse command-line
246 args : argparse.Namespace
247 Parsed arguments
249 Returns
250 -------
251 argparse.Namespace or None
252 The arguments parsed, updated according to some rules. Exit program
253 with error message if error occurs with arguments given.
254 """
255 models_fastme = {"p-distance": "p", "RY-symetric": "Y", "RY": "R",
256 "JC69": "J", "K2P": "K", "F81": "1", "F84": "4",
257 "TN93": "T", "LogDet": "L"}
258 models_fasttree = {"GTR": "-gtr", "JC": ""}
259 models_iqtree = set(["HKY", "JC", "F81", "K2P", "K3P", "K81uf",
260 "TNef", "TIM", "TIMef", "TVM", "TVMef", "SYM", "GTR", "TEST"])
261 models_iqtree = {mod: mod for mod in models_iqtree}
263 def check_model(models, choice):
264 if choice in models.keys():
265 return models[choice]
266 elif choice in models.values():
267 return choice
268 mmsg = ("{} is not an available model for {}. Please choose an available DNA model "
269 "(see -h for more details)").format(choice, args.soft)
270 parser.error(mmsg)
272 if args.soft == "quicktree" and args.threads != 1:
273 msg = ("You cannot run quicktree with multiple threads. Choose another software, "
274 "or remove the --threads option.")
275 parser.error(msg)
277 if args.soft == "quicktree" and args.model:
278 msg = "Quicktree only runs the NJ algorithm. You cannot choose a DNA substitution model."
279 parser.error(msg)
281 # Memory option only available with iqtree
282 if args.soft != "iqtree" and args.soft != "iqtree2" and args.memory:
283 msg = "'--mem' option is only available for IQtree."
284 parser.error(msg)
286 # If bootstraps are asked with iqtree, check the number is >= 1000
287 if (args.soft == "iqtree" or args.soft == "iqtree2") and args.boot and int(args.boot) < 1000:
288 msg = "With IQtree, number of replicates for bootstraps must be >= 1000."
289 parser.error(msg)
291 # Write bootstrap option only available for fastme and iqtree
292 if (args.soft != "iqtree" and args.soft != "iqtree2" and args.soft != "fastme"
293 and args.write_boot):
294 msg = "'-B' option is only available with FastME and IQtree."
295 parser.error(msg)
297 # Write distance matrix option only available for fastme
298 if args.write_mat and args.soft != "fastme":
299 msg = "'-M' option is only available with FastME."
300 parser.error(msg)
302 # Fast option only available for iqtree
303 if (args.fast and ((args.soft != "iqtree" and args.soft != "iqtree2")
304 or (args.boot or args.write_boot))):
305 msg = ("-fast option is available only for IQtree, and not compatible "
306 "with '-B' and '--boot' options (bootstraps).")
307 parser.error(msg)
309 # Check model name is valid for the chosen soft
310 if args.soft == "fastme":
311 if args.model:
312 args.model = check_model(models_fastme, args.model)
313 else:
314 args.model = "T"
315 elif args.soft == "fasttree":
316 if args.model:
317 args.model = check_model(models_fasttree, args.model)
318 else:
319 args.model = "-gtr"
320 elif args.soft == "iqtree" or args.soft == "iqtree2":
321 if args.model:
322 args.model = check_model(models_iqtree, args.model)
323 else:
324 args.model = "GTR"
325 return args
328def parse(parser, argu):
329 """
330 Parse arguments given to parser
332 Parameters
333 ----------
334 parser : argparse.ArgumentParser
335 the parser used
336 argu : [str]
337 command-line given by user, to parse using parser
339 Returns
340 -------
341 argparse.Namespace
342 Parsed arguments
343 """
344 args = parser.parse_args(argu)
345 return check_args(parser, args)
348if __name__ == '__main__':
349 import argparse
351 myparser = argparse.ArgumentParser(description=(("Infer phylogenetic tree based on "
352 "core/persistent genome")),
353 add_help=False)
354 build_parser(myparser)
355 OPTIONS = parse(myparser, sys.argv[1:])
356 main_from_parse(OPTIONS)