Coverage for PanACoTA/subcommands/tree.py: 100%

106 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-20 14:37 +0000

1#!/usr/bin/env python3 

2# coding: utf-8 

3 

4# ############################################################################### 

5# This file is part of PanACOTA. # 

6# # 

7# Authors: Amandine Perrin # 

8# Copyright © 2018-2020 Institut Pasteur (Paris). # 

9# See the COPYRIGHT file for details. # 

10# # 

11# PanACOTA is a software providing tools for large scale bacterial comparative # 

12# genomics. From a set of complete and/or draft genomes, you can: # 

13# - Do a quality control of your strains, to eliminate poor quality # 

14# genomes, which would not give any information for the comparative study # 

15# - Uniformly annotate all genomes # 

16# - Do a Pan-genome # 

17# - Do a Core or Persistent genome # 

18# - Align all Core/Persistent families # 

19# - Infer a phylogenetic tree from the Core/Persistent families # 

20# # 

21# PanACOTA is free software: you can redistribute it and/or modify it under the # 

22# terms of the Affero GNU General Public License as published by the Free # 

23# Software Foundation, either version 3 of the License, or (at your option) # 

24# any later version. # 

25# # 

26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # 

27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # 

28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # 

29# for more details. # 

30# # 

31# You should have received a copy of the Affero GNU General Public License # 

32# along with PanACOTA (COPYING file). # 

33# If not, see <https://www.gnu.org/licenses/>. # 

34# ############################################################################### 

35 

36""" 

37tree is a subcommand of PanACoTA 

38 

39@author gem 

40June 2017 

41""" 

42import sys 

43 

44 

45def main_from_parse(args): 

46 """ 

47 Call main function from the arguments given by parser 

48 

49 Parameters 

50 ---------- 

51 args : argparse.Namespace 

52 result of argparse parsing of all arguments in command line 

53 """ 

54 cmd = "PanACoTA " + ' '.join(args.argv) 

55 main(cmd, args.alignment, args.outdir, args.soft, args.model, args.threads, 

56 args.boot, args.write_boot, args.write_mat, args.memory, args.fast, args.verbose, args.quiet) 

57 

58 

59def main(cmd, align, outdir, soft, model, threads, boot=False, write_boot=False, write_mat=False, 

60 memory=False, fast=False, verbose=0, quiet=False): 

61 """ 

62 Inferring a phylogenetic tree from an alignment file, with the given software. 

63 

64 Parameters 

65 ---------- 

66 cmd: str 

67 command used to launch tree module 

68 align: str 

69 Path to file containing alignments of persistent families grouped by genome 

70 outdir: str or None 

71 Path to file which will contain the tree inferred 

72 soft: str 

73 Soft to use to infer the phylogenetic tree: 1 of quicktree, fasttree or fastme 

74 model: str or None 

75 DNA substitution model chosen by user, None if quicktree used 

76 threads: int 

77 Maximum number of threads to use 

78 boot: int or None 

79 Number of bootstraps to compute. None if no bootstrap asked 

80 write_boot: bool 

81 True if all bootstrap pseudo-trees must be saved into a file, False otherwise 

82 write_mat: bool 

83 True if distance matrix must be saved, false otherwise 

84 memory: str 

85 Maximal RAM usage in GB | MB | % - Only for iqtree 

86 fast: boolean 

87 use -fast option with IQtree 

88 verbose : int 

89 verbosity: 

90 - defaut 0 : stdout contains INFO, stderr contains ERROR. 

91 - 1: stdout contains INFO, stderr contains WARNING and ERROR 

92 - 2: stdout contains (DEBUG), DETAIL and INFO, stderr contains WARNING and ERROR 

93 - >=15: Add DEBUG in stdout 

94 quiet: bool 

95 True if nothing must be sent to stdout/stderr, False otherwise 

96 """ 

97 # import needed packages 

98 import logging 

99 import os 

100 from PanACoTA import utils 

101 from PanACoTA import __version__ as version 

102 tree = None 

103 if soft == "fasttree": 

104 # test if fasttree is installed and in the path 

105 if not utils.check_installed("FastTreeMP"): # pragma: no cover 

106 print("FastTreeMP is not installed. 'PanACoTA tree' cannot run.") 

107 sys.exit(1) 

108 from PanACoTA.tree_module import fasttree_func as tree 

109 elif soft == "fastme": 

110 # test if fastME is installed and in the path 

111 if not utils.check_installed("fastme"): # pragma: no cover 

112 print("fastme is not installed. 'PanACoTA tree' cannot run.") 

113 sys.exit(1) 

114 from PanACoTA.tree_module import fastme_func as tree 

115 elif soft == "quicktree": 

116 # test if fastME is installed and in the path 

117 if not utils.check_installed("quicktree"): # pragma: no cover 

118 print("quicktree is not installed. 'PanACoTA tree' cannot run.") 

119 sys.exit(1) 

120 from PanACoTA.tree_module import quicktree_func as tree 

121 elif soft == "iqtree2": 

122 # by default, iqtree2 (not iqtree). 

123 # So, if user did not specify, it means iqtree2. But if 'iqtree2' command 

124 # does not exist, use iqtree command instead. 

125 # test if iqtree2 is installed and in the path 

126 if not utils.check_installed("iqtree2"): # pragma: no cover 

127 if not utils.check_installed("iqtree"): 

128 print("IQtree2 is not installed. 'PanACoTA tree' cannot run.") 

129 sys.exit(1) 

130 else: 

131 soft = "iqtree" 

132 from PanACoTA.tree_module import iqtree_func as tree 

133 elif soft == "iqtree": 

134 # user specifically asked for iqtree (version 1) 

135 if not utils.check_installed("iqtree"): # pragma: no cover 

136 print("IQtree is not installed. 'PanACoTA tree' cannot run.") 

137 sys.exit(1) 

138 from PanACoTA.tree_module import iqtree_func as tree 

139 

140 # If outdir does not already exist, create it 

141 if not os.path.isdir(outdir): 

142 os.makedirs(outdir) 

143 # name logfile, add timestamp if already existing 

144 logfile_base = os.path.join(outdir, "PanACoTA-tree-" + soft) 

145 # level is the minimum level that will be considered. 

146 # for verbose = 0 or 1, ignore details and debug, start from info 

147 if verbose <= 1: 

148 level = logging.INFO 

149 # for verbose = 2, ignore only debug 

150 if verbose >= 2 and verbose < 15: 

151 level = 15 # int corresponding to detail level 

152 # for verbose >= 15, write everything 

153 if verbose >= 15: 

154 level = logging.DEBUG 

155 

156 utils.init_logger(logfile_base, level, 'tree', verbose=verbose, quiet=quiet, log_details=True) 

157 logger = logging.getLogger("tree") 

158 logger.info(f'PanACoTA version {version}') 

159 logger.info("Command used\n \t > " + cmd) 

160 tree.run_tree(align, boot, outdir, quiet, threads, model=model, wb=write_boot, 

161 mem=memory, s=soft, f=fast, matrix=write_mat) 

162 

163 logger.info("END") 

164 

165 

166def build_parser(parser): 

167 """ 

168 Method to create a parser for command-line options 

169 

170 Parameters 

171 ---------- 

172 parser : argparse.ArgumentParser 

173 parser to configure in order to extract command-line arguments 

174 """ 

175 import argparse 

176 from PanACoTA import utils_argparse 

177 

178 

179 # Create command-line parser for all options and arguments to give 

180 required = parser.add_argument_group('Required arguments') 

181 required.add_argument("-a", dest="alignment", required=True, 

182 help=("Alignment file in multi-fasta: each header will be a " 

183 "leaf of the inferred tree.")) 

184 required.add_argument("-o", dest="outdir", required=True, 

185 help=("Directory where tree results will be saved. ")) 

186 

187 # Choose with which soft inferring phylogenetic tree 

188 softparse = parser.add_argument_group('Choose soft to use (default is IQtree2)') 

189 softs = ["fasttree", "fastme", "quicktree", "iqtree", "iqtree2"] 

190 softparse.add_argument("-s", "--soft", dest="soft", choices=softs, default="iqtree2", 

191 help=("Choose with which software you want to infer the " 

192 "phylogenetic tree. Default is IQtree2 " 

193 "(versions 2.x of IQtree). If you want version 1.x of " 

194 "IQtree, use '-s iqtree'")) 

195 

196 optional = parser.add_argument_group('Optional arguments') 

197 optional.add_argument("-b", "--boot", dest="boot", type=int, 

198 help=("Indicate how many bootstraps you want to compute. By " 

199 "default, no bootstrap is calculated. For IQtree, it " 

200 "will use ultrafast bootstrap (>=1000).")) 

201 

202 optional.add_argument("--threads", dest="threads", default=1, type=utils_argparse.thread_num, 

203 help=("add this option if you want to parallelize on several threads. " 

204 "Indicate on how many threads you want to parallelize. " 

205 "By default, it uses 1 thread. Put 0 if you want to use " 

206 "all threads of your computer. Not available with quicktree.")) 

207 optional.add_argument("-m", "--model", dest="model", 

208 help=("Choose your DNA substitution model.\n" 

209 "Default for FastTree and IQtree: GTR. Default for FastME: F84.\n" 

210 "For FastTree, the choices are 'GTR' and 'JC'.\n" 

211 "For FastME, choices are: 'p-distance' " 

212 "(or 'p'), 'RY symmetric' (or 'Y'), 'RY' (or 'R'), " 

213 "'JC69' (or 'J'), 'K2P' (or 'K'), 'F81' (or '1'), " 

214 "'F84' (or '4'), 'TN93' (or 'T'), 'LogDet' (or 'L').\n" 

215 "For IQtree, choices are HKY, JC, F81, K2P, K3P, K81uf," 

216 " TNef, TIM, TIMef, TVM, TVMef, SYM, GTR, TEST. TEST to run standard model selection.")) 

217 optional.add_argument("-B", dest="write_boot", action="store_true", 

218 help=("Add this option if you want to write all bootstrap " 

219 "pseudo-trees. Only available with FastME and IQtree.")) 

220 optional.add_argument("-M", dest="write_mat", action="store_true", 

221 help=("Add this option if you want to write the distance matrix. " 

222 "Only available with FastME.")) 

223 optional.add_argument("--mem", dest="memory", 

224 help=("Maximal RAM usage in GB | MB. Only available with iqtree.")) 

225 optional.add_argument("-fast", dest="fast", action="store_true", 

226 help=("Use -fast option with iqtree.")) 

227 

228 helper = parser.add_argument_group('Others') 

229 helper.add_argument("-v", "--verbose", dest="verbose", action="count", default=0, 

230 help="Increase verbosity in stdout/stderr.") 

231 helper.add_argument("-q", "--quiet", dest="quiet", action="store_true", default=False, 

232 help=("Do not display anything to stdout/stderr. log files will " 

233 "still be created.")) 

234 helper.add_argument("-h", "--help", dest="help", action="help", 

235 help="show this help message and exit") 

236 

237 

238def check_args(parser, args): 

239 """ 

240 Check that arguments given to parser are as expected. 

241 

242 Parameters 

243 ---------- 

244 parser : argparse.ArgumentParser 

245 The parser used to parse command-line 

246 args : argparse.Namespace 

247 Parsed arguments 

248 

249 Returns 

250 ------- 

251 argparse.Namespace or None 

252 The arguments parsed, updated according to some rules. Exit program 

253 with error message if error occurs with arguments given. 

254 """ 

255 models_fastme = {"p-distance": "p", "RY-symetric": "Y", "RY": "R", 

256 "JC69": "J", "K2P": "K", "F81": "1", "F84": "4", 

257 "TN93": "T", "LogDet": "L"} 

258 models_fasttree = {"GTR": "-gtr", "JC": ""} 

259 models_iqtree = set(["HKY", "JC", "F81", "K2P", "K3P", "K81uf", 

260 "TNef", "TIM", "TIMef", "TVM", "TVMef", "SYM", "GTR", "TEST"]) 

261 models_iqtree = {mod: mod for mod in models_iqtree} 

262 

263 def check_model(models, choice): 

264 if choice in models.keys(): 

265 return models[choice] 

266 elif choice in models.values(): 

267 return choice 

268 mmsg = ("{} is not an available model for {}. Please choose an available DNA model " 

269 "(see -h for more details)").format(choice, args.soft) 

270 parser.error(mmsg) 

271 

272 if args.soft == "quicktree" and args.threads != 1: 

273 msg = ("You cannot run quicktree with multiple threads. Choose another software, " 

274 "or remove the --threads option.") 

275 parser.error(msg) 

276 

277 if args.soft == "quicktree" and args.model: 

278 msg = "Quicktree only runs the NJ algorithm. You cannot choose a DNA substitution model." 

279 parser.error(msg) 

280 

281 # Memory option only available with iqtree 

282 if args.soft != "iqtree" and args.soft != "iqtree2" and args.memory: 

283 msg = "'--mem' option is only available for IQtree." 

284 parser.error(msg) 

285 

286 # If bootstraps are asked with iqtree, check the number is >= 1000 

287 if (args.soft == "iqtree" or args.soft == "iqtree2") and args.boot and int(args.boot) < 1000: 

288 msg = "With IQtree, number of replicates for bootstraps must be >= 1000." 

289 parser.error(msg) 

290 

291 # Write bootstrap option only available for fastme and iqtree 

292 if (args.soft != "iqtree" and args.soft != "iqtree2" and args.soft != "fastme" 

293 and args.write_boot): 

294 msg = "'-B' option is only available with FastME and IQtree." 

295 parser.error(msg) 

296 

297 # Write distance matrix option only available for fastme 

298 if args.write_mat and args.soft != "fastme": 

299 msg = "'-M' option is only available with FastME." 

300 parser.error(msg) 

301 

302 # Fast option only available for iqtree 

303 if (args.fast and ((args.soft != "iqtree" and args.soft != "iqtree2") 

304 or (args.boot or args.write_boot))): 

305 msg = ("-fast option is available only for IQtree, and not compatible " 

306 "with '-B' and '--boot' options (bootstraps).") 

307 parser.error(msg) 

308 

309 # Check model name is valid for the chosen soft 

310 if args.soft == "fastme": 

311 if args.model: 

312 args.model = check_model(models_fastme, args.model) 

313 else: 

314 args.model = "T" 

315 elif args.soft == "fasttree": 

316 if args.model: 

317 args.model = check_model(models_fasttree, args.model) 

318 else: 

319 args.model = "-gtr" 

320 elif args.soft == "iqtree" or args.soft == "iqtree2": 

321 if args.model: 

322 args.model = check_model(models_iqtree, args.model) 

323 else: 

324 args.model = "GTR" 

325 return args 

326 

327 

328def parse(parser, argu): 

329 """ 

330 Parse arguments given to parser 

331 

332 Parameters 

333 ---------- 

334 parser : argparse.ArgumentParser 

335 the parser used 

336 argu : [str] 

337 command-line given by user, to parse using parser 

338 

339 Returns 

340 ------- 

341 argparse.Namespace 

342 Parsed arguments 

343 """ 

344 args = parser.parse_args(argu) 

345 return check_args(parser, args) 

346 

347 

348if __name__ == '__main__': 

349 import argparse 

350 

351 myparser = argparse.ArgumentParser(description=(("Infer phylogenetic tree based on " 

352 "core/persistent genome")), 

353 add_help=False) 

354 build_parser(myparser) 

355 OPTIONS = parse(myparser, sys.argv[1:]) 

356 main_from_parse(OPTIONS)