Coverage for PanACoTA/tree_module/quicktree_func.py: 100%

34 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-20 14:37 +0000

1#!/usr/bin/env python3 

2# coding: utf-8 

3 

4# ############################################################################### 

5# This file is part of PanACOTA. # 

6# # 

7# Authors: Amandine Perrin # 

8# Copyright © 2018-2020 Institut Pasteur (Paris). # 

9# See the COPYRIGHT file for details. # 

10# # 

11# PanACOTA is a software providing tools for large scale bacterial comparative # 

12# genomics. From a set of complete and/or draft genomes, you can: # 

13# - Do a quality control of your strains, to eliminate poor quality # 

14# genomes, which would not give any information for the comparative study # 

15# - Uniformly annotate all genomes # 

16# - Do a Pan-genome # 

17# - Do a Core or Persistent genome # 

18# - Align all Core/Persistent families # 

19# - Infer a phylogenetic tree from the Core/Persistent families # 

20# # 

21# PanACOTA is free software: you can redistribute it and/or modify it under the # 

22# terms of the Affero GNU General Public License as published by the Free # 

23# Software Foundation, either version 3 of the License, or (at your option) # 

24# any later version. # 

25# # 

26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # 

27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # 

28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # 

29# for more details. # 

30# # 

31# You should have received a copy of the Affero GNU General Public License # 

32# along with PanACOTA (COPYING file). # 

33# If not, see <https://www.gnu.org/licenses/>. # 

34# ############################################################################### 

35 

36""" 

37Functions to infer a phylogenetic tree with quicktree 

38 

39@author gem 

40June 2017 

41""" 

42 

43from Bio import AlignIO 

44import os 

45import logging 

46 

47from PanACoTA import utils 

48 

49logger = logging.getLogger("tree.quicktree") 

50 

51 

52def run_tree(alignfile, boot, outdir, *args, **kwargs): 

53 """ 

54 Run quicktree for the given alignment file and options 

55 

56 Parameters 

57 ---------- 

58 alignfile: str 

59 Path to file containing alignments of persistent families grouped by genome 

60 boot: int or None 

61 Number of bootstraps to compute. None if no bootstrap asked 

62 outdir: str or None 

63 Path to the tree file that must be created 

64 args: tuple 

65 Used to be compatible with the 'run_tree' function of other softs like fastME and 

66 fastTree which require more arguments like the DNA substitution model, the number of 

67 threads to use, etc. 

68 kwargs: dict 

69 Used to be compatible with the 'run_tree' function of other softs like fastME and 

70 fastTree which require more arguments like the DNA substitution model, the number of 

71 threads to use, etc. 

72 """ 

73 align_name = os.path.basename(alignfile) 

74 align_stock = os.path.join(outdir, align_name + ".stockholm") 

75 convert2stockholm(alignfile, align_stock) 

76 run_quicktree(align_stock, boot, outdir) 

77 

78 

79def convert2stockholm(infile, outfile): 

80 """ 

81 Input alignment is in fasta format. Input of quicktree must be in stockholm format. 

82 Convert it here. 

83 

84 Parameters 

85 ---------- 

86 infile: str 

87 Path to file containing alignments in fasta 

88 outfile: str 

89 Path to file which will contain the alignments converted to Stockholm format 

90 """ 

91 if os.path.isfile(outfile): 

92 logger.info("Stockholm alignment file already existing.") 

93 logger.warning(("The Stockholm alignment file {} already exists. The program " 

94 "will use it instead of re-converting {}.").format(outfile, infile)) 

95 return 

96 logger.info("Converting fasta alignment to stockholm format.") 

97 with open(infile, 'r') as input_handle, open(outfile, 'w') as output_handle: 

98 alignments = AlignIO.parse(input_handle, "fasta") 

99 AlignIO.write(alignments, output_handle, "stockholm") 

100 

101 

102def run_quicktree(alignfile, boot, outdir): 

103 """ 

104 Run quicktree on the given alignment. 

105 

106 Parameters 

107 ---------- 

108 alignfile: str 

109 Path to file containing alignments of persistent families grouped by genome, 

110 in Stockholm format 

111 boot: int or None 

112 Number of bootstraps to compute. None if no bootstrap asked 

113 outdir: str or None 

114 Path to the tree file that must be created 

115 """ 

116 logger.info("Running Quicktree...") 

117 bootinfo = "" 

118 

119 # Get bootstrap information 

120 if boot: 

121 bootinfo = f"-boot {boot}" 

122 # Get output filename and logfile name 

123 align_name = os.path.basename(alignfile) 

124 logfile = os.path.join(outdir, align_name + ".quicktree.log") 

125 treefile = os.path.join(outdir, align_name + ".quicktree_tree.nwk") 

126 cmd = f"quicktree -in a -out t {bootinfo} {alignfile}" 

127 outfile = open(treefile, "w") 

128 logfilef = open(logfile, "w") 

129 error = (f"Problem while running quicktree. See log file ({logfile}) for " 

130 "more information.") 

131 logger.details(cmd) 

132 utils.run_cmd(cmd, error, stdout=outfile, eof=True, logger=logger, stderr=logfilef)