Coverage for PanACoTA/tree_module/fastme_func.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-20 14:37 +0000

1#!/usr/bin/env python3 

2# coding: utf-8 

3 

4# ############################################################################### 

5# This file is part of PanACOTA. # 

6# # 

7# Authors: Amandine Perrin # 

8# Copyright © 2018-2020 Institut Pasteur (Paris). # 

9# See the COPYRIGHT file for details. # 

10# # 

11# PanACOTA is a software providing tools for large scale bacterial comparative # 

12# genomics. From a set of complete and/or draft genomes, you can: # 

13# - Do a quality control of your strains, to eliminate poor quality # 

14# genomes, which would not give any information for the comparative study # 

15# - Uniformly annotate all genomes # 

16# - Do a Pan-genome # 

17# - Do a Core or Persistent genome # 

18# - Align all Core/Persistent families # 

19# - Infer a phylogenetic tree from the Core/Persistent families # 

20# # 

21# PanACOTA is free software: you can redistribute it and/or modify it under the # 

22# terms of the Affero GNU General Public License as published by the Free # 

23# Software Foundation, either version 3 of the License, or (at your option) # 

24# any later version. # 

25# # 

26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY # 

27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # 

28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License # 

29# for more details. # 

30# # 

31# You should have received a copy of the Affero GNU General Public License # 

32# along with PanACOTA (COPYING file). # 

33# If not, see <https://www.gnu.org/licenses/>. # 

34# ############################################################################### 

35 

36""" 

37Functions to infer a phylogenetic tree with fastME 

38 

39@author gem 

40June 2017 

41""" 

42 

43from Bio import AlignIO 

44import os 

45import logging 

46 

47from PanACoTA import utils 

48 

49logger = logging.getLogger("tree.fastme") 

50 

51def run_tree(alignfile, boot, outdir, quiet, threads, **kwargs): 

52 """ 

53 Run fastme for the given alignment file and options 

54 

55 Parameters 

56 ---------- 

57 alignfile: str 

58 Path to file containing alignments of persistent families grouped by genome 

59 boot: int or None 

60 Number of bootstraps to compute. None if no bootstrap asked 

61 outdir: str 

62 output directory to save all results 

63 quiet: bool 

64 True if nothing must be printed to stderr/stdout, False otherwise 

65 threads: int 

66 Maximum number of threads to use 

67 kwargs["model"]: str 

68 DNA substitution model chosen by user 

69 kwargs["matrix"]: bool 

70 True if matrix file must be written, False otherwise 

71 kwargs["wb"]: bool 

72 True if all bootstrap pseudo-trees must be saved into a file, False otherwise 

73 """ 

74 model = kwargs["model"] 

75 write_boot = kwargs["wb"] 

76 write_matrix = kwargs["matrix"] 

77 align_name = os.path.basename(alignfile) 

78 align_phylip = os.path.join(outdir, align_name + ".phylip") 

79 convert2phylip(alignfile, align_phylip) 

80 run_fastme(align_phylip, boot, write_boot, write_matrix, threads, model, outdir, quiet) 

81 

82 

83def convert2phylip(infile, outfile): 

84 """ 

85 Input alignment is in fasta format. Input of fastME must be in Phylip-relaxed format. 

86 Convert it here. 

87 

88 Parameters 

89 ---------- 

90 infile: str 

91 Path to file in fasta format 

92 outfile: str 

93 Path to file to generate, in Phylip-relaxed format 

94 """ 

95 if os.path.isfile(outfile): 

96 logger.info("Phylip alignment file already existing.") 

97 logger.warning(("The Phylip alignment file {} already exists. The program " 

98 "will use it instead of re-converting {}.").format(outfile, infile)) 

99 return 

100 logger.info("Converting fasta alignment to PHYLIP-relaxed format.") 

101 with open(infile, 'r') as input_handle, open(outfile, 'w') as output_handle: 

102 alignments = AlignIO.parse(input_handle, "fasta") 

103 AlignIO.write(alignments, output_handle, "phylip-relaxed") 

104 

105 

106def run_fastme(alignfile, boot, write_boot, write_matrix, threads, model, outdir, quiet): 

107 """ 

108 Run fastME on the given alignment. 

109 

110 Parameters 

111 ---------- 

112 alignfile: str 

113 Path to file containing alignments of persistent families grouped by genome 

114 boot: int or None 

115 Number of bootstraps to compute. None if no bootstrap asked 

116 write_boot: bool 

117 True if all bootstrap pseudo-trees must be saved into a file, False otherwise 

118 threads: int 

119 Maximum number of threads to use 

120 model: str or None 

121 DNA substitution model chosen by user. None if default one 

122 outdir: str 

123 output directory to save all results 

124 quiet: bool 

125 True if nothing must be printed to stderr/stdout, False otherwise 

126 """ 

127 logger.info("Running FastME...") 

128 bootinfo = "" 

129 threadinfo = "" 

130 outboot = "" 

131 outmat = "" 

132 

133 # Get bootstrap information 

134 if boot: 

135 bootinfo = "-b {}".format(boot) 

136 # Get threads information 

137 if threads: 

138 threadinfo = "-T {}".format(threads) 

139 # Get output filename 

140 align_name = os.path.basename(alignfile) 

141 logfile = os.path.join(outdir, align_name + ".fastme.log") 

142 treefile = os.path.join(outdir, align_name + ".fastme_tree.nwk") 

143 # If bootstrap pseudo-trees must be written, define the filename here 

144 if write_boot: 

145 outboot = "-B " + os.path.join(outdir, align_name + ".fastme_bootstraps.nwk") 

146 if write_matrix: 

147 outmat = "-O " + os.path.join(outdir, align_name + ".fastme_dist-mat.txt") 

148 # Put default model if not given 

149 if not model: 

150 model = "T" 

151 cmd = (f"fastme -i {alignfile} -d {model} -n B -s {threadinfo} {bootinfo} " 

152 f"-o {treefile} -I {logfile} {outboot} {outmat}") 

153 logger.details(cmd) 

154 if quiet: 

155 fnull = open(os.devnull, 'w') 

156 else: 

157 fnull = None 

158 error = ("Problem while running FastME. See log file ({}) for " 

159 "more information.").format(logfile) 

160 utils.run_cmd(cmd, error, stdout=fnull, eof=True, logger=logger, stderr=fnull)