Coverage for PanACoTA/tree_module/fastme_func.py: 100%
49 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
1#!/usr/bin/env python3
2# coding: utf-8
4# ###############################################################################
5# This file is part of PanACOTA. #
6# #
7# Authors: Amandine Perrin #
8# Copyright © 2018-2020 Institut Pasteur (Paris). #
9# See the COPYRIGHT file for details. #
10# #
11# PanACOTA is a software providing tools for large scale bacterial comparative #
12# genomics. From a set of complete and/or draft genomes, you can: #
13# - Do a quality control of your strains, to eliminate poor quality #
14# genomes, which would not give any information for the comparative study #
15# - Uniformly annotate all genomes #
16# - Do a Pan-genome #
17# - Do a Core or Persistent genome #
18# - Align all Core/Persistent families #
19# - Infer a phylogenetic tree from the Core/Persistent families #
20# #
21# PanACOTA is free software: you can redistribute it and/or modify it under the #
22# terms of the Affero GNU General Public License as published by the Free #
23# Software Foundation, either version 3 of the License, or (at your option) #
24# any later version. #
25# #
26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY #
27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License #
29# for more details. #
30# #
31# You should have received a copy of the Affero GNU General Public License #
32# along with PanACOTA (COPYING file). #
33# If not, see <https://www.gnu.org/licenses/>. #
34# ###############################################################################
36"""
37Functions to infer a phylogenetic tree with fastME
39@author gem
40June 2017
41"""
43from Bio import AlignIO
44import os
45import logging
47from PanACoTA import utils
49logger = logging.getLogger("tree.fastme")
51def run_tree(alignfile, boot, outdir, quiet, threads, **kwargs):
52 """
53 Run fastme for the given alignment file and options
55 Parameters
56 ----------
57 alignfile: str
58 Path to file containing alignments of persistent families grouped by genome
59 boot: int or None
60 Number of bootstraps to compute. None if no bootstrap asked
61 outdir: str
62 output directory to save all results
63 quiet: bool
64 True if nothing must be printed to stderr/stdout, False otherwise
65 threads: int
66 Maximum number of threads to use
67 kwargs["model"]: str
68 DNA substitution model chosen by user
69 kwargs["matrix"]: bool
70 True if matrix file must be written, False otherwise
71 kwargs["wb"]: bool
72 True if all bootstrap pseudo-trees must be saved into a file, False otherwise
73 """
74 model = kwargs["model"]
75 write_boot = kwargs["wb"]
76 write_matrix = kwargs["matrix"]
77 align_name = os.path.basename(alignfile)
78 align_phylip = os.path.join(outdir, align_name + ".phylip")
79 convert2phylip(alignfile, align_phylip)
80 run_fastme(align_phylip, boot, write_boot, write_matrix, threads, model, outdir, quiet)
83def convert2phylip(infile, outfile):
84 """
85 Input alignment is in fasta format. Input of fastME must be in Phylip-relaxed format.
86 Convert it here.
88 Parameters
89 ----------
90 infile: str
91 Path to file in fasta format
92 outfile: str
93 Path to file to generate, in Phylip-relaxed format
94 """
95 if os.path.isfile(outfile):
96 logger.info("Phylip alignment file already existing.")
97 logger.warning(("The Phylip alignment file {} already exists. The program "
98 "will use it instead of re-converting {}.").format(outfile, infile))
99 return
100 logger.info("Converting fasta alignment to PHYLIP-relaxed format.")
101 with open(infile, 'r') as input_handle, open(outfile, 'w') as output_handle:
102 alignments = AlignIO.parse(input_handle, "fasta")
103 AlignIO.write(alignments, output_handle, "phylip-relaxed")
106def run_fastme(alignfile, boot, write_boot, write_matrix, threads, model, outdir, quiet):
107 """
108 Run fastME on the given alignment.
110 Parameters
111 ----------
112 alignfile: str
113 Path to file containing alignments of persistent families grouped by genome
114 boot: int or None
115 Number of bootstraps to compute. None if no bootstrap asked
116 write_boot: bool
117 True if all bootstrap pseudo-trees must be saved into a file, False otherwise
118 threads: int
119 Maximum number of threads to use
120 model: str or None
121 DNA substitution model chosen by user. None if default one
122 outdir: str
123 output directory to save all results
124 quiet: bool
125 True if nothing must be printed to stderr/stdout, False otherwise
126 """
127 logger.info("Running FastME...")
128 bootinfo = ""
129 threadinfo = ""
130 outboot = ""
131 outmat = ""
133 # Get bootstrap information
134 if boot:
135 bootinfo = "-b {}".format(boot)
136 # Get threads information
137 if threads:
138 threadinfo = "-T {}".format(threads)
139 # Get output filename
140 align_name = os.path.basename(alignfile)
141 logfile = os.path.join(outdir, align_name + ".fastme.log")
142 treefile = os.path.join(outdir, align_name + ".fastme_tree.nwk")
143 # If bootstrap pseudo-trees must be written, define the filename here
144 if write_boot:
145 outboot = "-B " + os.path.join(outdir, align_name + ".fastme_bootstraps.nwk")
146 if write_matrix:
147 outmat = "-O " + os.path.join(outdir, align_name + ".fastme_dist-mat.txt")
148 # Put default model if not given
149 if not model:
150 model = "T"
151 cmd = (f"fastme -i {alignfile} -d {model} -n B -s {threadinfo} {bootinfo} "
152 f"-o {treefile} -I {logfile} {outboot} {outmat}")
153 logger.details(cmd)
154 if quiet:
155 fnull = open(os.devnull, 'w')
156 else:
157 fnull = None
158 error = ("Problem while running FastME. See log file ({}) for "
159 "more information.").format(logfile)
160 utils.run_cmd(cmd, error, stdout=fnull, eof=True, logger=logger, stderr=fnull)