Coverage for PanACoTA/tree_module/quicktree_func.py: 100%
34 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-20 14:37 +0000
1#!/usr/bin/env python3
2# coding: utf-8
4# ###############################################################################
5# This file is part of PanACOTA. #
6# #
7# Authors: Amandine Perrin #
8# Copyright © 2018-2020 Institut Pasteur (Paris). #
9# See the COPYRIGHT file for details. #
10# #
11# PanACOTA is a software providing tools for large scale bacterial comparative #
12# genomics. From a set of complete and/or draft genomes, you can: #
13# - Do a quality control of your strains, to eliminate poor quality #
14# genomes, which would not give any information for the comparative study #
15# - Uniformly annotate all genomes #
16# - Do a Pan-genome #
17# - Do a Core or Persistent genome #
18# - Align all Core/Persistent families #
19# - Infer a phylogenetic tree from the Core/Persistent families #
20# #
21# PanACOTA is free software: you can redistribute it and/or modify it under the #
22# terms of the Affero GNU General Public License as published by the Free #
23# Software Foundation, either version 3 of the License, or (at your option) #
24# any later version. #
25# #
26# PanACOTA is distributed in the hope that it will be useful, but WITHOUT ANY #
27# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
28# FOR A PARTICULAR PURPOSE. See the Affero GNU General Public License #
29# for more details. #
30# #
31# You should have received a copy of the Affero GNU General Public License #
32# along with PanACOTA (COPYING file). #
33# If not, see <https://www.gnu.org/licenses/>. #
34# ###############################################################################
36"""
37Functions to infer a phylogenetic tree with quicktree
39@author gem
40June 2017
41"""
43from Bio import AlignIO
44import os
45import logging
47from PanACoTA import utils
49logger = logging.getLogger("tree.quicktree")
52def run_tree(alignfile, boot, outdir, *args, **kwargs):
53 """
54 Run quicktree for the given alignment file and options
56 Parameters
57 ----------
58 alignfile: str
59 Path to file containing alignments of persistent families grouped by genome
60 boot: int or None
61 Number of bootstraps to compute. None if no bootstrap asked
62 outdir: str or None
63 Path to the tree file that must be created
64 args: tuple
65 Used to be compatible with the 'run_tree' function of other softs like fastME and
66 fastTree which require more arguments like the DNA substitution model, the number of
67 threads to use, etc.
68 kwargs: dict
69 Used to be compatible with the 'run_tree' function of other softs like fastME and
70 fastTree which require more arguments like the DNA substitution model, the number of
71 threads to use, etc.
72 """
73 align_name = os.path.basename(alignfile)
74 align_stock = os.path.join(outdir, align_name + ".stockholm")
75 convert2stockholm(alignfile, align_stock)
76 run_quicktree(align_stock, boot, outdir)
79def convert2stockholm(infile, outfile):
80 """
81 Input alignment is in fasta format. Input of quicktree must be in stockholm format.
82 Convert it here.
84 Parameters
85 ----------
86 infile: str
87 Path to file containing alignments in fasta
88 outfile: str
89 Path to file which will contain the alignments converted to Stockholm format
90 """
91 if os.path.isfile(outfile):
92 logger.info("Stockholm alignment file already existing.")
93 logger.warning(("The Stockholm alignment file {} already exists. The program "
94 "will use it instead of re-converting {}.").format(outfile, infile))
95 return
96 logger.info("Converting fasta alignment to stockholm format.")
97 with open(infile, 'r') as input_handle, open(outfile, 'w') as output_handle:
98 alignments = AlignIO.parse(input_handle, "fasta")
99 AlignIO.write(alignments, output_handle, "stockholm")
102def run_quicktree(alignfile, boot, outdir):
103 """
104 Run quicktree on the given alignment.
106 Parameters
107 ----------
108 alignfile: str
109 Path to file containing alignments of persistent families grouped by genome,
110 in Stockholm format
111 boot: int or None
112 Number of bootstraps to compute. None if no bootstrap asked
113 outdir: str or None
114 Path to the tree file that must be created
115 """
116 logger.info("Running Quicktree...")
117 bootinfo = ""
119 # Get bootstrap information
120 if boot:
121 bootinfo = f"-boot {boot}"
122 # Get output filename and logfile name
123 align_name = os.path.basename(alignfile)
124 logfile = os.path.join(outdir, align_name + ".quicktree.log")
125 treefile = os.path.join(outdir, align_name + ".quicktree_tree.nwk")
126 cmd = f"quicktree -in a -out t {bootinfo} {alignfile}"
127 outfile = open(treefile, "w")
128 logfilef = open(logfile, "w")
129 error = (f"Problem while running quicktree. See log file ({logfile}) for "
130 "more information.")
131 logger.details(cmd)
132 utils.run_cmd(cmd, error, stdout=outfile, eof=True, logger=logger, stderr=logfilef)