Source code for bioconvert.nexus2fasta

###########################################################################
# Bioconvert is a project to facilitate the interconversion               #
# of life science data from one format to another.                        #
#                                                                         #
# Copyright © 2018-2022  Institut Pasteur, Paris and CNRS.                #
#                                                                         #
# bioconvert is free software: you can redistribute it and/or modify      #
# it under the terms of the GNU General Public License as published by    #
# the Free Software Foundation, either version 3 of the License, or       #
# (at your option) any later version.                                     #
#                                                                         #
# bioconvert is distributed in the hope that it will be useful,           #
# but WITHOUT ANY WARRANTY; without even the implied warranty of          #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           #
# GNU General Public License for more details.                            #
#                                                                         #
# You should have received a copy of the GNU General Public License       #
# along with this program (COPYING file).                                 #
# If not, see <http://www.gnu.org/licenses/>.                             #
#                                                                         #
# Repository: https://github.com/bioconvert/bioconvert                    #
# Documentation: http://bioconvert.readthedocs.io                         #
###########################################################################

"""Convert :term:`NEXUS` to :term:`FASTA` format"""

import colorlog

from bioconvert import ConvBase
from bioconvert.core.decorators import compressor, requires

_log = colorlog.getLogger(__name__)


__all__ = ["NEXUS2FASTA"]


[docs]class NEXUS2FASTA(ConvBase): """ Converts a sequence alignment from :term:`NEXUS` format to :term:`FASTA` format. """ #: Default value _default_method = "biopython" def __init__(self, infile, outfile=None, alphabet=None, *args, **kwargs): """.. rubric:: constructor :param str infile: input :term:`NEXUS` file. :param str outfile: (optional) output :term:`FASTA` file """ super().__init__(infile, outfile) self.alphabet = alphabet
[docs] @requires("goalign") @compressor def _method_goalign(self, *args, **kwargs): """Convert :term:`NEXUS` interleaved file in :term:`FASTA` format using goalign tool. `goalign documentation <https://github.com/fredericlemoine/goalign>`_ .. warning:: the sequential format is not supported """ self.install_tool("goalign") cmd = "goalign reformat fasta x -i {infile} -o {outfile} -x".format(infile=self.infile, outfile=self.outfile) self.execute(cmd)
[docs] @requires(python_library="biopython") @compressor def _method_biopython(self, *args, **kwargs): """Convert :term:`NEXUS` interleaved or sequential file in :term:`FASTA` format using biopython. The FASTA output file will be an aligned FASTA file. `Bio.AlignIO <https://biopython.org/docs/1.76/api/Bio.AlignIO.html>`_ For instance: We have a Nexus input file that look like :: #NEXUS [TITLE: Test file] begin data; dimensions ntax=3 nchar=123; format interleave datatype=DNA missing=N gap=-; matrix read3 -AT--------CCCGCTCGATGGGCCTCATTGCGTCCACTAGTTGATCTT read2 -----------------------GGAAGCCCACGCCACGGTCTTGATACG read4 ---------------------AGGGATGAACGATGCTCGCAGTTGATGCT read3 CTGGAGTAT---T----TAGGAAAGCAAGTAAACTCCTTGTACAAATAAA read2 AATTTTTCTAATGGCTATCCCTACATAACCTAACCGGGCATGTAATGTGT read4 CAGAAGTGCCATTGCGGTAGAAACAAATGTTCCCAGATTGTTGACTGATA read3 GATCTTA-----GATGGGCAT-- read2 CACCGTTGTTTCGACGTAAAGAG read4 AGTAGGACCTCAGTCGTGACT-- ; end; begin assumptions; options deftype=unord; end; the output file will look like :: >read3 -AT--------CCCGCTCGATGGGCCTCATTGCGTCCACTAGTTGATCTTCTGGAGTAT- --T----TAGGAAAGCAAGTAAACTCCTTGTACAAATAAAGATCTTA-----GATGGGCA T-- >read2 -----------------------GGAAGCCCACGCCACGGTCTTGATACGAATTTTTCTA ATGGCTATCCCTACATAACCTAACCGGGCATGTAATGTGTCACCGTTGTTTCGACGTAAA GAG >read4 ---------------------AGGGATGAACGATGCTCGCAGTTGATGCTCAGAAGTGCC ATTGCGGTAGAAACAAATGTTCCCAGATTGTTGACTGATAAGTAGGACCTCAGTCGTGAC T-- and not :: >read3 ATCCCGCTCGATGGGCCTCATTGCGTCCACTAGTTGATCTTCTGGAGTATTTAGGAAAGC AAGTAAACTCCTTGTACAAATAAAGATCTTAGATGGGCAT >read2 GGAAGCCCACGCCACGGTCTTGATACGAATTTTTCTAATGGCTATCCCTACATAACCTAA CCGGGCATGTAATGTGTCACCGTTGTTTCGACGTAAAGAG >read4 AGGGATGAACGATGCTCGCAGTTGATGCTCAGAAGTGCCATTGCGGTAGAAACAAATGTT CCCAGATTGTTGACTGATAAGTAGGACCTCAGTCGTGACT""" from Bio import AlignIO with open(self.outfile, "w") as output_handle: alignments = list(AlignIO.parse(self.infile, "nexus")) AlignIO.write(alignments, output_handle, "fasta")
[docs] @requires("squizz") @compressor def _method_squizz(self, *args, **kwargs): """Convert :term:`NEXUS` sequential or interleave file in :term:`FASTA` format using squizz tool. command used:: squizz -c FASTA infile > outfile """ cmd = "squizz -c FASTA {infile} > {outfile}".format(infile=self.infile, outfile=self.outfile) self.execute(cmd)