Source code for bioconvert.genbank2fasta

###########################################################################
# Bioconvert is a project to facilitate the interconversion               #
# of life science data from one format to another.                        #
#                                                                         #
# Copyright © 2018-2022  Institut Pasteur, Paris and CNRS.                #
#                                                                         #
# bioconvert is free software: you can redistribute it and/or modify      #
# it under the terms of the GNU General Public License as published by    #
# the Free Software Foundation, either version 3 of the License, or       #
# (at your option) any later version.                                     #
#                                                                         #
# bioconvert is distributed in the hope that it will be useful,           #
# but WITHOUT ANY WARRANTY; without even the implied warranty of          #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           #
# GNU General Public License for more details.                            #
#                                                                         #
# You should have received a copy of the GNU General Public License       #
# along with this program (COPYING file).                                 #
# If not, see <http://www.gnu.org/licenses/>.                             #
#                                                                         #
# Repository: https://github.com/bioconvert/bioconvert                    #
# Documentation: http://bioconvert.readthedocs.io                         #
###########################################################################
"""Convert :term:`GENBANK` to :term:`EMBL` format"""

from bioconvert import ConvBase
from bioconvert.core.decorators import compressor, requires, requires_nothing
from bioconvert.io.genbank import Genbank

__all__ = ["GENBANK2FASTA"]


[docs]class GENBANK2FASTA(ConvBase): """Convert :term:`GENBANK` file to :term:`FASTA` file Methods are based on biopython [BIOPYTHON]_, squizz [SQUIZZ] and our own Bioconvert implementation. """ #: Default value _default_method = "biopython" def __init__(self, infile, outfile, *args, **kargs): """.. rubric:: constructor :param str infile: input GENBANK file :param str outfile: output EMBL filename """ super(GENBANK2FASTA, self).__init__(infile, outfile, *args, **kargs) # squizz works as well but keeps lower cases while biopython uses upper # cases
[docs] @requires("squizz") @compressor def _method_squizz(self, *args, **kwargs): """Header is less informative than the one obtained with biopython""" cmd = "squizz -f genbank -c fasta {} > {} ".format(self.infile, self.outfile) self.execute(cmd)
[docs] @requires(python_library="biopython") @compressor def _method_biopython(self, *args, **kwargs): """For this method we use the biopython package Bio.SeqIO. `Bio.SeqIO Documentation <https://biopython.org/docs/1.76/api/Bio.SeqIO.html>`_""" from Bio import SeqIO SeqIO.convert(self.infile, "genbank", self.outfile, "fasta")
[docs] @requires_nothing @compressor def _method_python(self, *args, **kwargs): "Internal method." reader = Genbank(self.infile) with open(self.outfile, "w") as writer: for idx, entry in enumerate(reader.read()): if "ORIGIN" in entry: writer.write( ">{} {}\n{}\n".format( entry["VERSION"]["id"] if "VERSION" in entry else entry["LOCUS"]["id"], entry["DEFINITION"] if "DEFINITION" in entry else "", entry["ORIGIN"], ) ) else: print( "Impossible to create a sequence for the entry number {}. Sequence not found after the keyword ORIGIN".format( idx ) )