Source code for bioconvert.cram2fasta
###########################################################################
# Bioconvert is a project to facilitate the interconversion #
# of life science data from one format to another. #
# #
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
# #
# bioconvert is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# bioconvert is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program (COPYING file). #
# If not, see <http://www.gnu.org/licenses/>. #
# #
# Repository: https://github.com/bioconvert/bioconvert #
# Documentation: http://bioconvert.readthedocs.io #
###########################################################################
"""Convert :term:`CRAM` file to :term:`FASTQ` format"""
import os
import subprocess
import colorlog
from bioconvert import ConvBase
from bioconvert.core.decorators import requires
from bioconvert.core.utils import get_extension
logger = colorlog.getLogger(__name__)
[docs]class CRAM2FASTA(ConvBase):
"""Convert :term:`CRAM` file to :term:`FASTA` file
Methods available are based on samtools [SAMTOOLS]_.
"""
#: Default value
_default_method = "samtools"
_threading = True
def __init__(self, infile, outfile, *args, **kargs):
""".. rubric:: constructor
:param str infile: input CRAM file
:param str outfile: output FASTA filename
"""
super(CRAM2FASTA, self).__init__(infile, outfile, *args, **kargs)
[docs] @requires("samtools")
def _method_samtools(self, *args, **kwargs):
"""do the conversion :term:`BAM` -> :term:`FASTA` using samtools
`SAMtools documentation <http://www.htslib.org/doc/samtools.html>`_
.. note:: fasta are on one line"""
# Test if input bam file is paired
p = subprocess.Popen(
"samtools view -c -f 1 {}".format(self.infile).split(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
isPaired = p.communicate()[0].strip()
# Collect the extension
ext = os.path.splitext(self.outfile)[1]
# FIXME: this compression code may be factorised ?
output_ext = get_extension(self.outfile, remove_compression=True)
# If the output file extension is compress extension
if ext in [".gz", ".bz2"]:
outbasename = os.path.splitext(self.outfile)[0].split(".", 1)[0]
if ext == ".gz":
compresscmd = "gzip -f"
if ext == ".bz2":
compresscmd = "pbzip2 -f"
# When the input file is not paired and the output file needs to be compressed
if isPaired == "0":
cmd = "samtools fasta {} > {}.{}".format(self.infile, outbasename, output_ext)
self.execute(cmd)
cmd = "{} {}.{}".format(compresscmd, outbasename, output_ext)
self.execute(cmd)
# When the input file is paired and the output file needs to be compressed
else:
cmd = "samtools fasta -1 {}_1.{} -2 {}_2.{} -n {} ".format(
outbasename, output_ext, outbasename, output_ext, self.infile
)
self.execute(cmd)
cmd = "{} {}_1.{}".format(compresscmd, outbasename, output_ext)
self.execute(cmd)
cmd = "{} {}_2.{}".format(compresscmd, outbasename, output_ext)
self.execute(cmd)
else:
outbasename = os.path.splitext(self.outfile)[0]
# When the input file is not paired
if isPaired == "0":
cmd = "samtools fasta {} > {}".format(self.infile, self.outfile)
self.execute(cmd)
# When the input file is paired
else:
cmd = "samtools fasta -1 {}_1.{} -2 {}_2.{} -n {} ".format(
outbasename, output_ext, outbasename, output_ext, self.infile
)
self.execute(cmd)