Source code for bioconvert.sra2fastq

###########################################################################
# Bioconvert is a project to facilitate the interconversion               #
# of life science data from one format to another.                        #
#                                                                         #
# Copyright © 2018-2022  Institut Pasteur, Paris and CNRS.                #
#                                                                         #
# bioconvert is free software: you can redistribute it and/or modify      #
# it under the terms of the GNU General Public License as published by    #
# the Free Software Foundation, either version 3 of the License, or       #
# (at your option) any later version.                                     #
#                                                                         #
# bioconvert is distributed in the hope that it will be useful,           #
# but WITHOUT ANY WARRANTY; without even the implied warranty of          #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           #
# GNU General Public License for more details.                            #
#                                                                         #
# You should have received a copy of the GNU General Public License       #
# along with this program (COPYING file).                                 #
# If not, see <http://www.gnu.org/licenses/>.                             #
#                                                                         #
# Repository: https://github.com/bioconvert/bioconvert                    #
# Documentation: http://bioconvert.readthedocs.io                         #
###########################################################################
"""Convert :term:`SRA` format to :term:`FASTA` format"""

import os
import shutil
import subprocess
import tempfile

from bioconvert import ConvBase
from bioconvert.core.decorators import requires


[docs]class SRA2FASTQ(ConvBase): """Download FASTQ from SRA archive :: bioconvert sra2fastq ERR043367 This may take some times since the files are downloaded from SRA website. """ #: Default value _default_method = "fastq_dump" # If test: will take only the first 10 reads from the sra file def __init__(self, infile, outfile, test=False): """.. rubric:: constructor https://edwards.flinders.edu.au/fastq-dump/ library used: sra-toolkit """ super().__init__(infile, outfile) self.test = test
[docs] @requires("fastq-dump") def _method_fastq_dump(self, *args, **kwargs): """Uses Sratoolkit (fastq-dump) to convert a sra file to fastq `Fastq-dump documentation <https://edwards.flinders.edu.au/fastq-dump/>`_""" inname = os.path.split(os.path.splitext(self.infile)[0])[1] outbasename, ext = os.path.splitext(self.outfile) compresscmd = "" gzext = "" if ext == ".gz": compresscmd = "--gzip" gzext = ".gz" outbasename = os.path.splitext(outbasename)[0] infile = self.infile # If the file does not exist locally, we take the basename # it should correspond to a SRA ID if os.path.isfile(infile) is False: infile = inname tmpdir = tempfile.mkdtemp() testcmd = "" # If in test mode, we retrieve only 10 reads from sra if self.test: testcmd = "-X 10" if self.isPairedSRA(infile): cmd = "fastq-dump {} {} --split-files -O {} {}".format(testcmd, compresscmd, tmpdir, infile) self.execute(cmd) cmd = "mv {}/{}_1.fastq{} {}_1.fastq{}".format(tmpdir, inname, gzext, outbasename, gzext) self.execute(cmd) cmd = "mv {}/{}_2.fastq{} {}_2.fastq{}".format(tmpdir, inname, gzext, outbasename, gzext) self.execute(cmd) else: cmd = "fastq-dump {} {} -O {} {}".format(testcmd, compresscmd, tmpdir, infile) self.execute(cmd) cmd = "mv {}/{}.fastq{} {}".format(tmpdir, inname, gzext, self.outfile) self.execute(cmd) shutil.rmtree(tmpdir)
def isPairedSRA(self, filename): try: contents = subprocess.check_output(["fastq-dump", "-X", "1", "-Z", "--split-spot", filename]) except subprocess.CalledProcessError: raise Exception("Error running fastq-dump on", filename) if contents.count(b"\n") == 4: return False elif contents.count(b"\n") == 8: return True else: raise Exception("Unexpected output from fast-dump on ", filename)