Source code for bioconvert.core.extensions

###########################################################################
# Bioconvert is a project to facilitate the interconversion               #
# of life science data from one format to another.                        #
#                                                                         #
# Copyright © 2018-2022  Institut Pasteur, Paris and CNRS.                #
#                                                                         #
# bioconvert is free software: you can redistribute it and/or modify      #
# it under the terms of the GNU General Public License as published by    #
# the Free Software Foundation, either version 3 of the License, or       #
# (at your option) any later version.                                     #
#                                                                         #
# bioconvert is distributed in the hope that it will be useful,           #
# but WITHOUT ANY WARRANTY; without even the implied warranty of          #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           #
# GNU General Public License for more details.                            #
#                                                                         #
# You should have received a copy of the GNU General Public License       #
# along with this program (COPYING file).                                 #
# If not, see <http://www.gnu.org/licenses/>.                             #
#                                                                         #
# Repository: https://github.com/bioconvert/bioconvert                    #
# Documentation: http://bioconvert.readthedocs.io                         #
###########################################################################
"""List of formats and associated extensions"""


[docs]class AttrDict(dict):
    """Copy from easydev package."""

    def __init__(self, **kwargs):
        dict.__init__(self, kwargs)
        self.__dict__ = self
        self.update(kwargs)

[docs]    def update(self, content):
        """See class/constructor documentation for details

        :param dict content: a valid dictionary
        """
        # accepts dict and attrdict classes
        try:
            from collections import OrderedDict
        except:
            OrderedDict = AttrDict

        if content.__class__ not in [dict, OrderedDict, AttrDict]:
            raise TypeError

        for k, v in content.items():
            if v.__class__ not in [dict, AttrDict, OrderedDict]:
                # fixme copy ?
                self[k] = v
            else:
                self[k] = AttrDict(**v)


# Formats can be of type
# - sequence
# - alignment
# - binary
# - compression
# - database
# - variant

#: List of formats and their extensions included in Bioconvert
extensions = {
    "abi": ["abi", "ab1"],  # sequence
    "agp": ["agp"],  # assembly
    "bam": ["bam"],  # alignment
    "bcf": ["bcf"],  # variant
    "bed": ["bed"],  # database
    "bedgraph": ["bedgraph", "bg"],  # database
    "bigwig": ["bigwig", "bw"],  # database
    "bigbed": ["bb", "bigbed"],
    "bz2": ["bz2"],  # compression
    "bplink": ["bplink"],
    "cdao": ["cdao"],  # phylo
    "cram": ["cram"],  # alignment
    "clustal": ["clustal", "aln", "clw"],  # phylo
    "cov": ["cov"],  # coverage (chrom name,  pos, depth)
    "csv": ["csv"],  # database
    "dsrc": ["dsrc"],  # compression
    "embl": ["embl"],  # annotation/sequence
    "ena": ["ena"],
    "faa": ["faa", "mpfa", "aa"],  # fasta multiple amino acid
    "fast5": ["fast5"],
    "fasta": ["fasta", "fa", "fst"],  # sequence
    "fastq": ["fastq", "fq"],  # sequence
    "genbank": ["genbank", "gbk", "gb"],  # annotation/sequence
    "gfa": ["gfa"],  # assembly
    "gff2": ["gff"],
    "gff3": ["gff3"],  # annotation
    "gtf": ["gtf"],  # annotation
    "gz": ["gz"],
    "json": ["json"],  # database
    "maf": ["maf"],  # !! this is MIRA format, not mutation alignment format
    "newick": ["newick", "nw", "nhx", "nwk"],  # phylo
    "nexus": ["nexus", "nx", "nex", "nxs"],  # phylo
    "ods": ["ods"],  # database
    "paf": ["paf"],  # assembly
    "pdb": ["pdb"],
    "phylip": ["phy", "ph", "phylip"],  # phylo
    "phyloxml": ["phyloxml", "xml"],  # phylo
    "plink": ["plink"],
    "pod5": ["pod5"],
    "qual": ["qual"],  # seauence
    "sam": ["sam"],  # alignement
    "scf": ["scf"],  # alignement
    "sra": ["sra"],  # sra format
    "stockholm": ["sto", "sth", "stk", "stockholm"],  # alignment
    "twobit": ["2bit"],  # sequence
    "tsv": ["tsv"],  # database
    "vcf": ["vcf"],  # variant
    "wiggle": ["wig", "wiggle"],
    "wig": ["wig"],
    "xls": ["xls"],  # database
    "xlsx": ["xlsx"],  # database
    "xmfa": ["xmfa"],
    "yaml": ["yaml", "YAML"],  # database
}

extensions = AttrDict(**extensions)