###########################################################################
# Bioconvert is a project to facilitate the interconversion #
# of life science data from one format to another. #
# #
# Copyright © 2018-2022 Institut Pasteur, Paris and CNRS. #
# #
# bioconvert is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# bioconvert is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program (COPYING file). #
# If not, see <http://www.gnu.org/licenses/>. #
# #
# Repository: https://github.com/bioconvert/bioconvert #
# Documentation: http://bioconvert.readthedocs.io #
###########################################################################
"""Network tools to manipulate the graph of conversion"""
from os import environ
from bioconvert.core.registry import Registry
from bioconvert.core.utils import TempFile
import colorlog
_log = colorlog.getLogger(__name__)
__all__ = ["create_graph", "get_conversions_wrapped", "create_graph_for_cytoscape"]
[docs]def create_graph(
filename, layout="dot", use_singularity=False, color_for_disabled_converter="red", include_subgraph=False
):
"""
:param filename: should end in .png or .svg or .dot
If extension is .dot, only the dot file is created without annotations.
This is useful if you have issues installing graphviz.
If so, under Linux you could use our singularity container
see github.com/cokelaer/graphviz4all
"""
rr = Registry()
try:
if filename.endswith(".dot") or use_singularity is True:
raise Exception()
# local import because optional for bioconvert
_log.info("Switching to pygraphviz")
from pygraphviz import AGraph
dg = AGraph(directed=True)
url = "https://bioconvert.readthedocs.io/en/main/formats.html#{}"
for a, b, s in rr.get_all_conversions():
if len(a) == 1 and len(b) == 1:
dg.add_node(
a[0],
shape="rectangle",
style="filled",
url=url.format(a[0].upper()),
)
dg.add_node(
b[0],
shape="rectangle",
style="filled",
url=url.format(b[0].upper()),
)
dg.add_edge(a[0], b[0], alpha=0.5, color="black" if s else color_for_disabled_converter, minlen=1)
else:
and_node = "_".join(a) + "_and_" + "_".join(b)
dg.add_node(
and_node,
label="",
fillcolor="black",
width=0.1,
height=0.1,
styled="filled",
fixedsize=True,
shape="circle",
)
for this in a:
dg.add_edge(
this,
and_node,
color="black" if s else color_for_disabled_converter,
)
for this in b:
dg.add_edge(
and_node,
this,
color="black" if s else color_for_disabled_converter,
)
for name in dg.nodes():
if dg.degree(name) < 5:
dg.get_node(name).attr["fillcolor"] = "white"
elif dg.degree(name) < 10:
# yellow
dg.get_node(name).attr["fillcolor"] = "yellow"
elif dg.degree(name) < 20:
# orange
dg.get_node(name).attr["fillcolor"] = "orange"
else:
# red
dg.get_node(name).attr["fillcolor"] = "red"
if include_subgraph:
# sequencing
with dg.subgraph(name="cluster_sequencing", shape="circle") as c:
c.graph_attr.update(style="filled", color="lightgrey", fillcolor="#A569BD", label="Sequencing")
c.add_nodes_from(
[
"FASTQ",
"FASTA",
"SRA",
"FAA",
"AGP",
"TWOBIT",
"ABI",
"QUAL",
"FASTA_QUAL_and_FASTQ",
"FASTQ_and_FASTA_QUAL",
"FASTA_and_FASTA_AGP",
]
)
# alignment
with dg.subgraph(name="cluster_alignment") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="circle", fillcolor="#D2B4DE", label="Alignment"
)
c.add_nodes_from(["SAM", "BAM", "CRAM", "PAF", "MAF"])
# phylogney
with dg.subgraph(name="cluster_phylo") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#BB8FCE", label="Phylogney"
)
c.add_nodes_from(["NEXUS", "PHYLOXML", "CLUSTAL", "NEWICK", "PHYLIP", "STOCKHOLM"])
# variant
with dg.subgraph(name="cluster_variant") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#F4ECF7", label="Variant"
)
c.add_nodes_from(["VCF", "BCF", "PLINK", "BPLINK"])
# annotation
with dg.subgraph(name="cluster_annotation") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#D2B4DE", label="Annotation"
)
c.add_nodes_from(["GENBANK", "EMBL", "GFF3", "GFF2"])
# compression
with dg.subgraph(name="cluster_comp") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#E9DAEF", label="Compression"
)
c.add_nodes_from(["GZ", "DSRC", "BZ2"])
# coverage
with dg.subgraph(name="cluster_cov") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#E9DAEF", label="Coverage"
)
c.add_nodes_from(["COV", "BED", "BEDGRAPH", "WIG", "BIGWIG", "WIGGLE", "BIGBED"])
# assembly
with dg.subgraph(name="cluster_ass") as c:
c.graph_attr.update(
style="filled", color="lightgrey", shape="box", fillcolor="#BB8FCE", label="Assembly"
)
c.add_nodes_from(["GFA", "SCF"])
dg.layout(layout)
dg.draw(filename)
dg.write("conversion.dot")
return dg
except Exception as e:
print(e)
_log.info("Switching to local dot and singularity")
dot = """
strict digraph{
node [label="\\N"];
"""
nodes = set([item for items in rr.get_all_conversions() for item in items[0:1][0]])
for node in nodes:
dot += '"{}";\n'.format(node)
for a, b, s in rr.get_all_conversions():
dot += '"{}" -> "{}";\n'.format(a[0], b[0])
dot += "}\n"
from bioconvert import shell
dotfile = TempFile(suffix=".dot")
with open(dotfile.name, "w") as fout:
fout.write(dot)
dotpath = ""
if use_singularity:
from bioconvert.core.downloader import download_singularity_image
try:
singfile = download_singularity_image(
"graphviz.simg",
"shub://cokelaer/graphviz4all:v1",
"4288088d91c848e5e3a327282a1ab3d1",
)
except Exception:
print(
"Warning ! Singularity must be installed if you want to you used it ! Switching to local graphviz executable if available"
)
else:
dotpath = "singularity run {} ".format(singfile)
on_rtd = environ.get("READTHEDOCS", None) == "True"
if on_rtd:
dotpath = ""
ext = filename.rsplit(".", 1)[1]
cmd = "{}dot -T{} {} -o {}".format(dotpath, ext, dotfile.name, filename)
try:
shell(cmd)
except Exception:
import os
os.system(cmd)
def get_conversions_wrapped(registry, all_conversions=False):
if all_conversions:
for i, o, s in registry.get_all_conversions():
yield i, o, s
else:
for i, o in registry.get_conversions():
yield i, o, True
[docs]def create_graph_for_cytoscape(all_converter=False):
"""
:param all_converter: use all converters or only the ones
available in the current installation
:return:
"""
from bioconvert.core.registry import Registry
registry = Registry()
graph_nodes = []
graph_edges = []
graph = {
"data": {
"selected": False,
},
"elements": {
"nodes": graph_nodes,
"edges": graph_edges,
},
}
nodes = {}
def get_or_create(fmt):
try:
return nodes[fmt]
except:
ret = {
"data": {
"id": "n" + str(len(nodes)),
"name": fmt,
}
}
nodes[fmt] = ret
graph_nodes.append(ret)
return ret
for i, o, _ in get_conversions_wrapped(registry, all_converter):
i_as_node = get_or_create(i)
o_as_node = get_or_create(o)
graph_edges.append(
{
"data": {
"id": "e" + str(len(graph_edges)),
"source": i_as_node["data"]["id"],
"target": o_as_node["data"]["id"],
}
}
)
return graph