"""This module contains a Sphinx_ extension for documenting YMP stages and
Snakemake_ rules.
The `SnakemakeDomain` (name **sm**) provides the following directives:
.. rst:directive:: .. sm:rule:: name
Describes a `Snakemake rule <snakefiles-rules>`
.. rst:directive:: .. sm:stage:: name
Describes a `YMP Stage <Stage>`
Both directives accept an optional ``source`` parameter. If given, a
link to the source code of the stage or rule definition will be added.
The format of the string passed is ``filename:line``. Referenced
Snakefiles will be highlighted with pygments and added to the
documentation when building HTML.
The extension also provides an autodoc-like directive:
.. rst:directive:: .. autosnake:: filename
Generates documentation from Snakefile ``filename``.
.. _Sphinx: http://sphinx-doc.org
.. _Snakemake: http://snakemake.readthedocs.io
"""
import os
from textwrap import dedent, indent
from typing import List, Optional
from docutils import nodes
from docutils.parsers import rst
from docutils.statemachine import StringList
from snakemake.rules import Rule
from sphinx import addnodes
from sphinx.application import Sphinx
from sphinx.directives import ObjectDescription
from sphinx.domains import Domain, ObjType
from sphinx.environment import BuildEnvironment
from sphinx.environment.collectors import EnvironmentCollector
from sphinx.roles import XRefRole
from sphinx.util import logging, ws_re
from sphinx.util.nodes import make_refnode
import ymp
from ymp.snakemake import ExpandableWorkflow
from ymp.snakemakelexer import SnakemakeLexer
from ymp.stage import Stage
try:
logger = logging.getLogger(__name__)
except AttributeError:
# Fall back to normal logging
import logging as _logging
logger = _logging.getLogger(__name__)
#: str: Path in which YMP package is located
BASEPATH = os.path.dirname(os.path.dirname(ymp.__file__))
[docs]def relpath(path: str) -> str:
"""Make absolute path relative to BASEPATH
Args:
path: absolute path
Returns:
path relative to BASEPATH
"""
return os.path.relpath(path, BASEPATH)
[docs]class YmpObjectDescription(ObjectDescription):
"""
Base class for RSt directives in SnakemakeDomain
Since this inherhits from Sphinx' ObjectDescription, content
generated by the directive will always be inside an addnodes.desc.
Args:
source: Specify source position as ``file:line`` to create link
"""
typename = "[object name]"
option_spec = {
# source link (<filename>:<lineno>)
'source': rst.directives.unchanged
}
[docs] def handle_signature(self, sig: str, signode: addnodes.desc) -> str:
"""
Parse rule signature *sig* into RST nodes and append them
to *signode*.
The retun value identifies the object and is passed to
:meth:`add_target_and_index()` unchanged
Args:
sig: Signature string (i.e. string passed after directive)
signode: Node created for object signature
Returns:
Normalized signature (white space removed)
"""
signode += addnodes.desc_annotation(self.typename, self.typename+" ")
signode += addnodes.desc_name(sig, sig)
if 'source' in self.options:
self.add_source_link(signode)
sigid = ws_re.sub('', sig)
return sigid
[docs] def add_source_link(self, signode: addnodes.desc) -> None:
"""
Add link to source code to *signode*
"""
filename, lineno = self.options['source'].split(':')
if not hasattr(self.env, '_snakefiles'):
self.env._snakefiles = set()
self.env._snakefiles.add(filename)
onlynode = addnodes.only(expr='html') # show only in html
onlynode += nodes.reference(
'',
refuri='_snakefiles/{}.html#line-{}'.format(filename, lineno)
)
onlynode[0] += nodes.inline('', '[source]',
classes=['viewcode-link'])
signode += onlynode
[docs] def add_target_and_index(self, name: str, sig: str,
signode: addnodes.desc) -> None:
"""
Add cross-reference IDs and entries to ``self.indexnode``
"""
targetname = "-".join((self.objtype, name))
if targetname not in self.state.document.ids:
signode['names'].append(targetname)
signode['ids'].append(targetname)
signode['first'] = (not self.names)
self.state.document.note_explicit_target(signode)
objects = self.env.domaindata[self.domain]['objects']
key = (self.objtype, name)
if key in objects:
self.env.warn(self.env.docname,
'duplicate description of {} {}, '
'other instance in {}:{}'
''.format(self.objtype, name,
self.env.doc2path(objects[key][0]),
self.lineno))
objects[key] = (self.env.docname, targetname)
# register rule in index
indextext = self.get_index_text(self.objtype, name)
if indextext:
self.indexnode['entries'].append((
'single',
indextext,
targetname,
'',
None))
[docs] def get_index_text(self, typename: str, name: str) -> str:
"""Formats object for entry into index"""
return "{} ({})".format(name, typename)
[docs]class SnakemakeRule(YmpObjectDescription):
"""
Directive ``sm:rule::`` describing a Snakemake rule
"""
typename = "rule"
[docs]class YmpStage(YmpObjectDescription):
"""
Directive ``sm:stage::`` describing an YMP stage
"""
typename = "stage"
[docs]class SnakemakeDomain(Domain):
"""Snakemake language domain"""
name = "sm"
label = "Snakemake"
object_types = {
# ObjType(name, *roles, **attrs)
'rule': ObjType('rule', 'rule'),
'stage': ObjType('stage', 'stage'),
}
directives = {
'rule': SnakemakeRule,
'stage': YmpStage,
}
roles = {
'rule': XRefRole(),
'stage': XRefRole(),
}
initial_data = {
'objects': {}, #: (type, name) -> docname, labelid
}
data_version = 0
[docs] def clear_doc(self, docname: str):
"""Delete objects derived from file ``docname``"""
if 'objects' in self.data:
toremove = [
key
for (key, (docname_, _)) in self.data['objects'].items()
if docname_ == docname
]
for key in toremove:
del self.data['objects'][key]
[docs] def resolve_xref(self, env: BuildEnvironment, fromdocname: str,
builder, typ, target, node, contnode):
objects = self.data['objects']
objtypes = self.objtypes_for_role(typ)
for objtype in objtypes:
if (objtype, target) in objects:
return make_refnode(builder, fromdocname,
objects[objtype, target][0],
objects[objtype, target][1],
contnode, target + ' ' + objtype)
[docs] def get_objects(self):
for (typ, name), (docname, ref) in self.data['objects'].items():
# name, dispname, type, docname, anchor, searchprio
yield name, name, typ, docname, ref, 1
[docs]class AutoSnakefileDirective(rst.Directive):
"""Implements RSt directive ``.. autosnake:: filename``
The directive extracts docstrings from rules in snakefile and
auto-generates documentation.
"""
#: bool: This rule does not accept content
has_content = False
#: int: This rule needs one argument (the filename)
required_arguments = 1
#: str: Template for generated Rule RSt
tpl_rule = ".. sm:rule:: {name}"
#: str: Template for generated Stage RSt
tpl_stage = ".. sm:stage:: {name}"
#: str: Template option source
tpl_source = " :source: {filename}:{lineno}"
[docs] def run(self):
"""Entry point"""
snakefile = self.arguments[0]
#: BuildEnvironment: Sphinx build environment
self.env: BuildEnvironment = self.state.document.settings.env
#: ExpandableWorkflow: Ymp Workflow object
self.workflow = self.load_workflow(snakefile)
return self._generate_nodes()
[docs] def load_workflow(self, file_path: str) -> ExpandableWorkflow:
"""Load the Snakefile"""
workflow = ExpandableWorkflow(snakefile=file_path)
workflow.include(file_path)
return workflow
[docs] def parse_doc(self, doc: str, source: str, idt: int=0) -> StringList:
"""Convert doc string to StringList
Args:
doc: Documentation text
source: Source filename
idt: Result indentation in characters (default 0)
Returns:
StringList of re-indented documentation wrapped in newlines
"""
doc = dedent(doc or "").strip("\n")
doc = indent(doc, " " * idt)
doclines = [''] + doc.splitlines() + ['']
return StringList(doclines, source)
[docs] def parse_rule(self, rule: Rule, idt: int=0) -> StringList:
"""Convert Rule to StringList
Args:
rule: Rule object
idt: Result indentation in characters (default 0)
Retuns:
StringList containing formatted Rule documentation
"""
head = self.tpl_rule.format(name=rule.name)
if rule.lineno:
head += "\n"
head += self.tpl_source.format(
filename=relpath(rule.snakefile),
lineno=self.workflow.linemaps[rule.snakefile][rule.lineno],
)
head = indent(head, " " * idt)
headlines = head.splitlines()
doc = self.parse_doc(rule.docstring, rule.snakefile, idt+3)
return StringList(headlines, rule.snakefile) + doc
[docs] def parse_stage(self, stage: Stage, idt: int=0) -> StringList:
head = self.tpl_stage.format(name=stage.name)
if stage.lineno:
head += "\n"
head += self.tpl_source.format(
filename=relpath(stage.filename),
lineno=self.workflow.linemaps[stage.filename][stage.lineno],
)
head = indent(head, " " * idt)
headlines = head.splitlines()
doc = self.parse_doc(stage.docstring, stage.filename, idt+3)
res = StringList(headlines, stage.filename) + doc
for rule in sorted(stage.rules, key=lambda s: s.name):
res.extend(self.parse_rule(rule, idt+3))
return res
def _generate_nodes(self) -> List[nodes.Node]:
"""Generate Sphinx nodes from parsed snakefile"""
node = nodes.paragraph('')
result = StringList()
# generate stages
stages = Stage.get_registry().values()
stages = list(set(stages))
stages = sorted(stages, key=lambda x: x.name)
for stage in stages:
result.extend(self.parse_stage(stage))
# generate nodes for rules not registered with stages
rules = self.workflow.rules
for rule in rules:
if not getattr(rule, "ymp_stage", False):
result.extend(self.parse_rule(rule))
self.state.nested_parse(result, 0, node)
return [node]
[docs]def collect_pages(app: Sphinx):
"""Add Snakefiles to documentation (in HTML mode)
"""
if not hasattr(app.env, '_snakefiles'):
return
highlight_block = app.builder.highlighter.highlight_block
for snakefile in app.env._snakefiles:
try:
with open(os.path.join(BASEPATH, snakefile), 'r') as f:
code = f.read()
except IOError:
logger.error("failed to open {}".format(snakefile))
continue
highlighted = highlight_block(code, 'snakemake', lineanchors="line")
context = {
'title': snakefile,
'body': '<h1>Snakefile "{}"</h1>'.format(snakefile) +
highlighted
}
yield (os.path.join('_snakefiles', snakefile), context, 'page.html')
html = ['\n']
context = {
'title': ('Overview: Snakemake rule files'),
'body': '<h1>All Snakemake rule files</h1>' +
''.join(html)
}
yield ('_snakefiles/index', context, 'page.html')
[docs]class DomainTocTreeCollector(EnvironmentCollector):
"""Add Sphinx Domain entries to the TOC"""
# override
[docs] def clear_doc(self, app: Sphinx,
env: BuildEnvironment, docname: str) -> None:
"""Clear data from environment
If we have cached data in environment for document ``docname``,
we should clear it here.
"""
# override
[docs] def merge_other(self, app: Sphinx, env: BuildEnvironment,
docnames: List[str], other: BuildEnvironment) -> None:
"""Merge with results from parallel processes
Called if Sphinx is processing documents in parallel. We
should merge this from ``other`` into ``env`` for all ``docnames``.
"""
# override
[docs] def process_doc(self, app: Sphinx, doctree: nodes.Node) -> None:
"""Process ``doctree``
This is called by ``read-doctree``, so after the doctree has been
loaded. The signal is processed in registered first order,
so we are called after built-in extensions, such as the
:mod:`sphinx.environment.collectors.toctree` extension building
the TOC.
"""
# FIXME: handle duplicate entries
for node in self.select_doc_nodes(doctree):
tocnode = self.select_toc_location(app, node)
heading = self.make_heading(node)
if not tocnode:
continue
self.toc_insert(app.env.docname, tocnode, node, heading)
[docs] def select_doc_nodes(self, doctree: nodes.Node) -> List[nodes.Node]:
"""Select the nodes for which entries in the TOC are desired
This is a separate method so that it might be overriden by
subclasses wanting to add other types of nodes to the TOC.
"""
return doctree.traverse(addnodes.desc)
[docs] def select_toc_location(self, app: Sphinx,
node: nodes.Node) -> nodes.Node:
"""Select location in TOC where ``node`` should be referenced
"""
while node is not None:
tocnode = self.locate_in_toc(app, node)
if tocnode:
return tocnode
node = node.parent
return app.env.tocs[app.env.docname][0]
[docs] def locate_in_toc(self, app: Sphinx,
node: nodes.Node) -> Optional[nodes.Node]:
toc = app.env.tocs[app.env.docname]
ref = self.get_ref(node)
for node in toc.traverse(nodes.reference):
node_ref = node.get('anchorname')
if not node_ref or node_ref[0] != "#":
continue
if node_ref[1:] == ref:
return node.parent.parent
[docs] def get_ref(self, node: nodes.Node) -> Optional[nodes.Node]:
while node is not None:
if not node.get('ids'):
# In Sphinx domain descriptions, the ID is in the
# first child node, the desc_signature.
# We can take that, but to make the JS handling
# the TOC sidebar work properly, we need it to
# be in the hierarchy. So create an ID for the
# desc node.
if node[0].get('ids'):
node['ids'] = [node[0].get('ids')[0] + '-tocentry']
if node.get('ids'):
return node['ids'][0]
node = node.parent
[docs] def make_heading(self, node: nodes.Node) -> List[nodes.Node]:
names = node[0].traverse(addnodes.desc_name)
return names
[docs] def toc_insert(self, docname: str, tocnode: nodes.Node, node: nodes.Node,
heading: List[nodes.Node]) -> None:
for child in tocnode.children:
if isinstance(child, nodes.bullet_list):
blist = child
break
else:
blist = nodes.bullet_list('')
tocnode += blist
reference = nodes.reference(
'', '', internal=True, refuri=docname,
anchorname="#" + self.get_ref(node), *heading)
para = addnodes.compact_paragraph('', '', reference)
item = nodes.list_item('', para)
# FIXME: find correct location
blist.append(item)
[docs]def setup(app: Sphinx):
"""Register the extension with Sphinx"""
app.add_lexer('snakemake', SnakemakeLexer())
app.add_domain(SnakemakeDomain)
app.add_directive('autosnake', AutoSnakefileDirective)
app.add_env_collector(DomainTocTreeCollector)
app.connect('html-collect-pages', collect_pages)