Add a pandoc filter for man pages written in javascript, to transform man pages. ihse-manpages-branch
authorihse
Wed, 17 Oct 2018 11:41:33 +0200
branchihse-manpages-branch
changeset 56977 9221e7c3bf36
parent 56974 8139ac807fc9
child 57000 03413eb7301f
Add a pandoc filter for man pages written in javascript, to transform man pages.
make/Docs.gmk
make/common/ProcessMarkdown.gmk
make/scripts/pandoc-manpage-filter.js
make/scripts/pandoc-manpage-filter.sh.template
--- a/make/Docs.gmk	Tue Oct 16 12:02:45 2018 +0200
+++ b/make/Docs.gmk	Wed Oct 17 11:41:33 2018 +0200
@@ -28,6 +28,7 @@
 include MakeBase.gmk
 include Modules.gmk
 include ProcessMarkdown.gmk
+include TextFileProcessing.gmk
 include ZipArchive.gmk
 include $(TOPDIR)/make/ToolsJdk.gmk
 include $(TOPDIR)/make/ModuleTools.gmk
@@ -558,6 +559,25 @@
   )
 
   ifeq ($(ENABLE_MAN_PAGES), true)
+    PANDOC_FILTER := $(SUPPORT_OUTPUTDIR)/docs/pandoc-manpage-filter
+    PANDOC_FILTER_SETUP := $(SUPPORT_OUTPUTDIR)/docs/_pandoc_filter_setup.marker
+
+    # Create a usable instance of the wrapper script that calls the pandoc filter
+    # (which is written in javascript).
+    $(eval $(call SetupTextFileProcessing, CREATE_PANDOC_FILTER, \
+        SOURCE_FILES := $(TOPDIR)/make/scripts/pandoc-manpage-filter.sh.template, \
+        OUTPUT_FILE := $(PANDOC_FILTER), \
+        REPLACEMENTS := \
+            @@BOOT_JDK@@ => $(BOOT_JDK) ; \
+            @@TOPDIR@@ => $(TOPDIR) ; \
+            @@JJS_FLAGS@@ => $(addprefix -J, $(JAVA_FLAGS_SMALL)), \
+    ))
+
+    # Created script must be made executable
+    $(PANDOC_FILTER_SETUP): $(CREATE_PANDOC_FILTER)
+	$(CHMOD) a+rx $(PANDOC_FILTER)
+	$(TOUCH) $@
+
     # We assume all man pages should reside in section 1
 
     # Generate man pages from markdown
@@ -571,6 +591,8 @@
               FILES := $(filter %.md, $(call CacheFind, $d)), \
               DEST := $(DOCS_OUTPUTDIR)/man/man1, \
               FORMAT := man, \
+              EXTRA_DEPS := $(PANDOC_FILTER_SETUP), \
+              FILTER := $(PANDOC_FILTER), \
           )) \
         ) \
         $(eval MAN_PAGE_TARGETS += $($($m_$d_NAME))) \
--- a/make/common/ProcessMarkdown.gmk	Tue Oct 16 12:02:45 2018 +0200
+++ b/make/common/ProcessMarkdown.gmk	Wed Oct 17 11:41:33 2018 +0200
@@ -55,13 +55,18 @@
       $1_$2_CSS_OPTION := --css '$$($1_$2_CSS)'
     endif
   endif
+
   $1_$2_OPTIONS := $$(shell $$(GREP) _pandoc-options_: $$($1_SRC)/$2 | $$(CUT) -d : -f 2-)
 
+  ifneq ($$($1_FILTER), )
+    $1_$2_OPTIONS += --filter $$($1_FILTER)
+  endif
+
   $1_$2_VARDEPS := $$($1_OPTIONS) $$($1_$2_OPTIONS) $$($1_CSS)
   $1_$2_VARDEPS_FILE := $$(call DependOnVariable, $1_$2_VARDEPS, \
       $$(SUPPORT_OUTPUTDIR)/markdown/$$($1_$2_MARKER).vardeps)
 
-$$($1_$2_OUTPUT_FILE): $$($1_$2_INPUT_FILE) $$($1_$2_VARDEPS_FILE)
+$$($1_$2_OUTPUT_FILE): $$($1_$2_INPUT_FILE) $$($1_$2_VARDEPS_FILE) $$($1_EXTRA_DEPS)
 	$$(call LogInfo, Converting $2 to $$($1_FORMAT))
 	$$(call MakeDir, $$($1_$2_TARGET_DIR) $$(SUPPORT_OUTPUTDIR)/markdown)
         ifneq ($$($1_PRE_PROCESS), )
@@ -102,6 +107,8 @@
 #   FORMAT   : The target format (defaults to html5)
 #   FILE_EXT : The file extension to replace .md with (defaults to .html)
 #   OPTIONS  : Additional options to pandoc
+#   EXTRA_DEPS : Additional dependencies to add to each pandoc call
+#   FILTER   : Optional pandoc filter command
 #
 SetupProcessMarkdown = $(NamedParamsMacroTemplate)
 define SetupProcessMarkdownBody
@@ -125,20 +132,21 @@
 
   ifeq ($$($1_FORMAT), man)
     # Use preprocessing to inject the current version.
-    # Also mis-use preprocessing for stuff that would be better handled as a
-    # pandoc filter: raise ## headings to #, convert those headings to upper
-    # case, and remove internal links.
-    $1_PRE_PROCESS := $(SED) -E -e 's/@@VERSION_SHORT@@/$(VERSION_SHORT)/g' \
-        -e '/^\\\#\\\# /y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-        -e 's/^\\\#\\\# /\\\# /' -e 's/(\\\#[a-z-]+)/ /g'
+   $1_PRE_PROCESS := $(SED) -e 's/@@VERSION_SHORT@@/$(VERSION_SHORT)/g'
 
     # Pandoc's default behavior is to convert `code` to \f[C], which do not
     # show up in normal man page rendering (but shows correctly when generating
-    # html). Normally, we could fix this by a pandoc filter, but pandoc prior
+    # html). The norm for man pages is to show code as bold, so this should be
+    # transformed. However, even if we fix this in our pandoc filter, pandoc prior
     # to version 2.0 cannot properly produced nested formatting in man pages
     # (see https://github.com/jgm/pandoc/issues/3568).
-    # As a workaround, use post-processing with sed.
-    $1_POST_PROCESS := $(SED) -e 's/\\f\[C\]/\\f\[CB\]/g'
+    # As a workaround, use post-processing with sed instead.
+    #
+    # As of pandoc 2.3, the termination of formatting is still broken
+    # (see https://github.com/jgm/pandoc/issues/4973). We need to replace
+    # \f[] with \f[R].
+    $1_POST_PROCESS := $(SED) -e 's/\\f\[C\]/\\f\[CB\]/g' \
+        -e 's/\\f\[\]/\\f\[R\]/g'
 
     # If no file extension is specified, default to '.1'.
     ifeq ($$($1_FILE_EXT), )
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/scripts/pandoc-manpage-filter.js	Wed Oct 17 11:41:33 2018 +0200
@@ -0,0 +1,134 @@
+//
+// Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+
+//
+// Traverse a tree of pandoc format objects, calling callback on each
+// element, and replacing it if callback returns a new object.
+//
+// Inspired by the walk method in
+// https://github.com/jgm/pandocfilters/blob/master/pandocfilters.py
+//
+function traverse(obj, callback) {
+    if (Array.isArray(obj)) {
+        var processed_array = [];
+        obj.forEach(function(elem) {
+            if (elem === Object(elem) && elem.t) {
+                var replacement = callback(elem.t, elem.c || []);
+                if (!replacement) {
+                    // no replacement object returned, use original
+                    processed_array.push(traverse(elem, callback));
+                } else if (Array.isArray(replacement)) {
+                    // array of objects returned, splice all elements into array
+                    replacement.forEach(function(repl_elem) {
+                        processed_array.push(traverse(repl_elem, callback));
+                    })
+                } else {
+                    // replacement object given, traverse it
+                    processed_array.push(traverse(replacement, callback));
+                }
+            } else {
+                processed_array.push(traverse(elem, callback));
+            }
+        })
+        return processed_array;
+    } else if (obj === Object(obj)) {
+        var processed_obj = {};
+        Object.keys(obj).forEach(function(key) {
+            processed_obj[key] = traverse(obj[key], callback);
+        })
+        return processed_obj;
+    } else {
+        return obj;
+    }
+}
+
+//
+// Helper constructors to create pandoc format objects
+//
+function Space() {
+    return { 't': 'Space', 'c': [] };
+}
+
+function Str(value) {
+    return { 't': 'Str', 'c': value };
+}
+
+function Emph(value) {
+    return { 't': 'Emph', 'c': value };
+}
+
+function Header(value) {
+    return { 't': 'Header', 'c': value };
+}
+
+//
+// Callback to change all Str texts to upper case
+//
+function uppercase(type, value) {
+    if (type === 'Str') {
+        return Str(value.toUpperCase());
+    }
+}
+
+//
+// Main callback function that performs our man page AST rewrites
+//
+function manpage_filter(type, value) {
+    // If it is a header, decrease the heading level by one, and
+    // if it is a level 1 header, convert it to upper case.
+    if (type === 'Header') {
+        value[0] = Math.max(1, value[0] - 1);
+        if (value[0] == 1) {
+            return Header(traverse(value, uppercase));
+        }
+    }
+
+    // If it is a link, put the link name in italics. If it is internal,
+    // remove the target, otherwise, put it in brackets.
+    if (type === 'Link') {
+        var target = value[2][0];
+        if (target.startsWith('#')) {
+            return Emph(value[1]);
+        } else {
+            return [ Emph(value[1]), Space(), Str('[' + target + ']') ];
+        }
+    }
+}
+
+//
+// Main function
+//
+function main() {
+    var input = "";
+    while (line = readLine()) {
+        input = input.concat(line);
+    }
+    var json = JSON.parse(input);
+
+    var transformed_json = traverse(json, manpage_filter);
+
+    print(JSON.stringify(transformed_json));
+}
+
+// ... and execute it
+main();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/scripts/pandoc-manpage-filter.sh.template	Wed Oct 17 11:41:33 2018 +0200
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+# Simple wrapper script to call Nashorn with the javascript pandoc filter
+
+@@BOOT_JDK@@/bin/jjs @@JJS_FLAGS@@ -scripting \
+    "@@TOPDIR@@/make/scripts/pandoc-manpage-filter.js"