Browse Source

element: Add citation support

* lisp/org-element.el (org-element-citation-key-re):
(org-element-citation-prefix-re): New variables.
(org-element--set-regexps): Set `org-element--object-regexp' so it
finds citations.
(org-element-all-objects): Add citation and citation-reference
objects.
(org-element-recursive-objects): Add citation object.
(org-element-object-restrictions): Add citation and citation-reference
to restrictions.
(org-element-secondary-value-alist): citation and citation references
can hold secondary strings.
(org-element-citation-parser):
(org-element-citation-interpreter):
(org-element-citation-reference-parser):
(org-element-citation-reference-interpreter): New functions.
(org-element--object-lex): Parse citations and citations references.
* testing/lisp/test-org-element.el (test-org-element/citation-parser):
(test-org-element/citation-reference-parser):
(test-org-element/citation-interpreter): New tests.

This patch adds support for [cite:@key], [cite:pre @key post]
[cite:global prefix; pre @key1 post; pre @key2 post; global suffix]
objects along with their [cite/style: ...] counterparts.
Nicolas Goaziou 4 years ago
parent
commit
fed07be5b8
2 changed files with 338 additions and 20 deletions
  1. 170 20
      lisp/org-element.el
  2. 168 0
      testing/lisp/test-org-element.el

+ 170 - 20
lisp/org-element.el

@@ -117,6 +117,19 @@
 ;; `org-element-update-syntax' builds proper syntax regexps according
 ;; to current setup.
 
+(defconst org-element-citation-key-re
+  (rx "@" (group (one-or-more (any word "-.:?!`'/*@+|(){}<>&_^$#%&~"))))
+  "Regexp matching a citation key.
+Key is located in match group 1.")
+
+(defconst org-element-citation-prefix-re
+  (rx "[cite"
+      (opt "/" (group (one-or-more (any "/_-" alnum)))) ;style
+      ":"
+      (zero-or-more (any "\t\n ")))
+  "Regexp matching a citation prefix.
+Style, if any, is located in match group 1.")
+
 (defvar org-element-paragraph-separate nil
   "Regexp to separate paragraphs in an Org buffer.
 In the case of lines starting with \"#\" and \":\", this regexp
@@ -182,15 +195,17 @@ specially in `org-element--object-lex'.")
 				      (nth 2 org-emphasis-regexp-components)))
 		      ;; Plain links.
 		      (concat "\\<" link-types ":")
-		      ;; Objects starting with "[": regular link,
+		      ;; Objects starting with "[": citations,
 		      ;; footnote reference, statistics cookie,
-		      ;; timestamp (inactive).
-		      (concat "\\[\\(?:"
-			      "fn:" "\\|"
-			      "\\[" "\\|"
-			      "[0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}" "\\|"
-			      "[0-9]*\\(?:%\\|/[0-9]*\\)\\]"
-			      "\\)")
+		      ;; timestamp (inactive) and regular link.
+		      (format "\\[\\(?:%s\\)"
+			      (mapconcat
+			       #'identity
+			       (list "cite[:/]"
+				     "fn:"
+				     "\\(?:[0-9]\\|\\(?:%\\|/[0-9]*\\)\\]\\)"
+				     "\\[")
+			       "\\|"))
 		      ;; Objects starting with "@": export snippets.
 		      "@@"
 		      ;; Objects starting with "{": macro.
@@ -234,15 +249,15 @@ specially in `org-element--object-lex'.")
   "List of recursive element types aka Greater Elements.")
 
 (defconst org-element-all-objects
-  '(bold code entity export-snippet footnote-reference inline-babel-call
-	 inline-src-block italic line-break latex-fragment link macro
-	 radio-target statistics-cookie strike-through subscript superscript
-	 table-cell target timestamp underline verbatim)
+  '(bold citation citation-reference code entity export-snippet
+	 footnote-reference inline-babel-call inline-src-block italic line-break
+	 latex-fragment link macro radio-target statistics-cookie strike-through
+	 subscript superscript table-cell target timestamp underline verbatim)
   "Complete list of object types.")
 
 (defconst org-element-recursive-objects
-  '(bold footnote-reference italic link subscript radio-target strike-through
-	 superscript table-cell underline)
+  '(bold citation footnote-reference italic link subscript radio-target
+	 strike-through superscript table-cell underline)
   "List of recursive object types.")
 
 (defconst org-element-object-containers
@@ -331,9 +346,12 @@ Don't modify it, set `org-element-affiliated-keywords' instead.")
 (defconst org-element-object-restrictions
   (let* ((minimal-set '(bold code entity italic latex-fragment strike-through
 			     subscript superscript underline verbatim))
-	 (standard-set (remq 'table-cell org-element-all-objects))
+	 (standard-set
+	  (remq 'citation-reference (remq 'table-cell org-element-all-objects)))
 	 (standard-set-no-line-break (remq 'line-break standard-set)))
     `((bold ,@standard-set)
+      (citation citation-reference)
+      (citation-reference ,@minimal-set)
       (footnote-reference ,@standard-set)
       (headline ,@standard-set-no-line-break)
       (inlinetask ,@standard-set-no-line-break)
@@ -370,9 +388,11 @@ This alist also applies to secondary string.  For example, an
 still has an entry since one of its properties (`:title') does.")
 
 (defconst org-element-secondary-value-alist
-  '((headline :title)
+  '((citation :prefix :suffix)
+    (headline :title)
     (inlinetask :title)
-    (item :tag))
+    (item :tag)
+    (citation-reference :prefix :suffix))
   "Alist between element types and locations of secondary values.")
 
 (defconst org-element--pair-round-table
@@ -2753,6 +2773,129 @@ CONTENTS is the contents of the object."
   (format "*%s*" contents))
 
 
+;;;; Citation
+
+(defun org-element-citation-parser ()
+  "Parse citation object at point, if any.
+
+When at a citation object, return a list whose car is `citation'
+and cdr is a plist with `:style', `:prefix', `:suffix', `:begin',
+`:end', `:contents-begin', `:contents-end', and `:post-blank'
+keywords.  Otherwise, return nil.
+
+Assume point is at the beginning of the citation."
+  (when (looking-at org-element-citation-prefix-re)
+    (let* ((begin (point))
+	   (style (and (match-end 1)
+		       (match-string-no-properties 1)))
+	   ;; Ignore blanks between cite type and prefix or key.
+	   (start (match-end 0))
+	   (closing (with-syntax-table org-element--pair-square-table
+		      (ignore-errors (scan-lists begin 1 0)))))
+      (save-excursion
+	(when (and closing
+		   (re-search-forward org-element-citation-key-re closing t))
+	  ;; Find prefix, if any.
+	  (let ((first-key-end (match-end 0))
+		(types (org-element-restriction 'citation-reference))
+                (cite
+		 (list 'citation
+		       (list :style style
+			     :begin begin
+			     :post-blank (progn
+					   (goto-char closing)
+					   (skip-chars-forward " \t"))
+			     :end (point)))))
+	    ;; `:contents-begin' depends on the presence of
+	    ;; a non-empty common prefix.
+	    (goto-char first-key-end)
+	    (if (not (search-backward ";" start t))
+		(org-element-put-property cite :contents-begin start)
+	      (when (< start (point))
+		(org-element-put-property
+                 cite :prefix
+                 (org-element--parse-objects start (point) nil types cite)))
+	      (forward-char)
+	      (org-element-put-property cite :contents-begin (point)))
+	    ;; `:contents-end' depends on the presence of a non-empty
+	    ;; common suffix.
+	    (goto-char (1- closing))
+	    (skip-chars-backward " \r\t\n")
+	    (let ((end (point)))
+	      (if (or (not (search-backward ";" first-key-end t))
+		      (re-search-forward org-element-citation-key-re end t))
+		  (org-element-put-property cite :contents-end end)
+                (forward-char)
+		(when (< (point) end)
+		  (org-element-put-property
+                   cite :suffix
+                   (org-element--parse-objects (point) end nil types cite)))
+		(org-element-put-property cite :contents-end (point))))
+	    cite))))))
+
+(defun org-element-citation-interpreter (citation contents)
+  "Interpret CITATION object as Org syntax.
+CONTENTS is the contents of the object, as a string."
+  (let ((prefix (org-element-property :prefix citation))
+        (suffix (org-element-property :suffix citation))
+        (style (org-element-property :style citation)))
+    (concat "[cite"
+            (and style (concat "/" style))
+            ":"
+            (and prefix (concat (org-element-interpret-data prefix) ";"))
+            (if suffix
+                (concat contents (org-element-interpret-data suffix))
+              ;; Remove spurious semicolon.
+              (substring contents nil -1))
+            "]")))
+
+
+;;;; Citation Reference
+
+(defun org-element-citation-reference-parser ()
+  "Parse citation reference object at point, if any.
+
+When at a reference, return a list whose car is
+`citation-reference', and cdr is a plist with `:key',
+`:prefix', `:suffix', `:begin', `:end', and `:post-blank' keywords.
+
+Assume point is at the beginning of the reference."
+  (save-excursion
+    (let ((begin (point)))
+      (when (re-search-forward org-element-citation-key-re nil t)
+        (let* ((key (match-string-no-properties 1))
+	       (key-start (match-beginning 0))
+	       (key-end (match-end 0))
+	       (separator (search-forward ";" nil t))
+               (end (or separator (point-max)))
+               (suffix-end (if separator (1- end) end))
+               (types (org-element-restriction 'citation-reference))
+	       (reference
+	        (list 'citation-reference
+		      (list :key key
+			    :begin begin
+			    :end end
+			    :post-blank 0))))
+	  (when (< begin key-start)
+	    (org-element-put-property
+	     reference :prefix
+             (org-element--parse-objects begin key-start nil types reference)))
+	  (when (< key-end suffix-end)
+	    (org-element-put-property
+	     reference :suffix
+             (org-element--parse-objects key-end suffix-end nil types reference)))
+	  reference)))))
+
+(defun org-element-citation-reference-interpreter (citation-reference _)
+  "Interpret CITATION-REFERENCE object as Org syntax."
+  (concat (org-element-interpret-data
+           (org-element-property :prefix citation-reference))
+	  "@" (org-element-property :key citation-reference)
+	  (org-element-interpret-data
+           (org-element-property :suffix citation-reference))
+          ";"))
+
+
 ;;;; Code
 
 (defun org-element-code-parser ()
@@ -4437,7 +4580,11 @@ Elements are accumulated into ACC."
 RESTRICTION is a list of object types, as symbols, that should be
 looked after.  This function assumes that the buffer is narrowed
 to an appropriate container (e.g., a paragraph)."
-  (if (memq 'table-cell restriction) (org-element-table-cell-parser)
+  (cond
+   ((memq 'table-cell restriction) (org-element-table-cell-parser))
+   ((memq 'citation-reference restriction)
+    (org-element-citation-reference-parser))
+   (t
     (let* ((start (point))
 	   (limit
 	    ;; Object regexp sometimes needs to have a peek at
@@ -4525,6 +4672,9 @@ to an appropriate container (e.g., a paragraph)."
 			 ((and ?f
 			       (guard (memq 'footnote-reference restriction)))
 			  (org-element-footnote-reference-parser))
+			 ((and ?c
+			       (guard (memq 'citation restriction)))
+			  (org-element-citation-parser))
 			 ((and (or ?% ?/)
 			       (guard (memq 'statistics-cookie restriction)))
 			  (org-element-statistics-cookie-parser))
@@ -4539,8 +4689,8 @@ to an appropriate container (e.g., a paragraph)."
 	    (or (eobp) (forward-char))))
 	(cond (found)
 	      (limit (forward-char -1)
-		     (org-element-link-parser)) ;radio link
-	      (t nil))))))
+		     (org-element-link-parser))	;radio link
+	      (t nil)))))))
 
 (defun org-element--parse-objects (beg end acc restriction &optional parent)
   "Parse objects between BEG and END and return recursive structure.

+ 168 - 0
testing/lisp/test-org-element.el

@@ -519,6 +519,144 @@ Some other text
      (= (org-element-property :end (org-element-at-point)) (point-max)))))
 
 
+;;;; Citation
+
+(ert-deftest test-org-element/citation-parser ()
+  "Test `citation' parser"
+  ;; Parse citations.  They must contain at least a bare key.
+  (should
+   (eq 'citation
+       (org-test-with-temp-text "[cite:@key]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (eq 'citation
+       (org-test-with-temp-text "[cite:-@key]"
+	 (org-element-type (org-element-context)))))
+  (should-not
+   (eq 'citation
+       (org-test-with-temp-text "[cite:text]"
+	 (org-element-type (org-element-context)))))
+  ;; Citation may contain a style.
+  (should
+   (eq 'citation
+       (org-test-with-temp-text "[cite/style:@key]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (equal "style"
+	  (org-test-with-temp-text "[cite/style:@key]"
+	    (org-element-property :style (org-element-context)))))
+  ;; Handle multi citations separated with semi-columns.
+  (should
+   (eq 'citation
+       (org-test-with-temp-text "[cite:@a;@b;@c]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (equal '("a" "b" "c")
+	  (org-test-with-temp-text "[cite:@a;@b;@c]"
+	    (org-element-map (org-element-parse-buffer) 'citation-reference
+	      (lambda (r) (org-element-property :key r))))))
+  (should
+   (eq 'citation
+       (org-test-with-temp-text "[cite:@a;-@b]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (equal '("a" "b")
+	  (org-test-with-temp-text "[cite:@a;-@b]"
+	    (org-element-map (org-element-parse-buffer) 'citation-reference
+	      (lambda (r) (org-element-property :key r))))))
+  ;; Multi citations accept `:prefix' and `:suffix' properties.
+  (should
+   (equal '("common-prefix")
+	  (org-test-with-temp-text "[cite:common-prefix;@a]"
+	    (org-element-property :prefix (org-element-context)))))
+  (should
+   (equal '("common-suffix")
+	  (org-test-with-temp-text "[cite:@a;common-suffix]"
+	    (org-element-property :suffix (org-element-context)))))
+  ;; White spaces right after "cite" tags are ignored. So are white
+  ;; spaces at the end of the citation.
+  (should
+   (equal '("common-prefix ")
+	  (org-test-with-temp-text "[cite: common-prefix ;@a]"
+	    (org-element-property :prefix (org-element-context)))))
+  (should
+   (equal '(" common-suffix")
+	  (org-test-with-temp-text "[cite: @a; common-suffix ]"
+	    (org-element-property :suffix (org-element-context))))))
+
+
+;;;; Citation Reference
+
+(ert-deftest test-org-element/citation-reference-parser ()
+  "Test `citation' reference parser."
+  ;; Parse bare keys.
+  (should
+   (eq 'citation-reference
+       (org-test-with-temp-text "[cite:<point>@key]"
+	 (org-element-type (org-element-context)))))
+  ;; Bare keys can contain any word character, and some punctuation,
+  ;; but not semicolon, square brackets, and space.
+  (should
+   (equal "_key"
+	  (org-test-with-temp-text "[cite:@_k<point>ey]"
+	    (org-element-property :key (org-element-context)))))
+  (should
+   (eq 'citation-reference
+       (org-test-with-temp-text "[cite:<point>@a]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (eq 'citation-reference
+       (org-test-with-temp-text "[cite:<point>@ö]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (eq 'citation-reference
+       (org-test-with-temp-text "[cite:<point>@_]"
+	 (org-element-type (org-element-context)))))
+  (should
+   (equal "a:.#$%&-+?<>~/1"
+	  (org-test-with-temp-text "[cite:<point>@a:.#$%&-+?<>~/1]"
+	    (org-element-property :key (org-element-context)))))
+  (should-not
+   (eq 'citation-reference
+       (org-test-with-temp-text "[cite:<point>@;]"
+	 (org-element-type (org-element-context)))))
+  (should-not
+   (equal "key"
+	  (org-test-with-temp-text "[cite:<point>@[]]"
+	    (org-element-property :key (org-element-context)))))
+  ;; References in citations accept optional `:prefix' and `:suffix'
+  ;; properties.
+  (should
+   (equal '("pre ")
+	  (org-test-with-temp-text "[cite:pre <point>@key]"
+	    (org-element-property :prefix (org-element-context)))))
+  (should
+   (equal '(" post")
+	  (org-test-with-temp-text "[cite:<point>@key post]"
+	    (org-element-property :suffix (org-element-context)))))
+  ;; White spaces between "cite" tag and prefix are ignored.
+  (should
+   (equal '("pre ")
+	  (org-test-with-temp-text "[cite: pre <point>@key]"
+	    (org-element-property :prefix (org-element-context)))))
+  ;; Semicolons do not belong to prefix or suffix.
+  (should
+   (equal '("pre ")
+	  (org-test-with-temp-text "[cite:@key1;pre <point>@key2]"
+	    (org-element-property :prefix (org-element-context)))))
+  (should
+   (equal '(" post")
+	  (org-test-with-temp-text "[cite:@key1 <point>post;@key2]"
+	    (org-element-property :suffix (org-element-context)))))
+  (should
+   (equal '("pre ")
+	  (org-test-with-temp-text "[cite:global prefix;pre<point> @key1]"
+	    (org-element-property :prefix (org-element-context)))))
+  (should
+   (equal '(" post")
+	  (org-test-with-temp-text "[cite:@key1 <point>post; global suffix]"
+	    (org-element-property :suffix (org-element-context))))))
+
 ;;;; Clock
 
 (ert-deftest test-org-element/clock-parser ()
@@ -3124,6 +3262,36 @@ DEADLINE: <2012-03-29 thu.> SCHEDULED: <2012-03-29 thu.> CLOSED: [2012-03-29 thu
   "Test bold interpreter."
   (should (equal (org-test-parse-and-interpret "*text*") "*text*\n")))
 
+(ert-deftest test-org-element/citation-interpreter ()
+  "Test citation interpreter."
+  (should
+   (equal "[cite:@key]\n"
+	  (org-test-parse-and-interpret "[cite:@key]")))
+  (should
+   (equal "[cite:-@key]\n"
+	  (org-test-parse-and-interpret "[cite:-@key]")))
+  (should
+   (equal "[cite/style:@key]\n"
+	  (org-test-parse-and-interpret "[cite/style:@key]")))
+  (should
+   (equal "[cite:pre @key]\n"
+	  (org-test-parse-and-interpret "[cite:pre @key]")))
+  (should
+   (equal "[cite:@key post]\n"
+	  (org-test-parse-and-interpret "[cite:@key post]")))
+  (should
+   (equal "[cite:@a ;b]\n"
+	  (org-test-parse-and-interpret "[cite: @a ;b]")))
+  (should
+   (equal "[cite:@a;@b;@c]\n"
+	  (org-test-parse-and-interpret "[cite:@a;@b;@c]")))
+  (should
+   (equal "[cite:common-pre ; @a]\n"
+	  (org-test-parse-and-interpret "[cite:common-pre ; @a]")))
+  (should
+   (equal "[cite:@a ; common-post]\n"
+	  (org-test-parse-and-interpret "[cite:@a ; common-post]"))))
+
 (ert-deftest test-org-element/code-interpreter ()
   "Test code interpreter."
   (should (equal (org-test-parse-and-interpret "~text~") "~text~\n")))