浏览代码

org-element: Fix parapraph parsing

* lisp/org-element.el (org-element--set-regexps): Refine
  `org-element-paragraph-separate'.
(org-element-paragraph-parser): Fix paragraph parsing when followed by
a dual keyword or an ill-formed latex environment.

* testing/lisp/test-org-element.el (test-org-element/paragraph-parser):
  Add tests.

Reported-by: Jacob Gerlach <jacobgerlach@gmail.com>
<http://permalink.gmane.org/gmane.emacs.orgmode/97256>
Nicolas Goaziou 10 年之前
父节点
当前提交
eb77fed33f
共有 2 个文件被更改,包括 66 次插入66 次删除
  1. 45 64
      lisp/org-element.el
  2. 21 2
      testing/lisp/test-org-element.el

+ 45 - 64
lisp/org-element.el

@@ -159,16 +159,20 @@ specially in `org-element--object-lex'.")
 		"$" "\\|"
 		;; Tables (any type).
 		"\\(?:|\\|\\+-[-+]\\)" "\\|"
-		;; Blocks (any type), Babel calls and keywords.  This
-		;; is only an indication and need some thorough check.
-		"#\\(?:[+ ]\\|$\\)" "\\|"
-		;; Drawers (any type) and fixed-width areas.  This is
-		;; also only an indication.
-		":" "\\|"
+		;; Comments, keyword-like or block-like constructs.
+		;; Blocks and keywords with dual values need to be
+		;; double-checked.
+		"#\\(?: \\|$\\|\\+\\(?:"
+		"BEGIN_\\S-+" "\\|"
+		"\\S-+\\(?:\\[.*\\]\\)?:\\(?: \\|$\\)\\)\\)"
+		"\\|"
+		;; Drawers (any type) and fixed-width areas.  Drawers
+		;; need to be double-checked.
+		":\\(?: \\|$\\|[-_[:word:]]+:[ \t]*$\\)" "\\|"
 		;; Horizontal rules.
 		"-\\{5,\\}[ \t]*$" "\\|"
 		;; LaTeX environments.
-		"\\\\begin{\\([A-Za-z0-9]+\\*?\\)}" "\\|"
+		"\\\\begin{\\([A-Za-z0-9*]+\\)}" "\\|"
 		;; Clock lines.
 		(regexp-quote org-clock-string) "\\|"
 		;; Lists.
@@ -2233,65 +2237,42 @@ Assume point is at the beginning of the paragraph."
 	   (before-blank
 	    (let ((case-fold-search t))
 	      (end-of-line)
-	      (if (not (re-search-forward
-			org-element-paragraph-separate limit 'm))
-		  limit
-		;; A matching `org-element-paragraph-separate' is not
-		;; necessarily the end of the paragraph.  In
-		;; particular, lines starting with # or : as a first
-		;; non-space character are ambiguous.  We have to
-		;; check if they are valid Org syntax (e.g., not an
-		;; incomplete keyword).
-		(beginning-of-line)
-		(while (not
-			(or
-			 ;; There's no ambiguity for other symbols or
-			 ;; empty lines: stop here.
-			 (looking-at "[ \t]*\\(?:[^:#]\\|$\\)")
-			 ;; Stop at valid fixed-width areas.
-			 (looking-at "[ \t]*:\\(?: \\|$\\)")
-			 ;; Stop at drawers.
-			 (and (looking-at org-drawer-regexp)
-			      (save-excursion
-				(re-search-forward
-				 "^[ \t]*:END:[ \t]*$" limit t)))
-			 ;; Stop at valid comments.
-			 (looking-at "[ \t]*#\\(?: \\|$\\)")
-			 ;; Stop at valid dynamic blocks.
-			 (and (looking-at org-dblock-start-re)
-			      (save-excursion
-				(re-search-forward
-				 "^[ \t]*#\\+END:?[ \t]*$" limit t)))
-			 ;; Stop at valid blocks.
-			 (and (looking-at "[ \t]*#\\+BEGIN_\\(\\S-+\\)")
-			      (save-excursion
-				(re-search-forward
-				 (format "^[ \t]*#\\+END_%s[ \t]*$"
-					 (regexp-quote
-					  (org-match-string-no-properties 1)))
-				 limit t)))
-			 ;; Stop at valid latex environments.
-			 (and (looking-at org-element--latex-begin-environment)
-			      (save-excursion
-				(re-search-forward
-				 (format org-element--latex-end-environment
-					 (regexp-quote
-					  (org-match-string-no-properties 1)))
-				 limit t)))
-			 ;; Stop at valid keywords.
-			 (looking-at "[ \t]*#\\+\\S-+:")
-			 ;; Skip everything else.
-			 (not
-			  (progn
-			    (end-of-line)
-			    (re-search-forward org-element-paragraph-separate
-					       limit 'm)))))
-		  (beginning-of-line)))
+	      ;; A matching `org-element-paragraph-separate' is not
+	      ;; necessarily the end of the paragraph.  In particular,
+	      ;; drawers, blocks or LaTeX environments opening lines
+	      ;; must be closed.  Moreover keywords with a secondary
+	      ;; value must belong to "dual keywords".
+	      (while (not
+		      (cond
+		       ((not (and (re-search-forward
+				   org-element-paragraph-separate limit 'move)
+				  (progn (beginning-of-line) t))))
+		       ((looking-at org-drawer-regexp)
+			(save-excursion
+			  (re-search-forward "^[ \t]*:END:[ \t]*$" limit t)))
+		       ((looking-at "[ \t]*#\\+BEGIN_\\(\\S-+\\)")
+			(save-excursion
+			  (re-search-forward
+			   (format "^[ \t]*#\\+END_%s[ \t]*$"
+				   (regexp-quote (match-string 1)))
+			   limit t)))
+		       ((looking-at org-element--latex-begin-environment)
+			(save-excursion
+			  (re-search-forward
+			   (format org-element--latex-end-environment
+				   (regexp-quote (match-string 1)))
+			   limit t)))
+		       ((looking-at "[ \t]*#\\+\\(\\S-+\\)\\[.*\\]:")
+			(member-ignore-case (match-string 1)
+					    org-element-dual-keywords))
+		       ;; Everything else is unambiguous.
+		       (t)))
+		(end-of-line))
 	      (if (= (point) limit) limit
 		(goto-char (line-beginning-position)))))
-	   (contents-end (progn (skip-chars-backward " \r\t\n" contents-begin)
-				(forward-line)
-				(point)))
+	   (contents-end (save-excursion
+			   (skip-chars-backward " \r\t\n" contents-begin)
+			   (line-beginning-position 2)))
 	   (end (progn (skip-chars-forward " \r\t\n" limit)
 		       (if (eobp) (point) (line-beginning-position)))))
       (list 'paragraph

+ 21 - 2
testing/lisp/test-org-element.el

@@ -1744,10 +1744,29 @@ e^{i\\pi}+1=0
      (let ((elem (org-element-at-point)))
        (and (eq (org-element-type elem) 'paragraph)
 	    (= (point-max) (org-element-property :end elem))))))
+  (should
+   (org-test-with-temp-text "Paragraph\n\begin{equation}"
+     (let ((elem (org-element-at-point)))
+       (and (eq (org-element-type elem) 'paragraph)
+	    (= (point-max) (org-element-property :end elem))))))
+  ;; Stop at affiliated keywords.
+  (should
+   (org-test-with-temp-text "Paragraph\n#+NAME: test\n| table |"
+     (let ((elem (org-element-at-point)))
+       (and (eq (org-element-type elem) 'paragraph)
+	    (not (org-element-property :name elem))
+	    (= (org-element-property :end elem) (line-beginning-position 2))))))
+  (should
+   (org-test-with-temp-text
+       "Paragraph\n#+CAPTION[with short caption]: test\n| table |"
+     (let ((elem (org-element-at-point)))
+       (and (eq (org-element-type elem) 'paragraph)
+	    (not (org-element-property :name elem))
+	    (= (org-element-property :end elem) (line-beginning-position 2))))))
   ;; Do not steal affiliated keywords from container.
   (should
-   (org-test-with-temp-text "#+ATTR_LATEX: test\n- item 1"
-     (let ((elem (progn (search-forward "item") (org-element-at-point))))
+   (org-test-with-temp-text "#+ATTR_LATEX: test\n- item<point> 1"
+     (let ((elem (org-element-at-point)))
        (and (eq (org-element-type elem) 'paragraph)
 	    (not (org-element-property :attr_latex elem))
 	    (/= (org-element-property :begin elem) 1)))))