Przeglądaj źródła

Refactor unescaping functions

* org.el (org-link-unescape): Simpler algorithm for replacing percent
escapes.
(org-link-unescape-compound): Use cond statements instead of nested
if, convert hex string with string-to-number, save match data.
(org-link-unescape-single-byte-sequence): Use mapconcat and
string-to-number for unescaping single byte sequence.
David Maus 14 lat temu
rodzic
commit
7b58cccddd
1 zmienionych plików z 39 dodań i 63 usunięć
  1. 39 63
      lisp/org.el

+ 39 - 63
lisp/org.el

@@ -8642,77 +8642,53 @@ If optional argument MERGE is set, merge TABLE into
 (defun org-link-unescape (str)
   "Unhex hexified unicode strings as returned from the JavaScript function
 encodeURIComponent. E.g. `%C3%B6' is the german Umlaut `ö'."
-  (setq str (or str ""))
-  (let ((tmp "")
-	(case-fold-search t))
-    (while (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str)
-      (let* ((start (match-beginning 0))
-	     (end (match-end 0))
-	     (hex (match-string 0 str))
-	     (replacement (org-link-unescape-compound (upcase hex))))
-	(setq tmp (concat tmp (substring str 0 start) replacement))
-	(setq str (substring str end))))
-    (setq tmp (concat tmp str))
-    tmp))
+  (unless (and (null str) (string= "" str))
+    (let ((pos 0) (case-fold-search t) unhexed)
+      (while (setq pos (string-match "\\(%[0-9a-f][0-9a-f]\\)+" str pos))
+	(setq unhexed (org-link-unescape-compound (match-string 0 str)))
+	(setq str (replace-match unhexed t t str))
+	(setq pos (+ pos (length unhexed))))))
+  str)
 
 (defun org-link-unescape-compound (hex)
   "Unhexify unicode hex-chars. E.g. `%C3%B6' is the German Umlaut `ö'.
 Note: this function also decodes single byte encodings like
 `%E1' (\"á\") if not followed by another `%[A-F0-9]{2}' group."
-  (let* ((bytes (remove "" (split-string hex "%")))
-	 (ret "")
-	 (eat 0)
-	 (sum 0))
-    (while bytes
-      (let* ((b (pop bytes))
-	     (a (elt b 0))
-	     (b (elt b 1))
-	     (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
-	     (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0)))
-	     (val (+ (lsh c1 4) c2))
-	     (shift
-	      (if (= 0 eat) ;; new byte
-		  (if (>= val 252) 6
-		    (if (>= val 248) 5
-		      (if (>= val 240) 4
-			(if (>= val 224) 3
-			  (if (>= val 192) 2 0)))))
-		6))
-	     (xor
-	      (if (= 0 eat) ;; new byte
-		  (if (>= val 252) 252
-		    (if (>= val 248) 248
-		      (if (>= val 240) 240
-			(if (>= val 224) 224
-			  (if (>= val 192) 192 0)))))
-		128)))
-	(if (>= val 192) (setq eat shift))
-	(setq val (logxor val xor))
-	(setq sum (+ (lsh sum shift) val))
-	(if (> eat 0) (setq eat (- eat 1)))
-	(cond
-	 ((= 0 eat)                         ;multi byte
-	  (setq ret (concat ret (org-char-to-string sum)))
-	  (setq sum 0))
-	 ((not bytes)                       ; single byte(s)
-	  (setq ret (org-link-unescape-single-byte-sequence hex))))
-	)) ;; end (while bytes
-    ret ))
+  (save-match-data
+    (let* ((bytes (cdr (split-string hex "%")))
+	   (ret "")
+	   (eat 0)
+	   (sum 0))
+      (while bytes
+	(let* ((val (string-to-number (pop bytes) 16))
+	       (shift-xor
+		(if (= 0 eat)
+		    (cond
+		     ((>= val 252) (cons 6 252))
+		     ((>= val 248) (cons 5 248))
+		     ((>= val 240) (cons 4 240))
+		     ((>= val 224) (cons 3 224))
+		     ((>= val 192) (cons 2 192))
+		     (t (cons 0 0)))
+		  (cons 6 128))))
+	  (if (>= val 192) (setq eat (car shift-xor)))
+	  (setq val (logxor val (cdr shift-xor)))
+	  (setq sum (+ (lsh sum (car shift-xor)) val))
+	  (if (> eat 0) (setq eat (- eat 1)))
+	  (cond
+	   ((= 0 eat)			;multi byte
+	    (setq ret (concat ret (org-char-to-string sum)))
+	    (setq sum 0))
+	   ((not bytes)			; single byte(s)
+	    (setq ret (org-link-unescape-single-byte-sequence hex))))
+	  )) ;; end (while bytes
+      ret )))
 
 (defun org-link-unescape-single-byte-sequence (hex)
   "Unhexify hex-encoded single byte character sequences."
-  (let ((bytes (remove "" (split-string hex "%")))
-	(ret ""))
-    (while bytes
-      (let* ((b (pop bytes))
-	     (a (elt b 0))
-	     (b (elt b 1))
-	     (c1 (if (> a ?9) (+ 10 (- a ?A)) (- a ?0)))
-	     (c2 (if (> b ?9) (+ 10 (- b ?A)) (- b ?0))))
-	(setq ret
-	      (concat ret (char-to-string
-			   (+ (lsh c1 4) c2))))))
-    ret))
+  (mapconcat (lambda (byte)
+	       (char-to-string (string-to-number byte 16)))
+	     (cdr (split-string hex "%")) ""))
 
 (defun org-xor (a b)
   "Exclusive or."