org-feed.el 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. ;;; org-feed.el --- Add RSS feed items to Org files
  2. ;;
  3. ;; Copyright (C) 2009 Free Software Foundation, Inc.
  4. ;;
  5. ;; Author: Carsten Dominik <carsten at orgmode dot org>
  6. ;; Keywords: outlines, hypermedia, calendar, wp
  7. ;; Homepage: http://orgmode.org
  8. ;; Version: 6.24trans
  9. ;;
  10. ;; This file is part of GNU Emacs.
  11. ;;
  12. ;; GNU Emacs is free software: you can redistribute it and/or modify
  13. ;; it under the terms of the GNU General Public License as published by
  14. ;; the Free Software Foundation, either version 3 of the License, or
  15. ;; (at your option) any later version.
  16. ;; GNU Emacs is distributed in the hope that it will be useful,
  17. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. ;; GNU General Public License for more details.
  20. ;; You should have received a copy of the GNU General Public License
  21. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  22. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  23. ;;
  24. ;;; Commentary:
  25. ;; This library allows to create entries in an Org-mode file from
  26. ;; RSS feeds.
  27. ;;
  28. ;; Selecting feeds and target locations
  29. ;; -----------------------------------
  30. ;;
  31. ;; This module is configured through a single variable, `org-feed-alist'.
  32. ;; Here is an example, using a notes/tasks feed from reQall.com.
  33. ;;
  34. ;; (setq org-feed-alist
  35. ;; '(("ReQall"
  36. ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
  37. ;; "~/org/feeds.org" "ReQall Entries")
  38. ;;
  39. ;; With this setup, the command `M-x org-feed-update-all' will
  40. ;; collect new entries in the feed at the given URL and create
  41. ;; entries as subheadings under the "ReQall Entries" heading in the
  42. ;; file "~/org.feeds.org".
  43. ;; In addition to these standard arguments, additional keyword-value
  44. ;; pairs are possible. For example, here we turn entries with
  45. ;; "<category>Task</category>" into TODO entries by adding the
  46. ;; keyword to the title, usinf the `:filter' argument:
  47. ;;
  48. ;; (setq org-feed-alist
  49. ;; '(("ReQall"
  50. ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
  51. ;; "~/org/feeds.org" "ReQall Entries"
  52. ;; :filter my-reqall-filter)))
  53. ;;
  54. ;; (defun my-reqall-filter (e)
  55. ;; (when (equal (plist-get e :category) "Task")
  56. ;; (setq e (plist-put e :title
  57. ;; (concat "TODO " (plist-get e :title)))))
  58. ;; e)
  59. ;;
  60. ;; A `:template' entry in the alist would override the template
  61. ;; in `org-feed-default-template' for the construction of the outline
  62. ;; node to be inserted. Another possibility would be for the filter
  63. ;; function to create the Org node for the feed item, by adding the
  64. ;; formatted entry as a `:formatted-for-org' property:
  65. ;;
  66. ;; (defun my-reqall-filter (e)
  67. ;; (setq e (plist-put
  68. ;; e :formatted-for-org
  69. ;; (format "* %s\n%s"
  70. ;; (plist-get e :title)
  71. ;; (plist-get e :description))))
  72. ;; e)
  73. ;;
  74. ;; The filter function may also decide that certain feed items
  75. ;; should be ignored, by returning nil instead of the entry.
  76. ;;
  77. ;;
  78. ;; Keeping track of old GUIDs
  79. ;; --------------------------
  80. ;;
  81. ;; Since Org allows you to delete, archive, or move outline nodes,
  82. ;; org-feed.el needs to keep track of GUIDs in the feed it has
  83. ;; already processed. It does so by listing them in a special
  84. ;; drawer, FEEDGUIDS, under the heading that received the input of
  85. ;; the feed. You should add FEEDGUIDS to your list of drawers
  86. ;; in the files that receive feed input:
  87. ;;
  88. ;; #+DRAWERS: PROPERTIES LOGBOOK FEEDGUIDS
  89. ;;
  90. ;; Acknowledgements
  91. ;; ----------------
  92. ;;
  93. ;; org-feed.el is based on ideas by Brad Bozarth who implemented a
  94. ;; similar mechanism using shell and awk scripts, and who in this
  95. ;; way made me for the first time look into an RSS feed, showing
  96. ;; how simple this really was. Because I wanted to include a
  97. ;; solution into Org with as few dependencies as possible, I
  98. ;; reimplemented his ideas in Emacs Lisp.
  99. ;;; Code:
  100. (require 'org)
  101. (declare-function url-retrieve-synchronously "url" (url))
  102. (defgroup org-feed nil
  103. "Options concerning RSS feeds as inputs for Org files."
  104. :tag "Org ID"
  105. :group 'org)
  106. ;;;###autoload
  107. (defcustom org-feed-alist nil
  108. "Alist specifying RSS feeds that should create inputs for Org.
  109. Each entry in this list specified an RSS feed tat should be queried
  110. to create inbox items in Org. Each entry is a list with the following items:
  111. name a custom name for this feed
  112. URL the Feed URL
  113. file the target Org file where entries should be listed
  114. headline the headline under which entries should be listed
  115. Additional argumetns can be given using keyword-value pairs:
  116. :template template-string
  117. The template to create an Org node from a feed item
  118. :filter filter-function
  119. A function to filter entries before Org nodes are
  120. created from them.
  121. If no template is given, the one in `org-feed-default-template' is used.
  122. See the docstring of that variable for information on the syntax of this
  123. template. If creating the node required more logic than a template can
  124. provide, this task can be delegated to the filter function.
  125. The filter function gets as a argument a property list describing the item.
  126. That list has a property for each field, for example `:title' for the
  127. `<title>' field and `:pubDate' for the publication date. In addition,
  128. it contains the following properties:
  129. `:item-full-text' the full text in the <item> tag
  130. `:guid-permalink' t when the guid property is a permalink
  131. The filter function can modify the existing fields before an item
  132. is constructed using the template. Or it can construct the node directly,
  133. by adding a `:formatted-for-org' property that specifies the complete
  134. outline node that should be added.
  135. The filter should return the modified entry property list. It may also
  136. return nil to indicate that this entry should not be added to the Org file
  137. at all."
  138. :group 'org-feed
  139. :type '(repeat
  140. (list :value ("" "http://" "" "")
  141. (string :tag "Name")
  142. (string :tag "Feed URL")
  143. (file :tag "File for inbox")
  144. (string :tag "Headline for inbox")
  145. (repeat :inline t
  146. (choice
  147. (list :inline t :tag "Template"
  148. (const :template) (string :tag "Template"))
  149. (list :inline t :tag "Filter"
  150. (const :filter) (symbol :tag "Filter Function")))))))
  151. (defcustom org-feed-default-template "* %h\n %U\n %description\n %a\n"
  152. "Template for the Org node created from RSS feed items.
  153. This is just the default, each feed can specify its own.
  154. Any fields from the feed item can be interpolated into the template with
  155. %name, for example %title, %description, %pubDate etc. In addition, the
  156. following special escapes are valid as well:
  157. %h the title, or the first line of the description
  158. %t the date as a stamp, either from <pubDate> (if present), or
  159. the current date.
  160. %T date and time
  161. %u,%U like %t,%T, but inactive time stamps
  162. %a A link, from <guid> if that is a permalink, else from <link>"
  163. :group 'org-feed
  164. :type '(string :tag "Template"))
  165. (defcustom org-feed-save-after-adding t
  166. "Non-nil means, save buffer after adding new feed items."
  167. :group 'org-feed
  168. :type 'boolean)
  169. (defcustom org-feed-retrieve-method 'url-retrieve-synchronously
  170. "The method to be used to retrieve a feed URL.
  171. This can be `curl' or `wget' to call these external programs, or it can be
  172. an Emacs Lisp function that will return a buffer containing the content
  173. of the file pointed to by the URL."
  174. :group 'org-feed
  175. :type '(choice
  176. (const :tag "Internally with url.el" url-retrieve-synchronously)
  177. (const :tag "Externally with curl" curl)
  178. (const :tag "Externally with wget" wget)
  179. (function :tag "Function")))
  180. (defcustom org-feed-assume-stable t
  181. "Non-nil means, assume feeds to be stable.
  182. A stable feed is one which only adds and removes items, but never removes
  183. an item with a given GUID and then later adds it back in. So if the feed
  184. is stable, this means we can simple remember the GUIDs present in the feed
  185. at any given time, as the ones we have seen and precessed. So we can
  186. forget GUIDs that used to be in the feed but no longer are.
  187. Thus, for stable feeds, we only need to remember a limited number of GUIDs.
  188. For unstable ones, we need to remember all GUIDs we have ever seen, which
  189. can be a very long list indeed."
  190. :group 'org-feed
  191. :type 'boolean)
  192. (defcustom org-feed-before-adding-hook nil
  193. "Hook that is run before adding new feed items to a file.
  194. You might want to commit the file in its current state to version control,
  195. for example."
  196. :group 'org-feed
  197. :type 'hook)
  198. (defcustom org-feed-after-adding-hook nil
  199. "Hook that is run after new items have been added to a file.
  200. Depending on `org-feed-save-after-adding', the buffer will already
  201. have been saved."
  202. :group 'org-feed
  203. :type 'hook)
  204. (defvar org-feed-buffer "*Org feed*"
  205. "The buffer used to retrieve a feed.")
  206. ;;;###autoload
  207. (defun org-feed-update-all ()
  208. "Get inbox items from all feeds in `org-feed-alist'."
  209. (interactive)
  210. (let ((nfeeds (length org-feed-alist))
  211. (nnew (apply '+ (mapcar 'org-feed-update org-feed-alist))))
  212. (message "%s from %d %s"
  213. (cond ((= nnew 0) "No new entries")
  214. ((= nnew 1) "1 new entry")
  215. (t (format "%d new entries" nnew)))
  216. nfeeds
  217. (if (= nfeeds 1) "feed" "feeds"))))
  218. ;;;###autoload
  219. (defun org-feed-update (feed)
  220. "Get inbox items from FEED.
  221. FEED can be a string with an association in `org-feed-alist', or
  222. it can be a list structured like an entry in `org-feed-alist'."
  223. (interactive (list (org-completing-read "Feed name: " org-feed-alist)
  224. current-prefix-arg))
  225. (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
  226. (unless feed
  227. (error "No such feed in `org-feed-alist"))
  228. (let ((feed-name (car feed))
  229. (feed-url (nth 1 feed))
  230. (feed-file (nth 2 feed))
  231. (feed-headline (nth 3 feed))
  232. (feed-filter (nth 1 (memq :filter feed)))
  233. (feed-template (or (nth 1 (memq :template feed))
  234. org-feed-default-template))
  235. feed-buffer feed-pos
  236. entries entries2 old-guids current-guids new new-selected e)
  237. (setq feed-buffer (org-feed-get-feed feed-url))
  238. (unless (and feed-buffer (bufferp feed-buffer))
  239. (error "Cannot get feed %s" feed-name))
  240. (setq entries (org-feed-parse-feed feed-buffer)
  241. entries2 entries)
  242. (ignore-errors (kill-buffer feed-buffer))
  243. (save-excursion
  244. (save-window-excursion
  245. (setq feed-pos (org-feed-goto-inbox-internal feed-file feed-headline))
  246. (setq old-guids (org-feed-get-old-guids feed-pos))
  247. (while (setq e (pop entries2))
  248. (unless (member (plist-get e :guid) old-guids)
  249. (push (org-feed-parse-entry e) new)))
  250. (if (not new)
  251. (progn (message "No new items in feed %s" feed-name) 0)
  252. ;; Format the new entries
  253. (run-hooks 'org-feed-before-adding-hook)
  254. (setq new-selected new)
  255. (when feed-filter
  256. (setq new-selected (mapcar feed-filter new-selected)))
  257. (setq new-selected
  258. (delq nil
  259. (mapcar
  260. (lambda (e) (org-feed-format-entry e feed-template))
  261. new-selected)))
  262. ;; Insert the new items
  263. (apply 'org-feed-add-items feed-pos new-selected)
  264. ;; Update the list of seen GUIDs in a drawer
  265. (if org-feed-assume-stable
  266. (apply 'org-feed-add-guids feed-pos 'replace entries)
  267. (apply 'org-feed-add-guids feed-pos nil new))
  268. (goto-char feed-pos)
  269. (show-children)
  270. (when org-feed-save-after-adding
  271. (save-buffer))
  272. (message "Added %d new item%s from feed %s to file %s, heading %s"
  273. (length new) (if (> (length new) 1) "s" "")
  274. feed-name
  275. (file-name-nondirectory feed-file) feed-headline)
  276. (run-hooks 'org-feed-after-adding-hook)
  277. (length new))))))
  278. ;;;###autoload
  279. (defun org-feed-goto-inbox (feed)
  280. "Go to the inbox that captures feed FEED."
  281. (interactive
  282. (list (if (= (length org-feed-alist) 1)
  283. (car org-feed-alist)
  284. (org-completing-read "Feed name: " org-feed-alist))))
  285. (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
  286. (unless feed
  287. (error "No such feed in `org-feed-alist"))
  288. (org-feed-goto-inbox (nth 2 feed) (nth 3 feed)))
  289. (defun org-feed-goto-inbox-internal (file heading)
  290. "Find or create HEADING in FILE.
  291. Switch to that buffer, and return the position of that headline."
  292. (find-file file)
  293. (widen)
  294. (goto-char (point-min))
  295. (if (re-search-forward
  296. (concat "^\\*+[ \t]+" heading "[ \t]*\\(:.*?:[ \t]*\\)?$")
  297. nil t)
  298. (goto-char (match-beginning 0))
  299. (goto-char (point-max))
  300. (insert "\n\n* " heading "\n\n")
  301. (org-back-to-heading t))
  302. (point))
  303. (defun org-feed-get-old-guids (pos)
  304. "Get the list of old GUIDs from the entry at POS.
  305. This will find the FEEDGUIDS drawer and extract the IDs."
  306. (save-excursion
  307. (goto-char pos)
  308. (let ((end (save-excursion (org-end-of-subtree t t))))
  309. (if (re-search-forward
  310. "^[ \t]*:FEEDGUIDS:[ \t]*\n\\([^\000]*?\\)\n[ \t]*:END:"
  311. end t)
  312. (org-split-string (org-trim (org-match-string-no-properties 1))
  313. "[ \t]*\n[ \t]*")
  314. nil))))
  315. (defun org-feed-add-guids (pos replace &rest entries)
  316. "Add GUIDs for headline at POS.
  317. When REPLACE is non-nil, replace all GUIDs by the new ones."
  318. (save-excursion
  319. (goto-char pos)
  320. (let ((end (save-excursion (org-end-of-subtree t t)))
  321. guid)
  322. (if (re-search-forward "^[ \t]*:FEEDGUIDS:[ \t]*\n" end t)
  323. (progn
  324. (goto-char (match-end 0))
  325. (when replace
  326. (delete-region (point)
  327. (save-excursion
  328. (and (re-search-forward "^[ \t]*:END:" nil t)
  329. (match-beginning 0))))))
  330. (outline-next-heading)
  331. (insert " :FEEDGUIDS:\n :END:\n")
  332. (beginning-of-line 0))
  333. (while entries
  334. (when (setq guid (plist-get (pop entries) :guid))
  335. (insert " " guid "\n"))))))
  336. (defun org-feed-add-items (pos &rest entries)
  337. "Add the formatted items to the headline as POS."
  338. (let (entry level)
  339. (save-excursion
  340. (goto-char pos)
  341. (unless (looking-at org-complex-heading-regexp)
  342. (error "Wrong position"))
  343. (setq level (org-get-valid-level (length (match-string 1)) 1))
  344. (org-end-of-subtree t t)
  345. (skip-chars-backward " \t\n")
  346. (beginning-of-line 2)
  347. (setq pos (point))
  348. (while (setq entry (pop entries))
  349. (org-paste-subtree level (plist-get entry :formatted-for-org) 'yank))
  350. (org-mark-ring-push pos))))
  351. (defun org-feed-format-entry (entry template)
  352. "Format ENTRY so that it can be inserted into an Org file.
  353. ENTRY is a property list. This function adds a `:formatted-for-org' property
  354. and returns the full property list.
  355. If that property is already present, nothing changes."
  356. (unless (or (not entry) ; not an entry at all
  357. (plist-get entry :formatted-for-org)) ; already formatted
  358. (let (dlines fmt tmp indent)
  359. (setq dlines (org-split-string (or (plist-get entry :description) "???")
  360. "\n")
  361. v-h (or (plist-get entry :title) (car dlines) "???")
  362. time (or (if (plist-get entry :pubDate)
  363. (org-read-date t t (plist-get entry :pubDate)))
  364. (current-time))
  365. v-t (format-time-string (org-time-stamp-format nil nil) time)
  366. v-T (format-time-string (org-time-stamp-format t nil) time)
  367. v-u (format-time-string (org-time-stamp-format nil t) time)
  368. v-U (format-time-string (org-time-stamp-format t t) time)
  369. v-a (if (setq tmp (or (and (plist-get entry :guid-permalink)
  370. (plist-get entry :guid))
  371. (plist-get entry :link)))
  372. (concat "[[" tmp "]]\n")
  373. ""))
  374. (with-temp-buffer
  375. (insert template)
  376. (debug)
  377. (goto-char (point-min))
  378. (while (re-search-forward "%\\([a-zA-Z]+\\)" nil t)
  379. (setq name (match-string 1))
  380. (cond
  381. ((member name '("h" "t" "T" "u" "U" "a"))
  382. (replace-match (symbol-value (intern (concat "v-" name))) t t))
  383. ((setq tmp (plist-get entry (intern (concat ":" name))))
  384. (save-excursion
  385. (save-match-data
  386. (beginning-of-line 1)
  387. (when (looking-at (concat "^\\([ \t]*\\)%" name "[ \t]*$"))
  388. (setq tmp (org-feed-make-indented-block
  389. tmp (org-get-indentation))))))
  390. (replace-match tmp t t))
  391. t))
  392. (setq entry (plist-put entry :formatted-for-org (buffer-string))))))
  393. entry)
  394. (defun org-feed-make-indented-block (s n)
  395. "Add indentaton of N spaces to a multiline string S."
  396. (if (not (string-match "\n" s))
  397. s
  398. (mapconcat 'identity
  399. (org-split-string s "\n")
  400. (concat "\n" (make-string n ?\ )))))
  401. (defun org-feed-get-feed (url)
  402. "Get the RSS feed file at URL and return the buffer."
  403. (cond
  404. ((eq org-feed-retrieve-method 'url-retrieve-synchronously)
  405. (url-retrieve-synchronously url))
  406. ((eq org-feed-retrieve-method 'curl)
  407. (ignore-errors (kill-buffer org-feed-buffer))
  408. (call-process "curl" nil org-feed-buffer nil url)
  409. org-feed-buffer)
  410. ((eq org-feed-retrieve-method 'wget)
  411. (ignore-errors (kill-buffer org-feed-buffer))
  412. (call-process "curl" nil org-feed-buffer nil "-q" "-O" "-" url)
  413. org-feed-buffer)
  414. ((functionp org-feed-retrieve-method)
  415. (funcall org-feed-retrieve-method url))))
  416. (defun org-feed-parse-feed (buffer)
  417. "Parse BUFFER for RS feed entries.
  418. Returns a list of entries, with each entry a property list,
  419. containing the properties `:guid' and `:item-full-text'."
  420. (let (entries beg end item guid entry)
  421. (with-current-buffer buffer
  422. (widen)
  423. (goto-char (point-min))
  424. (while (re-search-forward "<item>" nil t)
  425. (setq beg (point)
  426. end (and (re-search-forward "</item>" nil t)
  427. (match-beginning 0)))
  428. (setq item (buffer-substring beg end)
  429. guid (if (string-match "<guid\\>.*?>\\(.*?\\)</guid>" item)
  430. (org-match-string-no-properties 1 item)))
  431. (setq entry (list :guid guid :item-full-text item))
  432. (push entry entries)
  433. (widen)
  434. (goto-char end))
  435. (nreverse entries))))
  436. (defun org-feed-parse-entry (entry)
  437. "Parse the `:item-full-text' field for xml tags and create new properties."
  438. (with-temp-buffer
  439. (insert (plist-get entry :item-full-text))
  440. (goto-char (point-min))
  441. (while (re-search-forward "<\\([a-zA-Z]+\\>\\).*?>\\([^\000]*?\\)</\\1>"
  442. nil t)
  443. (setq entry (plist-put entry
  444. (intern (concat ":" (match-string 1)))
  445. (match-string 2))))
  446. (goto-char (point-min))
  447. (unless (re-search-forward "isPermaLink[ \t]*=[ \t]*\"false\"" nil t)
  448. (setq entry (plist-put entry :guid-permalink t))))
  449. entry)
  450. (provide 'org-feed)
  451. ;;; org-feed.el ends here
  452. ;; arch-tag: 0929b557-9bc4-47f4-9633-30a12dbb5ae2