org-feed.el 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. ;;; org-feed.el --- Add RSS feed items to Org files
  2. ;;
  3. ;; Copyright (C) 2009 Free Software Foundation, Inc.
  4. ;;
  5. ;; Author: Carsten Dominik <carsten at orgmode dot org>
  6. ;; Keywords: outlines, hypermedia, calendar, wp
  7. ;; Homepage: http://orgmode.org
  8. ;; Version: 6.24trans
  9. ;;
  10. ;; This file is part of GNU Emacs.
  11. ;;
  12. ;; GNU Emacs is free software: you can redistribute it and/or modify
  13. ;; it under the terms of the GNU General Public License as published by
  14. ;; the Free Software Foundation, either version 3 of the License, or
  15. ;; (at your option) any later version.
  16. ;; GNU Emacs is distributed in the hope that it will be useful,
  17. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. ;; GNU General Public License for more details.
  20. ;; You should have received a copy of the GNU General Public License
  21. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  22. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  23. ;;
  24. ;;; Commentary:
  25. ;; This library allows to create entries in an Org-mode file from
  26. ;; RSS feeds.
  27. ;;
  28. ;; Selecting feeds and target locations
  29. ;; -----------------------------------
  30. ;;
  31. ;; This module is configured through a single variable, `org-feed-alist'.
  32. ;; Here is an example, using a notes/tasks feed from reQall.com.
  33. ;;
  34. ;; (setq org-feed-alist
  35. ;; '(("ReQall"
  36. ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
  37. ;; "~/org/feeds.org" "ReQall Entries" nil)
  38. ;;
  39. ;; With this setup, the command `M-x org-feed-update-all' will
  40. ;; collect new entries in the feed at the given URL and create
  41. ;; entries as subheading under the "ReQall Entries" heading in the
  42. ;; file "~/org.feeds.org". The final entry in this list can be
  43. ;; a filter function to further process the parsed information. For
  44. ;; example, here we turn entries with "<category>Task</category>"
  45. ;; into TODO entries by adding the keyword to the title:
  46. ;;
  47. ;; (setq org-feed-alist
  48. ;; '(("ReQall"
  49. ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
  50. ;; "~/org/feeds.org" "ReQall Entries"
  51. ;; my-raquall-filter)))
  52. ;;
  53. ;; (defun my-requall-filter (e)
  54. ;; (when (equal (plist-get e :category) "Task")
  55. ;; (setq e (plist-put e :title
  56. ;; (concat "TODO " (plist-get e :title)))))
  57. ;; e)
  58. ;;
  59. ;; The filter function may also decide that certain feed items
  60. ;; should be ignored, by returning nil instead of the entry.
  61. ;;
  62. ;; See the docstring of `org-feed-alist' for more details.
  63. ;;
  64. ;; Keeping track of old GUIDs
  65. ;; --------------------------
  66. ;;
  67. ;; Since Org allows you to delete, archive, or move outline nodes,
  68. ;; org-feed needs to keep track of all GUIDs in the feed it has
  69. ;; already processed. It does so by listing them in a special
  70. ;; drawer, FEEDGUIDS, under the heading that received the input of
  71. ;; te feed. You should add FEEDGUIDS to your list of drawers
  72. ;; in the files that receive feed input:
  73. ;;
  74. ;; #+DRAWERS: PROPERTIES LOGBOOK FEEDGUIDS
  75. ;;
  76. ;; Acknowledgements
  77. ;; ----------------
  78. ;;
  79. ;; It is based on ideas by Brad Bozarth who implemented it using
  80. ;; shell and awk scripts, and who in this way made me for the first
  81. ;; time look into an RSS feed, showing me how simple this really
  82. ;; was.
  83. (require 'org)
  84. (declare-function url-retrieve-synchronously "url" (url))
  85. (defgroup org-feed nil
  86. "Options concerning RSS feeds as inputs for Org files."
  87. :tag "Org ID"
  88. :group 'org)
  89. ;;;###autoload
  90. (defcustom org-feed-alist nil
  91. "Alist specifying RSS feeds that should create inputs for Org.
  92. Each entry in this list specified an RSS feed tat should be queried
  93. to create inbox items in Org. Each entry is a list with the following items:
  94. name a custom name for this feed
  95. URL the Feed URL
  96. file the target Org file where entries should be listed
  97. headline the headline under which entries should be listed
  98. filter a filter function to modify the property list before
  99. an Org entry is created from it.
  100. The filter function gets as a argument a property list describing the item.
  101. That list has a property for each field, for example `:title' for the
  102. `<title>' field and `:pubDate' for the publication date. In addition,
  103. it contains the following properties:
  104. `:item-full-text' the full text in the <item> tag.
  105. `:guid-permalink' t when the guid property is a permalink
  106. The filter function can modify the existing fields before an item
  107. is constructed from the `:title', `:pubDate', `:link', `:guid', and
  108. `:description' fields. For more control, the filter can construct
  109. the Org item itself, by adding a `:formatted-for-org' property that
  110. specifies the complete outline node that should be added.
  111. If the filter returns nil for some entries, these will be marked as seen
  112. but *not* inserted into the inbox."
  113. :group 'org-feed
  114. :type '(repeat
  115. (list :value ("" "http://" "" "" nil)
  116. (string :tag "Name")
  117. (string :tag "Feed URL")
  118. (file :tag "File for inbox")
  119. (string :tag "Headline for inbox")
  120. (symbol :tag "Filter Function"))))
  121. (defcustom org-feed-save-after-adding t
  122. "Non-nil means, save buffer after adding new feed items."
  123. :group 'org-feed
  124. :type 'boolean)
  125. (defcustom org-feed-retrieve-method 'url-retrieve-synchronously
  126. "The method to be used to retrieve a feed URL.
  127. This can be `curl' or `wget' to call these external programs, or it can be
  128. an Emacs Lisp function that will return a buffer containing the content
  129. of the file pointed to by the URL."
  130. :group 'org-feed
  131. :type '(choice
  132. (const :tag "Internally with url.el" url-retrieve-synchronously)
  133. (const :tag "Externally with curl" curl)
  134. (const :tag "Externally with wget" wget)
  135. (function :tag "Function")))
  136. (defcustom org-feed-after-adding-hook nil
  137. "Hook that is run after new items have been added to a file.
  138. Depending on `org-feed-save-after-adding', the buffer will already
  139. have been saved."
  140. :group 'org-feed
  141. :type 'hook)
  142. (defvar org-feed-buffer "*Org feed*"
  143. "The buffer used to retrieve a feed.")
  144. (defun org-feed-goto-inbox (file heading)
  145. "Find or create HEADING in FILE.
  146. Switch to that buffer, and return the position of that headline."
  147. (find-file file)
  148. (widen)
  149. (goto-char (point-min))
  150. (if (re-search-forward
  151. (concat "^\\*+[ \t]+" heading "[ \t]*\\(:.*?:[ \t]*\\)?$")
  152. nil t)
  153. (goto-char (match-beginning 0))
  154. (goto-char (point-max))
  155. (insert "\n\n* " heading "\n\n")
  156. (org-back-to-heading t))
  157. (point))
  158. (defun org-feed-get-old-guids (pos)
  159. "Get the list of old GUIDs from the entry at POS.
  160. This will find the FEEDGUIDS drawer and extract the IDs."
  161. (save-excursion
  162. (goto-char pos)
  163. (let ((end (save-excursion (org-end-of-subtree t t))))
  164. (if (re-search-forward
  165. "^[ \t]*:FEEDGUIDS:[ \t]*\n\\([^\000]*?\\)\n[ \t]*:END:"
  166. end t)
  167. (org-split-string (org-trim (org-match-string-no-properties 1))
  168. "[ \t]*\n[ \t]*")
  169. nil))))
  170. (defun org-feed-add-guids (pos &rest entries)
  171. "Add GUIDs to the headline at POS."
  172. (save-excursion
  173. (goto-char pos)
  174. (let ((end (save-excursion (org-end-of-subtree t t)))
  175. guid)
  176. (if (re-search-forward "^[ \t]*:FEEDGUIDS:[ \t]*\n" end t)
  177. (goto-char (match-end 0))
  178. (outline-next-heading)
  179. (insert " :FEEDGUIDS:\n :END:\n")
  180. (beginning-of-line 0))
  181. (while entries
  182. (when (setq guid (plist-get (pop entries) :guid))
  183. (insert " " guid "\n"))))))
  184. (defun org-feed-add-items (pos &rest entries)
  185. "Add the formatted items to the headline as POS."
  186. (let (entry level)
  187. (save-excursion
  188. (goto-char pos)
  189. (unless (looking-at org-complex-heading-regexp)
  190. (error "Wrong position"))
  191. (setq level (org-get-valid-level (length (match-string 1)) 1))
  192. (org-end-of-subtree t t)
  193. (skip-chars-backward " \t\n")
  194. (beginning-of-line 2)
  195. (setq pos (point))
  196. (while (setq entry (pop entries))
  197. (org-paste-subtree level (plist-get entry :formatted-for-org) 'yank))
  198. (org-mark-ring-push pos))))
  199. (defun org-feed-format (entry)
  200. "Format ENTRY so that it can be inserted into an Org file.
  201. ENTRY is a property list. This function adds a `:formatted-for-org' property
  202. and returns the full property list.
  203. If that property is already present, nothing changes."
  204. (unless (or (not entry) ; not an entry at all
  205. (plist-get entry :formatted-for-org)) ; already formatted
  206. (let (lines fmt tmp indent)
  207. (setq lines (org-split-string (or (plist-get entry :description) "???")
  208. "\n")
  209. indent " ")
  210. (setq fmt
  211. (concat
  212. "* " (or (plist-get entry :title) (car lines)) "\n"
  213. (if (setq tmp (plist-get entry :pubDate))
  214. (concat
  215. " ["
  216. (substring
  217. (format-time-string (cdr org-time-stamp-formats)
  218. (org-read-date t t tmp))
  219. 1 -1)
  220. "]\n"))
  221. (concat " :PROPERTIES:\n :FEED-GUID: "
  222. (plist-get entry :guid)
  223. "\n :END:\n")
  224. (mapconcat (lambda (x) (concat indent x)) lines "\n") "\n"
  225. (if (setq tmp (or (and (plist-get entry :guid-permalink)
  226. (plist-get entry :guid))
  227. (plist-get entry :link)))
  228. (concat " [[" tmp "]]\n")))
  229. entry (plist-put entry :formatted-for-org fmt))))
  230. entry)
  231. (defun org-feed-get-feed (url)
  232. "Get the RSS feed file at URL and return the buffer."
  233. (cond
  234. ((eq org-feed-retrieve-method 'url-retrieve-synchronously)
  235. (url-retrieve-synchronously url))
  236. ((eq org-feed-retrieve-method 'curl)
  237. (ignore-errors (kill-buffer org-feed-buffer))
  238. (call-process "curl" nil org-feed-buffer nil url)
  239. org-feed-buffer)
  240. ((eq org-feed-retrieve-method 'wget)
  241. (ignore-errors (kill-buffer org-feed-buffer))
  242. (call-process "curl" nil org-feed-buffer nil "-q" "-O" "-" url)
  243. org-feed-buffer)
  244. ((functionp org-feed-retrieve-method)
  245. (funcall org-feed-retrieve-method url))))
  246. (defun org-feed-parse-feed (buffer)
  247. "Parse BUFFER for RS feed entries.
  248. Returns a list of entries, with each entry a property list,
  249. containing the properties `:guid' and `:item-full-text'."
  250. (let (entries beg end item guid entry)
  251. (with-current-buffer buffer
  252. (widen)
  253. (goto-char (point-min))
  254. (while (re-search-forward "<item>" nil t)
  255. (setq beg (point)
  256. end (and (re-search-forward "</item>" nil t)
  257. (match-beginning 0)))
  258. (setq item (buffer-substring beg end)
  259. guid (if (string-match "<guid\\>.*?>\\(.*?\\)</guid>" item)
  260. (org-match-string-no-properties 1 item)))
  261. (setq entry (list :guid guid :item-full-text item))
  262. (push entry entries)
  263. (widen)
  264. (goto-char end))
  265. (nreverse entries))))
  266. (defun org-feed-parse-entry (entry)
  267. "Parse the `:item-full-text' field for xml tags and create new properties."
  268. (with-temp-buffer
  269. (insert (plist-get entry :item-full-text))
  270. (goto-char (point-min))
  271. (while (re-search-forward "<\\([a-zA-Z]+\\>\\).*?>\\([^\000]*?\\)</\\1>"
  272. nil t)
  273. (setq entry (plist-put entry
  274. (intern (concat ":" (match-string 1)))
  275. (match-string 2))))
  276. (goto-char (point-min))
  277. (unless (re-search-forward "isPermaLink[ \t]*=[ \t]*\"false\"" nil t)
  278. (setq entry (plist-put entry :guid-permalink t))))
  279. entry)
  280. ;;;###autoload
  281. (defun org-feed-update (feed)
  282. "Get inbox items from FEED.
  283. FEED can be a string with an association in `org-feed-alist', or
  284. it can be a list structured like an entry in `org-feed-alist'."
  285. (interactive (list (org-completing-read "Feed name: " org-feed-alist)))
  286. (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
  287. (unless feed
  288. (error "No such feed in `org-feed-alist"))
  289. (let ((feed-name (car feed))
  290. (feed-url (nth 1 feed))
  291. (feed-file (nth 2 feed))
  292. (feed-headline (nth 3 feed))
  293. (feed-formatter (nth 4 feed))
  294. feed-buffer feed-pos
  295. entries old-guids new new-selected e)
  296. (setq feed-buffer (org-feed-get-feed feed-url))
  297. (unless (and feed-buffer (bufferp feed-buffer))
  298. (error "Cannot get feed %s" feed-name))
  299. (setq entries (org-feed-parse-feed feed-buffer))
  300. (ignore-errors (kill-buffer feed-buffer))
  301. (save-excursion
  302. (save-window-excursion
  303. (setq feed-pos (org-feed-goto-inbox feed-file feed-headline))
  304. (setq old-guids (org-feed-get-old-guids feed-pos))
  305. (while (setq e (pop entries))
  306. (unless (member (plist-get e :guid) old-guids)
  307. (push (org-feed-parse-entry e) new)))
  308. (if (not new)
  309. (message "No new items in feed %s" feed-name)
  310. ;; Format the new entries
  311. (setq new-selected new)
  312. (when feed-formatter
  313. (setq new-selected (mapcar feed-formatter new-selected)))
  314. (setq new-selected (mapcar 'org-feed-format new-selected))
  315. (setq new-selected (delq nil new-selected))
  316. ;; Insert them
  317. (apply 'org-feed-add-items feed-pos new-selected)
  318. (apply 'org-feed-add-guids feed-pos new)
  319. (goto-char feed-pos)
  320. (show-children)
  321. (when org-feed-save-after-adding
  322. (save-buffer))
  323. (message "Added %d new item%s from feed %s to file %s, heading %s"
  324. (length new) (if (> (length new) 1) "s" "")
  325. feed-name
  326. (file-name-nondirectory feed-file) feed-headline)
  327. (run-hooks 'org-feed-after-adding-hook))))))
  328. ;;;###autoload
  329. (defun org-feed-update-all ()
  330. "Get inbox items from all feeds in `org-feed-alist'."
  331. (interactive)
  332. (mapc 'org-feed-update org-feed-alist))
  333. (provide 'org-feed)
  334. ;;; org-feed.el ends here
  335. ;; arch-tag: 0929b557-9bc4-47f4-9633-30a12dbb5ae2