123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722 |
- ;;; org-feed.el --- Add RSS feed items to Org files -*- lexical-binding: t; -*-
- ;;
- ;; Copyright (C) 2009-2022 Free Software Foundation, Inc.
- ;;
- ;; Author: Carsten Dominik <carsten.dominik@gmail.com>
- ;; Keywords: outlines, hypermedia, calendar, wp
- ;; URL: https://orgmode.org
- ;;
- ;; This file is part of GNU Emacs.
- ;;
- ;; GNU Emacs is free software: you can redistribute it and/or modify
- ;; it under the terms of the GNU General Public License as published by
- ;; the Free Software Foundation, either version 3 of the License, or
- ;; (at your option) any later version.
- ;; GNU Emacs is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;; GNU General Public License for more details.
- ;; You should have received a copy of the GNU General Public License
- ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;
- ;;; Commentary:
- ;;
- ;; This module allows entries to be created and changed in an Org mode
- ;; file triggered by items in an RSS feed. The basic functionality
- ;; is geared toward simply adding new items found in a feed as
- ;; outline nodes to an Org file. Using hooks, arbitrary actions can
- ;; be triggered for new or changed items.
- ;;
- ;; Selecting feeds and target locations
- ;; ------------------------------------
- ;;
- ;; This module is configured through a single variable, `org-feed-alist'.
- ;; Here is an example, using a notes/tasks feed from reQall.com.
- ;;
- ;; (setq org-feed-alist
- ;; '(("ReQall"
- ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
- ;; "~/org/feeds.org" "ReQall Entries")
- ;;
- ;; With this setup, the command `M-x org-feed-update-all' will
- ;; collect new entries in the feed at the given URL and create
- ;; entries as subheadings under the "ReQall Entries" heading in the
- ;; file "~/org/feeds.org". Each feed should normally have its own
- ;; heading - however see the `:drawer' parameter.
- ;;
- ;; Besides these standard elements that need to be specified for each
- ;; feed, keyword-value pairs can set additional options. For example,
- ;; to de-select transitional entries with a title containing
- ;;
- ;; "reQall is typing what you said",
- ;;
- ;; you could use the `:filter' argument:
- ;;
- ;; (setq org-feed-alist
- ;; '(("ReQall"
- ;; "http://www.reqall.com/user/feeds/rss/a1b2c3....."
- ;; "~/org/feeds.org" "ReQall Entries"
- ;; :filter my-reqall-filter)))
- ;;
- ;; (defun my-reqall-filter (e)
- ;; (if (string-match "reQall is typing what you said"
- ;; (plist-get e :title))
- ;; nil
- ;; e))
- ;;
- ;; See the docstring for `org-feed-alist' for more details.
- ;;
- ;;
- ;; Keeping track of previously added entries
- ;; -----------------------------------------
- ;;
- ;; Since Org allows you to delete, archive, or move outline nodes,
- ;; org-feed.el needs to keep track of which feed items have been handled
- ;; before, so that they will not be handled again. For this, org-feed.el
- ;; stores information in a special drawer, FEEDSTATUS, under the heading
- ;; that received the input of the feed.
- ;;
- ;;
- ;; Acknowledgments
- ;; ---------------
- ;;
- ;; org-feed.el is based on ideas by Brad Bozarth who implemented a
- ;; similar mechanism using shell and awk scripts.
- ;;; Code:
- (require 'org-macs)
- (org-assert-version)
- (require 'org)
- (require 'sha1)
- (declare-function url-retrieve-synchronously "url"
- (url &optional silent inhibit-cookies timeout))
- (declare-function xml-node-children "xml" (node))
- (declare-function xml-get-children "xml" (node child-name))
- (declare-function xml-get-attribute "xml" (node attribute))
- (declare-function xml-get-attribute-or-nil "xml" (node attribute))
- (declare-function xml-substitute-special "xml" (string))
- (declare-function org-capture-escaped-% "org-capture" ())
- (declare-function org-capture-expand-embedded-elisp "org-capture" (&optional mark))
- (declare-function org-capture-inside-embedded-elisp-p "org-capture" ())
- (defgroup org-feed nil
- "Options concerning RSS feeds as inputs for Org files."
- :tag "Org Feed"
- :group 'org)
- (defcustom org-feed-alist nil
- "Alist specifying RSS feeds that should create inputs for Org.
- Each entry in this list specified an RSS feed tat should be queried
- to create inbox items in Org. Each entry is a list with the following items:
- name a custom name for this feed
- URL the Feed URL
- file the target Org file where entries should be listed, when
- nil the target becomes the current buffer (may be an
- indirect buffer) each time the feed update is invoked
- headline the headline under which entries should be listed
- Additional arguments can be given using keyword-value pairs. Many of these
- specify functions that receive one or a list of \"entries\" as their single
- argument. An entry is a property list that describes a feed item. The
- property list has properties for each field in the item, for example `:title'
- for the `<title>' field and `:pubDate' for the publication date. In addition,
- it contains the following properties:
- `:item-full-text' the full text in the <item> tag
- `:guid-permalink' t when the guid property is a permalink
- Here are the keyword-value pair allows in `org-feed-alist'.
- :drawer drawer-name
- The name of the drawer for storing feed information. The default is
- \"FEEDSTATUS\". Using different drawers for different feeds allows
- several feeds to target the same inbox heading.
- :filter filter-function
- A function to select interesting entries in the feed. It gets a single
- entry as parameter. It should return the entry if it is relevant, or
- nil if it is not.
- :template template-string
- The default action on new items in the feed is to add them as children
- under the headline for the feed. The template describes how the entry
- should be formatted. If not given, it defaults to
- `org-feed-default-template'.
- :formatter formatter-function
- Instead of relying on a template, you may specify a function to format
- the outline node to be inserted as a child. This function gets passed
- a property list describing a single feed item, and it should return a
- string that is a properly formatted Org outline node of level 1.
- :new-handler function
- If adding new items as children to the outline is not what you want
- to do with new items, define a handler function that is called with
- a list of all new items in the feed, each one represented as a property
- list. The handler should do what needs to be done, and org-feed will
- mark all items given to this handler as \"handled\", i.e. they will not
- be passed to this handler again in future readings of the feed.
- When the handler is called, point will be at the feed headline.
- :changed-handler function
- This function gets passed a list of all entries that have been
- handled before, but are now still in the feed and have *changed*
- since last handled (as evidenced by a different sha1 hash).
- When the handler is called, point will be at the feed headline.
- :parse-feed function
- This function gets passed a buffer, and should return a list
- of entries, each being a property list containing the
- `:guid' and `:item-full-text' keys. The default is
- `org-feed-parse-rss-feed'; `org-feed-parse-atom-feed' is an
- alternative.
- :parse-entry function
- This function gets passed an entry as returned by the parse-feed
- function, and should return the entry with interesting properties added.
- The default is `org-feed-parse-rss-entry'; `org-feed-parse-atom-entry'
- is an alternative."
- :group 'org-feed
- :type '(repeat
- (list :value ("" "http://" "" "")
- (string :tag "Name")
- (string :tag "Feed URL")
- (file :tag "File for inbox")
- (string :tag "Headline for inbox")
- (repeat :inline t
- (choice
- (list :inline t :tag "Filter"
- (const :filter)
- (symbol :tag "Filter Function"))
- (list :inline t :tag "Template"
- (const :template)
- (string :tag "Template"))
- (list :inline t :tag "Formatter"
- (const :formatter)
- (symbol :tag "Formatter Function"))
- (list :inline t :tag "New items handler"
- (const :new-handler)
- (symbol :tag "Handler Function"))
- (list :inline t :tag "Changed items"
- (const :changed-handler)
- (symbol :tag "Handler Function"))
- (list :inline t :tag "Parse Feed"
- (const :parse-feed)
- (symbol :tag "Parse Feed Function"))
- (list :inline t :tag "Parse Entry"
- (const :parse-entry)
- (symbol :tag "Parse Entry Function"))
- )))))
- (defcustom org-feed-drawer "FEEDSTATUS"
- "The name of the drawer for feed status information.
- Each feed may also specify its own drawer name using the `:drawer'
- parameter in `org-feed-alist'."
- :group 'org-feed
- :type '(string :tag "Drawer Name"))
- (defcustom org-feed-default-template "\n* %h\n %U\n %description\n %a\n"
- "Template for the Org node created from RSS feed items.
- This is just the default, each feed can specify its own.
- Any fields from the feed item can be interpolated into the template with
- %name, for example %title, %description, %pubDate etc. In addition, the
- following special escapes are valid as well:
- %h The title, or the first line of the description
- %t The date as a stamp, either from <pubDate> (if present), or
- the current date
- %T Date and time
- %u,%U Like %t,%T, but inactive time stamps
- %a A link, from <guid> if that is a permalink, else from <link>
- %(sexp) Evaluate elisp `(sexp)' and replace with the result, the simple
- %-escapes above can be used as arguments, e.g. %(capitalize \\\"%h\\\")"
- :group 'org-feed
- :type '(string :tag "Template"))
- (defcustom org-feed-save-after-adding t
- "Non-nil means save buffer after adding new feed items."
- :group 'org-feed
- :type 'boolean)
- (defcustom org-feed-retrieve-method 'url-retrieve-synchronously
- "The method to be used to retrieve a feed URL.
- This can be `curl' or `wget' to call these external programs, or it can be
- an Emacs Lisp function that will return a buffer containing the content
- of the file pointed to by the URL."
- :group 'org-feed
- :type '(choice
- (const :tag "Internally with url.el" url-retrieve-synchronously)
- (const :tag "Externally with curl" curl)
- (const :tag "Externally with wget" wget)
- (function :tag "Function")))
- (defcustom org-feed-before-adding-hook nil
- "Hook that is run before adding new feed items to a file.
- You might want to commit the file in its current state to version control,
- for example."
- :group 'org-feed
- :type 'hook)
- (defcustom org-feed-after-adding-hook nil
- "Hook that is run after new items have been added to a file.
- Depending on `org-feed-save-after-adding', the buffer will already
- have been saved."
- :group 'org-feed
- :type 'hook)
- (defvar org-feed-buffer "*Org feed*"
- "The buffer used to retrieve a feed.")
- ;;;###autoload
- (defun org-feed-update-all ()
- "Get inbox items from all feeds in `org-feed-alist'."
- (interactive)
- (let ((entries 0)
- (errors 0)
- (total-feeds (length org-feed-alist)))
- (dolist (feed org-feed-alist)
- (let ((items (ignore-errors (org-feed-update feed))))
- (if items (cl-incf entries items)
- (cl-incf errors))))
- (message "%s from %d %s%s"
- (pcase entries
- (0 "No new entries")
- (1 "1 new entry")
- (_ (format "%d new entries" entries)))
- total-feeds
- (if (= total-feeds 1) "feed" "feeds")
- (if (= 0 errors) "" (format " (unavailable feeds: %d)" errors)))))
- ;;;###autoload
- (defun org-feed-update (feed &optional retrieve-only)
- "Get inbox items from FEED.
- FEED can be a string with an association in `org-feed-alist', or
- it can be a list structured like an entry in `org-feed-alist'."
- (interactive (list (org-completing-read "Feed name: " org-feed-alist)))
- (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
- (unless feed
- (error "No such feed in `org-feed-alist"))
- (catch 'exit
- (let ((name (car feed))
- (url (nth 1 feed))
- (file (or (nth 2 feed) (buffer-file-name (or (buffer-base-buffer)
- (current-buffer)))))
- (headline (nth 3 feed))
- (filter (nth 1 (memq :filter feed)))
- (formatter (nth 1 (memq :formatter feed)))
- (new-handler (nth 1 (memq :new-handler feed)))
- (changed-handler (nth 1 (memq :changed-handler feed)))
- (template (or (nth 1 (memq :template feed))
- org-feed-default-template))
- (drawer (or (nth 1 (memq :drawer feed))
- org-feed-drawer))
- (parse-feed (or (nth 1 (memq :parse-feed feed))
- 'org-feed-parse-rss-feed))
- (parse-entry (or (nth 1 (memq :parse-entry feed))
- 'org-feed-parse-rss-entry))
- feed-buffer inbox-pos new-formatted
- entries old-status status new changed guid-alist guid olds)
- (setq feed-buffer (org-feed-get-feed url))
- (unless (and feed-buffer (bufferp (get-buffer feed-buffer)))
- (error "Cannot get feed %s" name))
- (when retrieve-only
- (throw 'exit feed-buffer))
- (setq entries (funcall parse-feed feed-buffer))
- (ignore-errors (kill-buffer feed-buffer))
- (save-excursion
- (save-window-excursion
- (setq inbox-pos (org-feed-goto-inbox-internal file headline))
- (setq old-status (org-feed-read-previous-status inbox-pos drawer))
- ;; Add the "handled" status to the appropriate entries
- (setq entries (mapcar (lambda (e)
- (setq e
- (plist-put e :handled
- (nth 1 (assoc
- (plist-get e :guid)
- old-status)))))
- entries))
- ;; Find out which entries are new and which are changed
- (dolist (e entries)
- (if (not (plist-get e :handled))
- (push e new)
- (setq olds (nth 2 (assoc (plist-get e :guid) old-status)))
- (if (and olds
- (not (string= (sha1
- (plist-get e :item-full-text))
- olds)))
- (push e changed))))
- ;; Parse the relevant entries fully
- (setq new (mapcar parse-entry new)
- changed (mapcar parse-entry changed))
- ;; Run the filter
- (when filter
- (setq new (delq nil (mapcar filter new))
- changed (delq nil (mapcar filter new))))
- (when (not (or new changed))
- (message "No new items in feed %s" name)
- (throw 'exit 0))
- ;; Get alist based on guid, to look up entries
- (setq guid-alist
- (append
- (mapcar (lambda (e) (list (plist-get e :guid) e)) new)
- (mapcar (lambda (e) (list (plist-get e :guid) e)) changed)))
- ;; Construct the new status
- (setq status
- (mapcar
- (lambda (e)
- (setq guid (plist-get e :guid))
- (list guid
- ;; things count as handled if we handle them now,
- ;; or if they were handled previously
- (if (assoc guid guid-alist) t (plist-get e :handled))
- ;; A hash, to detect changes
- (sha1 (plist-get e :item-full-text))))
- entries))
- ;; Handle new items in the feed
- (when new
- (if new-handler
- (progn
- (goto-char inbox-pos)
- (funcall new-handler new))
- ;; No custom handler, do the default adding
- ;; Format the new entries into an alist with GUIDs in the car
- (setq new-formatted
- (mapcar
- (lambda (e) (org-feed-format-entry e template formatter))
- new)))
- ;; Insert the new items
- (org-feed-add-items inbox-pos new-formatted))
- ;; Handle changed items in the feed
- (when (and changed-handler changed)
- (goto-char inbox-pos)
- (funcall changed-handler changed))
- ;; Write the new status
- ;; We do this only now, in case something goes wrong above, so
- ;; that would end up with a status that does not reflect
- ;; which items truly have been handled
- (org-feed-write-status inbox-pos drawer status)
- ;; Normalize the visibility of the inbox tree
- (goto-char inbox-pos)
- (org-fold-subtree t)
- (org-fold-show-children)
- ;; Hooks and messages
- (when org-feed-save-after-adding (save-buffer))
- (message "Added %d new item%s from feed %s to file %s, heading %s"
- (length new) (if (> (length new) 1) "s" "")
- name
- (file-name-nondirectory file) headline)
- (run-hooks 'org-feed-after-adding-hook)
- (length new))))))
- ;;;###autoload
- (defun org-feed-goto-inbox (feed)
- "Go to the inbox that captures the feed named FEED."
- (interactive
- (list (if (= (length org-feed-alist) 1)
- (car org-feed-alist)
- (org-completing-read "Feed name: " org-feed-alist))))
- (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
- (unless feed
- (error "No such feed in `org-feed-alist"))
- (org-feed-goto-inbox-internal (nth 2 feed) (nth 3 feed)))
- ;;;###autoload
- (defun org-feed-show-raw-feed (feed)
- "Show the raw feed buffer of a feed."
- (interactive
- (list (if (= (length org-feed-alist) 1)
- (car org-feed-alist)
- (org-completing-read "Feed name: " org-feed-alist))))
- (if (stringp feed) (setq feed (assoc feed org-feed-alist)))
- (unless feed
- (error "No such feed in `org-feed-alist"))
- (pop-to-buffer-same-window
- (org-feed-update feed 'retrieve-only))
- (goto-char (point-min)))
- (defun org-feed-goto-inbox-internal (file heading)
- "Find or create HEADING in FILE.
- Switch to that buffer, and return the position of that headline."
- (find-file file)
- (widen)
- (goto-char (point-min))
- (if (re-search-forward
- (concat "^\\*+[ \t]+" heading "[ \t]*\\(:.*?:[ \t]*\\)?$")
- nil t)
- (goto-char (match-beginning 0))
- (goto-char (point-max))
- (insert "\n\n* " heading "\n\n")
- (org-back-to-heading t))
- (point))
- (defun org-feed-read-previous-status (pos drawer)
- "Get the alist of old GUIDs from the entry at POS.
- This will find DRAWER and extract the alist."
- (save-excursion
- (goto-char pos)
- (let ((end (save-excursion (org-end-of-subtree t t))))
- (if (re-search-forward
- (concat "^[ \t]*:" drawer ":[ \t]*\n\\([^\000]*?\\)\n[ \t]*:END:")
- end t)
- (read (match-string 1))
- nil))))
- (defun org-feed-write-status (pos drawer status)
- "Write the feed STATUS to DRAWER in entry at POS."
- (save-excursion
- (goto-char pos)
- (let ((end (save-excursion (org-end-of-subtree t t))))
- (if (re-search-forward (concat "^[ \t]*:" drawer ":[ \t]*\n")
- end t)
- (progn
- (goto-char (match-end 0))
- (delete-region (point)
- (save-excursion
- (and (re-search-forward "^[ \t]*:END:" nil t)
- (match-beginning 0)))))
- (outline-next-heading)
- (insert " :" drawer ":\n :END:\n")
- (beginning-of-line 0))
- (insert (pp-to-string status)))))
- (defun org-feed-add-items (pos entries)
- "Add the formatted items to the headline as POS."
- (let (entry level)
- (save-excursion
- (goto-char pos)
- (unless (looking-at org-complex-heading-regexp)
- (error "Wrong position"))
- (setq level (org-get-valid-level (length (match-string 1)) 1))
- (org-end-of-subtree t t)
- (skip-chars-backward " \t\n")
- (beginning-of-line 2)
- (setq pos (point))
- (while (setq entry (pop entries))
- (org-paste-subtree level entry 'yank))
- (org-mark-ring-push pos))))
- (defun org-feed-format-entry (entry template formatter)
- "Format ENTRY so that it can be inserted into an Org file.
- ENTRY is a property list. This function adds a `:formatted-for-org' property
- and returns the full property list.
- If that property is already present, nothing changes."
- (require 'org-capture)
- (if formatter (funcall formatter entry)
- (let* ((dlines
- (org-split-string (or (plist-get entry :description) "???")
- "\n"))
- (time (or (if (plist-get entry :pubDate)
- (org-read-date t t (plist-get entry :pubDate)))
- (current-time)))
- (v-h (or (plist-get entry :title) (car dlines) "???"))
- (v-t (format-time-string (org-time-stamp-format nil nil) time))
- (v-T (format-time-string (org-time-stamp-format t nil) time))
- (v-u (format-time-string (org-time-stamp-format nil t) time))
- (v-U (format-time-string (org-time-stamp-format t t) time))
- (v-a (let ((tmp (or (and (plist-get entry :guid-permalink)
- (plist-get entry :guid))
- (plist-get entry :link))))
- (if tmp (format "[[%s]]\n" tmp ) ""))))
- (with-temp-buffer
- (insert template)
- (goto-char (point-min))
- ;; Mark %() embedded elisp for later evaluation.
- (org-capture-expand-embedded-elisp 'mark)
- ;; Simple %-escapes. `org-capture-escaped-%' may modify
- ;; buffer and cripple match-data. Use markers instead.
- (while (re-search-forward "%\\([a-zA-Z]+\\)" nil t)
- (let ((key (match-string 1))
- (beg (copy-marker (match-beginning 0)))
- (end (copy-marker (match-end 0))))
- (unless (org-capture-escaped-%)
- (delete-region beg end)
- (set-marker beg nil)
- (set-marker end nil)
- (let ((replacement
- (pcase key
- ("h" v-h)
- ("t" v-t)
- ("T" v-T)
- ("u" v-u)
- ("U" v-U)
- ("a" v-a)
- (name
- (let ((v (plist-get entry (intern (concat ":" name)))))
- (save-excursion
- (save-match-data
- (beginning-of-line)
- (if (looking-at
- (concat "^\\([ \t]*\\)%" name "[ \t]*$"))
- (org-feed-make-indented-block
- v (current-indentation))
- v))))))))
- (when replacement
- (insert
- ;; Escape string delimiters within embedded lisp.
- (if (org-capture-inside-embedded-elisp-p)
- (replace-regexp-in-string "\"" "\\\\\"" replacement)
- replacement)))))))
- ;; %() embedded elisp
- (org-capture-expand-embedded-elisp)
- (decode-coding-string
- (buffer-string) (detect-coding-region (point-min) (point-max) t))))))
- (defun org-feed-make-indented-block (s n)
- "Add indentation of N spaces to a multiline string S."
- (if (not (string-match "\n" s))
- s
- (mapconcat 'identity
- (org-split-string s "\n")
- (concat "\n" (make-string n ?\ )))))
- (defun org-feed-skip-http-headers (buffer)
- "Remove HTTP headers from BUFFER, and return it.
- Assumes headers are indeed present!"
- (with-current-buffer buffer
- (widen)
- (goto-char (point-min))
- (search-forward "\n\n")
- (delete-region (point-min) (point))
- buffer))
- (defun org-feed-get-feed (url)
- "Get the RSS feed file at URL and return the buffer."
- (cond
- ((eq org-feed-retrieve-method 'url-retrieve-synchronously)
- (org-feed-skip-http-headers (url-retrieve-synchronously url)))
- ((eq org-feed-retrieve-method 'curl)
- (ignore-errors (kill-buffer org-feed-buffer))
- (call-process "curl" nil org-feed-buffer nil "--silent" url)
- org-feed-buffer)
- ((eq org-feed-retrieve-method 'wget)
- (ignore-errors (kill-buffer org-feed-buffer))
- (call-process "wget" nil org-feed-buffer nil "-q" "-O" "-" url)
- org-feed-buffer)
- ((functionp org-feed-retrieve-method)
- (funcall org-feed-retrieve-method url))))
- (defun org-feed-parse-rss-feed (buffer)
- "Parse BUFFER for RSS feed entries.
- Returns a list of entries, with each entry a property list,
- containing the properties `:guid' and `:item-full-text'."
- (require 'xml)
- (let ((case-fold-search t)
- entries beg end item guid entry)
- (with-current-buffer buffer
- (widen)
- (goto-char (point-min))
- (while (re-search-forward "<item\\>.*?>" nil t)
- (setq beg (point)
- end (and (re-search-forward "</item>" nil t)
- (match-beginning 0)))
- (setq item (buffer-substring beg end)
- guid (if (string-match "<guid\\>.*?>\\([^\000]*?\\)</guid>" item)
- (xml-substitute-special (match-string-no-properties 1 item))))
- (setq entry (list :guid guid :item-full-text item))
- (push entry entries)
- (widen)
- (goto-char end))
- (nreverse entries))))
- (defun org-feed-parse-rss-entry (entry)
- "Parse the `:item-full-text' field for xml tags and create new properties."
- (require 'xml)
- (with-temp-buffer
- (insert (plist-get entry :item-full-text))
- (goto-char (point-min))
- (while (re-search-forward "<\\([a-zA-Z]+\\>\\).*?>\\([^\000]*?\\)</\\1>"
- nil t)
- (setq entry (plist-put entry
- (intern (concat ":" (match-string 1)))
- (xml-substitute-special (match-string 2)))))
- (goto-char (point-min))
- (unless (re-search-forward "isPermaLink[ \t]*=[ \t]*\"false\"" nil t)
- (setq entry (plist-put entry :guid-permalink t))))
- entry)
- (defun org-feed-parse-atom-feed (buffer)
- "Parse BUFFER for Atom feed entries.
- Returns a list of entries, with each entry a property list,
- containing the properties `:guid' and `:item-full-text'.
- The `:item-full-text' property actually contains the sexp
- formatted as a string, not the original XML data."
- (require 'xml)
- (with-current-buffer buffer
- (widen)
- (let ((feed (car (xml-parse-region (point-min) (point-max)))))
- (mapcar
- (lambda (entry)
- (list
- :guid (car (xml-node-children (car (xml-get-children entry 'id))))
- :item-full-text (prin1-to-string entry)))
- (xml-get-children feed 'entry)))))
- (defun org-feed-parse-atom-entry (entry)
- "Parse the `:item-full-text' as a sexp and create new properties."
- (let ((xml (car (read-from-string (plist-get entry :item-full-text)))))
- ;; Get first <link href='foo'/>.
- (setq entry (plist-put entry :link
- (xml-get-attribute
- (car (xml-get-children xml 'link))
- 'href)))
- ;; Add <title/> as :title.
- (setq entry (plist-put entry :title
- (xml-substitute-special
- (car (xml-node-children
- (car (xml-get-children xml 'title)))))))
- (let* ((content (car (xml-get-children xml 'content)))
- (type (xml-get-attribute-or-nil content 'type)))
- (when content
- (cond
- ((string= type "text")
- ;; We like plain text.
- (setq entry (plist-put entry :description
- (xml-substitute-special
- (car (xml-node-children content))))))
- ((string= type "html")
- ;; TODO: convert HTML to Org markup.
- (setq entry (plist-put entry :description
- (xml-substitute-special
- (car (xml-node-children content))))))
- ((string= type "xhtml")
- ;; TODO: convert XHTML to Org markup.
- (setq entry (plist-put entry :description
- (prin1-to-string
- (xml-node-children content)))))
- (t
- (setq entry (plist-put entry :description
- (format-message
- "Unknown `%s' content." type)))))))
- entry))
- (provide 'org-feed)
- ;; Local variables:
- ;; generated-autoload-file: "org-loaddefs.el"
- ;; End:
- ;;; org-feed.el ends here
|