ical2org.awk 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. #!/usr/bin/env gawk -f
  2. # awk script for converting an iCal formatted file to a sequence of org-mode headings.
  3. # this may not work in general but seems to work for day and timed events from Google's
  4. # calendar, which is really all I need right now...
  5. #
  6. # usage:
  7. # awk -f THISFILE < icalinputfile.ics > orgmodeentries.org
  8. #
  9. # Note: change org meta information generated below for author and
  10. # email entries!
  11. #
  12. # Caveats:
  13. #
  14. # - date entries with no time specified are assumed to be local time zone;
  15. # same remark for date entries that do have a time but do not end with Z
  16. # e.g.: 20130101T123456 is local and will be kept as 2013-01-01 12:34
  17. # where 20130223T123422Z is UTC and will be corrected appropriately
  18. #
  19. # - UTC times are changed into local times, using the time zone of the
  20. # computer that runs the script; it would be very hard in an awk script
  21. # to respect the time zone of a file belonging to another time zone:
  22. # the offsets will be different as well as the switchover time(s);
  23. # (consider a remote shell to a computer with the file's time zone)
  24. #
  25. # - the UTC conversion entirely relies on the built-in strftime method;
  26. # the author is not responsible for any erroneous conversions nor the
  27. # consequence of such conversions
  28. #
  29. # - does process RRULE recurring events, but ignores COUNT specifiers
  30. #
  31. # - does not process EXDATE to exclude date(s) from recurring events
  32. #
  33. # Eric S Fraga
  34. # 20100629 - initial version
  35. # 20100708 - added end times to timed events
  36. # - adjust times according to time zone information
  37. # - fixed incorrect transfer for entries with ":" embedded within the text
  38. # - added support for multi-line summary entries (which become headlines)
  39. # 20100709 - incorporated time zone identification
  40. # - fixed processing of continuation lines as Google seems to
  41. # have changed, in the last day, the number of spaces at
  42. # the start of the line for each continuation...
  43. # - remove backslashes used to protect commas in iCal text entries
  44. # no further revision log after this as the file was moved into a git
  45. # repository...
  46. #
  47. # Updated by: Guido Van Hoecke <guivhoATgmailDOTcom>
  48. # Last change: 2013.05.26 14:28:33
  49. #----------------------------------------------------------------------------------
  50. BEGIN {
  51. ### config section
  52. attending_types["UNSET"] = 0;
  53. attending_types["ATTENDING"] = 1;
  54. attending_types["NEEDS_ACTION"] = 2;
  55. attending_types["NOT_ATTENDING"] = 3;
  56. attending_types[0] = "UNSET";
  57. attending_types[1] = "ATTENDING";
  58. attending_types[2] = "NEEDS_ACTION";
  59. attending_types[3] = "NOT_ATTENDING";
  60. # map of UIDS for duplicate checking -- sometimes the same id comes down
  61. # with multiple VEVENTS
  62. UIDS[0];
  63. # map of people attending a given event
  64. people_attending[0];
  65. # maximum age in days for entries to be output: set this to -1 to
  66. # get all entries or to N>0 to only get enties that start or end
  67. # less than N days ago
  68. max_age = 7;
  69. # set to 1 or 0 to yes or not output a header block with TITLE,
  70. # AUTHOR, EMAIL etc...
  71. header = 1;
  72. # set to 1 or 0 to yes or not output the original ical preamble as
  73. # comment
  74. preamble = 1;
  75. # set to 1 to output time and summary as one line starting with
  76. # the time (value 1) or to 0 to output the summary as first line
  77. # and the date and time info as a later line (after the property
  78. # drawer or org complains)
  79. condense = 0;
  80. # set to 1 or 0 to yes or not output the original ical entry as a
  81. # comment (mostly useful for debugging purposes)
  82. original = 1;
  83. # google truncates long subjects with ... which is misleading in
  84. # an org file: it gives the unfortunate impression that an
  85. # expanded entry is still collapsed; value 1 will trim those
  86. # ... and value 0 doesn't touch them
  87. trimdots = 1;
  88. # change this to your name
  89. author = ENVIRON["AUTHOR"] != "" ? ENVIRON["AUTHOR"] : "Marc Sherry"
  90. # and to your email address
  91. emailaddress = ENVIRON["EMAIL"] != "" ? ENVIRON["EMAIL"] : "unknown"
  92. # calendar/category name for display in org-mode
  93. calendarname = ENVIRON["CALENDAR"] != "" ? ENVIRON["CALENDAR"] : "unknown"
  94. # any tags for this calendar (e.g. "WORK" or "PERSONAL")
  95. filetags = ENVIRON["FILETAGS"] != "" ? ENVIRON["FILETAGS"] : "unknown"
  96. # timezone offsets
  97. # TODO: this is stupid
  98. tz_offsets["America/Los_Angeles"] = 0
  99. tz_offsets["America/Chicago"] = 2
  100. ### end config section
  101. # use a colon to separate the type of data line from the actual contents
  102. FS = ":";
  103. # we only need to preserve the original entry lines if either the
  104. # preamble or original options are true
  105. preserve = preamble || original
  106. first = 1; # true until an event has been found
  107. max_age_seconds = max_age*24*60*60
  108. if (header) {
  109. print "#+TITLE: Main Google calendar entries"
  110. print "#+AUTHOR: ", author
  111. print "#+EMAIL: ", emailaddress
  112. print "#+DESCRIPTION: converted using the ical2org awk script"
  113. print "#+CATEGORY: ", calendarname
  114. print "#+STARTUP: hidestars"
  115. print "#+STARTUP: overview"
  116. print "#+FILETAGS: ", filetags
  117. print ""
  118. }
  119. }
  120. # continuation lines (at least from Google) start with a space. If the
  121. # continuation is after a processed field (description, summary, attendee,
  122. # etc.) append the entry to the respective variable
  123. /^[ ]/ {
  124. if (indescription) {
  125. entry = entry gensub("\r", "", "g", gensub("^[ ]", "", 1, $0));
  126. # print "entry continuation: " entry
  127. } else if (insummary) {
  128. summary = summary gensub("\r", "", "g", gensub("^[ ]", "", 1, $0))
  129. # print "summary continuation: " summary
  130. } else if (inattendee) {
  131. attendee = attendee gensub("\r", "", "g", gensub("^[ ]", "", 1, $0))
  132. # print "attendee continuation: " attendee
  133. are_we_going(attendee)
  134. add_attendee(attendee)
  135. } else if (inlocation) {
  136. location = location unescape(gensub("\r", "", "g", $0), 0);
  137. }
  138. if (preserve)
  139. icalentry = icalentry "\n" $0
  140. }
  141. /^BEGIN:VEVENT/ {
  142. # start of an event: initialize global values used for each event
  143. date = "";
  144. entry = ""
  145. headline = ""
  146. icalentry = "" # the full entry for inspection
  147. id = ""
  148. indescription = 0;
  149. insummary = 0
  150. inattendee = 0
  151. inlocation = 0
  152. in_alarm = 0
  153. got_end_date = 0
  154. attending = attending_types["UNSET"];
  155. # http://unix.stackexchange.com/a/147958/129055
  156. intfreq = "" # the interval and frequency for repeating org timestamps
  157. lasttimestamp = -1;
  158. location = ""
  159. rrend = ""
  160. status = ""
  161. summary = ""
  162. attendee = ""
  163. delete people_attending;
  164. # if this is the first event, output the preamble from the iCal file
  165. if (first) {
  166. if(preamble) {
  167. print "* COMMENT original iCal preamble"
  168. print gensub("\r", "", "g", icalentry)
  169. }
  170. if (preserve)
  171. icalentry = ""
  172. first = 0;
  173. }
  174. }
  175. # any line that starts at the left with a non-space character is a new data field
  176. /^BEGIN:VALARM/ {
  177. # alarms have their own UID, DESCRIPTION, etc. We don't want these polluting the real fields
  178. in_alarm = 1
  179. }
  180. /^END:VALARM/ {
  181. in_alarm = 0
  182. }
  183. /^[A-Z]/ {
  184. # we do not copy DTSTAMP lines as they change every time you download
  185. # the iCal format file which leads to a change in the converted
  186. # org file as I output the original input. This change, which is
  187. # really content free, makes a revision control system update the
  188. # repository and confuses.
  189. if (preserve)
  190. if (! index("DTSTAMP", $1))
  191. icalentry = icalentry "\n" $0
  192. # this line terminates the collection of description and summary entries
  193. indescription = 0;
  194. insummary = 0;
  195. inattendee = 0;
  196. }
  197. # this type of entry represents a day entry, not timed, with date stamp YYYYMMDD
  198. /^DTSTART;VALUE=DATE/ {
  199. date = datestring($2);
  200. }
  201. /^DTEND;VALUE=DATE/ {
  202. got_end_date = 1
  203. end_date = datestring($2, 1);
  204. if ( issameday )
  205. end_date = ""
  206. }
  207. # this represents a timed entry with date and time stamp YYYYMMDDTHHMMSS
  208. # we ignore the seconds
  209. /^DTSTART[:;][^V]/ {
  210. tz = "";
  211. match($0, /TZID=([^:]*)/, a)
  212. {
  213. tz = a[1];
  214. }
  215. offset = tz_offsets[tz]
  216. date = datetimestring($2, offset);
  217. # print date;
  218. if (date != "" && got_end_date) {
  219. fix_date_time()
  220. }
  221. }
  222. # and the same for the end date;
  223. /^DTEND[:;][^V]/ {
  224. # NOTE: this doesn't necessarily appear after DTSTART
  225. tz = "";
  226. match($0, /TZID=([^:]*)/, a)
  227. {
  228. tz = a[1];
  229. }
  230. offset = tz_offsets[tz]
  231. end_date = datetimestring($2, offset);
  232. got_end_date = 1
  233. if (date != "" && got_end_date) {
  234. # We got start and end date/time, let's munge as appropriate
  235. fix_date_time()
  236. }
  237. }
  238. # repetition rule
  239. /^RRULE:FREQ=(DAILY|WEEKLY|MONTHLY|YEARLY)/ {
  240. # TODO: handle BYDAY values for events that repeat weekly for multiple days
  241. # (e.g. a "Gym" event)
  242. # get the d, w, m or y value
  243. freq = tolower(gensub(/.*FREQ=(.).*/, "\\1", 1, $0))
  244. # get the interval, and use 1 if none specified
  245. interval = $2 ~ /INTERVAL=/ ? gensub(/.*INTERVAL=([0-9]+);.*/, "\\1", 1, $2) : 1
  246. # get the enddate of the rule and use "" if none specified
  247. rrend = $2 ~ /UNTIL=/ ? datestring(gensub(/.*UNTIL=([0-9]{8}).*/, "\\1", 1, $2)) : ""
  248. rrend_raw = $2 ~ /UNTIL=/ ? gensub(/.*UNTIL=([0-9]{8}).*/, "\\1", 1, $2) : ""
  249. repeat_count = $2 ~ /COUNT=/ ? gensub(/.*COUNT=([0-9]+).*/, "\\1", 1, $2) : ""
  250. # build the repetitor vale as understood by org
  251. intfreq = " +" interval freq
  252. # if the repetition is daily, and there is an end date, drop the repetitor
  253. # as that is the default
  254. if (intfreq == " +1d" && end_date == "" && rrend != "")
  255. intfreq = ""
  256. now = strftime("%Y%m%dT%H%M%SZ")
  257. if (rrend_raw != "" && rrend_raw < now)
  258. intfreq = ""
  259. if (repeat_count != "") # TODO: count repeats correctly
  260. intfreq = ""
  261. }
  262. # The description will the contents of the entry in org-mode.
  263. # this line may be continued.
  264. /^DESCRIPTION/ {
  265. if (!in_alarm) {
  266. # Setting $1 to "" clears colons from items like "1:1 with Marc", so we
  267. # strip "DESCRIPTION:" off of the front instead
  268. # $1 = "";
  269. entry = entry gensub("\r", "", "g", gensub(/^DESCRIPTION:/, "", 1, $0));
  270. indescription = 1;
  271. }
  272. }
  273. # the summary will be the org heading
  274. /^SUMMARY/ {
  275. # Setting $1 to "" clears colons from items like "1:1 with Marc", so we
  276. # strip "SUMMARY:" off of the front instead
  277. if (!in_alarm) {
  278. summary = gensub("\r", "", "g", gensub(/^SUMMARY:/, "", 1, $0));
  279. # trim trailing dots if requested by config option
  280. if(trimdots && summary ~ /\.\.\.$/)
  281. sub(/\.\.\.$/, "", summary)
  282. insummary = 1;
  283. # print "Summary: " summary
  284. }
  285. }
  286. # the unique ID will be stored as a property of the entry
  287. /^UID/ {
  288. if (!in_alarm) {
  289. id = gensub("\r", "", "g", $2);
  290. }
  291. }
  292. /^LOCATION/ {
  293. location = unescape(gensub("\r", "", "g", $2), 0);
  294. inlocation = 1;
  295. # print "Location: " location
  296. }
  297. /^STATUS/ {
  298. status = gensub("\r", "", "g", $2);
  299. # print "Status: " status
  300. }
  301. /^ATTENDEE/ {
  302. attendee = gensub("\r", "", "g", $0);
  303. inattendee = 1;
  304. # print "Attendee: " attendee
  305. }
  306. # when we reach the end of the event line, we output everything we
  307. # have collected so far, creating a top level org headline with the
  308. # date/time stamp, unique ID property and the contents, if any
  309. /^END:VEVENT/ {
  310. #output event
  311. # print "max_age: " max_age
  312. # print "lasttimestamp: " lasttimestamp
  313. # print "lasttimestamp+max_age_seconds: " lasttimestamp+max_age_seconds
  314. # print "systime(): " systime()
  315. is_duplicate = (id in UIDS);
  316. if(is_duplicate == 0 && (max_age<0 || intfreq != "" || ( lasttimestamp>0 && systime()<lasttimestamp+max_age_seconds )) )
  317. {
  318. if (attending != attending_types["NOT_ATTENDING"]) {
  319. # build org timestamp
  320. if (intfreq != "")
  321. date = date intfreq
  322. # TODO: http://orgmode.org/worg/org-faq.html#org-diary-class
  323. else if (end_date != "")
  324. date = date ">--<" end_date
  325. else if (rrend != "")
  326. date = date ">--<" rrend
  327. # translate \n sequences to actual newlines and unprotect commas (,)
  328. if (condense)
  329. print "* <" date "> " gensub("^[ ]+", "", "", unescape(summary, 0))
  330. else
  331. print "* " gensub("^[ ]+", "", "g", unescape(summary, 0))
  332. print " :PROPERTIES:"
  333. print " :ID: " id
  334. if(length(location))
  335. print " :LOCATION: " location
  336. if(length(status))
  337. print " :STATUS: " status
  338. attending_string = attending_types[attending]
  339. print " :ATTENDING: " attending_string
  340. print " :ATTENDEES: " join_keys(people_attending)
  341. print " :END:"
  342. if (date2 != "")
  343. {
  344. # Fake some logbook entries so we can generate a clock report
  345. print " :LOGBOOK:"
  346. print " CLOCK: [" date1 "]--[" date2 "] => " "0:00"
  347. print " :END"
  348. }
  349. if (!condense)
  350. print "<" date ">"
  351. print ""
  352. if(length(entry)>1)
  353. print gensub("^[ ]+", "", "g", unescape(entry, 1));
  354. # output original entry if requested by 'original' config option
  355. if (original)
  356. print "** COMMENT original iCal entry\n", gensub("\r", "", "g", icalentry)
  357. }
  358. UIDS[id] = 1;
  359. }
  360. }
  361. # Join keys in an array, return a string
  362. function join_keys(input)
  363. {
  364. joined = "";
  365. first_key = 1;
  366. for (key in input)
  367. {
  368. if (first_key != 1)
  369. joined = joined ", "
  370. joined = joined key
  371. first_key = 0;
  372. }
  373. return joined;
  374. }
  375. # unescape commas, newlines, etc. newlines are optionally converted to just
  376. # spaces -- it's good to preserve them in descriptions for e.g. interview
  377. # calendar events, but addresses look better with spaces as more info fits on a
  378. # line
  379. function unescape(input, preserve_newlines)
  380. {
  381. ret = gensub("\\\\,", ",", "g",
  382. gensub("\\\\;", ";", "g", input))
  383. if (preserve_newlines)
  384. ret = gensub("\\\\n", "\n", "g", ret)
  385. else
  386. ret = gensub("\\\\n", " ", "g", ret)
  387. return ret
  388. # return gensub("\\\\,", ",", "g",
  389. # gensub("\\\\n", " ", "g",
  390. # gensub("\\\\;", ";", "g", input)))
  391. }
  392. # funtion to convert an iCal time string 'yyyymmddThhmmss[Z]' into a
  393. # date time string as used by org, preferably including the short day
  394. # of week: 'yyyy-mm-dd day hh:mm' or 'yyyy-mm-dd hh:mm' if we cannot
  395. # define the day of the week
  396. function datetimestring(input, offset)
  397. {
  398. # print "________"
  399. # print "input : " input
  400. # convert the iCal Date+Time entry to a format that mktime can understand
  401. spec = match(input, "([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2})([0-9]{2})([0-9]{2}).*[\r]*", a);
  402. year = a[1]
  403. month = a[2]
  404. day = a[3]
  405. hour = a[4]
  406. min = a[5]
  407. sec = a[6]
  408. # print "spec :" spec
  409. if (offset > 0)
  410. {
  411. hour -= offset
  412. }
  413. # print "input: " input
  414. # print "datetime: " year" "month" "day" "hour" "min" "sec
  415. stamp = mktime(year" "month" "day" "hour" "min" "sec);
  416. lasttimestamp = stamp;
  417. if (stamp <= 0) {
  418. # this is a date before the start of the epoch, so we cannot
  419. # use strftime and will deliver a 'yyyy-mm-dd hh:mm' string
  420. # without day of week; this assumes local time, and does not
  421. # attempt UTC offset correction
  422. spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2})T([0-9]{2})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3 \\4:\\5", "g", input);
  423. # print "==> spec:" spec;
  424. return spec;
  425. }
  426. if (input ~ /[0-9]{8}T[0-9]{6}Z/ ) {
  427. # this is an utc time;
  428. # we need to correct the timestamp by the utc offset for this time
  429. offset = strftime("%z", stamp)
  430. pm = substr(offset,1,1) 1 # define multiplier +1 or -1
  431. hh = substr(offset,2,2) * 3600 * pm
  432. mm = substr(offset,4,2) * 60 * pm
  433. # adjust the timestamp
  434. stamp = stamp + hh + mm
  435. }
  436. return strftime("%Y-%m-%d %a %H:%M", stamp);
  437. }
  438. # function to convert an iCal date into an org date;
  439. # the optional parameter indicates whether this is an end date;
  440. # for single or multiple whole day events, the end date given by
  441. # iCal is the date of the first day after the event;
  442. # if the optional 'isenddate' parameter is non zero, this function
  443. # tries to reduce the given date by one day
  444. function datestring(input, isenddate)
  445. {
  446. #convert the iCal string to a an mktime input string
  447. spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1 \\2 \\3 00 00 00", "g", input);
  448. # compute the nr of seconds after or before the epoch
  449. # dates before the epoch will have a negative timestamp
  450. # days after the epoch will have a positive timestamp
  451. stamp = mktime(spec);
  452. if (isenddate) {
  453. # subtract 1 day from the timestamp
  454. # note that this also works for dates before the epoch
  455. stamp = stamp - 86400;
  456. # register whether the end date is same as the start date
  457. issameday = lasttimestamp == stamp
  458. }
  459. # save timestamp to allow for check of max_age
  460. lasttimestamp = stamp
  461. if (stamp < 0) {
  462. # this date is before the epoch;
  463. # the returned datestring will not have the short day of week string
  464. # as strftime does not handle negative times;
  465. # we have to construct the datestring directly from the input
  466. if (isenddate) {
  467. # we really should return the date before the input date, but strftime
  468. # does not work with negative timestamp values; so we can not use it
  469. # to obtain the string representation of the corrected timestamp;
  470. # we have to return the date specified in the iCal input and we
  471. # add time 00:00 to clarify this
  472. return spec = gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3 00:00", "g", input);
  473. } else {
  474. # just generate the desired representation of the input date, without time;
  475. return gensub("([0-9]{4})([0-9]{2})([0-9]{2}).*[\r]*", "\\1-\\2-\\3", "g", input);
  476. }
  477. }
  478. # return the date and day of week
  479. return strftime("%Y-%m-%d %a", stamp);
  480. }
  481. # Add the current attendee's response to a set, so we can list who's going
  482. # and who's declined
  483. function add_attendee(attendee)
  484. {
  485. match(attendee, /CN=([^;]+)/, m)
  486. {
  487. CN = tolower(m[1]);
  488. people_attending[CN] = 1;
  489. }
  490. }
  491. function fix_date_time()
  492. {
  493. if (substr(date,1,10) == substr(end_date,1,10)) {
  494. # timespan within same date, use one date with a time range, but preserve
  495. # original dates for org-clocktable
  496. date1 = date
  497. date2 = end_date
  498. date = date "-" substr(end_date, length(end_date)-4)
  499. end_date = ""
  500. }
  501. }
  502. # Parse the current ATTENDEE line and see if it belongs to us. If so, check if
  503. # we've accepted this calendar invite, and if so, set `attending` to True. It
  504. # may be the case that there are no attendees (e.g. personal calendar items),
  505. # and if that's the case, we'll leave `attending` unset. If there are attendees,
  506. # we'll parse our status out and set `attending` appropriately.
  507. function are_we_going(attendee)
  508. {
  509. if (attending != attending_types["UNSET"])
  510. {
  511. # print "Bailing out early, attending is " attending
  512. return;
  513. }
  514. match(attendee, /CN=([^;]+)/, m)
  515. {
  516. # CN's can optionally be surrounded by quotes (google calendar download
  517. # omits, apple calendar export includes them)
  518. CN = gensub("\"", "", "g", tolower(m[1]));
  519. # TODO: no hardcoding
  520. if (CN == tolower(author) || CN == tolower(emailaddress))
  521. {
  522. # This is us -- did we accept the meeting?
  523. if (attendee ~ /PARTSTAT=ACCEPTED/)
  524. {
  525. attending = attending_types["ATTENDING"];
  526. }
  527. else if (attendee ~ /PARTSTAT=NEEDS-ACTION/)
  528. {
  529. attending = attending_types["NEEDS_ACTION"];
  530. }
  531. else {
  532. attending = attending_types["NOT_ATTENDING"];
  533. }
  534. }
  535. }
  536. # print "are_we_going: " attending
  537. }
  538. # Local Variables:
  539. # time-stamp-line-limit: 1000
  540. # time-stamp-format: "%04y.%02m.%02d %02H:%02M:%02S"
  541. # time-stamp-active: t
  542. # time-stamp-start: "Last change:[ \t]+"
  543. # time-stamp-end: "$"
  544. # End: