;;; html2text.el --- a simple html to plain text converter
-
-;; Copyright (C) 2002 Free Software Foundation, Inc.
+;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
;; Author: Joakim Hove <hove@phys.ntnu.no>
(setq next (nth 2 tmp-list))
(setq index 1)
- (if (not (string-match "=" prev))
- (progn
- (if (not (string= (substring this 0 1) "="))
- (setq attr-list (cons (list prev nil) attr-list))
- )
- )
- )
+ (unless (string-match "=" prev)
+ (unless (string= (substring this 0 1) "=")
+ (setq attr-list (cons (list prev nil) attr-list))))
(while (< index (1- (length tmp-list)))
(if (not (string-match "=" this))
(if (not (or (string= (substring next 0 1) "=")
(string= (substring prev -1) "=")))
- (setq attr-list (cons (list this nil) attr-list))
- )
- )
+ (setq attr-list (cons (list this nil) attr-list))))
(setq index (1+ index))
(setq prev this)
(setq this next)
- (setq next (nth (1+ index) tmp-list))
- )
+ (setq next (nth (1+ index) tmp-list)))
- (if this
- (progn
- (if (not (string-match "=" this))
- (progn
- (if (not (string= (substring prev -1) "="))
- (setq attr-list (cons (list this nil) attr-list))
- )
- )
- )
- )
- )
- attr-list ;; return - value
- )
- )
+ (when this
+ (unless (string-match "=" this)
+ (unless (string= (substring prev -1) "=")
+ (setq attr-list (cons (list this nil) attr-list)))))
+ attr-list)) ;; return - value
+
;;
;; </Functions related to attributes>
(while (< item-nr items)
(setq item-nr (1+ item-nr))
(re-search-forward "<dt>\\([ ]*\\)" (point-max) t)
- (if (match-string 1)
- (kill-region (point) (- (point) (string-width (match-string 1))))
- )
+ (when (match-string 1)
+ (delete-region (point) (- (point) (string-width (match-string 1)))))
(let ((def-p1 (point))
(def-p2 0))
(re-search-forward "\\([ ]*\\)\\(</dt>\\|<dd>\\)" (point-max) t)
(mw2 (string-width (match-string 2)))
(mw (+ mw1 mw2)))
(goto-char (- (point) mw))
- (kill-region (point) (+ (point) mw1))
- (setq def-p2 (point))
- )
- )
+ (delete-region (point) (+ (point) mw1))
+ (setq def-p2 (point))))
(setq def-p2 (- (point) (string-width (match-string 2)))))
- (put-text-property def-p1 def-p2 'face 'bold)
- )
- )
- )
- )
+ (put-text-property def-p1 def-p2 'face 'bold)))))
(defun html2text-delete-tags (p1 p2 p3 p4)
- (kill-region p1 p2)
- (kill-region (- p3 (- p2 p1)) (- p4 (- p2 p1)))
- )
+ (delete-region p1 p2)
+ (delete-region (- p3 (- p2 p1)) (- p4 (- p2 p1))))
(defun html2text-delete-single-tag (p1 p2)
- (kill-region p1 p2)
- )
+ (delete-region p1 p2))
(defun html2text-clean-hr (p1 p2)
(html2text-delete-single-tag p1 p2)
;; surely improve upon this.
(let* ((attr-list (html2text-get-attr p1 p2 "a"))
(href (html2text-attr-value attr-list "href")))
- (kill-region p1 p4)
+ (delete-region p1 p4)
(when href
(goto-char p1)
(insert (substring href 1 -1 ))
(let ((has-br-line)
(refill-start)
(refill-stop))
- (if (re-search-forward "<br>$" p2 t)
- (setq has-br-line t)
- )
- (if has-br-line
- (progn
- (goto-char p1)
- (if (re-search-forward ".+[^<][^b][^r][^>]$" p2 t)
- (progn
- (beginning-of-line)
- (setq refill-start (point))
- (goto-char p2)
- (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t)
- (next-line 1)
- (end-of-line)
- ;; refill-stop should ideally be adjusted to
- ;; accomodate the "<br>" strings which are removed
- ;; between refill-start and refill-stop. Can simply
- ;; be returned from my-replace-string
- (setq refill-stop (+ (point)
- (html2text-replace-string
- "<br>" ""
- refill-start (point))))
- ;; (message "Point = %s refill-stop = %s" (point) refill-stop)
- ;; (sleep-for 4)
- (fill-region refill-start refill-stop)
- )
- )
- )
- )
- )
- (html2text-replace-string "<br>" "" p1 p2)
- )
+ (when (re-search-forward "<br>$" p2 t)
+ (setq has-br-line t))
+ (when has-br-line
+ (goto-char p1)
+ (when (re-search-forward ".+[^<][^b][^r][^>]$" p2 t)
+ (beginning-of-line)
+ (setq refill-start (point))
+ (goto-char p2)
+ (re-search-backward ".+[^<][^b][^r][^>]$" refill-start t)
+ (next-line 1)
+ (end-of-line)
+ ;; refill-stop should ideally be adjusted to
+ ;; accomodate the "<br>" strings which are removed
+ ;; between refill-start and refill-stop. Can simply
+ ;; be returned from my-replace-string
+ (setq refill-stop (+ (point)
+ (html2text-replace-string
+ "<br>" ""
+ refill-start (point))))
+ ;; (message "Point = %s refill-stop = %s" (point) refill-stop)
+ ;; (sleep-for 4)
+ (fill-region refill-start refill-stop))))
+ (html2text-replace-string "<br>" "" p1 p2))
;;
;; This one is interactive ...
;; Removing lonely <br> on a single line, if they are left intact we
;; dont have any paragraphs at all.
(html2text-buffer-head)
- (while (< (point) (point-max))
+ (while (not (eobp))
(let ((p1 (point)))
(forward-paragraph 1)
;;(message "Kaller fix med p1=%s p2=%s " p1 (1- (point))) (sleep-for 5)
(html2text-fix-paragraph p1 (1- (point)))
(goto-char p1)
- (if (< (point) (point-max))
- (forward-paragraph 1))
- )
- )
- )
+ (when (not (eobp))
+ (forward-paragraph 1)))))
;;
;; </Functions to be called to fix up paragraphs>
(interactive)
(dolist (tag tag-list)
(html2text-buffer-head)
- (while (re-search-forward (format "\\(</?%s[^>]*>\\)" tag) (point-max) t)
- (let ((p1 (point)))
- (search-backward "<")
- (kill-region (point) p1)
- )
- )
- )
- )
+ (while (re-search-forward (format "</?%s[^>]*>" tag) (point-max) t)
+ (delete-region (match-beginning 0) (match-end 0)))))
(defun html2text-format-tags ()
"See the variable \"html2text-format-tag-list\" for documentation"
(let ((tag (car tag-and-function))
(function (cdr tag-and-function)))
(html2text-buffer-head)
- (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag)
+ (while (re-search-forward (format "<%s\\( [^>]*\\)?>" tag)
(point-max) t)
(let ((p1)
(p2 (point))
(p3) (p4)
- (attr (match-string 1)))
+ (attr (match-string 0)))
(search-backward "<" (point-min) t)
(setq p1 (point))
(re-search-forward (format "</%s>" tag) (point-max) t)
(let ((tag (car tag-and-function))
(function (cdr tag-and-function)))
(html2text-buffer-head)
- (while (re-search-forward (format "\\(<%s\\( [^>]*\\)?>\\)" tag)
+ (while (re-search-forward (format "<%s\\( [^>]*\\)?>" tag)
(point-max) t)
(let ((p1)
(p2 (point)))