From 7fc8fa56b5e423d7ca11e722330e68a8f7e84476 Mon Sep 17 00:00:00 2001
From: Silvan Calarco <silvan.calarco@mambasoft.it>
Date: Fri, 5 Jan 2024 23:43:46 +0100
Subject: [PATCH] update to 2020.1.16 switch to python html2text package
 [release 2020.1.16-1mamba;Tue Mar 03 2020]

---
 README.md                   |   2 +-
 html2text-1.3.2a-utf8.patch | 706 ------------------------------------
 html2text.spec              |  48 +--
 3 files changed, 25 insertions(+), 731 deletions(-)
 delete mode 100644 html2text-1.3.2a-utf8.patch
diff --git a/README.md b/README.md
index d711515..170e5e2 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
 # html2text
 
-html2text is a command line utility, written in C++, that converts HTML documents into plain text.
+html2text is a Python script that converts a page of HTML into clean, easy-to-read plain ASCII text. Better yet, that ASCII also happens to be valid Markdown (a text-to-HTML format).
 
diff --git a/html2text-1.3.2a-utf8.patch b/html2text-1.3.2a-utf8.patch
deleted file mode 100644
index 442d187..0000000
--- a/html2text-1.3.2a-utf8.patch
+++ /dev/null
@@ -1,706 +0,0 @@
-diff -r -u -bB html2text-1.3.2a/Area.C html2text-1.3.2a-patched/Area.C
---- html2text-1.3.2a/Area.C	2003-11-23 12:05:29.000000000 +0100
-+++ html2text-1.3.2a-patched/Area.C	2005-05-13 22:19:59.862137688 +0200
-@@ -36,10 +36,13 @@
- #include <iostream>
- 
- #include "Area.h"
-+#include "html.h"
- #include "string.h"
- 
- #define LATIN1_nbsp 160
- 
-+extern int use_encoding;
-+
- /* ------------------------------------------------------------------------- */
- 
- #define malloc_array(type, size)\
-@@ -81,6 +84,27 @@
- 
- /* ------------------------------------------------------------------------- */
- 
-+/*           utf_length() and utf_width()       
-+ *
-+ *     Very simplified algorithm of calculating length of UTF-8
-+ *   string. No check for errors. Counting only ASCII bytes and
-+ *   leading bytes of UTF-8 multibyte sequences. All bytes like
-+ *   10xxxxxx are dropped. If USE_UTF8 is false then returns
-+ *   usual length.               --YS
-+ */
-+
-+unsigned int
-+Line::utf_length(size_type f, size_type t) const
-+{
-+  size_type m = (t < length_ ? t : length_);
-+  size_type r = m - f;
-+  if(USE_UTF8) {
-+      for (int i = f; i < m; i++)
-+        if((cells_[i].character & 0xc0) == 0x80) r--;
-+  }
-+  return r;
-+}
-+
- void
- Line::resize(size_type l)
- {
-@@ -236,6 +260,23 @@
-   return *this;
- }
- 
-+unsigned int
-+Area::utf_width()
-+{
-+  size_type r = width_;
-+  if(USE_UTF8) { r = 0;
-+    for (size_type yy = 0; yy < height_; yy++) {
-+      size_type r1 = 0;
-+      for (int i = width_ - 1; i >= 0; i--) {
-+        if(!r1 && isspace(cells_[yy][i].character)) continue;
-+        if((cells_[yy][i].character & 0xc0) != 0x80) r1++;
-+      }
-+      if(r < r1) r = r1;
-+    }
-+  }
-+  return r;
-+}
-+
- void
- Area::resize(size_type w, size_type h)
- {
-@@ -439,7 +480,7 @@
-       char c = p->character;
-       char a = p->attribute;
- 
--      if (c == (char) LATIN1_nbsp) c = ' ';
-+      if (c == (char) LATIN1_nbsp && !USE_UTF8) c = ' ';
- 
-       if (a == Cell::NONE) {
-         os << c;
-Nur in html2text-1.3.2a-patched/: Area.C.orig.
-diff -r -u -bB html2text-1.3.2a/Area.h html2text-1.3.2a-patched/Area.h
---- html2text-1.3.2a/Area.h	2003-11-23 12:05:29.000000000 +0100
-+++ html2text-1.3.2a-patched/Area.h	2005-05-13 22:19:59.863137536 +0200
-@@ -81,6 +81,8 @@
-   Cell       &operator[](size_type x)       { return cells_[x]; }
-   const Cell *cells() const { return cells_; }
- 
-+  unsigned int utf_length(size_type f, size_type t) const;
-+
-   void resize(size_type l);
-   void enlarge(size_type l) { if (l > length_) resize(l); }
- 
-@@ -134,6 +136,8 @@
-   Cell       *operator[](size_type y)       { return cells_[y]; }
-   const Area &operator>>=(size_type rs);
- 
-+  unsigned int utf_width();
-+
-   void resize(size_type w, size_type h);
-   void enlarge(size_type w, size_type h);
- 
-Nur in html2text-1.3.2a-patched/: Area.h.orig.
-diff -r -u -bB html2text-1.3.2a/format.C html2text-1.3.2a-patched/format.C
---- html2text-1.3.2a/format.C	2003-11-23 12:05:29.000000000 +0100
-+++ html2text-1.3.2a-patched/format.C	2005-05-13 22:19:59.865137232 +0200
-@@ -1210,6 +1210,7 @@
-     }
- 
-     Line::size_type to = from + 1;
-+    int to_from;
- 
-     Line::size_type lbp = (Line::size_type) -1; // "Last break position".
- 
-@@ -1238,18 +1239,20 @@
-         to++;
-       }
- 
--      if (to - from > w && lbp != (Area::size_type) -1) { to = lbp; break; }
-+      if (line.utf_length(from,to) > w && lbp != (Area::size_type) -1) 
-+                    { to = lbp; break; }
-     }
- 
-+    to_from = line.utf_length(from,to);
-     /*
-      * Copy the "from...to" range from the "line" to the bottom of the "res"
-      * Area.
-      */
-     Area::size_type x = 0;
-     Area::size_type len = to - from;
--    if (halign == Area::LEFT || len >= w) { ;                   } else
--    if (halign == Area::CENTER)           { x += (w - len) / 2; } else
--    if (halign == Area::RIGHT)            { x += w - len;       }
-+    if (halign == Area::LEFT || to_from >= w) { ;                   } else
-+    if (halign == Area::CENTER)           { x += (w - to_from) / 2; } else
-+    if (halign == Area::RIGHT)            { x += w - to_from;       }
-     res->insert(line.cells() + from, len, x, res->height());
- 
-     /*
-Nur in html2text-1.3.2a-patched/: format.C.orig.
-diff -r -u -bB html2text-1.3.2a/html2text.C html2text-1.3.2a-patched/html2text.C
---- html2text-1.3.2a/html2text.C	2003-11-23 12:05:29.000000000 +0100
-+++ html2text-1.3.2a-patched/html2text.C	2005-05-13 22:19:59.868136776 +0200
-@@ -148,9 +148,10 @@
-   -o <file>      Redirect output into <file>\n\
-   -nobs          Do not use backspaces for boldface and underlining\n\
-   -ascii         Use plain ASCII for output instead of ISO-8859-1\n\
-+  -utf8          Assume both terminal and input stream are in UTF-8 mode\n\
- ";
- 
--int use_iso8859 = 1;
-+int use_encoding = ISO8859;
- 
- int
- main(int argc, char **argv)
-@@ -199,7 +200,8 @@
-     if (!strcmp(arg, "-width"        )) { width = atoi(argv[++i]);       } else
-     if (!strcmp(arg, "-o"            )) { output_file_name = argv[++i];  } else
-     if (!strcmp(arg, "-nobs"         )) { use_backspaces = false;        } else
--    if (!strcmp(arg, "-ascii"        )) { use_iso8859 = false;           } else
-+    if (!strcmp(arg, "-ascii"        )) { use_encoding = ASCII;          } else
-+    if (!strcmp(arg, "-utf8"         )) { use_encoding = UTF8;           } else
-     {
-       std::cerr
- 	<< "Unrecognized command line option \""
-Nur in html2text-1.3.2a-patched/: html2text.C.orig.
-diff -r -u -bB html2text-1.3.2a/html.h html2text-1.3.2a-patched/html.h
---- html2text-1.3.2a/html.h	2001-10-04 22:03:54.000000000 +0200
-+++ html2text-1.3.2a-patched/html.h	2005-05-13 22:19:59.866137080 +0200
-@@ -61,6 +61,11 @@
- 
- /* ------------------------------------------------------------------------- */
- 
-+enum {ASCII, ISO8859, UTF8};
-+#define USE_ISO8859 (use_encoding == ISO8859)
-+#define USE_ASCII (use_encoding == ASCII)
-+#define USE_UTF8 (use_encoding == UTF8)
-+
- #define LATIN1_nbsp   160
- #define LATIN1_iexcl  161
- #define LATIN1_cent   162
-diff -r -u -bB html2text-1.3.2a/sgml.C html2text-1.3.2a-patched/sgml.C
---- html2text-1.3.2a/sgml.C	2003-11-23 12:09:11.000000000 +0100
-+++ html2text-1.3.2a-patched/sgml.C	2005-05-13 22:19:59.870136472 +0200
-@@ -62,261 +62,280 @@
-   char name[8];
-   int  iso8859code;
-   char *asciistr;
-+  unsigned long unicode;
- } entities[] = {
--  { "AElig",   LATIN1_AElig,  "AE"         },
--  { "AMP",     0,             "&"          },
--  { "Aacute",  LATIN1_Aacute, "A'"         },
--  { "Acirc",   LATIN1_Acirc,  "A^"         },
--  { "Agrave",  LATIN1_Agrave, "A`"         },
--  { "Alpha",   0,             "A"          },
--  { "Aring",   LATIN1_Aring,  "AA"         },
--  { "Atilde",  LATIN1_Atilde, "A~"         },
--  { "Auml",    LATIN1_Auml,   "A\""        },
--  { "Beta",    0,             "B"          },
--  { "Ccedil",  LATIN1_Ccedil, "C,"         },
--  { "Chi",     0,             "H"          },
--  { "Dagger",  0,             "++"         },
--  { "Delta",   0,             "D"          },
--  { "ETH",     LATIN1_ETH,    "D-"         },
--  { "Eacute",  LATIN1_Eacute, "E'"         },
--  { "Ecirc",   LATIN1_Ecirc,  "E^"         },
--  { "Egrave",  LATIN1_Egrave, "E`"         },
--  { "Epsilon", 0,             "E"          },
--  { "Eta",     0,             "E"          },
--  { "Euml",    LATIN1_Euml,   "E\""        },
--  { "GT",      0,             ">"          },
--  { "Gamma",   0,             "G"          },
--  { "Iacute",  LATIN1_Iacute, "I'"         },
--  { "Icirc",   LATIN1_Icirc,  "I^"         },
--  { "Igrave",  LATIN1_Igrave, "I`"         },
--  { "Iota",    0,             "I"          },
--  { "Iuml",    LATIN1_Iuml,   "I\""        },
--  { "Kappa",   0,             "K"          },
--  { "LT",      0,             "<"          },
--  { "Lambda",  0,             "L"          },
--  { "Mu",      0,             "M"          },
--  { "Ntilde",  LATIN1_Ntilde, "N~"         },
--  { "Nu",      0,             "N"          },
--  { "OElig",   0,             "OE"         },
--  { "Oacute",  LATIN1_Oacute, "O'"         },
--  { "Ocirc",   LATIN1_Ocirc,  "O^"         },
--  { "Ograve",  LATIN1_Ograve, "O`"         },
--  { "Omega",   0,             "O"          },
--  { "Omicron", 0,             "O"          },
--  { "Oslash",  LATIN1_Oslash, "O/"         },
--  { "Otilde",  LATIN1_Otilde, "O~"         },
--  { "Ouml",    LATIN1_Ouml,   "O\""        },
--  { "Phi",     0,             "F"          },
--  { "Pi",      0,             "P"          },
--  { "Prime",   0,             "''"         },
--  { "Psi",     0,             "PS"         },
--  { "QUOT",    0,             "\""         },
--  { "Rho",     0,             "R"          },
--  { "Scaron",  0,             "S"          },
--  { "Sigma",   0,             "S"          },
--  { "THORN",   LATIN1_THORN,  "TH"         },
--  { "Tau",     0,             "T"          },
--  { "Theta",   0,             "TH"         },
--  { "Uacute",  LATIN1_Uacute, "U'"         },
--  { "Ucirc",   LATIN1_Ucirc,  "U^"         },
--  { "Ugrave",  LATIN1_Ugrave, "U`"         },
--  { "Upsilon", 0,             "U"          },
--  { "Uuml",    LATIN1_Uuml,   "U\""        },
--  { "Xi",      0,             "X"          },
--  { "Yacute",  LATIN1_Yacute, "Y'"         },
--  { "Yuml",    0,             "Y\""        },
--  { "Zeta",    0,             "Z"          },
--  { "aacute",  LATIN1_aacute, "a'"         },
--  { "acirc",   LATIN1_acirc,  "a^"         },
--  { "acute",   LATIN1_acute,  "'"          },
--  { "aelig",   LATIN1_aelig,  "ae"         },
--  { "agrave",  LATIN1_agrave, "a`"         },
-+  { "AElig",   LATIN1_AElig,  "AE",  0x00c6},
-+  { "AMP",     0,             "&",   0x0026},
-+  { "Aacute",  LATIN1_Aacute, "A'",  0x00c1},
-+  { "Acirc",   LATIN1_Acirc,  "A^",  0x00c2},
-+  { "Agrave",  LATIN1_Agrave, "A`",  0x00c0},
-+  { "Alpha",   0,             "A",   0x0391},
-+  { "Aring",   LATIN1_Aring,  "AA",  0x00c5},
-+  { "Atilde",  LATIN1_Atilde, "A~",  0x00c3},
-+  { "Auml",    LATIN1_Auml,   "A\"", 0x00c4},
-+  { "Beta",    0,             "B",   0x0392},
-+  { "Ccedil",  LATIN1_Ccedil, "C,",  0x00c7},
-+  { "Chi",     0,             "H",   0x03a7},
-+  { "Dagger",  0,             "++",  0x2020},
-+  { "Delta",   0,             "D",   0x0394},
-+  { "ETH",     LATIN1_ETH,    "D-",  0x00d0},
-+  { "Eacute",  LATIN1_Eacute, "E'",  0x00c9},
-+  { "Ecirc",   LATIN1_Ecirc,  "E^",  0x00ca},
-+  { "Egrave",  LATIN1_Egrave, "E`",  0x00c8},
-+  { "Epsilon", 0,             "E",   0x0395},
-+  { "Eta",     0,             "E",   0x0397},
-+  { "Euml",    LATIN1_Euml,   "E\"", 0x00cb},
-+  { "GT",      0,             ">",   0x003e},
-+  { "Gamma",   0,             "G",   0x0393},
-+  { "Iacute",  LATIN1_Iacute, "I'",  0x00cd},
-+  { "Icirc",   LATIN1_Icirc,  "I^",  0x00ce},
-+  { "Igrave",  LATIN1_Igrave, "I`",  0x00cc},
-+  { "Iota",    0,             "I",   0x0399},
-+  { "Iuml",    LATIN1_Iuml,   "I\"", 0x00cf},
-+  { "Kappa",   0,             "K",   0x039a},
-+  { "LT",      0,             "<",   0x003c},
-+  { "Lambda",  0,             "L",   0x039b},
-+  { "Mu",      0,             "M",   0x039c},
-+  { "Ntilde",  LATIN1_Ntilde, "N~",  0x00d1},
-+  { "Nu",      0,             "N",   0x039d},
-+  { "OElig",   0,             "OE",  0x0152},
-+  { "Oacute",  LATIN1_Oacute, "O'",  0x00d3},
-+  { "Ocirc",   LATIN1_Ocirc,  "O^",  0x00d4},
-+  { "Ograve",  LATIN1_Ograve, "O`",  0x00d2},
-+  { "Omega",   0,             "O",   0x03a9},
-+  { "Omicron", 0,             "O",   0x039f},
-+  { "Oslash",  LATIN1_Oslash, "O/",  0x00d8},
-+  { "Otilde",  LATIN1_Otilde, "O~",  0x00d5},
-+  { "Ouml",    LATIN1_Ouml,   "O\"", 0x00d6},
-+  { "Phi",     0,             "F",   0x03a6},
-+  { "Pi",      0,             "P",   0x03a0},
-+  { "Prime",   0,             "''",        },
-+  { "Psi",     0,             "PS",  0x03a8},
-+  { "QUOT",    0,             "\"",        },
-+  { "Rho",     0,             "R",   0x03a1},
-+  { "Scaron",  0,             "S",   0x0161},
-+  { "Sigma",   0,             "S",   0x03a3},
-+  { "THORN",   LATIN1_THORN,  "TH",  0x00de},
-+  { "Tau",     0,             "T",   0x03a4},
-+  { "Theta",   0,             "TH",  0x0398},
-+  { "Uacute",  LATIN1_Uacute, "U'",  0x00da},
-+  { "Ucirc",   LATIN1_Ucirc,  "U^",  0x00db},
-+  { "Ugrave",  LATIN1_Ugrave, "U`",  0x00d9},
-+  { "Upsilon", 0,             "U",   0x03a5},
-+  { "Uuml",    LATIN1_Uuml,   "U\"", 0x00dc},
-+  { "Xi",      0,             "X",   0x039e},
-+  { "Yacute",  LATIN1_Yacute, "Y'",  0x00dd},
-+  { "Yuml",    0,             "Y\"", 0x0178},
-+  { "Zeta",    0,             "Z",   0x0396},
-+  { "aacute",  LATIN1_aacute, "a'",  0x00e1},
-+  { "acirc",   LATIN1_acirc,  "a^",  0x00e2},
-+  { "acute",   LATIN1_acute,  "'",   0x00b4},
-+  { "aelig",   LATIN1_aelig,  "ae",  0x00e6},
-+  { "agrave",  LATIN1_agrave, "a`",  0x00e0},
-   { "alefsym", 0,             "Aleph"      },
--  { "alpha",   0,             "a"          },
-+  { "alpha",   0,             "a",   0x03b1},
-   { "amp",     0,             "&"          },
-   { "and",     0,             "AND"        },
-   { "ang",     0,             "-V"         },
-   { "apos",    0,             "'"          },
--  { "aring",   LATIN1_aring,  "aa"         },
--  { "asymp",   0,             "~="         },
--  { "atilde",  LATIN1_atilde, "a~"         },
--  { "auml",    LATIN1_auml,   "a\""        },
-+  { "aring",   LATIN1_aring,  "aa",  0x00e5},
-+  { "asymp",   0,             "~=",  0x2248},
-+  { "atilde",  LATIN1_atilde, "a~",  0x00e3},
-+  { "auml",    LATIN1_auml,   "a\"", 0x00e5},
-   { "bdquo",   0,             "\""         },
--  { "beta",    0,             "b"          },
--  { "brvbar",  LATIN1_brvbar, "|"          },
--  { "bull",    0,             " o "        },
-+  { "beta",    0,             "b",   0x03b2},
-+  { "brvbar",  LATIN1_brvbar, "|",   0x00a6},
-+  { "bull",    0,             " o ", 0x2022},
-   { "cap",     0,             "(U"         },
--  { "ccedil",  LATIN1_ccedil, "c,"         },
--  { "cedil",   LATIN1_cedil,  ","          },
--  { "cent",    LATIN1_cent,   "-c-"        },
--  { "chi",     0,             "h"          },
--  { "circ",    0,             "^"          },
-+  { "ccedil",  LATIN1_ccedil, "c,",  0x00e7},
-+  { "cedil",   LATIN1_cedil,  ",",   0x00b8},
-+  { "cent",    LATIN1_cent,   "-c-", 0x00a2},
-+  { "chi",     0,             "h",   0x03c7},
-+  { "circ",    0,             "^",   0x005e},
- //  { "clubs",   0,             "[clubs]"    },
-   { "cong",    0,             "?="         },
--  { "copy",    LATIN1_copy,   "(c)"        },
-+  { "copy",    LATIN1_copy,   "(c)", 0x00a9},
-   { "crarr",   0,             "<-'"        },
-   { "cup",     0,             ")U"         },
--  { "curren",  LATIN1_curren, "CUR"        },
-+  { "curren",  LATIN1_curren, "CUR", 0x00a4},
-   { "dArr",    0,             "vv"         },
--  { "dagger",  0,             "+"          },
-+  { "dagger",  0,             "+",   0x2020},
-   { "darr",    0,             "v"          },
--  { "deg",     LATIN1_deg,    "DEG"        },
--  { "delta",   0,             "d"          },
-+  { "deg",     LATIN1_deg,    "DEG", 0x00b0},
-+  { "delta",   0,             "d",   0x03b4},
- //  { "diams",   0,             "[diamonds]" },
--  { "divide",  LATIN1_divide, "/"          },
--  { "eacute",  LATIN1_eacute, "e'"         },
--  { "ecirc",   LATIN1_ecirc,  "e^"         },
--  { "egrave",  LATIN1_egrave, "e`"         },
-+  { "divide",  LATIN1_divide, "/",   0x00f7},
-+  { "eacute",  LATIN1_eacute, "e'",  0x00e9},
-+  { "ecirc",   LATIN1_ecirc,  "e^",  0x00ea},
-+  { "egrave",  LATIN1_egrave, "e`",  0x00e8},
-   { "empty",   0,             "{}"         },
--  { "epsilon", 0,             "e"          },
--  { "equiv",   0,             "=="         },
--  { "eta",     0,             "e"          },
--  { "eth",     LATIN1_eth,    "d-"         },
--  { "euml",    LATIN1_euml,   "e\""        },
--  { "euro",    0,             "EUR"        },
-+  { "epsilon", 0,             "e",   0x03b5},
-+  { "equiv",   0,             "==",  0x2261},
-+  { "eta",     0,             "e",   0x03b7},
-+  { "eth",     LATIN1_eth,    "d-",  0x00f0},
-+  { "euml",    LATIN1_euml,   "e\"", 0x00eb},
-+  { "euro",    0,             "EUR", 0x20ac},
-   { "exist",   0,             "TE"         },
-   { "fnof",    0,             "f"          },
-   { "forall",  0,             "FA"         },
--  { "frac12",  LATIN1_frac12, " 1/2"       },
--  { "frac14",  LATIN1_frac14, " 1/4"       },
--  { "frac34",  LATIN1_frac34, " 3/4"       },
-+  { "frac12",  LATIN1_frac12, " 1/2",0x00bd},
-+  { "frac14",  LATIN1_frac14, " 1/4",0x00bc},
-+  { "frac34",  LATIN1_frac34, " 3/4",0x00be},
-   { "frasl",   0,             "/"          },
--  { "gamma",   0,             "g"          },
--  { "ge",      0,             ">="         },
--  { "gt",      0,             ">"          },
-+  { "gamma",   0,             "g",   0x03b3},
-+  { "ge",      0,             ">=",  0x2265},
-+  { "gt",      0,             ">",   0x003e},
-   { "hArr",    0,             "<=>"        },
-   { "harr",    0,             "<->"        },
- //  { "hearts",  0,             "[hearts]"   },
--  { "hellip",  0,             "..."        },
--  { "iacute",  LATIN1_iacute, "i'"         },
--  { "icirc",   LATIN1_icirc,  "i^"         },
--  { "iexcl",   LATIN1_iexcl,  "!"          },
--  { "igrave",  LATIN1_igrave, "i`"         },
-+  { "hellip",  0,             "...", 0x2026},
-+  { "iacute",  LATIN1_iacute, "i'",  0x00ed},
-+  { "icirc",   LATIN1_icirc,  "i^",  0x00ee},
-+  { "iexcl",   LATIN1_iexcl,  "!",   0x00a1},
-+  { "igrave",  LATIN1_igrave, "i`",  0x00ec},
-   { "image",   0,             "Im"         },
--  { "infin",   0,             "oo"         },
--  { "int",     0,             "INT"        },
--  { "iota",    0,             "i"          },
--  { "iquest",  LATIN1_iquest, "?"          },
-+  { "infin",   0,             "oo",  0x221e},
-+  { "int",     0,             "INT", 0x222b},
-+  { "iota",    0,             "i",   0x03b9},
-+  { "iquest",  LATIN1_iquest, "?",   0x00bf},
-   { "isin",    0,             "(-"         },
--  { "iuml",    LATIN1_iuml,   "i\""        },
--  { "kappa",   0,             "k"          },
-+  { "iuml",    LATIN1_iuml,   "i\"", 0x00ef},
-+  { "kappa",   0,             "k",   0x03ba},
-   { "lArr",    0,             "<="         },
--  { "lambda",  0,             "l"          },
-+  { "lambda",  0,             "l",   0x03bb},
-   { "lang",    0,             "</"         },
-   { "laquo",   LATIN1_laquo,  "<<"         },
--  { "larr",    0,             "<-"         },
-+  { "larr",    0,             "<-",  0x2190},
- //  { "lceil",   0,             "<|"         },
-   { "ldquo",   0,             "\""         },
--  { "le",      0,             "<="         },
-+  { "le",      0,             "<=",  0x2264},
- //  { "lfloor",  0,             "|<"         },
-   { "lowast",  0,             "*"          },
-   { "loz",     0,             "<>"         },
-   { "lsaquo",  0,             "<"          },
-   { "lsquo",   0,             "`"          },
--  { "lt",      0,             "<"          },
--  { "macr",    LATIN1_macr,   "-"          },
-+  { "lt",      0,             "<",   0x003c},
-+  { "macr",    LATIN1_macr,   "-",   0x00af},
-   { "mdash",   0,             "--"         },
--  { "micro",   LATIN1_micro,  "my"         },
--  { "middot",  LATIN1_middot, "."          },
--  { "minus",   0,             "-"          },
--  { "mu",      0,             "m"          },
-+  { "micro",   LATIN1_micro,  "my",  0x00b5},
-+  { "middot",  LATIN1_middot, ".",   0x00b7},
-+  { "minus",   0,             "-",   0x2212},
-+  { "mu",      0,             "m",   0x03bc},
-   { "nabla",   0,             "Nabla"      },
--  { "nbsp",    LATIN1_nbsp,   " "          },
-+  { "nbsp",    LATIN1_nbsp,   " ",   0x00a0},
-   { "ndash",   0,             "-"          },
--  { "ne",      0,             "!="         },
-+  { "ne",      0,             "!=",  0x2260},
-   { "ni",      0,             "-)"         },
-   { "not",     LATIN1_not,    "NOT"        },
-   { "notin",   0,             "!(-"        },
-   { "nsub",    0,             "!(C"        },
--  { "ntilde",  LATIN1_ntilde, "n~"         },
--  { "nu",      0,             "n"          },
--  { "oacute",  LATIN1_oacute, "o'"         },
--  { "ocirc",   LATIN1_ocirc,  "o^"         },
-+  { "ntilde",  LATIN1_ntilde, "n~",  0x00f1},
-+  { "nu",      0,             "n",   0x03bd},
-+  { "oacute",  LATIN1_oacute, "o'",  0x00f3},
-+  { "ocirc",   LATIN1_ocirc,  "o^",  0x00f4},
-   { "oelig",   0,             "oe"         },
--  { "ograve",  LATIN1_ograve, "o`"         },
-+  { "ograve",  LATIN1_ograve, "o`",  0x00f2},
-   { "oline",   LATIN1_macr,   "-"          },
--  { "omega",   0,             "o"          },
--  { "omicron", 0,             "o"          },
-+  { "omega",   0,             "o",   0x03c9},
-+  { "omicron", 0,             "o",   0x03bf},
-   { "oplus",   0,             "(+)"        },
-   { "or",      0,             "OR"         },
--  { "ordf",    LATIN1_ordf,   "-a"         },
--  { "ordm",    LATIN1_ordm,   "-o"         },
--  { "oslash",  LATIN1_oslash, "o/"         },
--  { "otilde",  LATIN1_otilde, "o~"         },
-+  { "ordf",    LATIN1_ordf,   "-a",  0x00aa},
-+  { "ordm",    LATIN1_ordm,   "-o",  0x00ba},
-+  { "oslash",  LATIN1_oslash, "o/",  0x00f8},
-+  { "otilde",  LATIN1_otilde, "o~",  0x00f5},
-   { "otimes",  0,             "(x)"        },
--  { "ouml",    LATIN1_ouml,   "o\""        },
--  { "para",    LATIN1_para,   "P:"         },
--  { "part",    0,             "PART"       },
--  { "permil",  0,             " 0/00"      },
-+  { "ouml",    LATIN1_ouml,   "o\"", 0x00f6},
-+  { "para",    LATIN1_para,   "P:",  0x00b6},
-+  { "part",    0,             "PART",0x2202},
-+  { "permil",  0,             " 0/00",0x2030},
-   { "perp",    0,             "-T"         },
--  { "phi",     0,             "f"          },
--  { "pi",      0,             "p"          },
-+  { "phi",     0,             "f",   0x03c6},
-+  { "pi",      0,             "p",   0x03c0},
-   { "piv",     0,             "Pi"         },
--  { "plusmn",  LATIN1_plusmn, "+/-"        },
--  { "pound",   LATIN1_pound,  "-L-"        },
-+  { "plusmn",  LATIN1_plusmn, "+/-", 0x00b1},
-+  { "pound",   LATIN1_pound,  "-L-", 0x00a3},
-   { "prime",   0,             "'"          },
--  { "prod",    0,             "PROD"       },
-+  { "prod",    0,             "PROD",0x220f},
-   { "prop",    0,             "0("         },
--  { "psi",     0,             "ps"         },
-+  { "psi",     0,             "ps",  0x03c8},
-   { "quot",    0,             "\""         },
-   { "rArr",    0,             "=>"         },
--  { "radic",   0,             "SQRT"       },
-+  { "radic",   0,             "SQRT",0x221a},
-   { "rang",    0,             "/>"         },
-   { "raquo",   LATIN1_raquo,  ">>"         },
--  { "rarr",    0,             "->"         },
-+  { "rarr",    0,             "->",  0x2192},
- //  { "rceil",   0,             ">|"         },
-   { "rdquo",   0,             "\""         },
-   { "real",    0,             "Re"         },
--  { "reg",     LATIN1_reg,    "(R)"        },
-+  { "reg",     LATIN1_reg,    "(R)", 0x00ae},
- //  { "rfloor",  0,             "|>"         },
--  { "rho",     0,             "r"          },
-+  { "rho",     0,             "r",   0x03c1},
-   { "rsaquo",  0,             ">"          },
-   { "rsquo",   0,             "'"          },
-   { "sbquo",   0,             "'"          },
--  { "scaron",  0,             "s"          },
-+  { "scaron",  0,             "s",   0x0161},
-   { "sdot",    0,             "DOT"        },
--  { "sect",    LATIN1_sect,   "S:"         },
-+  { "sect",    LATIN1_sect,   "S:",  0x00a7},
-   { "shy",     LATIN1_shy,    ""           },
--  { "sigma",   0,             "s"          },
--  { "sigmaf",  0,             "s"          },
-+  { "sigma",   0,             "s",   0x03c3},
-+  { "sigmaf",  0,             "s",   0x03c2},
-   { "sim",     0,             "~"          },
- //  { "spades",  0,             "[spades]"   },
-   { "sub",     0,             "(C"         },
-   { "sube",    0,             "(_"         },
--  { "sum",     0,             "SUM"        },
-+  { "sum",     0,             "SUM", 0x2211},
-   { "sup",     0,             ")C"         },
--  { "sup1",    LATIN1_sup1,   "^1"         },
--  { "sup2",    LATIN1_sup2,   "^2"         },
--  { "sup3",    LATIN1_sup3,   "^3"         },
-+  { "sup1",    LATIN1_sup1,   "^1",  0x00b9},
-+  { "sup2",    LATIN1_sup2,   "^2",  0x00b2},
-+  { "sup3",    LATIN1_sup3,   "^3",  0x00b3},
-   { "supe",    0,             ")_"         },
--  { "szlig",   LATIN1_szlig,  "ss"         },
--  { "tau",     0,             "t"          },
-+  { "szlig",   LATIN1_szlig,  "ss",  0x00df},
-+  { "tau",     0,             "t",   0x03c4},
-   { "there4",  0,             ".:"         },
--  { "theta",   0,             "th"         },
--  { "thorn",   LATIN1_thorn,  "th"         },
--  { "tilde",   0,             "~"          },
--  { "times",   LATIN1_times,  "x"          },
--  { "trade",   0,             "[TM]"       },
-+  { "theta",   0,             "th",  0x03b8},
-+  { "thorn",   LATIN1_thorn,  "th",  0x00fe},
-+  { "tilde",   0,             "~",   0x02dc},
-+  { "times",   LATIN1_times,  "x",   0x00d7},
-+  { "trade",   0,             "[TM]",0x2122},
-   { "uArr",    0,             "^^"         },
--  { "uacute",  LATIN1_uacute, "u'"         },
-+  { "uacute",  LATIN1_uacute, "u'",  0x00fa},
-   { "uarr",    0,             "^"          },
--  { "ucirc",   LATIN1_ucirc,  "u^"         },
--  { "ugrave",  LATIN1_ugrave, "u`"         },
--  { "uml",     LATIN1_uml,    "\""         },
--  { "upsilon", 0,             "u"          },
--  { "uuml",    LATIN1_uuml,   "u\""        },
-+  { "ucirc",   LATIN1_ucirc,  "u^",  0x00fb},
-+  { "ugrave",  LATIN1_ugrave, "u`",  0x00f9},
-+  { "uml",     LATIN1_uml,    "\"",  0x00a8},
-+  { "upsilon", 0,             "u",   0x03c5},
-+  { "uuml",    LATIN1_uuml,   "u\"", 0x00fc},
-   { "weierp",  0,             "P"          },
--  { "xi",      0,             "x"          },
--  { "yacute",  LATIN1_yacute, "y'"         },
--  { "yen",     LATIN1_yen,    "YEN"        },
--  { "yuml",    LATIN1_yuml,   "y\""        },
--  { "zeta",    0,             "z"          },
-+  { "xi",      0,             "x",   0x03be},
-+  { "yacute",  LATIN1_yacute, "y'",  0x00fd},
-+  { "yen",     LATIN1_yen,    "YEN", 0x00a5},
-+  { "yuml",    LATIN1_yuml,   "y\"", 0x00ff},
-+  { "zeta",    0,             "z",   0x03b6},
- };
- 
--extern int use_iso8859;
-+extern int use_encoding;
- 
- /* ------------------------------------------------------------------------- */
- 
-+char ubuf[4];
-+
-+char *mkutf(unsigned long x)
-+{
-+  memset(ubuf, 0, 4);
-+  if(x < 128) ubuf[0] = x;
-+  else if(x < 0x800) {
-+     ubuf[0] = (0xc0 | ((x >> 6) & 0x1f));
-+     ubuf[1] = (0x80 | (x & 0x3f));
-+  }
-+  else {
-+     ubuf[0] = (0xe0 | ((x >> 12) & 0x0f));
-+     ubuf[1] = (0x80 | ((x >> 6) & 0x3f));
-+     ubuf[2] = (0x80 | (x & 0x3f));
-+  }
-+  return ubuf;
-+}
-+
- void
- replace_sgml_entities(string *s)
- {
-@@ -330,9 +349,9 @@
-      */
-     while (j < l && s->at(j) != '&') ++j;
-     /*
--     * We could convert high-bit chars to "&#233;" here if use_iso8859
--     * is off, then let them be translated or not.  Is the purpose of
--     * !use_iso8859 to allow SGML entities to be seen, or to strongly
-+     * We could convert high-bit chars to "&#233;" here if USE_ASCII
-+     * is on, then let them be translated or not.  Is the purpose of
-+     * USE_ASCII to allow SGML entities to be seen, or to strongly
-      * filter against high-ASCII chars that might blow up a terminal
-      * that doesn't speak ISO8859?  For the moment, "allow SGML entities
-      * to be seen" -- no filtering here.
-@@ -370,7 +389,11 @@
-           if (!isdigit(c)) break;
-           x = 10 * x + c - '0';
-         }
--        if (use_iso8859 || (x < 128)) {
-+        if (USE_UTF8) {
-+          s->replace(beg, j - beg, mkutf(x));
-+          j = beg + 1;
-+        }
-+        else if (USE_ISO8859 && (x < 256) || USE_ASCII && (x < 128)) {
-         s->replace(beg, j - beg, 1, (char) x);
-         j = beg + 1;
-         } else {
-@@ -408,13 +431,17 @@
-         (int (*)(const void *, const void *)) strcmp
-       );
-       if (entity != NULL) {
--        if (use_iso8859 && entity->iso8859code) {
-+        if (USE_ISO8859 && entity->iso8859code) {
-           s->replace(beg, j - beg, 1, (char) entity->iso8859code);
-           j = beg + 1;
--        } else if (entity->asciistr) {
-+        } else if (USE_ASCII && entity->asciistr) {
-           s->replace(beg, j - beg, entity->asciistr);
-         j = beg + 1;
-         } /* else don't replace it at all, we don't have a translation */
-+        else if(USE_UTF8 && entity->unicode) {
-+        s->replace(beg, j - beg, mkutf(entity->unicode));
-+        j = beg + 1;
-+        }
-       }
-     } else {
-       ;                         /* EXTENSION: Allow literal '&' sometimes. */
-diff -r -u -bB html2text-1.3.2a/table.C html2text-1.3.2a-patched/table.C
---- html2text-1.3.2a/table.C	2002-07-22 13:32:50.000000000 +0200
-+++ html2text-1.3.2a-patched/table.C	2005-05-13 22:19:59.871136320 +0200
-@@ -175,7 +175,7 @@
-           - (*number_of_columns_return - 1) * (column_spacing + 0),
-           Area::LEFT // Yields better results than "p->halign"!
-         ));
--	p->width = tmp.get() ? tmp->width() : 0;
-+	p->width = tmp.get() ? tmp->utf_width() : 0;
-       }
-       p->minimized = false;
- 
-@@ -308,7 +308,7 @@
- 	left_of_column + old_column_width - 1,
- 	Area::LEFT // Yields better results than "lc.halign"!
-       ));
--      w = tmp->width();
-+      w = tmp->utf_width();
-       if (w >= left_of_column + old_column_width) lc.minimized = true;
-     }
-     if (w > left_of_column + new_column_width) {
diff --git a/html2text.spec b/html2text.spec
index c28ff9d..289b555 100644
--- a/html2text.spec
+++ b/html2text.spec
@@ -1,53 +1,53 @@
 Name:          html2text
-Version:       1.3.2a
-Release:       3mamba
-Summary:       html2text is a command line utility, written in C++, that converts HTML documents into plain text.
+Version:       2020.1.16
+Release:       1mamba
+Summary:       Converts a page of HTML into clean, easy-to-read plain ASCII text
 Group:         Graphical Desktop/Applications/Office
 Vendor:        openmamba
 Distribution:  openmamba
-Packager:      Tiziana Ferro <tiziana.ferro@email.it>
+Packager:      Silvan Calarco <silvan.calarco@mambasoft.it>
 URL:           http://www.mbayer.de/html2text
-Source:        http://userpage.fu-berlin.de/%7Embayer/tools/%{name}-%{version}.tar.gz
-Patch0:        %{name}-1.3.2a-utf8.patch
+Source:        https://pypi.debian.net/html2text/html2text-%{version}.tar.gz
+#Source:        http://userpage.fu-berlin.de/%7Embayer/tools/%{name}-%{version}.tar.gz
 License:       GPL
 BuildRoot:     %{_tmppath}/%{name}-%{version}-root
 ## AUTOBUILDREQ-BEGIN
-BuildRequires: glibc-devel
-BuildRequires: libgcc
-BuildRequires: libstdc++6-devel
+BuildRequires: libpython3-devel
 ## AUTOBUILDREQ-END
+Requires:      python3 >= %python_version
 
 %description
-html2text is a command line utility, written in C++, that converts HTML documents into plain text.
+html2text is a Python script that converts a page of HTML into clean, easy-to-read plain ASCII text. Better yet, that ASCII also happens to be valid Markdown (a text-to-HTML format).
 
 %prep
 %setup -q
-%patch0 -p1
 
 %build
-%configure --prefix=/usr
-%make
+CFLAGS="%{optflags}" %{__python3} setup.py build
 
 %install
 [ "%{buildroot}" != / ] && rm -rf "%{buildroot}"
-mkdir -p %{buildroot}%{_bindir}
-install -m 755 html2text %{buildroot}%{_bindir}
-mkdir -p %{buildroot}%{_mandir}/man1
-mkdir -p %{buildroot}%{_mandir}/man5
-install -m 644 html2text.1.gz %{buildroot}%{_mandir}/man1
-install -m 644 html2textrc.5.gz %{buildroot}%{_mandir}/man5
+%{__python3} setup.py install \
+   -O1 --skip-build \
+   --root="%{buildroot}" \
+   --install-headers=%{python3_inc} \
+   --install-lib=%{python3_sitearch} \
+   --record=%{name}.filelist
+
+sed -i "s,.*/man/.*,&.gz," %{name}.filelist
 
 %clean
 [ "%{buildroot}" != / ] && rm -rf "%{buildroot}"
 
-%files
+%files -f %{name}.filelist
 %defattr(-,root,root)
-%doc CHANGES COPYING CREDITS README TODO
-%{_bindir}/html2text
-%{_mandir}/man1/html2text.*
-%{_mandir}/man5/html2textrc.*
+%doc COPYING
 
 %changelog
+* Tue Mar 03 2020 Silvan Calarco <silvan.calarco@mambasoft.it> 2020.1.16-1mamba
+- update to 2020.1.16
+- switch to python html2text package
+
 * Wed Jul 18 2012 Silvan Calarco <silvan.calarco@mambasoft.it> 1.3.2a-3mamba
 - added utf8 patch