Skip to content

Commit fbd342d

Browse files
committed
Use BeautifulSoup with html5lib for better results
1 parent b9c8488 commit fbd342d

File tree

3 files changed

+6
-2
lines changed

3 files changed

+6
-2
lines changed

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ warn_unreachable = true
6060
[[tool.mypy.overrides]]
6161
module = [
6262
"apiai.*",
63+
"bs4.*",
6364
"feedparser.*",
6465
"gitlint.*",
6566
"googleapiclient.*",
+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
bs4>=4.12.2
12
feedparser>=6.0.10
23
markdownify>=0.11.6

zulip/integrations/rss/rss-bot

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ from html.parser import HTMLParser
1818
from typing import Any, Dict, List, Optional, Tuple
1919

2020
import feedparser
21-
from markdownify import markdownify
21+
from bs4 import BeautifulSoup
22+
from markdownify import MarkdownConverter
2223
from typing_extensions import override
2324

2425
import zulip
@@ -193,7 +194,8 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]:
193194
body = unwrap_text(body)
194195

195196
def md(html: str) -> str:
196-
return markdownify(html, escape_underscores=False)
197+
soup: BeautifulSoup = BeautifulSoup(html, "html5lib")
198+
return MarkdownConverter(escape_underscores=False).convert_soup(soup)
197199

198200
convert: Callable[[str], str] = strip_tags if opts.strip else md
199201
content = f"**[{entry.title}]({entry.link})**\n{convert(body)}\n{entry.link}"

0 commit comments

Comments
 (0)