pyrofork: Fix nested url markdown parsing

* The problem with current implepementation is when we add another markdown inside an url markdown will not be parsed.
for example we add bold (**) markdown inside an url markdown, the url text show as `**text**` instead of making the text bold.

Signed-off-by: wulan17 <wulan17@nusantararom.org>
This commit is contained in:
wulan17 2025-03-02 23:23:22 +07:00
parent 13681302a0
commit 5c9470fd4f
No known key found for this signature in database
GPG key ID: 318CD6CD3A6AC0A5

View file

@ -37,7 +37,7 @@ PRE_DELIM = "```"
BLOCKQUOTE_DELIM = ">" BLOCKQUOTE_DELIM = ">"
BLOCKQUOTE_EXPANDABLE_DELIM = "**>" BLOCKQUOTE_EXPANDABLE_DELIM = "**>"
MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( MARKDOWN_RE = re.compile(r"({d})".format(
d="|".join( d="|".join(
["".join(i) for i in [ ["".join(i) for i in [
[rf"\{j}" for j in i] [rf"\{j}" for j in i]
@ -52,6 +52,7 @@ MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
] ]
]] ]]
))) )))
URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")
OPENING_TAG = "<{}>" OPENING_TAG = "<{}>"
CLOSING_TAG = "</{}>" CLOSING_TAG = "</{}>"
@ -118,7 +119,7 @@ class Markdown:
for i, match in enumerate(re.finditer(MARKDOWN_RE, text)): for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
start, _ = match.span() start, _ = match.span()
delim, is_emoji, text_url, url = match.groups() delim = match.group(1)
full = match.group(0) full = match.group(0)
if delim in FIXED_WIDTH_DELIMS: if delim in FIXED_WIDTH_DELIMS:
@ -127,16 +128,6 @@ class Markdown:
if is_fixed_width and delim not in FIXED_WIDTH_DELIMS: if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
continue continue
if not is_emoji and text_url:
text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
continue
if is_emoji:
emoji = text_url
emoji_id = url.lstrip("tg://emoji?id=")
text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
continue
if delim == BOLD_DELIM: if delim == BOLD_DELIM:
tag = "b" tag = "b"
elif delim == ITALIC_DELIM: elif delim == ITALIC_DELIM:
@ -169,6 +160,21 @@ class Markdown:
text = utils.replace_once(text, delim, tag, start) text = utils.replace_once(text, delim, tag, start)
for i, match in enumerate(re.finditer(URL_RE, text)):
start, _ = match.span()
is_emoji, text_url, url = match.groups()
full = match.group(0)
if not is_emoji and text_url:
text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
continue
if is_emoji:
emoji = text_url
emoji_id = url.lstrip("tg://emoji?id=")
text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
continue
for placeholder, code_section in placeholders.items(): for placeholder, code_section in placeholders.items():
text = text.replace(placeholder, code_section) text = text.replace(placeholder, code_section)