From 984abd200801f1cfae41279cbbe17ece109246a6 Mon Sep 17 00:00:00 2001 From: wulan17 Date: Sun, 2 Mar 2025 23:23:22 +0700 Subject: [PATCH] pyrofork: Fix nested url markdown parsing * The problem with current implepementation is when we add another markdown inside an url markdown will not be parsed. for example we add bold (**) markdown inside an url markdown, the url text show as `**text**` instead of making the text bold. Signed-off-by: wulan17 --- pyrogram/parser/markdown.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index 4c5a5f2e..5c27bae8 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -37,7 +37,7 @@ PRE_DELIM = "```" BLOCKQUOTE_DELIM = ">" BLOCKQUOTE_EXPANDABLE_DELIM = "**>" -MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( +MARKDOWN_RE = re.compile(r"({d})".format( d="|".join( ["".join(i) for i in [ [rf"\{j}" for j in i] @@ -52,6 +52,7 @@ MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format( ] ]] ))) +URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)") OPENING_TAG = "<{}>" CLOSING_TAG = "" @@ -118,7 +119,7 @@ class Markdown: for i, match in enumerate(re.finditer(MARKDOWN_RE, text)): start, _ = match.span() - delim, is_emoji, text_url, url = match.groups() + delim = match.group(1) full = match.group(0) if delim in FIXED_WIDTH_DELIMS: @@ -127,16 +128,6 @@ class Markdown: if is_fixed_width and delim not in FIXED_WIDTH_DELIMS: continue - if not is_emoji and text_url: - text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start) - continue - - if is_emoji: - emoji = text_url - emoji_id = url.lstrip("tg://emoji?id=") - text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start) - continue - if delim == BOLD_DELIM: tag = "b" elif delim == ITALIC_DELIM: @@ -169,6 +160,21 @@ class Markdown: text = utils.replace_once(text, delim, tag, start) + for i, match in enumerate(re.finditer(URL_RE, text)): + start, _ = match.span() + is_emoji, text_url, url = match.groups() + full = match.group(0) + + if not is_emoji and text_url: + text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start) + continue + + if is_emoji: + emoji = text_url + emoji_id = url.lstrip("tg://emoji?id=") + text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start) + continue + for placeholder, code_section in placeholders.items(): text = text.replace(placeholder, code_section)