pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method

Signed-off-by: wulan17 <wulan17@nusantararom.org>
pyrofork: Add support for multi-line blockquote in markdown unparser
2025-12-29 12:04:51 +00:00 · 2025-03-03 00:32:37 +07:00 · 2025-03-03 00:32:37 +07:00 · 2025-03-03 00:31:57 +07:00 · 2025-03-03 00:01:20 +07:00
3 changed files with 109 additions and 80 deletions
--- a/pyrogram/methods/chats/get_forum_topics.py
+++ b/pyrogram/methods/chats/get_forum_topics.py
@ -32,7 +32,10 @@ class GetForumTopics:
    async def get_forum_topics(
        self: "pyrogram.Client",
        chat_id: Union[int, str],
-        limit: int = 0
+        limit: int = 0,
        offset_date: int = 0,
        offset_id: int = 0,
        offset_topic: int = 0
    ) -> Optional[AsyncGenerator["types.ForumTopic", None]]:
        """Get one or more topic from a chat.
@ -46,6 +49,15 @@ class GetForumTopics:
            limit (``int``, *optional*):
                Limits the number of topics to be retrieved.
            offset_date (``int``, *optional*):
                Date of the last message of the last found topic.
            offset_id (``int``, *optional*):
                ID of the last message of the last found topic.
            offset_topic (``int``, *optional*):
                ID of the last found topic.
        Returns:
            ``Generator``: On success, a generator yielding :obj:`~pyrogram.types.ForumTopic` objects is returned.
@ -62,7 +74,7 @@ class GetForumTopics:
        peer = await self.resolve_peer(chat_id)
-        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=0, offset_id=0, offset_topic=0, limit=limit)
+        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=offset_date, offset_id=offset_id, offset_topic=offset_topic, limit=limit)
        r = await self.invoke(rpc, sleep_threshold=-1)
--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@ -37,7 +37,7 @@ PRE_DELIM = "```"
 BLOCKQUOTE_DELIM = ">"
 BLOCKQUOTE_EXPANDABLE_DELIM = "**>"
-MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
+MARKDOWN_RE = re.compile(r"({d})".format(
    d="|".join(
        ["".join(i) for i in [
            [rf"\{j}" for j in i]
@ -52,6 +52,7 @@ MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
            ]
        ]]
    )))
 URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")
 OPENING_TAG = "<{}>"
 CLOSING_TAG = "</{}>"
@ -118,7 +119,7 @@ class Markdown:
        for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
            start, _ = match.span()
-            delim, is_emoji, text_url, url = match.groups()
+            delim = match.group(1)
            full = match.group(0)
            if delim in FIXED_WIDTH_DELIMS:
@ -127,16 +128,6 @@ class Markdown:
            if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
                continue
            if not is_emoji and text_url:
                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
                continue
            if is_emoji:
                emoji = text_url
                emoji_id = url.lstrip("tg://emoji?id=")
                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
                continue
            if delim == BOLD_DELIM:
                tag = "b"
            elif delim == ITALIC_DELIM:
@ -169,6 +160,21 @@ class Markdown:
            text = utils.replace_once(text, delim, tag, start)
        for i, match in enumerate(re.finditer(URL_RE, text)):
            start, _ = match.span()
            is_emoji, text_url, url = match.groups()
            full = match.group(0)
            if not is_emoji and text_url:
                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
                continue
            if is_emoji:
                emoji = text_url
                emoji_id = url.lstrip("tg://emoji?id=")
                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
                continue
        for placeholder, code_section in placeholders.items():
            text = text.replace(placeholder, code_section)
@ -176,78 +182,76 @@ class Markdown:
    @staticmethod
    def unparse(text: str, entities: list):
        """
        Performs the reverse operation to .parse(), effectively returning
        markdown-like syntax given a normal text and its MessageEntity's.
        :param text: the text to be reconverted into markdown.
        :param entities: list of MessageEntity's applied to the text.
        :return: a markdown-like text representing the combination of both inputs.
        """
        delimiters = {
            MessageEntityType.BOLD: BOLD_DELIM,
            MessageEntityType.ITALIC: ITALIC_DELIM,
            MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
            MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
            MessageEntityType.CODE: CODE_DELIM,
            MessageEntityType.PRE: PRE_DELIM,
            MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
            MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
            MessageEntityType.SPOILER: SPOILER_DELIM
        }
        text = utils.add_surrogates(text)
-        entities_offsets = []
+        insert_at = []
-
+        for i, entity in enumerate(entities):
-        for entity in entities:
+            s = entity.offset
-            entity_type = entity.type
+            e = entity.offset + entity.length
-            start = entity.offset
+            delimiter = delimiters.get(entity.type, None)
-            end = start + entity.length
+            if delimiter:
-
+                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
-            if entity_type == MessageEntityType.BOLD:
+                    open_delimiter = delimiter
-                start_tag = end_tag = BOLD_DELIM
+                    close_delimiter = delimiter
-            elif entity_type == MessageEntityType.ITALIC:
+                    if entity.type == MessageEntityType.PRE:
-                start_tag = end_tag = ITALIC_DELIM
+                        if entity.language:
-            elif entity_type == MessageEntityType.UNDERLINE:
+                            open_delimiter += entity.language + '\n'
-                start_tag = end_tag = UNDERLINE_DELIM
+                        else:
-            elif entity_type == MessageEntityType.STRIKETHROUGH:
+                            open_delimiter += entity + '\n'
-                start_tag = end_tag = STRIKE_DELIM
+                    insert_at.append((s, i, open_delimiter))
-            elif entity_type == MessageEntityType.CODE:
+                    insert_at.append((e, -i, close_delimiter))
-                start_tag = end_tag = CODE_DELIM
+                else:
-            elif entity_type == MessageEntityType.PRE:
+                    # Handle multiline blockquotes
-                language = getattr(entity, "language", "") or ""
+                    text_subset = text[s:e]
-                start_tag = f"{PRE_DELIM}{language}\n"
+                    lines = text_subset.splitlines()
-                end_tag = f"\n{PRE_DELIM}"
+                    for line_num, line in enumerate(lines):
-            elif entity_type == MessageEntityType.BLOCKQUOTE:
+                        line_start = s + sum(len(l) + 1 for l in lines[:line_num])
                        if entity.collapsed:
-                    start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " "
+                            insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
                        else:
-                    start_tag = BLOCKQUOTE_DELIM + " "
+                            insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
-                end_tag = ""
+                    # No closing delimiter for blockquotes
-                blockquote_text = text[start:end]
+            else:
-                lines = blockquote_text.split("\n")
+                url = None
-                last_length = 0
+                if entity.type == MessageEntityType.TEXT_LINK:
                for line in lines:
                    if len(line) == 0 and last_length == end:
                        continue
                    start_offset = start+last_length
                    last_length = last_length+len(line)
                    end_offset = start_offset+last_length
                    entities_offsets.append((start_tag, start_offset,))
                    entities_offsets.append((end_tag, end_offset,))
                    last_length = last_length+1
                continue
            elif entity_type == MessageEntityType.SPOILER:
                start_tag = end_tag = SPOILER_DELIM
            elif entity_type == MessageEntityType.TEXT_LINK:
                    url = entity.url
-                start_tag = "["
+                elif entity.type == MessageEntityType.TEXT_MENTION:
-                end_tag = f"]({url})"
+                    url = 'tg://user?id={}'.format(entity.user.id)
-            elif entity_type == MessageEntityType.TEXT_MENTION:
+                if url:
-                user = entity.user
+                    insert_at.append((s, i, '['))
-                start_tag = "["
+                    insert_at.append((e, -i, ']({})'.format(url)))
                end_tag = f"](tg://user?id={user.id})"
            elif entity_type == MessageEntityType.CUSTOM_EMOJI:
                emoji_id = entity.custom_emoji_id
                start_tag = "!["
                end_tag = f"](tg://emoji?id={emoji_id})"
            else:
                continue
-            entities_offsets.append((start_tag, start,))
+        insert_at.sort(key=lambda t: (t[0], t[1]))
-            entities_offsets.append((end_tag, end,))
+        while insert_at:
            at, _, what = insert_at.pop()
-        entities_offsets = map(
+            # If we are in the middle of a surrogate nudge the position by -1.
-            lambda x: x[1],
+            # Otherwise we would end up with malformed text and fail to encode.
-            sorted(
+            # For example of bad input: "Hi \ud83d\ude1c"
-                enumerate(entities_offsets),
+            # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
-                key=lambda x: (x[1][1], x[0]),
+            while utils.within_surrogate(text, at):
-                reverse=True
+                at += 1
            )
        )
-        for entity, offset in entities_offsets:
+            text = text[:at] + what + text[at:]
            text = text[:offset] + entity + text[offset:]
        return utils.remove_surrogates(text)
--- a/pyrogram/parser/utils.py
+++ b/pyrogram/parser/utils.py
@ -40,3 +40,16 @@ def remove_surrogates(text):
 def replace_once(source: str, old: str, new: str, start: int):
    return source[:start] + source[start:].replace(old, new, 1)
 def within_surrogate(text, index, *, length=None):
    """
    `True` if ``index`` is within a surrogate (before and after it, not at!).
    """
    if length is None:
        length = len(text)
    return (
            1 < index < len(text) and  # in bounds
            '\ud800' <= text[index - 1] <= '\udbff' and  # previous is
            '\ud800' <= text[index] <= '\udfff'  # current is
    )
Author	SHA1	Message	Date
wulan17	4d1ae0b00f	pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method Some checks are pending Build-docs / build (push) Waiting to run Details Pyrofork / build (macos-latest, 3.10) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.11) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.12) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.13) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.9) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.10) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.11) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.12) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.13) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.9) (push) Waiting to run Details Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:32:37 +07:00
wulan17	943a7e0342	pyrofork: Add support for multi-line blockquote in markdown unparser Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:32:37 +07:00
wulan17	4a5af71d25	pyrofork: Adapt markdown unparser from telethon * The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up. for example link with bold text will be unparsed like this [github](https://github.com). Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:31:57 +07:00
wulan17	984abd2008	pyrofork: Fix nested url markdown parsing * The problem with current implepementation is when we add another markdown inside an url markdown will not be parsed. for example we add bold () markdown inside an url markdown, the url text show as `text**` instead of making the text bold. Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:01:20 +07:00