pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method

Signed-off-by: wulan17 <wulan17@nusantararom.org>
pyrofork: Add support for multi-line blockquote in markdown unparser
2025-12-29 12:04:51 +00:00 · 2025-03-03 00:32:37 +07:00 · 2025-03-03 00:32:37 +07:00 · 2025-03-03 00:31:57 +07:00 · 2025-03-03 00:01:20 +07:00
3 changed files with 109 additions and 80 deletions
--- a/pyrogram/methods/chats/get_forum_topics.py
+++ b/pyrogram/methods/chats/get_forum_topics.py
@ -32,7 +32,10 @@ class GetForumTopics:
    async def get_forum_topics(
        self: "pyrogram.Client",
        chat_id: Union[int, str],
-        limit: int = 0
+        limit: int = 0,
+        offset_date: int = 0,
+        offset_id: int = 0,
+        offset_topic: int = 0
    ) -> Optional[AsyncGenerator["types.ForumTopic", None]]:
        """Get one or more topic from a chat.

@ -46,6 +49,15 @@ class GetForumTopics:
            limit (``int``, *optional*):
                Limits the number of topics to be retrieved.

+            offset_date (``int``, *optional*):
+                Date of the last message of the last found topic.
+
+            offset_id (``int``, *optional*):
+                ID of the last message of the last found topic.
+
+            offset_topic (``int``, *optional*):
+                ID of the last found topic.
+
        Returns:
            ``Generator``: On success, a generator yielding :obj:`~pyrogram.types.ForumTopic` objects is returned.

@ -62,7 +74,7 @@ class GetForumTopics:

        peer = await self.resolve_peer(chat_id)

-        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=0, offset_id=0, offset_topic=0, limit=limit)
+        rpc = raw.functions.channels.GetForumTopics(channel=peer, offset_date=offset_date, offset_id=offset_id, offset_topic=offset_topic, limit=limit)

        r = await self.invoke(rpc, sleep_threshold=-1)

--- a/pyrogram/parser/markdown.py
+++ b/pyrogram/parser/markdown.py
@ -37,7 +37,7 @@ PRE_DELIM = "```"
 BLOCKQUOTE_DELIM = ">"
 BLOCKQUOTE_EXPANDABLE_DELIM = "**>"

-MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
+MARKDOWN_RE = re.compile(r"({d})".format(
    d="|".join(
        ["".join(i) for i in [
            [rf"\{j}" for j in i]
@ -52,6 +52,7 @@ MARKDOWN_RE = re.compile(r"({d})|(!?)\[(.+?)\]\((.+?)\)".format(
            ]
        ]]
    )))
+URL_RE = re.compile(r"(!?)\[(.+?)\]\((.+?)\)")

 OPENING_TAG = "<{}>"
 CLOSING_TAG = "</{}>"
@ -118,7 +119,7 @@ class Markdown:

        for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
            start, _ = match.span()
-            delim, is_emoji, text_url, url = match.groups()
+            delim = match.group(1)
            full = match.group(0)

            if delim in FIXED_WIDTH_DELIMS:
@ -127,16 +128,6 @@ class Markdown:
            if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
                continue

-            if not is_emoji and text_url:
-                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
-                continue
-
-            if is_emoji:
-                emoji = text_url
-                emoji_id = url.lstrip("tg://emoji?id=")
-                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
-                continue
-
            if delim == BOLD_DELIM:
                tag = "b"
            elif delim == ITALIC_DELIM:
@ -169,6 +160,21 @@ class Markdown:

            text = utils.replace_once(text, delim, tag, start)

+        for i, match in enumerate(re.finditer(URL_RE, text)):
+            start, _ = match.span()
+            is_emoji, text_url, url = match.groups()
+            full = match.group(0)
+
+            if not is_emoji and text_url:
+                text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
+                continue
+
+            if is_emoji:
+                emoji = text_url
+                emoji_id = url.lstrip("tg://emoji?id=")
+                text = utils.replace_once(text, full, EMOJI_MARKUP.format(emoji_id, emoji), start)
+                continue
+
        for placeholder, code_section in placeholders.items():
            text = text.replace(placeholder, code_section)

@ -176,78 +182,76 @@ class Markdown:

    @staticmethod
    def unparse(text: str, entities: list):
+        """
+        Performs the reverse operation to .parse(), effectively returning
+        markdown-like syntax given a normal text and its MessageEntity's.
+
+        :param text: the text to be reconverted into markdown.
+        :param entities: list of MessageEntity's applied to the text.
+        :return: a markdown-like text representing the combination of both inputs.
+        """
+        delimiters = {
+            MessageEntityType.BOLD: BOLD_DELIM,
+            MessageEntityType.ITALIC: ITALIC_DELIM,
+            MessageEntityType.UNDERLINE: UNDERLINE_DELIM,
+            MessageEntityType.STRIKETHROUGH: STRIKE_DELIM,
+            MessageEntityType.CODE: CODE_DELIM,
+            MessageEntityType.PRE: PRE_DELIM,
+            MessageEntityType.BLOCKQUOTE: BLOCKQUOTE_DELIM,
+            MessageEntityType.EXPANDABLE_BLOCKQUOTE: BLOCKQUOTE_EXPANDABLE_DELIM,
+            MessageEntityType.SPOILER: SPOILER_DELIM
+        }
+
        text = utils.add_surrogates(text)

-        entities_offsets = []
-
-        for entity in entities:
-            entity_type = entity.type
-            start = entity.offset
-            end = start + entity.length
-
-            if entity_type == MessageEntityType.BOLD:
-                start_tag = end_tag = BOLD_DELIM
-            elif entity_type == MessageEntityType.ITALIC:
-                start_tag = end_tag = ITALIC_DELIM
-            elif entity_type == MessageEntityType.UNDERLINE:
-                start_tag = end_tag = UNDERLINE_DELIM
-            elif entity_type == MessageEntityType.STRIKETHROUGH:
-                start_tag = end_tag = STRIKE_DELIM
-            elif entity_type == MessageEntityType.CODE:
-                start_tag = end_tag = CODE_DELIM
-            elif entity_type == MessageEntityType.PRE:
-                language = getattr(entity, "language", "") or ""
-                start_tag = f"{PRE_DELIM}{language}\n"
-                end_tag = f"\n{PRE_DELIM}"
-            elif entity_type == MessageEntityType.BLOCKQUOTE:
-                if entity.collapsed:
-                    start_tag = BLOCKQUOTE_EXPANDABLE_DELIM + " "
+        insert_at = []
+        for i, entity in enumerate(entities):
+            s = entity.offset
+            e = entity.offset + entity.length
+            delimiter = delimiters.get(entity.type, None)
+            if delimiter:
+                if entity.type != MessageEntityType.BLOCKQUOTE and entity.type != MessageEntityType.EXPANDABLE_BLOCKQUOTE:
+                    open_delimiter = delimiter
+                    close_delimiter = delimiter
+                    if entity.type == MessageEntityType.PRE:
+                        if entity.language:
+                            open_delimiter += entity.language + '\n'
+                        else:
+                            open_delimiter += entity + '\n'
+                    insert_at.append((s, i, open_delimiter))
+                    insert_at.append((e, -i, close_delimiter))
                else:
-                    start_tag = BLOCKQUOTE_DELIM + " "
-                end_tag = ""
-                blockquote_text = text[start:end]
-                lines = blockquote_text.split("\n")
-                last_length = 0
-                for line in lines:
-                    if len(line) == 0 and last_length == end:
-                        continue
-                    start_offset = start+last_length
-                    last_length = last_length+len(line)
-                    end_offset = start_offset+last_length
-                    entities_offsets.append((start_tag, start_offset,))
-                    entities_offsets.append((end_tag, end_offset,))
-                    last_length = last_length+1
-                continue
-            elif entity_type == MessageEntityType.SPOILER:
-                start_tag = end_tag = SPOILER_DELIM
-            elif entity_type == MessageEntityType.TEXT_LINK:
-                url = entity.url
-                start_tag = "["
-                end_tag = f"]({url})"
-            elif entity_type == MessageEntityType.TEXT_MENTION:
-                user = entity.user
-                start_tag = "["
-                end_tag = f"](tg://user?id={user.id})"
-            elif entity_type == MessageEntityType.CUSTOM_EMOJI:
-                emoji_id = entity.custom_emoji_id
-                start_tag = "!["
-                end_tag = f"](tg://emoji?id={emoji_id})"
+                    # Handle multiline blockquotes
+                    text_subset = text[s:e]
+                    lines = text_subset.splitlines()
+                    for line_num, line in enumerate(lines):
+                        line_start = s + sum(len(l) + 1 for l in lines[:line_num])
+                        if entity.collapsed:
+                            insert_at.append((line_start, i, BLOCKQUOTE_EXPANDABLE_DELIM))
+                        else:
+                            insert_at.append((line_start, i, BLOCKQUOTE_DELIM))
+                    # No closing delimiter for blockquotes
            else:
-                continue
+                url = None
+                if entity.type == MessageEntityType.TEXT_LINK:
+                    url = entity.url
+                elif entity.type == MessageEntityType.TEXT_MENTION:
+                    url = 'tg://user?id={}'.format(entity.user.id)
+                if url:
+                    insert_at.append((s, i, '['))
+                    insert_at.append((e, -i, ']({})'.format(url)))

-            entities_offsets.append((start_tag, start,))
-            entities_offsets.append((end_tag, end,))
+        insert_at.sort(key=lambda t: (t[0], t[1]))
+        while insert_at:
+            at, _, what = insert_at.pop()

-        entities_offsets = map(
-            lambda x: x[1],
-            sorted(
-                enumerate(entities_offsets),
-                key=lambda x: (x[1][1], x[0]),
-                reverse=True
-            )
-        )
+            # If we are in the middle of a surrogate nudge the position by -1.
+            # Otherwise we would end up with malformed text and fail to encode.
+            # For example of bad input: "Hi \ud83d\ude1c"
+            # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
+            while utils.within_surrogate(text, at):
+                at += 1

-        for entity, offset in entities_offsets:
-            text = text[:offset] + entity + text[offset:]
+            text = text[:at] + what + text[at:]

        return utils.remove_surrogates(text)
--- a/pyrogram/parser/utils.py
+++ b/pyrogram/parser/utils.py
@ -40,3 +40,16 @@ def remove_surrogates(text):

 def replace_once(source: str, old: str, new: str, start: int):
    return source[:start] + source[start:].replace(old, new, 1)
+
+def within_surrogate(text, index, *, length=None):
+    """
+    `True` if ``index`` is within a surrogate (before and after it, not at!).
+    """
+    if length is None:
+        length = len(text)
+
+    return (
+            1 < index < len(text) and  # in bounds
+            '\ud800' <= text[index - 1] <= '\udbff' and  # previous is
+            '\ud800' <= text[index] <= '\udfff'  # current is
+    )
Author	SHA1	Message	Date
wulan17	4d1ae0b00f	pyrofork: Add offset_{date,id,topic} parameters to get_forum_topics method Some checks are pending Build-docs / build (push) Waiting to run Details Pyrofork / build (macos-latest, 3.10) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.11) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.12) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.13) (push) Waiting to run Details Pyrofork / build (macos-latest, 3.9) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.10) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.11) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.12) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.13) (push) Waiting to run Details Pyrofork / build (ubuntu-latest, 3.9) (push) Waiting to run Details Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:32:37 +07:00
wulan17	943a7e0342	pyrofork: Add support for multi-line blockquote in markdown unparser Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:32:37 +07:00
wulan17	4a5af71d25	pyrofork: Adapt markdown unparser from telethon * The problem with current implementation is when we have nested markdown inside a url the markdown order is messed up. for example link with bold text will be unparsed like this [github](https://github.com). Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:31:57 +07:00
wulan17	984abd2008	pyrofork: Fix nested url markdown parsing * The problem with current implepementation is when we add another markdown inside an url markdown will not be parsed. for example we add bold () markdown inside an url markdown, the url text show as `text**` instead of making the text bold. Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-03-03 00:01:20 +07:00