# Pyrogram - Telegram MTProto API Client Library for Python # Copyright (C) 2017-2018 Dan Tès # # This file is part of Pyrogram. # # Pyrogram is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Pyrogram is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Pyrogram. If not, see . import re from pyrogram.api.types import ( MessageEntityBold as Bold, MessageEntityItalic as Italic, MessageEntityCode as Code, MessageEntityTextUrl as Url, MessageEntityPre as Pre, InputMessageEntityMentionName as Mention ) from . import utils class Markdown: INLINE_DELIMITERS = { "**": Bold, "__": Italic, "`": Code } # ``` python # for i in range(10): # print(i) # ``` PRE_RE = r"(?P

```(?P.*)\n(?P(.|\n)*)\n```)"

    # [url](github.com)
    URL_RE = r"(?P(\[(?P.+?)\]\((?P.+?)\)))"

    # [name](tg://user?id=123456789)
    MENTION_RE = r"(?P(\[(?P.+?)\]\(tg:\/\/user\?id=(?P\d+?)\)))"

    # **bold**
    # __italic__
    # `code`
    INLINE_RE = r"(?P(?P{d})(?P.+?)(?P{d}))".format(
        d="|".join(
            ["".join(i) for i in [
                ["\{}".format(j) for j in i]
                for i in sorted(  # Sort delimiters by length
                    INLINE_DELIMITERS.keys(),
                    key=lambda k: len(k),  # Or: key=len
                    reverse=True
                )
            ]]
        )
    )

    MARKDOWN_RE = re.compile("|".join([PRE_RE, MENTION_RE, URL_RE, INLINE_RE]))

    def __init__(self, peers_by_id):
        self.peers_by_id = peers_by_id

    def parse(self, text):
        entities = []
        text = utils.add_surrogates(text)
        offset = 0

        for match in self.MARKDOWN_RE.finditer(text):
            start = match.start() - offset

            if match.group("pre"):
                pattern = match.group("pre")
                lang = match.group("lang")
                replace = match.group("code")
                entity = Pre(start, len(replace), lang.strip())
                offset += len(lang) + 8
            elif match.group("url"):
                pattern = match.group("url")
                replace = match.group("url_text")
                path = match.group("url_path")
                entity = Url(start, len(replace), path)
                offset += len(path) + 4
            elif match.group("mention"):
                pattern = match.group("mention")
                replace = match.group("mention_text")
                user_id = match.group("user_id")
                entity = Mention(start, len(replace), self.peers_by_id[int(user_id)])
                offset += len(user_id) + 17
            elif match.group("inline"):
                pattern = match.group("inline")
                replace = match.group("body")
                start_delimiter = match.group("start_delimiter")
                end_delimiter = match.group("end_delimiter")

                if start_delimiter != end_delimiter:
                    continue

                entity = self.INLINE_DELIMITERS[start_delimiter](start, len(replace))
                offset += len(start_delimiter) * 2
            else:
                continue

            entities.append(entity)
            text = text.replace(pattern, replace)

        return dict(
            message=utils.remove_surrogates(text),
            entities=entities
        )