web/lib/changelog_formatter.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

import re
from cherrypy.lib.tidy import html_break

from web.lib.links import viewcvs_link, \
                          bugzilla_bug_link, \
                          ciavc_link

# We use short variable names!
# pylint: disable-msg=C0103

re_author = re.compile(r' 20[01]\d; ([^<]+)? *<([^@> ]+)@[^>]')
re_author2 = re.compile(r' <?([^<@ ]+)@')
def extract_changelog_entry_author(line):
    """From the first line of a changelog entry,
        extract the author name and userid"""
    authorname = authorid = None
    authorsearch = re_author.search(line)
    if authorsearch is not None:
        if authorsearch.group(1):
            authorname = authorsearch.group(1).strip()
        if authorsearch.group(2):
            authorid = authorsearch.group(2).strip()
    else:
        authorname = 'Unknown'
        authorid = ''
        # try harder to find an username only
        authorsearch = re_author2.search(line)
        if authorsearch is not None and authorsearch.group(1):
            authorid = authorsearch.group(1).strip()
    return (authorname, authorid)

def _single_pass_re_loop(reo, callback, instr):
    """For every match of the given regex, replace the entire match
    with the string given by callback.
    Callback takes the re.match object."""
    for m in reo.finditer(instr):
        #while True:
        #m = reo.search(instr)
        if m is None:
            break
        repl = callback(m)
        instr = instr.replace(m.group(0), repl)
    return instr

re_email1 = re.compile(r'<([^@ ]+)@gentoo.org>')
re_email2 = re.compile(r'([^@ ]+)@gentoo.org')
re_file = re.compile(r'([\+-]?)(\S+)([:,]|[:,]$)')
re_bugid = re.compile(r'([Bb][uU][gG]\s+?#?)(\d+)')
re_url_base = '(https?://[^\s/)>]+(?:/[\S]+)?)'
re_url = re.compile("([\s<(]*)"+re_url_base+"([\s>)\"']+?.?|$)?")
re_url_notend = re.compile(r'[\s.)>\'"]+$')
def _pretty_changelog_pass1(cat, pn, changelog):
    """Changelog prettification, pass1: replace text with markers"""

    changelog = changelog.strip()
    changelog_lines = changelog.splitlines()
    i = 0
    while changelog_lines is not None and \
            len(changelog_lines) > i and \
            changelog_lines[i] is not None and \
            changelog_lines[i].startswith('*'):
        i += 1
    seen_files = False
    seen_author = False
    authorname = None
    authorid = None
    while True and len(changelog_lines) > i:
        oldline = changelog_lines[i]
        line = oldline.split()
        newline = []
        if not seen_author or not seen_files:
            for w in line:
                if not seen_author and '@gentoo.org' in w:
                    (authorname, authorid) = \
                            extract_changelog_entry_author(oldline)
                    if authorid == '':
                        print 'Bad changelog entry for %s/%s = "%r"' \
                                % (cat, pn, changelog)
                    w = re_email1.sub('__CIA_VC__\\1__/CIA_VC__', w)
                    w = re_email2.sub(' __CIA_VC__\\1__/CIA_VC__', w)
                    seen_author = True
                elif not seen_files and seen_author:
                    w = re_file.sub('\\1__FILE__\\2__/FILE__\\3', w)
                newline.append(w)
            changelog_lines[i] = ' '.join(newline)
        else:
            # re.IGNORECASE does not work on Unicode strings in 2.4!
            newline = oldline
            def bug_markup(m):
                return '%s__BUG__%s__/BUG__' % (m.group(1), m.group(2))
            newline = _single_pass_re_loop(re_bugid, bug_markup, newline)
            def url_markup(m):
                prefix = m.group(1)
                url = m.group(2)
                suffix = m.group(3)
                if prefix is None:
                    prefix = ''
                if suffix is None:
                    suffix = ''
                extra_suffix = re_url_notend.search(url)
                if extra_suffix:
                    extra_suffix = extra_suffix.group(0)
                    suffix = extra_suffix + suffix
                    url = url[0:-len(extra_suffix)]
                return '%s__URL__%s__/URL__%s' % (prefix, url, suffix)
            newline = _single_pass_re_loop(re_url, url_markup, newline)
            changelog_lines[i] = newline
        if oldline.endswith(':'):
            seen_files = True
        i += 1
    changelog = "\n".join(changelog_lines)
    changelog.strip()
    if len(changelog) == 0:
        changelog = "No changelog entry available"
    return (changelog, authorname, authorid)

def _pretty_changelog_pass2(changelog):
    """Now convert remaining stuff to be HTML. This catches all
        lurking entities as well as \\n"""

    changelog = html_break(changelog)
    return changelog

re_m_ciavc = re.compile(r'__CIA_VC__(\S+)__/CIA_VC__ ?')
def _pretty_changelog_pass3(changelog):
    """Convert author markup to CIA.vc links"""

    def markup(m):
        user = m.group(1)
        url = ciavc_link(user)
        return '(<a href="%s">%s</a>) ' % (url, user)
    changelog = _single_pass_re_loop(re_m_ciavc, markup, changelog)
    return changelog

re_m_file = re.compile(r'__FILE__(\S+)__/FILE__')
def _pretty_changelog_pass4(cat, pn, changelog):
    """Convert any file markup entries to Gentoo ViewCVS links"""

    def markup(m):
        filename = m.group(1)
        relpath = '%s/%s/%s' % (cat, pn, filename)
        url = viewcvs_link(relpath)
        return '<a href="%s">%s</a>' % (url, filename)
    changelog = _single_pass_re_loop(re_m_file, markup, changelog)
    return changelog

re_m_bug = re.compile(r'__BUG__(\d+)__/BUG__')
def _pretty_changelog_pass5(changelog):
    """Convert any bug id markup to bugzilla links"""

    def markup(m):
        bugid = m.group(1)
        url = bugzilla_bug_link(int(bugid))
        return '<a href="%s">%s</a>' % (url, bugid)
    changelog = _single_pass_re_loop(re_m_bug, markup, changelog)
    return changelog

re_m_url = re.compile(r'__URL__(\S+)__/URL__')
def _pretty_changelog_pass6(changelog):
    """Convert any URL markup to real links"""

    def markup(m):
        url = 'http://'+m.group(1)
        return '<a href="%s">%s</a>' % (url, url)
    changelog = _single_pass_re_loop(re_m_url, markup, changelog)
    return changelog


def pretty_changelog(cat, pn, changelog):
    """Given a changelog snippet, make it look nice with HTML:
        - Make the body HTML-safe via entities.
        - Replace the author email with a CIA.vc link
        - Link the changed files to ViewCVS
        - Link any bug# entries to Bugzilla"""
    # text -> markup
    (changelog, authorname, authorid) = \
            _pretty_changelog_pass1(cat, pn, changelog)
    # entities -> HTML
    changelog = _pretty_changelog_pass2(changelog)
    # user markup -> CIA.vc
    changelog = _pretty_changelog_pass3(changelog)
    # file markup -> sources.g.o link
    changelog = _pretty_changelog_pass4(cat, pn, changelog)
    # bug markup -> bugzilla link
    changelog = _pretty_changelog_pass5(changelog)
    # url markup -> real link
    changelog = _pretty_changelog_pass6(changelog)
    # Done!
    return (changelog, authorname, authorid)

def optimal_collapse(atom, pnlength, pvlength, ellipsis = '^'):
    """Shrink the PN-PV string using well-placed ellipsis characters so
        that the maximum length of the string does not exceed the sum of
        the max specified PN and PV lengths. Retain the maximum amount of
        information."""
    # TP[PN] = target length
    maxlength = pnlength + pvlength + 1
    npn = pn = atom.package
    npv = pv = atom.fullver
    sep = '-'
    if atom.fullver is None:
        npv = pv = ''
        sep = ''
    tlpn = len(pn)
    tlpv = len(pv)
    pnpv = "%s%s%s" % (npn, sep, npv)
    i = 0
    while len(pnpv) > maxlength and i < 25:
        if tlpv > pvlength:
            tlpv -= 1
            npv = pv[0:tlpv-1] + '@'
        elif tlpn > pnlength:
            tlpn -= 1
            npn = pn[0:tlpn-1] + '@'
        pnpv = "%s%s%s" % (npn, sep, npv)
        i += 1
    return pnpv.replace('@', ellipsis)

# vim:ts=4 et ft=python: