aboutsummaryrefslogtreecommitdiff
blob: adba0bdb6cfd581faa5e141ebe135f8be7968152 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
import os
import re
from difflib import SequenceMatcher
from itertools import chain

from lxml import etree
from pkgcore import const as pkgcore_const
from pkgcore.ebuild.atom import MalformedAtom, atom
from pkgcore.restrictions.packages import Conditional
from pkgcore.fetch import fetchable
from snakeoil.osutils import pjoin
from snakeoil.sequences import iflatten_instance
from snakeoil.strings import pluralism

from .. import results, sources
from . import Check


class _MissingXml(results.Error):
    """Required XML file is missing."""

    def __init__(self, filename, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename

    @property
    def desc(self):
        return f"{self._attr} is missing {self.filename}"


class _BadlyFormedXml(results.Warning):
    """XML isn't well formed."""

    def __init__(self, filename, error, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.error = error

    @property
    def desc(self):
        return f"{self._attr} {self.filename} is not well formed xml: {self.error}"


class _InvalidXml(results.Error):
    """XML fails XML Schema validation."""

    def __init__(self, filename, message, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.message = message

    @property
    def desc(self):
        return f"{self._attr} {self.filename} violates metadata.xsd:\n{self.message}"


class _MetadataXmlInvalidPkgRef(results.Error):
    """metadata.xml <pkg/> references unknown/invalid package."""

    def __init__(self, filename, pkgtext, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.pkgtext = pkgtext

    @property
    def desc(self):
        return (
            f"{self._attr} {self.filename} <pkg/> "
            f"references unknown/invalid package: {self.pkgtext!r}"
        )


class _MetadataXmlInvalidCatRef(results.Error):
    """metadata.xml <cat/> references unknown/invalid category."""

    def __init__(self, filename, cattext, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.cattext = cattext

    @property
    def desc(self):
        return (
            f"{self._attr} {self.filename} <cat/> references "
            f"unknown/invalid category: {self.cattext!r}"
        )


class MaintainerNeeded(results.PackageResult, results.Warning):
    """Package with missing or invalid maintainer-needed comment in metadata.xml."""

    def __init__(self, filename, needed, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.needed = needed

    @property
    def desc(self):
        if not self.needed:
            return f"{self.filename}: missing maintainer-needed comment"
        return f"{self.filename}: invalid maintainer-needed comment"


class MaintainerWithoutProxy(results.PackageResult, results.Warning):
    """Package has a proxied maintainer without a proxy.

    All package maintainers have non-@gentoo.org e-mail addresses. Most likely,
    this means that the package is maintained by a proxied maintainer but there
    is no explicit proxy (developer or project) listed. This means no Gentoo
    developer will be CC-ed on bug reports, and most likely no developer
    oversees the proxied maintainer's activity.
    """

    def __init__(self, filename, maintainers, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.maintainers = tuple(maintainers)

    @property
    def desc(self):
        s = pluralism(self.maintainers)
        maintainers = ", ".join(self.maintainers)
        return f"{self.filename}: proxied maintainer{s} missing proxy dev/project: {maintainers}"


class ProxyWithoutProxied(results.PackageResult, results.Warning):
    """Package lists a proxy with no proxied maintainers.

    The package explicitly lists a proxy with no proxied maintainers.
    Most likely, this means that the proxied maintainer has been removed
    but the proxy was accidentally left.
    """

    def __init__(self, filename, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename

    @property
    def desc(self):
        return f"{self.filename}: proxy with no proxied maintainer"


class NonexistentProjectMaintainer(results.PackageResult, results.Warning):
    """Package specifying nonexistent project as a maintainer."""

    def __init__(self, filename, emails, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.emails = tuple(emails)

    @property
    def desc(self):
        s = pluralism(self.emails)
        emails = ", ".join(self.emails)
        return f"{self.filename}: nonexistent project maintainer{s}: {emails}"


class WrongMaintainerType(results.PackageResult, results.Warning):
    """A person-type maintainer matches an existing project."""

    def __init__(self, filename, emails, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.emails = tuple(emails)

    @property
    def desc(self):
        s = pluralism(self.emails)
        emails = ", ".join(self.emails)
        return f'{self.filename}: project maintainer{s} with type="person": {emails}'


class PkgMissingMetadataXml(_MissingXml, results.PackageResult):
    """Package is missing metadata.xml."""


class CatMissingMetadataXml(_MissingXml, results.CategoryResult):
    """Category is missing metadata.xml."""


class PkgInvalidXml(_InvalidXml, results.PackageResult):
    """Invalid package metadata.xml."""


class CatInvalidXml(_InvalidXml, results.CategoryResult):
    """Invalid category metadata.xml."""


class PkgBadlyFormedXml(_BadlyFormedXml, results.PackageResult):
    """Badly formed package metadata.xml."""


class CatBadlyFormedXml(_BadlyFormedXml, results.CategoryResult):
    """Badly formed category metadata.xml."""


class PkgMetadataXmlInvalidPkgRef(_MetadataXmlInvalidPkgRef, results.PackageResult):
    """Invalid package reference in package metadata.xml."""


class CatMetadataXmlInvalidPkgRef(_MetadataXmlInvalidPkgRef, results.CategoryResult):
    """Invalid package reference in category metadata.xml."""


class PkgMetadataXmlInvalidCatRef(_MetadataXmlInvalidCatRef, results.PackageResult):
    """Invalid category reference in package metadata.xml."""


class CatMetadataXmlInvalidCatRef(_MetadataXmlInvalidCatRef, results.CategoryResult):
    """Invalid category reference in category metadata.xml."""


class _MetadataXmlIndentation(results.BaseLinesResult, results.Style):
    """Inconsistent indentation in metadata.xml file.

    Either all tabs or all spaces should be used, not a mixture of both.
    """

    def __init__(self, filename, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename

    @property
    def desc(self):
        return f"{self.filename}: metadata.xml has inconsistent indentation {self.lines_str}"


class CatMetadataXmlIndentation(_MetadataXmlIndentation, results.CategoryResult):
    """Inconsistent indentation in category metadata.xml file.

    Either all tabs or all spaces should be used, not a mixture of both.
    """


class PkgMetadataXmlIndentation(_MetadataXmlIndentation, results.PackageResult):
    """Inconsistent indentation in package metadata.xml file.

    Either all tabs or all spaces should be used, not a mixture of both.
    """


class _MetadataXmlEmptyElement(results.Style):
    """Empty element in metadata.xml file."""

    def __init__(self, filename, element, line, **kwargs):
        super().__init__(**kwargs)
        self.filename = filename
        self.element = element
        self.line = line

    @property
    def desc(self):
        return f"{self.filename}: empty element {self.element!r} on line {self.line}"


class CatMetadataXmlEmptyElement(_MetadataXmlEmptyElement, results.CategoryResult):
    """Empty element in category metadata.xml file."""


class PkgMetadataXmlEmptyElement(_MetadataXmlEmptyElement, results.PackageResult):
    """Empty element in package metadata.xml file."""


class RedundantLongDescription(results.PackageResult, results.Style):
    """Package's longdescription element in metadata.xml and DESCRIPTION are interchangeable.

    The longdescription element is for providing extended information that
    doesn't fit in DESCRIPTION.
    """

    def __init__(self, msg, **kwargs):
        super().__init__(**kwargs)
        self.msg = msg

    @property
    def desc(self):
        return self.msg


class InvalidRemoteID(results.PackageResult, results.Warning):
    """Package's remote-id value incorrect for the specified type."""

    def __init__(self, id_type, id_value, expected, **kwargs):
        super().__init__(**kwargs)
        self.id_type = id_type
        self.id_value = id_value
        self.expected = expected

    @property
    def desc(self):
        return (
            f"remote-id value {self.id_value!r} invalid for "
            f"type={self.id_type!r}, expected: {self.expected!r}"
        )


class InvalidMetadataRestrict(results.PackageResult, results.Error):
    """Invalid package restrictions used in metadata.xml."""

    def __init__(self, restrict: str, msg: str, **kwargs):
        super().__init__(**kwargs)
        self.restrict = restrict
        self.msg = msg

    @property
    def desc(self):
        return f"metadata.xml: invalid package restrictions {self.restrict!r}: {self.msg}"


class _XmlBaseCheck(Check):
    """Base class for metadata.xml scans."""

    schema = None

    misformed_error = None
    invalid_error = None
    missing_error = None

    def __init__(self, *args):
        super().__init__(*args)
        self.repo_base = self.options.target_repo.location
        self.pkgref_cache = {}
        # content validation checks to run after parsing XML doc
        self._checks = tuple(getattr(self, x) for x in dir(self) if x.startswith("_check_"))

        # Prefer xsd file from the target repository or its masters, falling
        # back to the file installed with pkgcore.
        for repo in reversed(self.options.target_repo.trees):
            metadata_xsd = pjoin(repo.location, "metadata", "xml-schema", "metadata.xsd")
            if os.path.isfile(metadata_xsd):
                try:
                    self.schema = etree.XMLSchema(etree.parse(metadata_xsd))
                    break
                except etree.XMLSchemaParseError:
                    # ignore invalid xsd files
                    pass
        else:
            metadata_xsd = pjoin(pkgcore_const.DATA_PATH, "xml-schema", "metadata.xsd")
            self.schema = etree.XMLSchema(etree.parse(metadata_xsd))

    def _check_doc(self, pkg, loc, doc):
        """Perform additional document structure checks."""
        # Find all root descendant elements that are empty except
        # 'stabilize-allarches' which is allowed to be empty and 'flag' which
        # is caught by MissingLocalUseDesc.
        for el in doc.getroot().iterdescendants():
            if (
                not el.getchildren()
                and (el.text is None or not el.text.strip())
                and el.tag not in ("flag", "stabilize-allarches")
            ):
                yield self.empty_element(os.path.basename(loc), el.tag, el.sourceline, pkg=pkg)

        for el in doc.findall(".//cat"):
            c = el.text.strip()
            if c not in self.options.search_repo.categories:
                yield self.catref_error(os.path.basename(loc), c, pkg=pkg)

        for el in doc.findall(".//pkg"):
            p = el.text.strip()
            if p not in self.pkgref_cache:
                try:
                    a = atom(p)
                    found = self.options.search_repo.has_match(a)
                except MalformedAtom:
                    found = False
                self.pkgref_cache[p] = found

            if not self.pkgref_cache[p]:
                yield self.pkgref_error(os.path.basename(loc), p, pkg=pkg)

    def _check_whitespace(self, pkg, loc, doc):
        """Check for indentation consistency."""
        orig_indent = None
        indents = set()
        with open(loc) as f:
            for lineno, line in enumerate(f, 1):
                for i in line[: -len(line.lstrip())]:
                    if i != orig_indent:
                        if orig_indent is None:
                            orig_indent = i
                        else:
                            indents.add(lineno)
        if indents:
            yield self.indent_error(os.path.basename(loc), lines=map(str, sorted(indents)), pkg=pkg)

    @staticmethod
    def _format_lxml_errors(error_log):
        for x in error_log:
            yield f"line {x.line}, col {x.column}: ({x.type_name}) {x.message}"

    def _parse_xml(self, pkg, loc):
        try:
            doc = etree.parse(loc)
        except (IOError, OSError):
            # it's only an error when missing in the main gentoo repo
            if self.options.gentoo_repo:
                yield self.missing_error(os.path.basename(loc), pkg=pkg)
            return
        except etree.XMLSyntaxError as e:
            yield self.misformed_error(os.path.basename(loc), str(e), pkg=pkg)
            return

        # note: while doc is available, do not pass it here as it may
        # trigger undefined behavior due to incorrect structure
        if self.schema is not None and not self.schema.validate(doc):
            message = "\n".join(self._format_lxml_errors(self.schema.error_log))
            yield self.invalid_error(os.path.basename(loc), message, pkg=pkg)
            return

        # run all post parsing/validation checks
        for check in self._checks:
            yield from check(pkg, loc, doc)

    def feed(self, pkgset):
        pkg = pkgset[0]
        loc = self._get_xml_location(pkg)
        yield from self._parse_xml(pkg, loc)


class PackageMetadataXmlCheck(_XmlBaseCheck):
    """Package level metadata.xml scans."""

    _source = sources.PackageRepoSource
    misformed_error = PkgBadlyFormedXml
    invalid_error = PkgInvalidXml
    missing_error = PkgMissingMetadataXml
    catref_error = PkgMetadataXmlInvalidCatRef
    pkgref_error = PkgMetadataXmlInvalidPkgRef
    indent_error = PkgMetadataXmlIndentation
    empty_element = PkgMetadataXmlEmptyElement

    known_results = frozenset(
        [
            PkgBadlyFormedXml,
            PkgInvalidXml,
            PkgMissingMetadataXml,
            PkgMetadataXmlInvalidPkgRef,
            PkgMetadataXmlInvalidCatRef,
            PkgMetadataXmlIndentation,
            PkgMetadataXmlEmptyElement,
            MaintainerNeeded,
            MaintainerWithoutProxy,
            ProxyWithoutProxied,
            RedundantLongDescription,
            NonexistentProjectMaintainer,
            WrongMaintainerType,
            InvalidRemoteID,
            InvalidMetadataRestrict,
        ]
    )

    _one_component_validator_re = re.compile(r"^[^/]+$")
    _two_components_validator_re = re.compile(r"^[^/]+/[^/]+$")
    _gitlab_validator_re = re.compile(r"^([^/]+/)*[^/]+/[^/]+$")

    remote_id_validators = {
        # {name}-style remotes
        "cpan": (_one_component_validator_re, "{project}"),
        "cpan-module": (_one_component_validator_re, "{module}"),
        "cran": (_one_component_validator_re, "{project}"),
        "ctan": (_one_component_validator_re, "{project}"),
        "google-code": (_one_component_validator_re, "{project}"),
        "osdn": (_one_component_validator_re, "{project}"),
        "pear": (_one_component_validator_re, "{project}"),
        "pecl": (_one_component_validator_re, "{project}"),
        "pypi": (_one_component_validator_re, "{project}"),
        "rubygems": (_one_component_validator_re, "{project}"),
        "sourceforge": (_one_component_validator_re, "{project}"),
        # {name} with a special check for lp: prefix
        "launchpad": (re.compile(r"^(?!lp:)[^/]+$"), "{project}"),
        # {owner}/{name}-style remotes
        "bitbucket": (_two_components_validator_re, "{username}/{project}"),
        "github": (_two_components_validator_re, "{username}/{project}"),
        # gitlab (2+ components)
        "gitlab": (_gitlab_validator_re, "{username}/[{group}/...]{repo}"),
        "heptapod": (_gitlab_validator_re, "{username}/[{group}/...]{repo}"),
        # cpe
        "cpe": (re.compile(r"^cpe:/[aho]:[^:]+:[^:]+$"), "cpe:/[aho]:{vendor}:{product}"),
        # 1+ component + no ".git" suffix
        "gentoo": (re.compile(r"^([^/]+/)*[^/]+(?<!\.git)$"), "[{group}/...]{repo}"),
        # a positive decimal number
        "vim": (re.compile(r"^[1-9]\d*$"), "{script_id}"),
    }

    @staticmethod
    def _maintainer_proxied_key(m):
        if m.proxied is not None:
            return m.proxied
        if m.email == "proxy-maint@gentoo.org":
            return "proxy"
        if m.email.endswith("@gentoo.org"):
            return "no"
        return "yes"

    def _check_maintainers(self, pkg, loc, doc):
        """Validate maintainers in package metadata for the gentoo repo."""
        if self.options.gentoo_repo:
            maintainer_needed = any(
                c.text.strip() == "maintainer-needed" for c in doc.xpath("//comment()")
            )
            if pkg.maintainers:
                # check for invalid maintainer-needed comment
                if maintainer_needed:
                    yield MaintainerNeeded(os.path.basename(loc), maintainer_needed, pkg=pkg)

                # determine proxy maintainer status
                proxied, devs, proxies = [], [], []
                proxy_map = {"yes": proxied, "no": devs, "proxy": proxies}
                for m in pkg.maintainers:
                    proxy_map[self._maintainer_proxied_key(m)].append(m)

                # check proxy maintainers
                if not devs and not proxies:
                    maintainers = sorted(map(str, pkg.maintainers))
                    yield MaintainerWithoutProxy(os.path.basename(loc), maintainers, pkg=pkg)
                elif not proxied and proxies:
                    yield ProxyWithoutProxied(os.path.basename(loc), pkg=pkg)
            elif not maintainer_needed:
                # check for missing maintainer-needed comment
                yield MaintainerNeeded(os.path.basename(loc), maintainer_needed, pkg=pkg)

            # check maintainer validity
            if projects := set(pkg.repo.projects_xml.projects):
                nonexistent = []
                wrong_maintainers = []
                for m in pkg.maintainers:
                    if m.maint_type == "project" and m.email not in projects:
                        nonexistent.append(m.email)
                    elif m.maint_type == "person" and m.email in projects:
                        wrong_maintainers.append(m.email)
                if nonexistent:
                    yield NonexistentProjectMaintainer(
                        os.path.basename(loc), sorted(nonexistent), pkg=pkg
                    )
                if wrong_maintainers:
                    yield WrongMaintainerType(
                        os.path.basename(loc), sorted(wrong_maintainers), pkg=pkg
                    )

    def _check_longdescription(self, pkg, loc, doc):
        if pkg.longdescription is not None:
            match_ratio = SequenceMatcher(None, pkg.description, pkg.longdescription).ratio()
            if match_ratio > 0.75:
                msg = "metadata.xml longdescription closely matches DESCRIPTION"
                yield RedundantLongDescription(msg, pkg=pkg)
            elif len(pkg.longdescription) < 100:
                msg = "metadata.xml longdescription is too short"
                yield RedundantLongDescription(msg, pkg=pkg)

    def _check_restricts(self, pkg, loc, doc):
        restricts = (
            c.get("restrict")
            for path in ("maintainer", "use/flag")
            for c in doc.xpath(f"/pkgmetadata/{path}[string(@restrict)]")
        )
        for restrict_str in restricts:
            try:
                restrict = atom(restrict_str, eapi="0")
                if restrict.key != pkg.key:
                    yield InvalidMetadataRestrict(
                        restrict_str, "references another package", pkg=pkg
                    )
                if restrict.use:
                    yield InvalidMetadataRestrict(
                        restrict_str, "USE-conditionals are prohibited", pkg=pkg
                    )
            except MalformedAtom as exc:
                yield InvalidMetadataRestrict(restrict_str, exc, pkg=pkg)

    def _check_remote_id(self, pkg, loc, doc):
        for u in pkg.upstreams:
            # empty values are already reported as PkgMetadataXmlEmptyElement
            if not u.name:
                continue
            try:
                validator, expected = self.remote_id_validators[u.type]
            except KeyError:  # pragma: no cover
                continue
            if not validator.match(u.name):
                yield InvalidRemoteID(u.type, u.name, expected, pkg=pkg)

    def _get_xml_location(self, pkg):
        """Return the metadata.xml location for a given package."""
        return pjoin(os.path.dirname(pkg.ebuild.path), "metadata.xml")


class CategoryMetadataXmlCheck(_XmlBaseCheck):
    """Category level metadata.xml scans."""

    _source = (sources.CategoryRepoSource, (), (("source", sources.RawRepoSource),))
    misformed_error = CatBadlyFormedXml
    invalid_error = CatInvalidXml
    missing_error = CatMissingMetadataXml
    catref_error = CatMetadataXmlInvalidCatRef
    pkgref_error = CatMetadataXmlInvalidPkgRef
    indent_error = CatMetadataXmlIndentation
    empty_element = CatMetadataXmlEmptyElement

    known_results = frozenset(
        [
            CatBadlyFormedXml,
            CatInvalidXml,
            CatMissingMetadataXml,
            CatMetadataXmlInvalidPkgRef,
            CatMetadataXmlInvalidCatRef,
            CatMetadataXmlIndentation,
            CatMetadataXmlEmptyElement,
        ]
    )

    def _get_xml_location(self, pkg):
        """Return the metadata.xml location for a given package's category."""
        return pjoin(self.repo_base, pkg.category, "metadata.xml")


class MissingRemoteId(results.PackageResult, results.Info):
    """Missing remote-id which was inferred from ebuilds.

    Based on URIs found in SRC_URI and HOMEPAGE, a remote-id can be suggested.
    If a remote-id of same type is already defined in ``metadata.xml``, the
    suggestion won't be reported. It ignores URIs ending with ``.diff`` or
    ``.patch``, as they might point to a fork or developer's space. It also
    ignores URIs that are conditional on USE flags.
    """

    def __init__(self, remote_type: str, value: str, uri: str, **kwarg):
        super().__init__(**kwarg)
        self.remote_type = remote_type
        self.value = value
        self.uri = uri

    @property
    def desc(self):
        return (
            f'missing <remote-id type="{self.remote_type}">'
            f"{self.value}</remote-id> (inferred from URI {self.uri!r})"
        )


class MissingRemoteIdCheck(Check):
    """Detect missing remote-ids based on SRC_URI and HOMEPAGE."""

    _source = sources.PackageRepoSource
    known_results = frozenset([MissingRemoteId])

    _gitlab_match = r"(?P<value>(\w[^/]*/)*\w[^/]*/\w[^/]*)"

    remotes_map = (
        ("bitbucket", r"https://bitbucket.org/(?P<value>[^/]+/[^/]+)"),
        ("freedesktop-gitlab", rf"https://gitlab.freedesktop.org/{_gitlab_match}"),
        ("github", r"https://github.com/(?P<value>[^/]+/[^/]+)"),
        ("gitlab", rf"https://gitlab.com/{_gitlab_match}"),
        ("gnome-gitlab", rf"https://gitlab.gnome.org/{_gitlab_match}"),
        ("heptapod", rf"https://foss.heptapod.net/{_gitlab_match}"),
        ("launchpad", r"https://launchpad.net/(?P<value>[^/]+)"),
        ("pypi", r"https://pypi.org/project/(?P<value>[^/]+)"),
        ("pypi", r"https://files.pythonhosted.org/packages/source/\S/(?P<value>[^/]+)"),
        ("savannah", r"https://savannah.gnu.org/projects/(?P<value>[^/]+)"),
        ("savannah-nongnu", r"https://savannah.nongnu.org/projects/(?P<value>[^/]+)"),
        ("sourceforge", r"https://downloads.sourceforge.(net|io)/(?:project/)?(?P<value>[^/]+)"),
        ("sourceforge", r"https://sourceforge.(net|io)/projects/(?P<value>[^/]+)"),
        ("sourceforge", r"https://(?P<value>[^/]+).sourceforge.(net|io)/"),
        ("sourcehut", r"https://sr.ht/(?P<value>[^/]+/[^/]+)"),
    )

    def __init__(self, options, **kwargs):
        super().__init__(options, **kwargs)
        self.remotes_map = tuple(
            (remote_type, re.compile(regex)) for remote_type, regex in self.remotes_map
        )

    def feed(self, pkgset):
        remotes = {u.type: (None, None) for u in pkgset[0].upstreams}
        for pkg in sorted(pkgset, reverse=True):
            fetchables = iflatten_instance(
                pkg.generate_fetchables(
                    allow_missing_checksums=True,
                    ignore_unknown_mirrors=True,
                    skip_default_mirrors=True,
                ),
                (fetchable, Conditional),
            )
            all_urls = set(
                chain.from_iterable(f.uri for f in fetchables if isinstance(f, fetchable))
            )
            urls = {url for url in all_urls if not url.endswith((".patch", ".diff"))}
            urls = sorted(urls.union(pkg.homepage), key=len)

            for remote_type, regex in self.remotes_map:
                if remote_type in remotes:
                    continue
                for url in urls:
                    if mo := regex.match(url):
                        remotes[remote_type] = (mo.group("value"), url)
                        break

        for remote_type, (value, url) in remotes.items():
            if value is not None:
                yield MissingRemoteId(remote_type, value, url, pkg=pkgset[0])