Line data Source code
1 : /// Enables checking a package's license from pub.dev. 2 : /// 3 : /// This library is intended to be used by Very Good CLI to help extracting 4 : /// license information. The existence of this library is likely to be 5 : /// ephemeral. It may be obsolete once [pub.dev](https://pub.dev/) exposes 6 : /// stable license information in their official API; you may track the 7 : /// progress [here](https://github.com/dart-lang/pub-dev/issues/4717). 8 : library pub_license; 9 : 10 : import 'package:html/dom.dart' as html_dom; 11 : import 'package:html/parser.dart' as html_parser; 12 : import 'package:http/http.dart' as http; 13 : import 'package:meta/meta.dart'; 14 : 15 : /// The pub.dev [Uri] used to retrieve the license of a package. 16 1 : Uri _pubPackageLicenseUri(String packageName) => 17 2 : Uri.parse('https://pub.dev/packages/$packageName/license'); 18 : 19 : /// {@template pub_license_exception} 20 : /// An exception thrown by [PubLicense]. 21 : /// {@endtemplate} 22 : class PubLicenseException implements Exception { 23 : /// {@macro pub_license_exception} 24 3 : const PubLicenseException(String message) 25 1 : : message = '[pub_license] $message'; 26 : 27 : /// The exception message. 28 : final String message; 29 : } 30 : 31 : /// The function signature for parsing HTML documents. 32 : @visibleForTesting 33 : typedef HtmlDocumentParse = html_dom.Document Function( 34 : dynamic input, { 35 : String? encoding, 36 : bool generateSpans, 37 : String? sourceUrl, 38 : }); 39 : 40 : /// {@template pub_license} 41 : /// Enables checking pub.dev's hosted packages license. 42 : /// {@endtemplate} 43 : class PubLicense { 44 : /// {@macro pub_license} 45 1 : PubLicense({ 46 : @visibleForTesting http.Client? client, 47 : @visibleForTesting HtmlDocumentParse? parse, 48 1 : }) : _client = client ?? http.Client(), 49 : _parse = parse ?? html_parser.parse; 50 : 51 : final http.Client _client; 52 : 53 : final html_dom.Document Function( 54 : dynamic input, { 55 : String? encoding, 56 : bool generateSpans, 57 : String? sourceUrl, 58 : }) _parse; 59 : 60 : /// Retrieves the license of a package. 61 : /// 62 : /// Some packages may have multiple licenses, hence a [Set] is returned. 63 : /// 64 : /// It may throw a [PubLicenseException] if: 65 : /// * The response from pub.dev is not successful. 66 : /// * The response body cannot be parsed. 67 1 : Future<Set<String>> getLicense(String packageName) async { 68 3 : final response = await _client.get(_pubPackageLicenseUri(packageName)); 69 : 70 2 : if (response.statusCode != 200) { 71 1 : throw PubLicenseException( 72 2 : '''Failed to retrieve the license of the package, received status code: ${response.statusCode}''', 73 : ); 74 : } 75 : 76 : late final html_dom.Document document; 77 : try { 78 3 : document = _parse(response.body); 79 1 : } on html_parser.ParseError catch (e) { 80 1 : throw PubLicenseException( 81 1 : 'Failed to parse the response body, received error: $e', 82 : ); 83 : } catch (e) { 84 1 : throw PubLicenseException( 85 1 : '''An unknown error occurred when trying to parse the response body, received error: $e''', 86 : ); 87 : } 88 : 89 1 : return _scrapeLicense(document); 90 : } 91 : } 92 : 93 : /// Scrapes the license from the pub.dev's package license page. 94 : /// 95 : /// The expected HTML structure is: 96 : /// ```html 97 : /// <aside class="detail-info-box"> 98 : /// <h3> ... </h3> 99 : /// <p> ... </p> 100 : /// <h3 class="title">License</h3> 101 : /// <p> 102 : /// <img/> 103 : /// MIT (<a href="/packages/very_good_cli/license">LICENSE</a>) 104 : /// </p> 105 : /// </aside> 106 : /// ``` 107 : /// 108 : /// It may throw a [PubLicenseException] if: 109 : /// * The detail info box is not found. 110 : /// * The license header is not found. 111 1 : Set<String> _scrapeLicense(html_dom.Document document) { 112 1 : final detailInfoBox = document.querySelector('.detail-info-box'); 113 : if (detailInfoBox == null) { 114 : throw const PubLicenseException( 115 : '''Failed to scrape license because `.detail-info-box` was not found.''', 116 : ); 117 : } 118 : 119 : String? rawLicenseText; 120 4 : for (var i = 0; i < detailInfoBox.children.length; i++) { 121 2 : final child = detailInfoBox.children[i]; 122 : 123 3 : final headerText = child.text.trim().toLowerCase(); 124 1 : if (headerText == 'license') { 125 5 : rawLicenseText = detailInfoBox.children[i + 1].text.trim(); 126 : break; 127 : } 128 : } 129 : if (rawLicenseText == null) { 130 : throw const PubLicenseException( 131 : '''Failed to scrape license because the license header was not found.''', 132 : ); 133 : } 134 : 135 3 : final licenseText = rawLicenseText.split('(').first.trim(); 136 5 : return licenseText.split(',').map((e) => e.trim()).toSet(); 137 : }