LCOV - code coverage report
Current view: top level - src/formats - lang.dart (source / functions) Coverage Total Hit
Test: lcov.info Lines: 95.7 % 117 112
Test Date: 2026-06-16 03:31:00 Functions: - 0 0
Legend: Lines: hit not hit

            Line data    Source code
       1              : // Copyright 2026 The Authors. See the AUTHORS file for details.
       2              : //
       3              : // Licensed under the Apache License, Version 2.0 (the "License");
       4              : // you may not use this file except in compliance with the License.
       5              : // You may obtain a copy of the License at
       6              : //
       7              : //      https://www.apache.org/licenses/LICENSE-2.0
       8              : //
       9              : // Unless required by applicable law or agreed to in writing, software
      10              : // distributed under the License is distributed on an "AS IS" BASIS,
      11              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      12              : // See the License for the specific language governing permissions and
      13              : // limitations under the License.
      14              : 
      15              : import 'package:betto_abnf/betto_abnf.dart';
      16              : 
      17              : /// The separator used in language tags (`-`).
      18              : const defaultSeparator = '-';
      19              : 
      20              : /// A language tag as defined in [RFC 5646](https://www.rfc-editor.org/info/rfc5646).
      21              : ///
      22              : /// A language tag ("Tag") is a sequence of one or more subtags ("Subtag"),
      23              : /// separated by a hyphen (`-`).
      24              : ///
      25              : /// Some example language tags:
      26              : ///
      27              : /// - `en`: The ISO 639-1 code for English
      28              : /// - `en-AU`: adds the ISO 3166-1 alpha-2 region code for Australia
      29              : ///
      30              : /// Language tags are not case sensitive but the following guidance
      31              : /// is recommended:
      32              : ///
      33              : /// - language tags are lowercase
      34              : /// - region subtags are UPPERCASE
      35              : /// - script subtags are Title Case
      36              : /// - all other subtags are lowercase
      37              : ///
      38              : /// This class will convert values to meet that guidance.
      39              : sealed class LanguageTag {
      40              :   String get type;
      41              : 
      42              :   String get tag;
      43              : 
      44            0 :   @override
      45              :   bool operator ==(Object other) {
      46            0 :     if (other is LanguageTag) {
      47            0 :       return other.toString() == toString();
      48              :     }
      49              :     return false;
      50              :   }
      51              : 
      52            0 :   @override
      53            0 :   int get hashCode => toString().hashCode;
      54              : 
      55              :   /// Parses a language tag string into a [LanguageTag] variant.
      56              :   ///
      57              :   /// Returns the parsed [LanguageTag] — one of [LangTag], [GrandfatheredTag],
      58              :   /// or [PrivateUseTag] — or `null` if [tag] is empty or syntactically invalid.
      59            1 :   static LanguageTag? tryParse(
      60              :     String tag, {
      61              :     String separator = defaultSeparator,
      62              :   }) {
      63            1 :     if (tag.isEmpty) {
      64              :       return null;
      65              :     }
      66              : 
      67              :     // Attempt to parse the tag using the RFC 5646 grammar.
      68            2 :     final result = rfc5646LanguageTag.parse(tag);
      69              : 
      70            1 :     if (!result.success) {
      71              :       return null;
      72              :     }
      73              : 
      74              :     // RFC 5646 defines specific rules for grandfathered and private use tags.
      75              :     // Grandfathered tags are explicitly listed and may map to a standard tag.
      76            2 :     if (result.getRuleLexemes('grandfathered').isNotEmpty) {
      77            2 :       final regular = result.getRuleLexemes('regular').isNotEmpty;
      78            1 :       return GrandfatheredTag._(tag, regular);
      79            2 :     } else if (result.getRuleLexemes('privateuse').isNotEmpty) {
      80              :       // Private use tags start with 'x-' and are for experimental or personal use.
      81            1 :       return PrivateUseTag._(tag);
      82              :     }
      83              : 
      84              :     // Standard RFC 5646 tags must have at least a primary language subtag.
      85            2 :     final language = result.getRuleLexemes('language').firstOrNull;
      86              : 
      87              :     if (language == null) {
      88              :       return null;
      89              :     }
      90              : 
      91              :     // Construct the LangTag from the parsed subtag lexemes.
      92            1 :     final langtag = LangTag._(
      93              :       language,
      94            2 :       extendedLanguageSubtags: result.getRuleLexemes('extlang').firstOrNull,
      95            2 :       script: result.getRuleLexemes('script').firstOrNull,
      96            2 :       region: result.getRuleLexemes('region').firstOrNull,
      97            2 :       variant: result.getRuleLexemes('variant').firstOrNull,
      98            2 :       extension: result.getRuleLexemes('extension').firstOrNull,
      99            2 :       privateuse: result.getRuleLexemes('privateuse').firstOrNull,
     100              :     );
     101              : 
     102              :     return langtag;
     103              :   }
     104              : 
     105            2 :   static bool isValid(String value) => tryParse(value) != null;
     106              : 
     107              :   // TODO: Canonicalization as per https://www.rfc-editor.org/rfc/rfc5646.html#section-4.5
     108              : 
     109              :   // Returns a new [LanguageTag] with the last subtag removed.
     110              :   // https://www.rfc-editor.org/rfc/rfc5646.html#section-4.4.2
     111              :   // TODO LanguageTag truncate() {}
     112              : }
     113              : 
     114              : /// A normal language tag as per RFC 5646
     115              : class LangTag implements LanguageTag {
     116              :   @override
     117              :   final type = 'langtag';
     118              : 
     119              :   /// ISO 639-\[1|2|3|5\] language code
     120              :   final String language;
     121              : 
     122              :   /// ISO 639 code
     123              :   final String? extendedLanguageSubtags;
     124              : 
     125              :   /// ISO 15924 code
     126              :   final String? script;
     127              : 
     128              :   /// ISO 3166-1 or UN M.49 code
     129              :   final String? region;
     130              :   final String? variant;
     131              :   final String? extension;
     132              :   final String? privateuse;
     133              : 
     134              :   //final bool _validRfc5646;
     135              : 
     136              :   String? _rendered;
     137              : 
     138            1 :   @override
     139            1 :   String get tag => toString();
     140              : 
     141            1 :   LangTag._(
     142              :     String language, {
     143              :     this.extendedLanguageSubtags,
     144              :     String? script,
     145              :     String? region,
     146              :     this.variant,
     147              :     this.extension,
     148              :     this.privateuse,
     149            1 :   }) : language = language.toLowerCase(),
     150            1 :        region = region?.toUpperCase(),
     151              :        script = script != null
     152            4 :            ? '${script[0].toUpperCase()}${script.substring(1).toLowerCase()}'
     153            1 :            : null;
     154              : 
     155            1 :   bool get isValid {
     156              :     return false;
     157              :   }
     158              : 
     159            1 :   @override
     160              :   String toString() {
     161            1 :     var result = _rendered;
     162              :     if (result == null) {
     163            1 :       result = [
     164            1 :         language,
     165            1 :         if (extendedLanguageSubtags != null) extendedLanguageSubtags,
     166            2 :         if (script != null) script,
     167            2 :         if (region != null) region,
     168            1 :         if (variant != null) variant,
     169            1 :         if (extension != null) extension,
     170            1 :         if (privateuse != null) privateuse,
     171            1 :       ].join(defaultSeparator);
     172            1 :       _rendered = result;
     173              :     }
     174              :     return result;
     175              :   }
     176              : }
     177              : 
     178              : /// A Grandfathered language tag as per RFC 5646
     179              : class GrandfatheredTag implements LanguageTag {
     180              :   @override
     181              :   final type = 'grandfathered';
     182              : 
     183              :   @override
     184              :   final String tag;
     185              : 
     186              :   final bool regular;
     187              : 
     188            1 :   GrandfatheredTag._(this.tag, this.regular);
     189              : }
     190              : 
     191              : /// A Private Use Tag language tag as per RFC 5646
     192              : class PrivateUseTag implements LanguageTag {
     193              :   @override
     194              :   final type = 'privateuse';
     195              : 
     196              :   @override
     197              :   final String tag;
     198              : 
     199            1 :   PrivateUseTag._(this.tag);
     200              : }
     201              : 
     202              : /// The Language Tag syntax as defined in RFC 5646.
     203              : ///
     204              : /// Language tags can match one of the following rules:
     205              : ///
     206              : /// - `langtag`
     207              : /// - `privateuse`
     208              : /// - `grandfathered`
     209            3 : final rfc5646LanguageTag = grammar(
     210              :   'RFC5646 Language Tag',
     211            1 :   rule(
     212              :     'Language-Tag',
     213            2 :     alternatives([
     214              :       // grandfathered tags are explicitly listed and can map to a `langtag`
     215              :       // so we check them before `langtag`
     216            1 :       rfc5646grandfathered,
     217              :       // private use tags start with 'x' so check this next
     218            1 :       rfc5646privateuse,
     219            1 :       rfc5646langtag,
     220              :     ]),
     221              :   ),
     222              : );
     223              : 
     224            3 : final rfc5646langtag = rule(
     225              :   'langtag',
     226            2 :   concatenation([
     227            1 :     rfc5646language,
     228            4 :     optionalSequence([literal('-'), rfc5646script]),
     229            4 :     optionalSequence([literal('-'), rfc5646region]),
     230            5 :     variableRepetition(concatenation([literal('-'), rfc5646variant])),
     231            5 :     variableRepetition(concatenation([literal('-'), rfc5646extension])),
     232            4 :     optionalSequence([literal('-'), rfc5646privateuse]),
     233              :   ]),
     234              : );
     235              : 
     236            3 : final rfc5646language = rule(
     237              :   'language',
     238            2 :   alternatives([
     239            2 :     concatenation([
     240            2 :       variableRepetition(alpha, min: 2, max: 3),
     241            2 :       negativeLookahead(alphanum),
     242            2 :       optionalSequence([
     243            4 :         concatenation([literal('-'), rfc5646extlang]),
     244              :       ]),
     245              :     ]),
     246            6 :     concatenation([repetition(alpha, 4), negativeLookahead(alphanum)]),
     247            2 :     concatenation([
     248            2 :       variableRepetition(alpha, min: 5, max: 8),
     249            2 :       negativeLookahead(alphanum),
     250              :     ]),
     251              :   ]),
     252              : );
     253              : 
     254              : /// As defined in RFC5646, Section 2.2.2. Extended language subtags
     255              : ///
     256              : ///
     257            3 : final rfc5646extlang = rule(
     258              :   'extlang',
     259            2 :   concatenation([
     260            2 :     repetition(alpha, 3),
     261            2 :     negativeLookahead(alphanum),
     262            1 :     variableRepetition(
     263            2 :       concatenation([
     264            1 :         literal('-'),
     265            2 :         repetition(alpha, 3),
     266            2 :         negativeLookahead(alphanum),
     267              :       ]),
     268              :       max: 2,
     269              :     ),
     270              :   ]),
     271              : );
     272              : 
     273            3 : final rfc5646script = rule(
     274              :   'script',
     275            6 :   concatenation([repetition(alpha, 4), negativeLookahead(alphanum)]),
     276              : );
     277              : 
     278            3 : final rfc5646region = rule(
     279              :   'region',
     280            2 :   concatenation([
     281            6 :     alternatives([repetition(alpha, 2), repetition(digit, 3)]),
     282            2 :     negativeLookahead(alphanum),
     283              :   ]),
     284              : );
     285              : 
     286            3 : final rfc5646variant = rule(
     287              :   'variant',
     288            2 :   concatenation([
     289            2 :     alternatives([
     290            2 :       variableRepetition(alphanum, min: 5, max: 8),
     291            5 :       concatenation([digit, repetition(alphanum, 3)]),
     292              :     ]),
     293            2 :     negativeLookahead(alphanum),
     294              :   ]),
     295              : );
     296              : 
     297            3 : final rfc5646singleton = rule(
     298              :   'singleton',
     299            2 :   alternatives([
     300            1 :     digit,
     301            1 :     valueRange(0x41, 0x57),
     302            1 :     valueRange(0x59, 0x5A),
     303            1 :     valueRange(0x61, 0x77),
     304            1 :     valueRange(0x79, 0x7A),
     305              :   ]),
     306              : );
     307              : 
     308            3 : final rfc5646extension = rule(
     309              :   'extension',
     310            2 :   concatenation([
     311            1 :     rfc5646singleton,
     312            1 :     variableRepetition(
     313            2 :       concatenation([
     314            1 :         literal('-'),
     315            2 :         variableRepetition(alphanum, min: 2, max: 8),
     316            2 :         negativeLookahead(alphanum),
     317              :       ]),
     318              :       min: 1,
     319              :     ),
     320              :   ]),
     321              : );
     322              : 
     323              : /// As defined in RFC5646, grandfathered tags are still allowed
     324              : /// but have generally been deprecated in favour of new subtags
     325            3 : final rfc5646grandfathered = rule(
     326              :   'grandfathered',
     327            2 :   alternatives([
     328            5 :     rule('irregular', alternatives(irregular.map((e) => literal(e)))),
     329            5 :     rule('regular', alternatives(regular.map((e) => literal(e)))),
     330              :   ]),
     331              : );
     332              : 
     333              : /// As defined in RFC5646
     334              : const irregular = [
     335              :   'en-GB-oed',
     336              :   'i-ami',
     337              :   'i-bnn',
     338              :   'i-default',
     339              :   'i-enochian',
     340              :   'i-hak',
     341              :   'i-klingon',
     342              :   'i-lux',
     343              :   'i-mingo',
     344              :   'i-navajo',
     345              :   'i-pwn',
     346              :   'i-tao',
     347              :   'i-tay',
     348              :   'i-tsu',
     349              :   'sgn-BE-FR',
     350              :   'sgn-BE-NL',
     351              :   'sgn-CH-DE',
     352              : ];
     353              : 
     354              : /// As defined in RFC5646
     355              : const regular = [
     356              :   'art-lojban',
     357              :   'cel-gaulish',
     358              :   'no-bok',
     359              :   'no-nyn',
     360              :   'zh-guoyu',
     361              :   'zh-hakka',
     362              :   'zh-min-nan',
     363              :   'zh-min',
     364              :   'zh-xiang',
     365              : ];
     366              : 
     367            3 : final rfc5646privateuse = rule(
     368              :   'privateuse',
     369            2 :   concatenation([
     370            1 :     literal('x'),
     371            1 :     variableRepetition(
     372            2 :       concatenation([
     373            1 :         literal('-'),
     374            2 :         variableRepetition(alphanum, min: 1, max: 8),
     375              :       ]),
     376              :       min: 1,
     377              :     ),
     378              :   ]),
     379              : );
        

Generated by: LCOV version 2.0-1