Line data Source code
1 : // Copyright 2026 The Authors. See the AUTHORS file for details.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // https://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 :
15 : import 'package:betto_abnf/betto_abnf.dart';
16 :
17 : /// The separator used in language tags (`-`).
18 : const defaultSeparator = '-';
19 :
20 : /// A language tag as defined in [RFC 5646](https://www.rfc-editor.org/info/rfc5646).
21 : ///
22 : /// A language tag ("Tag") is a sequence of one or more subtags ("Subtag"),
23 : /// separated by a hyphen (`-`).
24 : ///
25 : /// Some example language tags:
26 : ///
27 : /// - `en`: The ISO 639-1 code for English
28 : /// - `en-AU`: adds the ISO 3166-1 alpha-2 region code for Australia
29 : ///
30 : /// Language tags are not case sensitive but the following guidance
31 : /// is recommended:
32 : ///
33 : /// - language tags are lowercase
34 : /// - region subtags are UPPERCASE
35 : /// - script subtags are Title Case
36 : /// - all other subtags are lowercase
37 : ///
38 : /// This class will convert values to meet that guidance.
39 : sealed class LanguageTag {
40 : String get type;
41 :
42 : String get tag;
43 :
44 0 : @override
45 : bool operator ==(Object other) {
46 0 : if (other is LanguageTag) {
47 0 : return other.toString() == toString();
48 : }
49 : return false;
50 : }
51 :
52 0 : @override
53 0 : int get hashCode => toString().hashCode;
54 :
55 : /// Parses a language tag string into a [LanguageTag] variant.
56 : ///
57 : /// Returns the parsed [LanguageTag] — one of [LangTag], [GrandfatheredTag],
58 : /// or [PrivateUseTag] — or `null` if [tag] is empty or syntactically invalid.
59 1 : static LanguageTag? tryParse(
60 : String tag, {
61 : String separator = defaultSeparator,
62 : }) {
63 1 : if (tag.isEmpty) {
64 : return null;
65 : }
66 :
67 : // Attempt to parse the tag using the RFC 5646 grammar.
68 2 : final result = rfc5646LanguageTag.parse(tag);
69 :
70 1 : if (!result.success) {
71 : return null;
72 : }
73 :
74 : // RFC 5646 defines specific rules for grandfathered and private use tags.
75 : // Grandfathered tags are explicitly listed and may map to a standard tag.
76 2 : if (result.getRuleLexemes('grandfathered').isNotEmpty) {
77 2 : final regular = result.getRuleLexemes('regular').isNotEmpty;
78 1 : return GrandfatheredTag._(tag, regular);
79 2 : } else if (result.getRuleLexemes('privateuse').isNotEmpty) {
80 : // Private use tags start with 'x-' and are for experimental or personal use.
81 1 : return PrivateUseTag._(tag);
82 : }
83 :
84 : // Standard RFC 5646 tags must have at least a primary language subtag.
85 2 : final language = result.getRuleLexemes('language').firstOrNull;
86 :
87 : if (language == null) {
88 : return null;
89 : }
90 :
91 : // Construct the LangTag from the parsed subtag lexemes.
92 1 : final langtag = LangTag._(
93 : language,
94 2 : extendedLanguageSubtags: result.getRuleLexemes('extlang').firstOrNull,
95 2 : script: result.getRuleLexemes('script').firstOrNull,
96 2 : region: result.getRuleLexemes('region').firstOrNull,
97 2 : variant: result.getRuleLexemes('variant').firstOrNull,
98 2 : extension: result.getRuleLexemes('extension').firstOrNull,
99 2 : privateuse: result.getRuleLexemes('privateuse').firstOrNull,
100 : );
101 :
102 : return langtag;
103 : }
104 :
105 2 : static bool isValid(String value) => tryParse(value) != null;
106 :
107 : // TODO: Canonicalization as per https://www.rfc-editor.org/rfc/rfc5646.html#section-4.5
108 :
109 : // Returns a new [LanguageTag] with the last subtag removed.
110 : // https://www.rfc-editor.org/rfc/rfc5646.html#section-4.4.2
111 : // TODO LanguageTag truncate() {}
112 : }
113 :
114 : /// A normal language tag as per RFC 5646
115 : class LangTag implements LanguageTag {
116 : @override
117 : final type = 'langtag';
118 :
119 : /// ISO 639-\[1|2|3|5\] language code
120 : final String language;
121 :
122 : /// ISO 639 code
123 : final String? extendedLanguageSubtags;
124 :
125 : /// ISO 15924 code
126 : final String? script;
127 :
128 : /// ISO 3166-1 or UN M.49 code
129 : final String? region;
130 : final String? variant;
131 : final String? extension;
132 : final String? privateuse;
133 :
134 : //final bool _validRfc5646;
135 :
136 : String? _rendered;
137 :
138 1 : @override
139 1 : String get tag => toString();
140 :
141 1 : LangTag._(
142 : String language, {
143 : this.extendedLanguageSubtags,
144 : String? script,
145 : String? region,
146 : this.variant,
147 : this.extension,
148 : this.privateuse,
149 1 : }) : language = language.toLowerCase(),
150 1 : region = region?.toUpperCase(),
151 : script = script != null
152 4 : ? '${script[0].toUpperCase()}${script.substring(1).toLowerCase()}'
153 1 : : null;
154 :
155 1 : bool get isValid {
156 : return false;
157 : }
158 :
159 1 : @override
160 : String toString() {
161 1 : var result = _rendered;
162 : if (result == null) {
163 1 : result = [
164 1 : language,
165 1 : if (extendedLanguageSubtags != null) extendedLanguageSubtags,
166 2 : if (script != null) script,
167 2 : if (region != null) region,
168 1 : if (variant != null) variant,
169 1 : if (extension != null) extension,
170 1 : if (privateuse != null) privateuse,
171 1 : ].join(defaultSeparator);
172 1 : _rendered = result;
173 : }
174 : return result;
175 : }
176 : }
177 :
178 : /// A Grandfathered language tag as per RFC 5646
179 : class GrandfatheredTag implements LanguageTag {
180 : @override
181 : final type = 'grandfathered';
182 :
183 : @override
184 : final String tag;
185 :
186 : final bool regular;
187 :
188 1 : GrandfatheredTag._(this.tag, this.regular);
189 : }
190 :
191 : /// A Private Use Tag language tag as per RFC 5646
192 : class PrivateUseTag implements LanguageTag {
193 : @override
194 : final type = 'privateuse';
195 :
196 : @override
197 : final String tag;
198 :
199 1 : PrivateUseTag._(this.tag);
200 : }
201 :
202 : /// The Language Tag syntax as defined in RFC 5646.
203 : ///
204 : /// Language tags can match one of the following rules:
205 : ///
206 : /// - `langtag`
207 : /// - `privateuse`
208 : /// - `grandfathered`
209 3 : final rfc5646LanguageTag = grammar(
210 : 'RFC5646 Language Tag',
211 1 : rule(
212 : 'Language-Tag',
213 2 : alternatives([
214 : // grandfathered tags are explicitly listed and can map to a `langtag`
215 : // so we check them before `langtag`
216 1 : rfc5646grandfathered,
217 : // private use tags start with 'x' so check this next
218 1 : rfc5646privateuse,
219 1 : rfc5646langtag,
220 : ]),
221 : ),
222 : );
223 :
224 3 : final rfc5646langtag = rule(
225 : 'langtag',
226 2 : concatenation([
227 1 : rfc5646language,
228 4 : optionalSequence([literal('-'), rfc5646script]),
229 4 : optionalSequence([literal('-'), rfc5646region]),
230 5 : variableRepetition(concatenation([literal('-'), rfc5646variant])),
231 5 : variableRepetition(concatenation([literal('-'), rfc5646extension])),
232 4 : optionalSequence([literal('-'), rfc5646privateuse]),
233 : ]),
234 : );
235 :
236 3 : final rfc5646language = rule(
237 : 'language',
238 2 : alternatives([
239 2 : concatenation([
240 2 : variableRepetition(alpha, min: 2, max: 3),
241 2 : negativeLookahead(alphanum),
242 2 : optionalSequence([
243 4 : concatenation([literal('-'), rfc5646extlang]),
244 : ]),
245 : ]),
246 6 : concatenation([repetition(alpha, 4), negativeLookahead(alphanum)]),
247 2 : concatenation([
248 2 : variableRepetition(alpha, min: 5, max: 8),
249 2 : negativeLookahead(alphanum),
250 : ]),
251 : ]),
252 : );
253 :
254 : /// As defined in RFC5646, Section 2.2.2. Extended language subtags
255 : ///
256 : ///
257 3 : final rfc5646extlang = rule(
258 : 'extlang',
259 2 : concatenation([
260 2 : repetition(alpha, 3),
261 2 : negativeLookahead(alphanum),
262 1 : variableRepetition(
263 2 : concatenation([
264 1 : literal('-'),
265 2 : repetition(alpha, 3),
266 2 : negativeLookahead(alphanum),
267 : ]),
268 : max: 2,
269 : ),
270 : ]),
271 : );
272 :
273 3 : final rfc5646script = rule(
274 : 'script',
275 6 : concatenation([repetition(alpha, 4), negativeLookahead(alphanum)]),
276 : );
277 :
278 3 : final rfc5646region = rule(
279 : 'region',
280 2 : concatenation([
281 6 : alternatives([repetition(alpha, 2), repetition(digit, 3)]),
282 2 : negativeLookahead(alphanum),
283 : ]),
284 : );
285 :
286 3 : final rfc5646variant = rule(
287 : 'variant',
288 2 : concatenation([
289 2 : alternatives([
290 2 : variableRepetition(alphanum, min: 5, max: 8),
291 5 : concatenation([digit, repetition(alphanum, 3)]),
292 : ]),
293 2 : negativeLookahead(alphanum),
294 : ]),
295 : );
296 :
297 3 : final rfc5646singleton = rule(
298 : 'singleton',
299 2 : alternatives([
300 1 : digit,
301 1 : valueRange(0x41, 0x57),
302 1 : valueRange(0x59, 0x5A),
303 1 : valueRange(0x61, 0x77),
304 1 : valueRange(0x79, 0x7A),
305 : ]),
306 : );
307 :
308 3 : final rfc5646extension = rule(
309 : 'extension',
310 2 : concatenation([
311 1 : rfc5646singleton,
312 1 : variableRepetition(
313 2 : concatenation([
314 1 : literal('-'),
315 2 : variableRepetition(alphanum, min: 2, max: 8),
316 2 : negativeLookahead(alphanum),
317 : ]),
318 : min: 1,
319 : ),
320 : ]),
321 : );
322 :
323 : /// As defined in RFC5646, grandfathered tags are still allowed
324 : /// but have generally been deprecated in favour of new subtags
325 3 : final rfc5646grandfathered = rule(
326 : 'grandfathered',
327 2 : alternatives([
328 5 : rule('irregular', alternatives(irregular.map((e) => literal(e)))),
329 5 : rule('regular', alternatives(regular.map((e) => literal(e)))),
330 : ]),
331 : );
332 :
333 : /// As defined in RFC5646
334 : const irregular = [
335 : 'en-GB-oed',
336 : 'i-ami',
337 : 'i-bnn',
338 : 'i-default',
339 : 'i-enochian',
340 : 'i-hak',
341 : 'i-klingon',
342 : 'i-lux',
343 : 'i-mingo',
344 : 'i-navajo',
345 : 'i-pwn',
346 : 'i-tao',
347 : 'i-tay',
348 : 'i-tsu',
349 : 'sgn-BE-FR',
350 : 'sgn-BE-NL',
351 : 'sgn-CH-DE',
352 : ];
353 :
354 : /// As defined in RFC5646
355 : const regular = [
356 : 'art-lojban',
357 : 'cel-gaulish',
358 : 'no-bok',
359 : 'no-nyn',
360 : 'zh-guoyu',
361 : 'zh-hakka',
362 : 'zh-min-nan',
363 : 'zh-min',
364 : 'zh-xiang',
365 : ];
366 :
367 3 : final rfc5646privateuse = rule(
368 : 'privateuse',
369 2 : concatenation([
370 1 : literal('x'),
371 1 : variableRepetition(
372 2 : concatenation([
373 1 : literal('-'),
374 2 : variableRepetition(alphanum, min: 1, max: 8),
375 : ]),
376 : min: 1,
377 : ),
378 : ]),
379 : );
|