Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | 22x 22x 22x 12x 12x 12x 12x 47x 47x 47x 47x 35x 12x 12x 31x 31x 4x 4x 27x 12x 6x 13x | import NaturalRegex from 'natural-regex';
const parser = NaturalRegex.parser;
/**
* French labels for natural-regex tokens. Keys are token type names from the Jison parser's `terminals_` table.
* @type {Record<string, string>}
*/
const TOKEN_LABELS = {
// Structure
STARTS_WITH: 'commence par',
ENDS_WITH: 'finit par',
GROUP: 'groupe',
END_GROUP: 'fin groupe',
CAPTURE: 'capture',
END_CAPTURE: 'fin capture',
CHARACTER_SET: 'un parmi',
NOT_CHARACTER_SET: 'aucun parmi',
// Logique
AND: 'et',
OR: 'ou',
THEN: 'puis',
FOLLOWED_BY: 'suivi de',
NOT_FOLLOWED_BY: 'non suivi de',
// Quantificateurs
OPTIONAL_REPETITION: 'répété optionnellement',
ONE_OR_MORE_REPETITION: 'une ou plusieurs fois',
ZERO_OR_ONE_REPETITION: 'optionnel',
REPETITION: 'fois',
MINIMUM: 'minimum',
MAXIMUM: 'maximum',
FOR: 'pour',
FROM: 'de',
TO: 'à',
SMALLEST: 'le plus petit',
// Classes de caractères
DIGIT: 'chiffre',
NON_DIGIT: 'non-chiffre',
LETTER: 'lettre',
UPPERCASE: 'majuscule',
LOWERCASE: 'minuscule',
WORD: 'mot',
NON_WORD: 'non-mot',
ALPHANUMERIC: 'alphanumérique',
SPACE: 'espace',
NON_SPACE: 'non-espace',
ANY_CHARACTER: 'tout caractère',
ANYTHING: "n'importe quoi",
// Caractères spéciaux
TAB: 'tabulation',
VERTICAL_TAB: 'tabulation verticale',
NULL: 'nul',
RETURN: 'retour chariot',
LINE_FEED: 'saut de ligne',
FORM_FEED: 'saut de page',
BACKSPACE: 'retour arrière',
// Ancres
START: 'début',
END: 'fin',
// Encodage
HEX: 'hexadécimal',
UNICODE: 'unicode',
ESCAPED: 'échappé',
// Nombre
// NUMBER: 'nombre',
TYPE_NUMBER: 'nombre',
NEGATIVE: 'négatif',
POSITIVE: 'positif',
DECIMAL: 'décimal',
// Motifs courants
HTML_TAG: 'balise HTML',
IPV4: 'IPv4',
IPV6: 'IPv6',
IP_ADDRESS: 'adresse IP',
MAC_ADDRESS: 'adresse MAC',
URL: 'URL',
EMAIL: 'e-mail',
SLUG: 'slug',
LOCALE: 'locale',
LATITUDE: 'latitude',
LONGITUDE: 'longitude',
COLOR_NAME: 'nom de couleur',
HOSTNAME: "nom d'hôte",
UUID: 'UUID',
GUID: 'GUID',
US_ZIP_CODE: 'code postal US',
CANADIAN_POSTAL_CODE: 'code postal canadien',
BRAZILIAN_POSTAL_CODE: 'code postal brésilien',
UK_POSTAL_CODE: 'code postal britannique',
BIC: 'BIC',
IBAN: 'IBAN',
BRAINFUCK: 'brainfuck',
MORSE: 'morse',
YOUTUBE_CHANNEL: 'chaîne YouTube',
YOUTUBE_VIDEO: 'vidéo YouTube',
// Date/heure
DATE: 'date',
DAY: 'jour',
MONTH: 'mois',
YEAR: 'année',
yy: 'année courte',
HOURS: 'heures',
MINUTES: 'minutes',
SECONDS: 'secondes',
// Divers
CONTROL_CHARACTER: 'caractère de contrôle',
CHARACTER: 'caractère'
};
/**
* Compound token patterns: when two adjacent token types appear in sequence,
* they are merged into a single token with a combined French label.
* Maps `firstType -> secondType -> mergedLabel`.
* @type {Record<string, Record<string, string>>}
*/
const COMPOUND_LABELS = {
UPPERCASE: { LETTER: 'lettre majuscule' },
LOWERCASE: { LETTER: 'lettre minuscule' },
NEGATIVE: { TYPE_NUMBER: 'nombre négatif', NUMBER: 'nombre négatif' },
POSITIVE: { TYPE_NUMBER: 'nombre positif', NUMBER: 'nombre positif' },
DECIMAL: { TYPE_NUMBER: 'nombre décimal', NUMBER: 'nombre décimal' }
};
/**
* @typedef {{ type: string; text: string; label: string }} NaturalRegexToken
*/
/**
* Tokenizes a natural-regex source string and returns tokens with French labels.
* @param {string} input
* @returns {NaturalRegexToken[]}
*/
function tokenize(input) {
const lexer = Object.create(parser.lexer);
lexer.setInput(input, {});
/** @type {{ type: string; text: string }[]} Raw tokens before compound merging */
const raw = [];
while (true) {
const tokenId = lexer.lex();
Iif (tokenId === false) break;
const type = parser.terminals_[tokenId] ?? String(tokenId);
if (type === 'EOF') break;
raw.push({ type, text: lexer.yytext });
}
/** @type {NaturalRegexToken[]} */
const tokens = [];
for (let i = 0; i < raw.length; i++) {
const compoundLabel = COMPOUND_LABELS[raw[i].type]?.[raw[i + 1]?.type];
if (compoundLabel) {
tokens.push({
type: `${raw[i].type}+${raw[i + 1].type}`,
text: `${raw[i].text} ${raw[i + 1].text}`,
label: compoundLabel
});
i++;
} else {
tokens.push({
type: raw[i].type,
text: raw[i].text,
label: TOKEN_LABELS[raw[i].type] ?? raw[i].text
});
}
}
return tokens;
}
/**
* Converts a natural-regex source string into a human-readable description.
* @param {string} input
* @returns {string}
*/
export function describeNaturalRegex(input) {
return tokenize(input)
.map((t) => t.label)
.join(' ')
.trim();
}
if (import.meta.vitest) {
const { test, expect, describe: suite } = import.meta.vitest;
suite('tokenize', () => {
test('tokenizes a simple pattern', () => {
const tokens = tokenize('digit');
expect(tokens).toHaveLength(1);
expect(tokens[0].type).toBe('DIGIT');
expect(tokens[0].label).toBe('chiffre');
});
test('tokenizes a multi-token pattern', () => {
const tokens = tokenize('starts with digit then letter end');
const types = tokens.map((t) => t.type);
expect(types).toEqual(['STARTS_WITH', 'DIGIT', 'THEN', 'LETTER', 'END']);
});
test('labels punctuation/literal characters with their text', () => {
const tokens = tokenize('starts with "hello" end');
const literals = tokens.filter((t) => t.type === 'CHARACTER');
expect(literals.every((t) => t.label === t.text)).toBe(true);
});
test('tokenizes date/time tokens', () => {
const tokens = tokenize('year month day hours minutes seconds');
const types = tokens.map((t) => t.type);
expect(types).toEqual(['YEAR', 'MONTH', 'DAY', 'HOURS', 'MINUTES', 'SECONDS']);
});
test('tokenizes quantifiers', () => {
const tokens = tokenize('digit one or more times');
const types = tokens.map((t) => t.type);
expect(types).toContain('DIGIT');
expect(types).toContain('ONE_OR_MORE_REPETITION');
});
});
suite('describe', () => {
test('produces a readable description', () => {
const result = describeNaturalRegex('starts with digit then letter end');
expect(result).toBe('commence par chiffre puis lettre fin');
});
test('handles uppercase/lowercase', () => {
const result = describeNaturalRegex('uppercase letter');
expect(result).toBe('lettre majuscule');
});
test('handles lowercase letter', () => {
const result = describeNaturalRegex('lowercase letter');
expect(result).toBe('lettre minuscule');
});
test('handles negative number', () => {
const result = describeNaturalRegex('negative number');
expect(result).toBe('nombre négatif');
});
test('does not reorder when adjective has no following noun', () => {
const result = describeNaturalRegex('uppercase');
expect(result).toBe('majuscule');
});
test('merges compound tokens into a single token', () => {
const tokens = tokenize('uppercase letter');
expect(tokens).toHaveLength(1);
expect(tokens[0].label).toBe('lettre majuscule');
expect(tokens[0].type).toBe('UPPERCASE+LETTER');
});
});
}
|