Skip to content

Commit

Permalink
Fix parser when value has backslash
Browse files Browse the repository at this point in the history
  • Loading branch information
kzrnm committed Oct 22, 2024
1 parent a7b24b5 commit 401d634
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 34 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to the "arbeditor" extension will be documented in this file

Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file.

## [Unreleased]

- Fix parser when value has backslash [#73](https://github.com/google/arb-editor/issues/73).

## [0.2.1]

- Fix behavior of `use-escaping` [#72](https://github.com/google/arb-editor/issues/72).
Expand Down
155 changes: 122 additions & 33 deletions src/messageParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,24 @@ export class Parser {
placeholderLevel = nestingLevel;
}
},
onLiteralValue: (value: any, offset: number) => {
onLiteralValue: (value: any, offset: number, length: number, startLine: number, startCharacter: number, pathSupplier: () => JSONPath) => {
if (inTemplateTag) {
templatePath = value;
inTemplateTag = false;
} else if (nestingLevel === 1 && messageKey !== null) {
} else if (typeof value === 'string' && nestingLevel === 1 && messageKey !== null) {
try {
var message = parseMessage(value, offset, false);
const rawValue = document.substring(offset + 1, offset + length - 1);
const message = parseMessage(StringLiteral.build(rawValue, value), offset, false);
messages.push(new MessageEntry(messageKey!, message));
} catch (error: any) {
//Very hacky solution to catch all errors here and store them, but better than not checking at all... The error has no special type, unfortunately.
if (String(error).startsWith('Error: Unbalanced ')) {
errors.push(new Literal(String(error), offset + 1, offset + value.length + 1));
errors.push(new Literal(String(error), offset + 1, offset + length - 1));
} else {
throw error;
}
}
messageKey.endOfMessage = offset + value.length + 2;
messageKey.endOfMessage = offset + length;
}
},
onObjectEnd: (offset: number, length: number, startLine: number, startCharacter: number) => {
Expand All @@ -109,59 +110,69 @@ export class Parser {
}, { disallowComments: true });


function parseMessage(messageString: string, globalOffset: number, expectPlaceholder: boolean): Message {
function parseMessage(messageString: StringLiteral, globalOffset: number, expectPlaceholder: boolean): Message {
const vals = matchCurlyBrackets(messageString, l10nOptions);

if (vals.length === 0) {
if (expectPlaceholder) {
return new Placeholder(messageString, globalOffset, globalOffset + messageString.length);
return new Placeholder(messageString.parsed, globalOffset, globalOffset + messageString.raw.length);
} else {
return new Literal(messageString, globalOffset, globalOffset + messageString.length);
return new Literal(messageString.parsed, globalOffset, globalOffset + messageString.raw.length);
}
}
const submessages: Message[] = [];
for (const part of vals) {
const isSubmessage = part.name === 'content';
const isString = part.name === 'outside';
if (isSubmessage || isString) {
if (isSubmessage && part.value.includes(',')) {
if (isSubmessage && part.parsed.includes(',')) {
submessages.push(parseComplexMessage(part));
} else {
submessages.push(parseMessage(part.value, globalOffset + part.start + 1, isSubmessage));
submessages.push(parseMessage(part, globalOffset + part.rawStart + 1, isSubmessage));
}
}
}

if (submessages.length > 1) {
return new CombinedMessage(globalOffset, globalOffset + messageString.length, submessages);
return new CombinedMessage(globalOffset, globalOffset + messageString.raw.length, submessages);
} else {
return submessages[0];
}

/**
* Decorate ICU Message of type `select`, `plural`, or `gender`
*/
function parseComplexMessage(part: XRegExp.MatchRecursiveValueNameMatch): ComplexMessage {
function parseComplexMessage(part: MatchRecursiveValueNameMatchStringLiteral): ComplexMessage {
const submessages = new Map<Literal, Message>();
const firstComma = part.value.indexOf(',');
var start = globalOffset + part.start + 1;
var end = globalOffset + part.start + firstComma + 1;

const argument = new Literal(part.value.substring(0, firstComma), start, end);

start = firstComma + 1;
const secondComma = part.value.indexOf(',', start);
end = secondComma;
({ start, end } = trim(part.value, start, end));
const complexType = new Literal(part.value.substring(start, end), globalOffset + part.start + start + 1, globalOffset + part.start + end + 1);
const firstComma = part.parsed.indexOf(',');

const argument = new Literal(
part.parsed.substring(0, firstComma),
globalOffset + part.rawStart + 1,
globalOffset + part.rawStart + part.positions[firstComma] + 1
);

let start = firstComma + 1;
const secondComma = part.parsed.indexOf(',', start);
let end = secondComma;
({ start, end } = trim(part.parsed, start, end));
const complexType = new Literal(
part.parsed.substring(start, end),
globalOffset + part.rawStart + part.positions[start] + 1,
globalOffset + part.rawStart + part.positions[end] + 1
);
start = secondComma + 1;
const bracketedValues = matchCurlyBrackets(part.value, l10nOptions);
const bracketedValues = matchCurlyBrackets(part, l10nOptions);
for (const innerPart of bracketedValues) {
if (innerPart.name === 'content') {
end = innerPart.start - 1;
({ start, end } = trim(part.value, start, end));
var submessagekey = new Literal(part.value.substring(start, end), globalOffset + part.start + start + 1, globalOffset + part.start + end + 1);
var message = parseMessage(innerPart.value, globalOffset + part.start + innerPart.start, false);
({ start, end } = trim(part.parsed, start, end));
let submessagekey = new Literal(
part.parsed.substring(start, end),
globalOffset + part.rawStart + part.positions[start] + 1,
globalOffset + part.rawStart + part.positions[end] + 1
);
let message = parseMessage(innerPart, globalOffset + part.rawStart + innerPart.rawStart, false);
submessages.set(submessagekey, message);
start = innerPart.end + 1;
}
Expand Down Expand Up @@ -260,18 +271,19 @@ interface ParseAndDecorateOptions {
quickfixes: CodeActions;
}

function matchCurlyBrackets(v: string, l10nOptions?: L10nYaml): XRegExp.MatchRecursiveValueNameMatch[] {
function matchCurlyBrackets(v: StringLiteral, l10nOptions?: L10nYaml): MatchRecursiveValueNameMatchStringLiteral[] {
const unescaped = l10nOptions?.['use-escaping'] ?? false
? getUnescapedRegions(v) :
[[0, v.length]];
? getUnescapedRegions(v.parsed) :
[[0, v.parsed.length]];

var values: XRegExp.MatchRecursiveValueNameMatch[] = [];
const values: MatchRecursiveValueNameMatchStringLiteral[] = [];
for (var region of unescaped) {
const newLocal = XRegExp.matchRecursive(v.substring(region[0], region[1]), '\\{', '\\}', 'g', {
const subLiteral = v.sub(region[0], region[1]);
const newLocal = XRegExp.matchRecursive(subLiteral.parsed, '\\{', '\\}', 'g', {
valueNames: ['outside', 'leftBracket', 'content', 'rightBracket'],
unbalanced: 'error'
});
values.push(...newLocal);
values.push(...newLocal.map(l => subLiteral.convertMatch(l)));
}
return values;
}
Expand Down Expand Up @@ -532,3 +544,80 @@ export class PlaceholderMetadata extends Message {
return [];
}
}

class StringLiteral {
constructor(
public raw: string,
public parsed: string,
public positions: number[],
) {
}

static build(raw: string, parsed: string): StringLiteral {
const positions: number[] = [];
let pos = 0;
for (let i = 0; i < raw.length; i++) {
let len = 1;
if (raw.charAt(i) === '\\') {
if (++i < raw.length) {
if (raw.substring(i, i + 5).match(/^u[A-Fa-f0-9]{4}/)) {
len = 6;
i += 4;
}
else {
len = 2;
}
}
}
positions.push(pos);
pos += len;
}
positions.push(pos);
return new StringLiteral(raw, parsed, positions);
}

private slicePositions(start: number, end: number): number[] {
const offset = this.positions[start];
return this.positions.slice(start, end + 1).map(v => v - offset);
}

public convertMatch(match: XRegExp.MatchRecursiveValueNameMatch): MatchRecursiveValueNameMatchStringLiteral {
const rawStart = this.positions[match.start];
const rawEnd = this.positions[match.end];
return new MatchRecursiveValueNameMatchStringLiteral(
match.name,
match.start,
match.end,
rawStart,
rawEnd,
this.raw.substring(rawStart, rawEnd),
match.value,
this.slicePositions(match.start, match.end),
);
}

public sub(start: number, end: number): StringLiteral {
const rawStart = this.positions[start];
const rawEnd = this.positions[end];
return new StringLiteral(
this.raw.substring(rawStart, rawEnd),
this.parsed.substring(start, end),
this.slicePositions(start, end),
);
}
}

class MatchRecursiveValueNameMatchStringLiteral extends StringLiteral {
constructor(
public name: string,
public start: number,
public end: number,
public rawStart: number,
public rawEnd: number,
raw: string,
parsed: string,
positions: number[],
) {
super(raw, parsed, positions);
}
}
13 changes: 13 additions & 0 deletions src/test/testarb.annotated
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
^^^^^^^^^[Information]:"The message with key "unescaped" does not have metadata defined."
"singlequote": "Your pa''ssword",
^^^^^^^^^^^[Information]:"The message with key "singlequote" does not have metadata defined."
"unbalanced": "Your\tpa\u007Bssword",
^^^^^^^^^^^^^^^^^^^^[Error]:"Error: Unbalanced left delimiter found in string at position 7"
"pageHomeInboxCount": "{count, plural, zero{I have {vehicle;;Type, select, sedn{Sedan} cabrolet{Solid roof cabriolet} tuck{16 wheel truck} other{Other}} no new messages} one{You have 1 new {counts} message} other{You have {count} new messages}}",
^^^^^[decoration]placeholder
^^^^^^^^^^^^^[decoration]placeholder
Expand Down Expand Up @@ -73,6 +75,17 @@
"vehicleType": {}
}
},
"commonVehicleTypeUnicode": "\t\u0050{vehicleType, select , sedan {\u0053edan} cabriolet{Solid\troof cabriolet{\u0076ehicleName}} truck{16 wheel truck} other{Other}}",
^^^^^^^^^^^[decoration]placeholder
^^^^^^^^^^^^^^^^[decoration]placeholder
^^^^^[decoration]select
^^^^^^^^^[decoration]select
^^^^^[decoration]select
^^^^^[decoration]select
^^^^^^^^^^^[Warning]:"Placeholder "vehicleType" not defined in the message metadata."
^^^^^^^^^^^^^^^^[Warning]:"Placeholder "vehicleName" not defined in the message metadata."
"@commonVehicleTypeUnicode": {
},
"pageHomeBalance": "Your balance at {am[ount} on {date2}",
^^^^^^^[decoration]placeholder
^^^^^[decoration]placeholder
Expand Down
4 changes: 4 additions & 0 deletions src/test/testarb.arb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"escaped": "Your pa'{'ssword",
"unescaped": "Your pa'ssword",
"singlequote": "Your pa''ssword",
"unbalanced": "Your\tpa\u007Bssword",
"pageHomeInboxCount": "{count, plural, zero{I have {vehicle;;Type, select, sedn{Sedan} cabrolet{Solid roof cabriolet} tuck{16 wheel truck} other{Other}} no new messages} one{You have 1 new {counts} message} other{You have {count} new messages}}",
"@pageHomeInboxCount": {
"description": "New messages count on the Home screen",
Expand All @@ -38,6 +39,9 @@
"vehicleType": {}
}
},
"commonVehicleTypeUnicode": "\t\u0050{vehicleType, select , sedan {\u0053edan} cabriolet{Solid\troof cabriolet{\u0076ehicleName}} truck{16 wheel truck} other{Other}}",
"@commonVehicleTypeUnicode": {
},
"pageHomeBalance": "Your balance at {am[ount} on {date2}",
"@pageHomeBalance": {
"placeholders": {
Expand Down
2 changes: 1 addition & 1 deletion src/test/testarb_2.annotated
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
^^^^^^^^^^^^^^^^[Error]:"Error: Unbalanced left delimiter found in string at position 8"
}

[Warning]:"Missing messages from template: unescaped, singlequote, pageHomeInboxCount, commonVehicleType, pageHomeBalance"
[Warning]:"Missing messages from template: unescaped, singlequote, pageHomeInboxCount, commonVehicleType, commonVehicleTypeUnicode, pageHomeBalance"
9 changes: 9 additions & 0 deletions src/test/testarb_3.annotated
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@
^^^^^^^^^^^^^^^^^[Information]:"The message with key "commonVehicleType" does not have metadata defined."
^^^^^^^^^^^[Warning]:"Placeholder "vehicleType" not defined in the message metadata."
^^^^^^^[Error]:"Unknown ICU messagetype "se?lect""
"commonVehicleTypeUnicode": "\t\u0050{vehicleType, select , sedan {\u0053edan} cabriolet{Solid\troof cabriolet{\u0076ehicleName}} truck{16 wheel truck} other{Other}}",
^^^^^^^^^^^[decoration]placeholder
^^^^^^^^^^^^^^^^[decoration]placeholder
^^^^^[decoration]select
^^^^^^^^^[decoration]select
^^^^^[decoration]select
^^^^^[decoration]select
^^^^^^^^^^^[Warning]:"Placeholder "vehicleType" not defined in the message metadata."
^^^^^^^^^^^^^^^^[Warning]:"Placeholder "vehicleName" not defined in the message metadata."
"pageHomeBalance": "Your balance at {am[ount} on {date2}"
^^^^^^^[decoration]placeholder
^^^^^[decoration]placeholder
Expand Down
1 change: 1 addition & 0 deletions src/test/testarb_3.arb
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
"pageHomeInboxCount": "{count, plural, zero{I have {vehicle;;Type, select, sedn{Sedan} cabrolet{Solid roof cabriolet} tuck{16 wheel truck} other{Other}} no new messages} one{You have 1 new {counts} message} other{You have {count} new messages}}",
"pageHomeBirthday": "Today is {sex, sele{ct, male{his b{irthday} female{her birthday} other{their birthday}}.",
"commonVehicleType": "{vehicleType, se?lect , sedan {Sedan} cabriolet{Solid roof cabriolet} truck{16 wheel truck} other{Other}}",
"commonVehicleTypeUnicode": "\t\u0050{vehicleType, select , sedan {\u0053edan} cabriolet{Solid\troof cabriolet{\u0076ehicleName}} truck{16 wheel truck} other{Other}}",
"pageHomeBalance": "Your balance at {am[ount} on {date2}"
}

0 comments on commit 401d634

Please sign in to comment.