Skip to content

Commit

Permalink
WI #2632 Use correct default encoding for alphanumeric literals writt…
Browse files Browse the repository at this point in the history
…en using hex notation (#2633)

* WI #2632 Use correct default encoding for alphanumeric literals written using hex notation

* WI #2632 Use CODEPAGE option to get encoding for alphanumeric literals
  • Loading branch information
fm-117 authored Jul 9, 2024
1 parent f568ebe commit 63cb8f7
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 47 deletions.
3 changes: 2 additions & 1 deletion TypeCobol.LanguageServer/Workspace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ internal void BindFileCompilerSourceTextDocument(DocumentContext docContext, str
StopDocumentBackgroundCompilation(docContext);
CompilationProject compilationProject = docContext.Project.Project;
string fileName = Path.GetFileName(docContext.Uri.LocalPath);
ITextDocument initialTextDocumentLines = new ReadOnlyTextDocument(fileName, Configuration.Format.Encoding,
var encodingForAlphanumericLiterals = compilationProject.CompilationOptions.GetEncodingForAlphanumericLiterals();
ITextDocument initialTextDocumentLines = new ReadOnlyTextDocument(fileName, encodingForAlphanumericLiterals,
Configuration.Format.ColumnsLayout, docContext.IsCopy, sourceText);
FileCompiler fileCompiler = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Line 46[24,27] <27, Error, Syntax> - Syntax error : mismatched input 'Var1' expe
- OutputDeviceName = SYSOUT
- WithNoAdvancing

[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,28+:X'40']<HexadecimalAlphanumericLiteral>(',Y,Y){@}
[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,28+:X'40']<HexadecimalAlphanumericLiteral>(',Y,Y){ }
- variables = X'40'

[[DisplayStatement]] [12,18:display]<DISPLAY> --> [24,33:HIGH-VALUE]<HIGH_VALUE>
Expand Down
26 changes: 15 additions & 11 deletions TypeCobol.Test/Parser/FileFormat/TestCobolFile.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using TypeCobol.Compiler;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Text;
using TypeCobol.Test.Parser.Scanner;

namespace TypeCobol.Test.Parser.FileFormat
{
static class TestCobolFile {
static class TestCobolFile
{
private static readonly Encoding _EncodingForAlphanumericLiterals =
#if EUROINFO_RULES
IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147);
#else
IBMCodePages.GetDotNetEncodingFromIBMCCSID(1140);
#endif
public static readonly string SampleFolder = "Parser" + Path.DirectorySeparatorChar + "FileFormat" + Path.DirectorySeparatorChar + "Samples";
public static void Check_EBCDICCobolFile()
{
Expand All @@ -27,7 +31,7 @@ public static void Check_EBCDICCobolFile()
if (fileProvider.TryGetFile("EbcdicRefFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormat.TXT", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormat.TXT", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -84,7 +88,7 @@ public static void Check_EBCDICCobolFileWithUnsupportedChar()
try
{
// Load the CobolFile in a TextDocument;
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormatWithBadChars.TXT", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("EbcdicRefFormatWithBadChars.TXT", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
}
catch(Exception e)
{
Expand Down Expand Up @@ -117,7 +121,7 @@ public static void Check_ASCIICobolFile_ReferenceFormat()
if (fileProvider.TryGetFile("AsciiRefFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -172,7 +176,7 @@ public static void Check_ASCIICobolFile_LinuxReferenceFormat()
if (fileProvider.TryGetFile("AsciiLinuxFormat.14", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiLinuxFormat.14", docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiLinuxFormat.14", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -227,7 +231,7 @@ public static void Check_ASCIICobolFile_FreeTextFormat()
if (fileProvider.TryGetFile("AsciiFreeFormat", out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiFreeFormat.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("AsciiFreeFormat.cpy", _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down Expand Up @@ -307,7 +311,7 @@ public static void Check_UTF8File()
if (fileProvider.TryGetFile(filename, out cobolFile))
{
// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument(filename, docFormat.Encoding, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument(filename, _EncodingForAlphanumericLiterals, docFormat.ColumnsLayout, false, cobolFile.ReadChars());
// Send all text lines in one batch to the test observer
textDocument.TextChanged += textSourceListener.OnTextChanged;
textDocument.StartSendingChangeEvents();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Line 1 --
[1,9+:X'C085D0']<HexadecimalAlphanumericLiteral>(',Y,Y){{e}}

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Line 1 --
[1,9+:X'C085D0']<HexadecimalAlphanumericLiteral>(',Y,Y){éeè}

6 changes: 2 additions & 4 deletions TypeCobol.Test/Parser/Scanner/ScannerUtils.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text;
using TypeCobol.Compiler.Concurrency;
using TypeCobol.Compiler.Diagnostics;
using TypeCobol.Compiler.Directives;
Expand Down Expand Up @@ -44,8 +42,8 @@ public TextChangeMap(TextChange change, ColumnsLayout columnsLayout)

internal static class ScannerUtils
{
public static TextSourceInfo TextSourceInfo = new TextSourceInfo("test", IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147), ColumnsLayout.FreeTextFormat, false);//Assuming a program here, not a copy.
public static TypeCobolOptions CompilerOptions = new TypeCobolOptions();
public static TextSourceInfo TextSourceInfo = new TextSourceInfo("test", CompilerOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, false);//Assuming a program here, not a copy.
public static List<RemarksDirective.TextNameVariation> CopyTextNameVariations = new List<RemarksDirective.TextNameVariation>();

public static string ScanLine(string testLine)
Expand Down
12 changes: 10 additions & 2 deletions TypeCobol.Test/Parser/Scanner/TestTokenTypes.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using TypeCobol.Compiler.Scanner;
using TypeCobol.Compiler.Scanner;

namespace TypeCobol.Test.Parser.Scanner
{
Expand Down Expand Up @@ -139,6 +138,15 @@ public static void CheckAlphanumericLiterals()
result = ScannerUtils.ScanLines(testLines);
ScannerUtils.CheckWithResultFile(result, testName);

#if EUROINFO_RULES
testName = "AlphanumericLiterals4-1147";
#else
testName = "AlphanumericLiterals4-1140";
#endif
testLines = new string[] { "X'C085D0'" }; // 'éeè' in EBCDIC 1147, '{e}' in EBCDIC 1140
result = ScannerUtils.ScanLines(testLines);
ScannerUtils.CheckWithResultFile(result, testName);

testName = "UTF8Literals";
testLines = new string[] {
@"U""This text does not include any escaped char""",
Expand Down
11 changes: 3 additions & 8 deletions TypeCobol.Test/Parser/TestParserRobustness.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using TypeCobol.Compiler;
using TypeCobol.Compiler;
using TypeCobol.Compiler.CodeElements;
using TypeCobol.Compiler.Diagnostics;
using TypeCobol.Compiler.Directives;
Expand All @@ -22,11 +17,11 @@ public static void CheckProgramCodeElements()
private static CodeElement[] ParseCodeElements(string cobolString, bool asPartOfACopy, out Diagnostic[] parserDiagnostics)
{
// Load text document from string
var textDocument = new ReadOnlyTextDocument("test string", Encoding.Default, ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
var typeCobolOptions = new TypeCobolOptions();
var textDocument = new ReadOnlyTextDocument("test string", typeCobolOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
textDocument.LoadChars(cobolString);

// Create a compilation project and a compiler for this document
var typeCobolOptions = new TypeCobolOptions();
var project = new CompilationProject("test project", ".", new[] { ".cbl", ".cpy" },
DocumentFormat.FreeTextFormat, typeCobolOptions, null);
var compiler = new FileCompiler(textDocument, project.SourceFileProvider, project, typeCobolOptions, project);
Expand Down
15 changes: 8 additions & 7 deletions TypeCobol.Test/Parser/Text/TestReadOnlyTextDocument.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using TypeCobol.Compiler;
using TypeCobol.Compiler.Directives;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Text;

Expand Down Expand Up @@ -49,7 +47,8 @@ public static void Check_DocumentFormatExceptions()

public static void Check_EmptyDocument()
{
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("empty", Encoding.Default, ColumnsLayout.CobolReferenceFormat, false, String.Empty);
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("empty", options.GetEncodingForAlphanumericLiterals(), ColumnsLayout.CobolReferenceFormat, false, String.Empty);

Exception resultException = null;
try
Expand Down Expand Up @@ -176,7 +175,8 @@ public static void Check_ReferenceFormatDocument()
}

// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCOUT.cpy", options.GetEncodingForAlphanumericLiterals(), docFormat.ColumnsLayout, true, cobolFile.ReadChars());

if(textDocument.CharAt(0) != '0')
{
Expand Down Expand Up @@ -269,7 +269,8 @@ public static void Check_FreeFormatDocument()
}

// Load the CobolFile in a TextDocument
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCINP free format.cpy", docFormat.Encoding, docFormat.ColumnsLayout, true, cobolFile.ReadChars());
var options = new TypeCobolOptions();
ReadOnlyTextDocument textDocument = new ReadOnlyTextDocument("MSVCINP free format.cpy", options.GetEncodingForAlphanumericLiterals(), docFormat.ColumnsLayout, true, cobolFile.ReadChars());

if (textDocument.CharAt(0) != '/')
{
Expand Down
10 changes: 3 additions & 7 deletions TypeCobol.Test/Utils/ParserUtils.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text;
using Antlr4.Runtime;
using TypeCobol.Compiler;
using TypeCobol.Compiler.AntlrUtils;
Expand Down Expand Up @@ -60,10 +56,10 @@ public static CompilationUnit ParseCobolFile(string textName, string folder, boo
public static CompilationUnit ParseCobolString(string cobolString, bool asPartOfACopy)
{
//Prepare
var textDocument = new ReadOnlyTextDocument("Empty doc", Encoding.Default, ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
var typeCobolOptions = new TypeCobolOptions();
var textDocument = new ReadOnlyTextDocument("Empty doc", typeCobolOptions.GetEncodingForAlphanumericLiterals(), ColumnsLayout.FreeTextFormat, asPartOfACopy, string.Empty);
textDocument.LoadChars(cobolString);

var typeCobolOptions = new TypeCobolOptions();
var project = new CompilationProject("Empty project", ".", new[] { ".cbl", ".cpy" },
DocumentFormat.FreeTextFormat, typeCobolOptions, null);

Expand Down
36 changes: 35 additions & 1 deletion TypeCobol/Compiler/Directives/IBMCompilerOptions.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#nullable enable

using System.Diagnostics.CodeAnalysis;
using System.Text;
using TypeCobol.Compiler.File;

namespace TypeCobol.Compiler.Directives
{
Expand Down Expand Up @@ -137,7 +139,14 @@ internal IBMCompilerOptionStatus(IBMCompilerOptionName name)
case IBMCompilerOptionName.BLOCK0: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.BUFSIZE: IsActivated = true; Value = "4096"; break;
case IBMCompilerOptionName.CICS: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.CODEPAGE: IsActivated = true; Value = "1140"; break;
case IBMCompilerOptionName.CODEPAGE:
IsActivated = true;
#if EUROINFO_RULES
Value = "1147"; //IBM EBCDIC (France-Euro)
#else
Value = "1140"; //IBM EBCDIC (EU-Canada-Euro)
#endif
break;
case IBMCompilerOptionName.COMPILE: IsActivated = false; Value = "S"; break;
case IBMCompilerOptionName.COPYLOC: IsActivated = false; Value = null; break;
case IBMCompilerOptionName.CURRENCY: IsActivated = false; Value = null; break;
Expand Down Expand Up @@ -1015,4 +1024,29 @@ public enum IBMCompilerOptionName
/* If you compile using ZWB, the compiler removes the sign from a signed zoned decimal (DISPLAY) field before comparing this field to an alphanumeric elementary field during execution. */
ZWB
}

public static class IBMCompilerOptionsExtensions
{
/// <summary>
/// Get from the CODEPAGE compiler option:
/// • The encoding of literals in the source program
/// • The encoding for data items described with USAGE DISPLAY or DISPLAY-1
/// • The encoding for XML parsing and XML generation
///
/// The encoding of national and UTF-8 data is not affected by the CODEPAGE compiler option. The encoding
/// for national literals and data items described with usage NATIONAL is UTF-16BE (big endian), CCSID
/// 1200. A reference to UTF-16 in this document is a reference to UTF-16BE. The encoding for UTF-8 literals
/// and data items described with usage UTF-8 is UTF-8, CCSID 1208.
/// </summary>
public static Encoding GetEncodingForAlphanumericLiterals(this IBMCompilerOptions ibmCompilerOptions)
{
string? codePageOption = ibmCompilerOptions.CODEPAGE.Value;
if (int.TryParse(codePageOption, out int codePage))
{
return IBMCodePages.GetDotNetEncodingFromIBMCCSID(codePage);
}

throw new ArgumentException($"Invalid CODEPAGE compiler option: '{codePageOption}'.");
}
}
}
6 changes: 3 additions & 3 deletions TypeCobol/Compiler/FileCompiler.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.Diagnostics;
using System.Diagnostics;
using JetBrains.Annotations;
using TypeCobol.Compiler.CodeModel;
using TypeCobol.Compiler.Directives;
Expand Down Expand Up @@ -165,7 +164,8 @@ private FileCompiler(Tuple<string, string, ColumnsLayout, bool> fileInfo, ITextD
{
// 2.a Load it in a new text document in memory
Debug.Assert(sourceFile != null);
TextDocument = new ReadOnlyTextDocument(sourceFile.Name, sourceFile.Encoding, fileInfo.Item3, fileInfo.Item4, sourceFile.ReadChars());
var encodingForAlphanumericLiterals = compilerOptions.GetEncodingForAlphanumericLiterals();
TextDocument = new ReadOnlyTextDocument(sourceFile.Name, encodingForAlphanumericLiterals, fileInfo.Item3, fileInfo.Item4, sourceFile.ReadChars());
}
else
{
Expand Down
3 changes: 1 addition & 2 deletions TypeCobol/Compiler/Text/CobolTextLine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

using TypeCobol.Compiler.Concurrency;
using TypeCobol.Compiler.Directives;
using TypeCobol.Compiler.File;
using TypeCobol.Compiler.Scanner;

namespace TypeCobol.Compiler.Text
Expand Down Expand Up @@ -193,7 +192,7 @@ private static IList<Tuple<string, bool> > Split(string line, int max, int min,
}
}
TokensLine tempTokensLine = TokensLine.CreateVirtualLineForInsertedToken(0, line, layout);
tempTokensLine.InitializeScanState(new MultilineScanState(IBMCodePages.GetDotNetEncodingFromIBMCCSID(1147)));
tempTokensLine.InitializeScanState(new MultilineScanState(scannerOptions.GetEncodingForAlphanumericLiterals()));

Scanner.Scanner scanner = new Scanner.Scanner(line, 0, line.Length - 1, tempTokensLine, scannerOptions, false);
Token? t;
Expand Down

0 comments on commit 63cb8f7

Please sign in to comment.