From 048b0740a87f4e89f34c8dc3725fac5e360abe05 Mon Sep 17 00:00:00 2001 From: RobertL Date: Sat, 21 May 2022 21:10:44 +0800 Subject: [PATCH 1/3] Add utf16_to_utf8 utf16_to_utf8 used in stringdecode.decode --- string_decode.lua | 154 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/string_decode.lua b/string_decode.lua index b65e09d..36448ac 100644 --- a/string_decode.lua +++ b/string_decode.lua @@ -29,8 +29,162 @@ local function utf8_to_32(utf8str) return res end +-- From https://github.com/robertlzj/lua_utf16_to_utf8 +local utf16_to_utf8 do + local private={} + + private.littleendian = true --current endiant + + function private.band(v1,v2) + -- lua 5.2 + return bit32.band(v1,v2) + -- lua 5.3 + -- return v1 & v2 + end + + function private.rshift(v1,shift) + -- lua 5.2 + return bit32.rshift(v1,shift) + -- lua 5.3 + -- return v1 >> shift + end + + function private.lshift(v1,shift) + -- lua 5.2 + return bit32.lshift(v1,shift) + -- lua 5.3 + -- return v1 << shift + end + + function private.unpack16(buf,pos,littleendian) + local c1,c2 = string.byte(buf,pos,pos+1) + if c1 == nil then c1 = 0 end + if c2 == nil then c2 = 0 end + + if littleendian == private.littleendian then + return private.lshift(c1,8) + c2 + else + return private.lshift(c2,8) + c1 + end + end + + function private.checkbom_le(s) + if string.len(s)<2 then + return false + end + local c1,c2 = string.byte(s,1,2) + if c1 ~= 0xFF then return false end + if c2 ~= 0xFE then return false end + return true + end + + function private.checkbom_be(s) + if string.len(s)<2 then + return false + end + local c1,c2 = string.byte(s,1,2) + if c1 ~= 0xFE then return false end + if c2 ~= 0xFF then return false end + return true + end + + function private.utf16_to_utf8(data,little) + if little == nil then little = true end + -- bom check + local bom = 0 + if private.checkbom_le(data) then little = true; bom = 1 end + if private.checkbom_be(data) then little = false; bom = 1 end + -- bom extract + if bom == 1 then + data = string.sub(data,3) + end + -- convert + if little then + return private.convert(data , private.utf16le_dec , private.utf8_enc) + else + return private.convert(data , private.utf16be_dec , private.utf8_enc) + end + end + + function private.convert(buf,decoder,encoder) + local out = {} + local cp,len,pos + pos = 1 + len = #buf + while pos= 0xD800) and (cp <= 0xDFFF) then + local high = private.lshift( cp - 0xD800,10 ) + cp = private.unpack16(buf,pos) + pos = pos + 2 --uchar( 2byte) + cp = 0x10000 + high + cp - 0xDC00 + end + return pos, cp + end + + function private.utf16be_dec(buf,pos) + local cp = private.unpack16(buf,pos) + pos = pos + 2 --uchar( 2byte) + if (cp >= 0xD800)and(cp <= 0xDFFF) then + local high = private.lshift( cp - 0xD800,10 ) + cp = private.unpack16(buf,pos) + pos = pos + 2 --uchar( 2byte) + cp = 0x10000 + high + cp - 0xDC00 + end + return pos, cp + end + + function private.utf8_enc(cp) + local shift,mask + if cp <= 0x7F then + return string.char(cp) + elseif cp <= 0x7FF then -- 2byte = 0xC0,xxxx + shift = 6 + mask = 0xC0 + elseif cp <= 0xFFFF then -- 3bytr = 0xE0,xxxx,xxxx + shift = 12 + mask = 0xE0 + elseif cp <= 0x10FFFF then -- 4byte = 0xF0,xxxx,xxxx,xxxx + shift = 18 + mask = 0xF0 + else + return nil + end + + local ss = "" + local cc + cc = private.rshift(cp,shift) + cc = private.band(cc,0x3F) + ss = string.char(mask + cc) + shift = shift - 6 + while shift >= 0 do + cc = private.rshift(cp,shift) + cc = private.band(cc,0x3F) + ss = ss..string.char(0x80 + cc) + shift = shift - 6 + end + + return ss + end + + utf16_to_utf8=private.utf16_to_utf8 +end--utf16_to_utf8 + function stringdecode.decode(str, encoding) local enc = encoding and encoding:lower() or "ascii" + + if enc=='utf-16le' then + str=utf16_to_utf8(str) + enc='utf-8' + end if enc == "ascii" then return str From 7968210835303ee2d2d1370b441540ebac23500b Mon Sep 17 00:00:00 2001 From: RobertL Date: Sat, 21 May 2022 21:31:09 +0800 Subject: [PATCH 2/3] add usage example *document --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index a791003..861846d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,28 @@ Further reading: The second command is only required if you need support for `process: zlib`. +## Example + +```LUA +require'kaitaistruct' +assert(KaitaiStruct and KaitaiStream) +-- following parsing library contains these global variables too + +require'windows_lnk_file' +-- Lua parsing library of target source files (.ksy) +-- example from [File Format Gallery for Kaitai Struct](https://formats.kaitai.io/) +assert(WindowsLnkFile and KaitaiStruct and KaitaiStream) + +local file_path=[[C:\test.lnk]] +local file_handle=io.open(file_path,'rb') +-- must not read / seek `file_handle` manually, use internal +local windowsLnkFile=WindowsLnkFile(KaitaiStream(file_handle)) + +print(windowsLnkFile.work_dir.str)--C:\ +``` + + + ## Licensing Copyright 2017-2020 Kaitai Project: MIT license From 714c8f06e06cf732488d330b573e96084a280ee3 Mon Sep 17 00:00:00 2001 From: RobertL Date: Sat, 21 May 2022 23:56:28 +0800 Subject: [PATCH 3/3] file structure, fix example usage +close() kaitaistruct\ .class .string_stream .. --- README.md | 4 ++++ kaitaistruct.lua | 4 ++-- string_stream.lua | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 861846d..4e28250 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,10 @@ local file_handle=io.open(file_path,'rb') local windowsLnkFile=WindowsLnkFile(KaitaiStream(file_handle)) print(windowsLnkFile.work_dir.str)--C:\ + +file_string_stream:close() +--or +-- file_handle:close() ``` diff --git a/kaitaistruct.lua b/kaitaistruct.lua index 28c1859..2f32c8d 100644 --- a/kaitaistruct.lua +++ b/kaitaistruct.lua @@ -1,5 +1,5 @@ -local class = require("class") -local stringstream = require("string_stream") +local class = require("kaitaistruct.class") +local stringstream = require("kaitaistruct.string_stream") KaitaiStruct = class.class() diff --git a/string_stream.lua b/string_stream.lua index 2f4e661..597a1fd 100644 --- a/string_stream.lua +++ b/string_stream.lua @@ -3,7 +3,7 @@ -- Inspired by https://gist.github.com/MikuAuahDark/e6428ac49248dd436f67c6c64fcec604 -- -local class = require("class") +local class = require("kaitaistruct.class") local StringStream = class.class()