Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utf16 to utf8, Add usage example #16

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@ Further reading:

The second command is only required if you need support for `process: zlib`.

## Example

```LUA
require'kaitaistruct'
assert(KaitaiStruct and KaitaiStream)
-- following parsing library contains these global variables too

require'windows_lnk_file'
-- Lua parsing library of target source files (.ksy)
-- example from [File Format Gallery for Kaitai Struct](https://formats.kaitai.io/)
assert(WindowsLnkFile and KaitaiStruct and KaitaiStream)

local file_path=[[C:\test.lnk]]
local file_handle=io.open(file_path,'rb')
-- must not read / seek `file_handle` manually, use internal
local windowsLnkFile=WindowsLnkFile(KaitaiStream(file_handle))

print(windowsLnkFile.work_dir.str)--C:\

file_string_stream:close()
--or
-- file_handle:close()
```



## Licensing

Copyright 2017-2020 Kaitai Project: MIT license
Expand Down
4 changes: 2 additions & 2 deletions kaitaistruct.lua
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
local class = require("class")
local stringstream = require("string_stream")
local class = require("kaitaistruct.class")
local stringstream = require("kaitaistruct.string_stream")

KaitaiStruct = class.class()

Expand Down
154 changes: 154 additions & 0 deletions string_decode.lua
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,162 @@ local function utf8_to_32(utf8str)
return res
end

-- From https://github.com/robertlzj/lua_utf16_to_utf8
local utf16_to_utf8 do
local private={}

private.littleendian = true --current endiant

function private.band(v1,v2)
-- lua 5.2
return bit32.band(v1,v2)
-- lua 5.3
-- return v1 & v2
end

function private.rshift(v1,shift)
-- lua 5.2
return bit32.rshift(v1,shift)
-- lua 5.3
-- return v1 >> shift
end

function private.lshift(v1,shift)
-- lua 5.2
return bit32.lshift(v1,shift)
-- lua 5.3
-- return v1 << shift
end

function private.unpack16(buf,pos,littleendian)
local c1,c2 = string.byte(buf,pos,pos+1)
if c1 == nil then c1 = 0 end
if c2 == nil then c2 = 0 end

if littleendian == private.littleendian then
return private.lshift(c1,8) + c2
else
return private.lshift(c2,8) + c1
end
end

function private.checkbom_le(s)
if string.len(s)<2 then
return false
end
local c1,c2 = string.byte(s,1,2)
if c1 ~= 0xFF then return false end
if c2 ~= 0xFE then return false end
return true
end

function private.checkbom_be(s)
if string.len(s)<2 then
return false
end
local c1,c2 = string.byte(s,1,2)
if c1 ~= 0xFE then return false end
if c2 ~= 0xFF then return false end
return true
end

function private.utf16_to_utf8(data,little)
if little == nil then little = true end
-- bom check
local bom = 0
if private.checkbom_le(data) then little = true; bom = 1 end
if private.checkbom_be(data) then little = false; bom = 1 end
-- bom extract
if bom == 1 then
data = string.sub(data,3)
end
-- convert
if little then
return private.convert(data , private.utf16le_dec , private.utf8_enc)
else
return private.convert(data , private.utf16be_dec , private.utf8_enc)
end
end

function private.convert(buf,decoder,encoder)
local out = {}
local cp,len,pos
pos = 1
len = #buf
while pos<len + 1 do
pos, cp = decoder(buf,pos)
table.insert(out,encoder(cp))
end
return table.concat(out)
end

function private.utf16le_dec(buf,pos)
local cp = private.unpack16(buf,pos)
pos = pos + 2 --uchar( 2byte)
if (cp >= 0xD800) and (cp <= 0xDFFF) then
local high = private.lshift( cp - 0xD800,10 )
cp = private.unpack16(buf,pos)
pos = pos + 2 --uchar( 2byte)
cp = 0x10000 + high + cp - 0xDC00
end
return pos, cp
end

function private.utf16be_dec(buf,pos)
local cp = private.unpack16(buf,pos)
pos = pos + 2 --uchar( 2byte)
if (cp >= 0xD800)and(cp <= 0xDFFF) then
local high = private.lshift( cp - 0xD800,10 )
cp = private.unpack16(buf,pos)
pos = pos + 2 --uchar( 2byte)
cp = 0x10000 + high + cp - 0xDC00
end
return pos, cp
end

function private.utf8_enc(cp)
local shift,mask
if cp <= 0x7F then
return string.char(cp)
elseif cp <= 0x7FF then -- 2byte = 0xC0,xxxx
shift = 6
mask = 0xC0
elseif cp <= 0xFFFF then -- 3bytr = 0xE0,xxxx,xxxx
shift = 12
mask = 0xE0
elseif cp <= 0x10FFFF then -- 4byte = 0xF0,xxxx,xxxx,xxxx
shift = 18
mask = 0xF0
else
return nil
end

local ss = ""
local cc
cc = private.rshift(cp,shift)
cc = private.band(cc,0x3F)
ss = string.char(mask + cc)
shift = shift - 6
while shift >= 0 do
cc = private.rshift(cp,shift)
cc = private.band(cc,0x3F)
ss = ss..string.char(0x80 + cc)
shift = shift - 6
end

return ss
end

utf16_to_utf8=private.utf16_to_utf8
end--utf16_to_utf8

function stringdecode.decode(str, encoding)
local enc = encoding and encoding:lower() or "ascii"

if enc=='utf-16le' then
str=utf16_to_utf8(str)
enc='utf-8'
end

if enc == "ascii" then
return str
Expand Down
2 changes: 1 addition & 1 deletion string_stream.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
-- Inspired by https://gist.github.com/MikuAuahDark/e6428ac49248dd436f67c6c64fcec604
--

local class = require("class")
local class = require("kaitaistruct.class")

local StringStream = class.class()

Expand Down