-
Notifications
You must be signed in to change notification settings - Fork 1
/
html.lua
145 lines (135 loc) · 3.77 KB
/
html.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
-- html.lua
-- Copyright David Hollander 2011
-- MIT License, see LICENSE
local ti, tc, tr = table.insert, table.concat, table.remove
local H={}
local function _dec_attr(s)
local attr = {}
s = s:gsub("(%w+)=([\"'])(.-)%2", function (k, _, v) attr[k]=v return '' end)
s = s:gsub("(%w+)=(%w+)", function (k, v) attr[k]=v return '' end)
s:gsub("(%w+)", function(k) attr[k]=k end)
return attr
end
local _empty = {area=true,base=true,basefont=true,br=true,col=true,frame=true,hr=true,img=true,input=true,isindex=true,link=true,meta=true}
local no_nest = {table=true,tr=true,td=true,html=true,body=true,head=true,title=true,script=true}
local _parents = {
body='html',tr='table',td='tr',title='head',meta='head',head='html',title='head'
}
---Nonstrict html parser. Always sticks elements in the DOM without raising errors. Inspired by Roberto's strict XML parser from LuaUsers wiki.
function H.decode(html)
local i=1
local dom = {}
local node = dom
while true do
local h, k, close, e, attr, empty = html:find("<(/?)([%w:]+)(.-)(/?)>", i)
if not h then break end
local text = html:sub(i, h-1)
if not text:match '^%s*$' then ti(node, text) end
--insert
if empty=='/' or _empty[e] then ti(node, {e=e, up=node, a=_dec_attr(attr)})
--(ascend?), insert, descend
elseif close=='' then
if node.up and _parents[e] and _parents[e]~=node.e then
repeat node=node.up until _parents[e]==node.e or not node.up
end
local x = {e=e, up=node, a=_dec_attr(attr)}
ti(node, x)
node = x
--[[ascend, insert, descend
elseif _parents[e] and _parents[e]~=node.e then
while node.up do
node=node.up
if node.e==parents[e] then break end
end
ti(node, {e=e, up=node, a=_dec_attr(attr)})]]
--ascend
elseif close=='/' then
while node.up do
if node.e==e then node=node.up; break
else node=node.up end
end
end
i=k+1
end
return dom
end
local function _enc_attr(attr)
local t ={}
for k,v in pairs(attr) do
if v==true then v=k end
ti(t,('%s=%q'):format(k,v))
end
return tc(t,' ')
end
local function _enc(t, node)
for i,node in ipairs(node) do
if type(node)~='table' then ti(t, node)
else
ti(t,'<'); ti(t, node.e);
if node.a then
ti(t, ' '); ti(t,_enc_attr(node.a))
end
if #node<1 then
ti(t,'/>')
else
ti(t, '>')
_enc(t, node)
ti(t, '</'); ti(t, node.e); ti(t, '>')
end
end
end
end
---Encodes an html graph produced by decode as an XHTML string
function H.encode(dom)
local out={}
_enc(out, dom)
return tc(out)
end
---Get all nodes where element == e
function H.gete(dom, e)
local function _(O, x)
for i, node in ipairs(x) do
if type(node)=='table' then
if node.e==e then ti(O, node) print('gete',H.encode(node))
else _(O, node) end
end
end
end
local out = {}
_(out, dom)
return out
end
---Get all nodes where attribute k == v
function H.geta(dom, k, v)
local function _(O, x)
for i, node in ipairs(x) do
if type(node)=='table' then
if node.a and node.a[k]==v then ti(O, node) print('geta',H.encode(node))
else _(O, node) end
end
end
end
local out = {}
_(out, dom)
return out
end
---Get all nodes where attribute k:match(v)
function H.matcha(dom, k, v)
local function _(O, x)
for i, node in ipairs(x) do
if type(node)=='table' then
if node.a and node.a[k] and node.a[k]:match(v) then ti(O, node)
else _(O, node) end
end
end
end
local out = {}
_(out, dom)
return out
end
---Similar to os.time(t), but utc timezone correct if type(t)=='table'.
function H.utctime(t)
if t then return os.time(t)+ os.date('%z')/100*60*60
else return os.time() end
end
return H