35
36:- module(rdf,
37 [ load_rdf/2, 38 load_rdf/3, 39 xml_to_rdf/3, 40 process_rdf/3 41 ]). 42
43:- meta_predicate
44 load_rdf(+, -, :),
45 process_rdf(+, :, :). 46
47:- autoload(library(lists),[select/3,append/3]). 48:- autoload(library(option),[meta_options/3,option/3]). 49:- autoload(library(rdf_parser),
50 [ make_rdf_state/3, xml_to_plrdf/3, rdf_name_space/1,
51 rdf_modify_state/3, element_to_plrdf/3
52 ]). 53:- autoload(library(rdf_triple),
54 [rdf_start_file/2,rdf_end_file/1,rdf_triples/2]). 55:- autoload(library(sgml),
56 [ load_structure/3, new_sgml_parser/2, set_sgml_parser/2,
57 open_dtd/3, xml_quote_attribute/2, sgml_parse/2,
58 free_sgml_parser/1, get_sgml_parser/2
59 ]).
97load_rdf(File, Triples) :-
98 load_rdf(File, Triples, []).
99
100load_rdf(File, Triples, M:Options0) :-
101 entity_options(Options0, EntOptions, Options1),
102 meta_options(load_meta_option, M:Options1, Options),
103 init_ns_collect(Options, NSList),
104 load_structure(File,
105 [ RDFElement
106 ],
107 [ dialect(xmlns),
108 space(sgml),
109 call(xmlns, rdf:on_xmlns)
110 | EntOptions
111 ]),
112 rdf_start_file(Options, Cleanup),
113 call_cleanup(xml_to_rdf(RDFElement, Triples0, Options),
114 rdf_end_file(Cleanup)),
115 exit_ns_collect(NSList),
116 post_process(Options, Triples0, Triples).
117
118entity_options([], [], []).
119entity_options([H|T0], Entities, Rest) :-
120 ( H = entity(_,_)
121 -> Entities = [H|ET],
122 entity_options(T0, ET, Rest)
123 ; Rest = [H|RT],
124 entity_options(T0, Entities, RT)
125 ).
126
127load_meta_option(convert_typed_literal).
131xml_to_rdf(XML, Triples, Options) :-
132 is_list(Options),
133 !,
134 make_rdf_state(Options, State, _),
135 xml_to_plrdf(XML, RDF, State),
136 rdf_triples(RDF, Triples).
137xml_to_rdf(XML, BaseURI, Triples) :-
138 atom(BaseURI),
139 !,
140 xml_to_rdf(XML, Triples, [base_uri(BaseURI)]).
141
142
143 146
147post_process([], Triples, Triples).
148post_process([expand_foreach(true)|T], Triples0, Triples) :-
149 !,
150 expand_each(Triples0, Triples1),
151 post_process(T, Triples1, Triples).
152post_process([_|T], Triples0, Triples) :-
153 !,
154 post_process(T, Triples0, Triples).
155
156
157 160
161expand_each(Triples0, Triples) :-
162 select(rdf(each(Container), Pred, Object),
163 Triples0, Triples1),
164 !,
165 each_triples(Triples1, Container, Pred, Object, Triples2),
166 expand_each(Triples2, Triples).
167expand_each(Triples, Triples).
168
169each_triples([], _, _, _, []).
170each_triples([H0|T0], Container, P, O,
171 [H0, rdf(S,P,O)|T]) :-
172 H0 = rdf(Container, rdf:A, S),
173 member_attribute(A),
174 !,
175 each_triples(T0, Container, P, O, T).
176each_triples([H|T0], Container, P, O, [H|T]) :-
177 each_triples(T0, Container, P, O, T).
178
179member_attribute(A) :-
180 sub_atom(A, 0, _, _, '_'). 181
182
183
221process_rdf(File, OnObject, M:Options0) :-
222 is_list(Options0),
223 !,
224 entity_options(Options0, EntOptions, Options1),
225 meta_options(load_meta_option, M:Options1, Options2),
226 option(base_uri(BaseURI), Options2, ''),
227 rdf_start_file(Options2, Cleanup),
228 strip_module(OnObject, Module, Pred),
229 b_setval(rdf_object_handler, Module:Pred),
230 nb_setval(rdf_options, Options2),
231 nb_setval(rdf_state, -),
232 init_ns_collect(Options2, NSList),
233 ( File = stream(In)
234 -> Source = BaseURI
235 ; is_stream(File)
236 -> In = File,
237 option(graph(Source), Options2, BaseURI)
238 ; open(File, read, In, [type(binary)]),
239 Close = In,
240 Source = File
241 ),
242 new_sgml_parser(Parser, [dtd(DTD)]),
243 def_entities(EntOptions, DTD),
244 ( Source \== []
245 -> set_sgml_parser(Parser, file(Source))
246 ; true
247 ),
248 set_sgml_parser(Parser, dialect(xmlns)),
249 set_sgml_parser(Parser, space(sgml)),
250 do_process_rdf(Parser, In, NSList, Close, Cleanup, Options2).
251process_rdf(File, BaseURI, OnObject) :-
252 process_rdf(File, OnObject, [base_uri(BaseURI)]).
253
254def_entities([], _).
255def_entities([entity(Name, Value)|T], DTD) :-
256 !,
257 def_entity(DTD, Name, Value),
258 def_entities(T, DTD).
259def_entities([_|T0], DTD) :-
260 def_entities(T0, DTD).
261
262def_entity(DTD, Name, Value) :-
263 open_dtd(DTD, [], Stream),
264 xml_quote_attribute(Value, QValue),
265 format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
266 close(Stream).
267
268
269do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :-
270 call_cleanup(( sgml_parse(Parser,
271 [ source(In),
272 call(begin, on_begin),
273 call(xmlns, on_xmlns)
274 | Options
275 ]),
276 exit_ns_collect(NSList)
277 ),
278 cleanup_process(Close, Cleanup, Parser)).
279
280cleanup_process(In, Cleanup, Parser) :-
281 ( var(In)
282 -> true
283 ; close(In)
284 ),
285 free_sgml_parser(Parser),
286 nb_delete(rdf_options),
287 nb_delete(rdf_object_handler),
288 nb_delete(rdf_state),
289 nb_delete(rdf_nslist),
290 rdf_end_file(Cleanup).
291
292on_begin(NS:'RDF', Attr, _) :-
293 rdf_name_space(NS),
294 !,
295 nb_getval(rdf_options, Options),
296 make_rdf_state(Options, State0, _),
297 rdf_modify_state(Attr, State0, State),
298 nb_setval(rdf_state, State).
299on_begin(Tag, Attr, Parser) :-
300 nb_getval(rdf_state, State),
301 ( State == (-)
302 -> nb_getval(rdf_options, RdfOptions),
303 ( memberchk(embedded(true), RdfOptions)
304 -> true
305 ; print_message(warning, rdf(unexpected(Tag, Parser)))
306 )
307 ; get_sgml_parser(Parser, line(Start)),
308 get_sgml_parser(Parser, file(File)),
309 sgml_parse(Parser,
310 [ document(Content),
311 parse(content)
312 ]),
313 b_getval(rdf_object_handler, OnTriples),
314 element_to_plrdf(element(Tag, Attr, Content), Objects, State),
315 rdf_triples(Objects, Triples),
316 call(OnTriples, Triples, File:Start)
317 ).
325on_xmlns(NS, URL, _Parser) :-
326 ( nb_getval(rdf_nslist, List),
327 List = list(L0)
328 -> nb_linkarg(1, List, [NS=URL|L0])
329 ; true
330 ).
331
332init_ns_collect(Options, NSList) :-
333 ( option(namespaces(NSList), Options, -),
334 NSList \== (-)
335 -> nb_setval(rdf_nslist, list([]))
336 ; nb_setval(rdf_nslist, -),
337 NSList = (-)
338 ).
339
340exit_ns_collect(NSList) :-
341 ( NSList == (-)
342 -> true
343 ; nb_getval(rdf_nslist, list(NSList))
344 ).
345
346
347
348 351
352:- multifile
353 prolog:message/3. 354
356
357prolog:message(rdf(unparsed(Data))) -->
358 { phrase(unparse_xml(Data), XML)
359 },
360 [ 'RDF: Failed to interpret "~s"'-[XML] ].
361prolog:message(rdf(shared_blank_nodes(N))) -->
362 [ 'RDF: Shared ~D blank nodes'-[N] ].
363prolog:message(rdf(not_a_name(Name))) -->
364 [ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ].
365prolog:message(rdf(redefined_id(Id))) -->
366 [ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ].
367prolog:message(rdf(unexpected(Tag, Parser))) -->
368 { get_sgml_parser(Parser, file(File)),
369 get_sgml_parser(Parser, line(Line))
370 },
371 [ 'RDF: ', url(File:Line), ': Unexpected element ~w'-[Tag] ].
372
373
374 377
378unparse_xml([]) -->
379 !,
380 [].
381unparse_xml([H|T]) -->
382 !,
383 unparse_xml(H),
384 unparse_xml(T).
385unparse_xml(Atom) -->
386 { atom(Atom)
387 },
388 !,
389 atom(Atom).
390unparse_xml(element(Name, Attr, Content)) -->
391 "<",
392 identifier(Name),
393 attributes(Attr),
394 ( { Content == []
395 }
396 -> "/>"
397 ; ">",
398 unparse_xml(Content)
399 ).
400
401attributes([]) -->
402 [].
403attributes([H|T]) -->
404 attribute(H),
405 attributes(T).
406
407attribute(Name=Value) -->
408 " ",
409 identifier(Name),
410 "=",
411 value(Value).
412
413identifier(NS:Local) -->
414 !,
415 "{", atom(NS), "}",
416 atom(Local).
417identifier(Local) -->
418 atom(Local).
419
420atom(Atom, Text, Rest) :-
421 atom_codes(Atom, Chars),
422 append(Chars, Rest, Text).
423
424value(Value) -->
425 { atom_codes(Value, Chars)
426 },
427 "\"",
428 quoted(Chars),
429 "\"".
430
431quoted([]) -->
432 [].
433quoted([H|T]) -->
434 quote(H),
435 !,
436 quoted(T).
437
438quote(0'<) --> "<".
439quote(0'>) --> ">".
440quote(0'") --> """.
441quote(0'&) --> "&".
442quote(X) --> [X].
443
444
445 448
449:- multifile prolog:meta_goal/2. 450prolog:meta_goal(process_rdf(_,G,_), [G+2])
RDF/XML parser
This module parses RDF/XML documents. It defines two processing modes: load_rdf/2 and load_rdf/3 which process a document into a list of
rdf(S,P,O)
terms and process_rdf/3 which processes the input description-by-description and uses a callback to handle the triples.