/* This file is part of ClioPatria.
Author:
HTTP: http://e-culture.multimedian.nl/
GITWEB: http://gollem.science.uva.nl/git/ClioPatria.git
GIT: git://gollem.science.uva.nl/home/git/ClioPatria.git
GIT: http://gollem.science.uva.nl/home/git/ClioPatria.git
Copyright: 2007, E-Culture/MultimediaN
ClioPatria is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
ClioPatria is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ClioPatria. If not, see .
*/
:- module(rdf_graph,
[ new_search_graph/1, % -Graph
search_graph_add_node/3, % !Graph, +Node, +Fields
search_graph_add_edge/4, % !Graph, +From, +Pred, +To
search_graph_add_edge/5, % !Graph, +From, +Pred, +To, +Weight
search_graph_set_node_type/3, % !Graph, +Node, +Type
search_graph_prune/2, % !Graph, +Nodes
search_graph_prune/3, % !Graph, +Node, -New
search_graph_drains/2, % +Graph, -Drains
search_graph_sources/2, % +Graph, -Sources
% Agenda
search_graph_next_agenda/3, % !Graph, -Node, -Score
search_graph_agenda/2, % +Graph, -Pairs
% Queries
search_graph_size/2, % +Graph, -NodeCount
search_graph_node_score/3, % +Graph, +Node, -Score
search_graph_node_type/3, % +Graph, +Node, -Type
search_graph_nodes_score_list/2, % @Graph, -list(Node-Score)
% RDF Query graph
search_graph_rdf_graph/2, % +Graph, -Triples
search_graph_rdf/4, % +Graph, ?S, ?P, ?O
search_graph_rdfs/4 % +Graph, ?S, ?P, ?O
]).
:- use_module(library(record)).
:- use_module(library(rbtrees)).
:- use_module(library(error)).
:- use_module(library(lists)).
:- use_module(library(option)).
:- use_module(library(semweb/rdf_db)).
/** Representing RDF search paths
---++ Introduction
This module represents a search-graph through an RDF graph starting at
one or more locations in the graph. This search-graph is essentially a
subgraph of the original graph. As it is intended as a dynamically
evolving structure during reasoning, it is represented as a pure Prolog
term and therefore subject to backtracking and normal Prolog memory
management.
---++ Representation
The graph is represented as an RB tree from URI to a record
node_data(Next, Previous, Score, Type)
where Next and Previous are lists of p([fi](Predicate), Target). If
f(P), the link is rdf(Prev, P, Next), if i(P), the link is rdf(Next, P,
Prev). The score of the node indicates semantic distance, ranging from 0
(very far way) to 1 (very close). Finally, Type classifies the node as
one of =start=, =node= or =target=.
The graphs as a whole contains
$ nodes :
An rb_tree mapping node-id to a node_data term as described above.
$ agenda :
An rb_tree mapping score to a list of nodes with that score.
$ size :
Nunber of nodes (for statistical reasons).
---++ Issues
---+++ Shall we deal internally with owl:sameAs?
---+++ How to deal with owl property types?
$ owl:inverseOf :
$ owl:TransitiveProperty :
$ owl:SymmetricProperty :
@tbd No weight loss passing a blank node?
*/
:- record graph(nodes, agenda, size=0).
:- record node_data(next=[], previous=[], score=1, type=node).
%% new_search_graph(-Graph)
%
% Create a new search graph. The initial graph is empty.
new_search_graph(Graph) :-
rb_empty(Nodes),
rb_empty(Agenda),
make_graph([nodes(Nodes), agenda(Agenda)], Graph).
%% search_graph_add_node(!Graph, +Node, +Fields)
%
% Add a (start-)node to the graph
search_graph_add_node(Graph, Node, Fields) :-
graph_nodes(Graph, Assoc0),
make_node_data(Fields, NodeData),
rb_insert(Assoc0, Node, NodeData, Assoc1),
set_nodes_of_graph(Assoc1, Graph),
graph_size(Graph, Size0),
Size is Size0 + 1,
set_size_of_graph(Size, Graph),
node_data_score(NodeData, Score0),
add_new_node_to_agenda(Graph, Node, Score0, Score),
( Score == Score0
-> true
; set_score_of_node_data(Score, NodeData)
).
%% search_graph_add_edge(!Graph, +From, +Pred, +To) is det.
%% search_graph_add_edge(!Graph, +From, +Pred, +To, +Options) is det.
%
% Add a link {From->Pred->To} to the graph. Options include
%
% * weight(+Weight)
% Weight to use for the predicate link instead of the default
%
% * inverse(true)
% is formed from rdf(To,Pred,From)
search_graph_add_edge(Graph, From, Pred, To) :-
search_graph_add_edge(Graph, From, Pred, To, []).
search_graph_add_edge(Graph, From, Pred, To, Options) :-
graph_nodes(Graph, Assoc0),
rb_lookup(From, FromData, Assoc0),
( option(inverse(true), Options)
-> PTerm = i(Pred)
; PTerm = f(Pred)
),
add_next_node_data(FromData, p(PTerm, To)),
node_data_score(FromData, Score0),
( option(weight(Weight), Options)
-> Score is Score0 * Weight
; update_score(Graph, From, Pred, To, Score0, Score)
),
( rb_lookup(To, ToData, Assoc0)
-> add_previous_node_data(ToData, p(PTerm, From)),
node_data_score(ToData, Score1),
join_score(Score1, Score, NewScore),
reschedule_node(Graph, To, Score1, NewScore, FinalScore),
set_score_of_node_data(FinalScore, ToData)
; search_graph_add_node(Graph, To,
[ score(Score),
previous([p(PTerm, From)])
])
).
add_next_node_data(Data, Next) :-
node_data_next(Data, Next0),
set_next_of_node_data([Next|Next0], Data).
add_previous_node_data(Data, Previous) :-
node_data_previous(Data, Previous0),
set_previous_of_node_data([Previous|Previous0], Data).
%% join_score(+Score1, +Score2, -Score)
%
% Join two scores comming from independent paths. Scores are
% considered probabilities.
join_score(S1, S2, S) :-
S is 1-((1-S1)*(1-S2)).
%% update_score(+Graph, +From, +Pred, +To, +Score0, -Score)
%
% Given that From has score ScoreO, compute the score for To based
% on Pred. First prototype used a predefined value per predicate
% in the range 0..1. Alternatively we can use statistical
% measures. Some porposals:
%
% * How many subjects of the same type have this property with
% this value?
% * How many subjects have this object (over this relation)?
%
% @param Score Float in the range 0.0..1.0
update_score(_Graph, From, Pred, To, Score0, Score) :-
by_in_links(From, Pred, To, Factor),
Score is Score0 * Factor.
by_in_links(_From, Pred, To, Factor) :-
rdf_estimate_complexity(_, Pred, To, Complexity),
Factor is 1/Complexity.
%% search_graph_set_node_type(+Graph, +Node, +Type) is det.
%
% Set the type for Node in Graph to Type.
%
% @error existence_error(node, Node)
search_graph_set_node_type(Graph, Node, Type) :-
graph_nodes(Graph, Assoc),
( rb_lookup(Node, Data, Assoc)
-> set_type_of_node_data(Type, Data)
; existence_error(node, Node)
).
%% search_graph_node_type(+Graph, +Node, -Type) is det.
%
% Type is the classification for Node in Graph.
%
% @tbd Generalise modes?
search_graph_node_type(Graph, Node, Type) :-
graph_nodes(Graph, Assoc),
( rb_lookup(Node, Data, Assoc)
-> node_data_type(Data, Type)
; existence_error(node, Node)
).
%% search_graph_node_score(+Graph, +Node, -Weight) is det.
%
% Weight is the current score for Node in Graph.
search_graph_node_score(Graph, Node, Weight) :-
graph_nodes(Graph, Assoc),
rb_lookup(Node, Data, Assoc),
node_data_score(Data, Weight).
%% search_graph_size(+Graph, -Size) is det.
%
% Size of the graph in nodes.
search_graph_size(Graph, Size) :-
graph_size(Graph, Size).
/*******************************
* PRUNING *
*******************************/
%% search_graph_prune(!Graph, +Nodes) is det.
%% search_graph_prune(!Graph, +Nodes, -NewLeaves) is det.
%
% Delete non-target leave nodes from Graph.
search_graph_prune(Graph, Prune) :-
search_graph_prune(Graph, Prune, _).
search_graph_prune(Graph, Prune, NewLeaves) :-
is_list(Prune), !,
find_ancestors(Prune, Graph, Ancestors),
group_pairs_by_key(Ancestors, Grouped),
delete_next_links(Grouped, Graph, NewLeaves).
search_graph_prune(Graph, Prune, NewLeaves) :-
search_graph_prune(Graph, [Prune], NewLeaves).
%% find_ancestors(+Prune, +Graph, -Ancestors)
%
% @param Ancestors is a difference-list of terms From-p(Pred,To)
find_ancestors(Prune, Graph, Ancestors) :-
graph_nodes(Graph, Nodes),
find_ancestors(Prune, Graph, Nodes, RestNodes,
0, Deleted, Ancestors, []),
graph_size(Graph, Size0),
Size is Size0 - Deleted,
set_size_of_graph(Size, Graph),
set_nodes_of_graph(RestNodes, Graph).
find_ancestors([], _, Nodes, Nodes, C, C, A, A).
find_ancestors([H|T], Graph, Nodes0, Nodes, C0, C, A, AT) :-
rb_lookup(H, Data, Nodes0),
( node_data_type(Data, target)
-> find_ancestors(T, Graph, Nodes0, Nodes, C0, C, A, AT)
; node_data_next(Data, Next),
Next \== []
-> permission_error(Graph, prune, H)
; node_data_previous(Data, Prev),
make_a_list(Prev, H, A, AT0),
rb_delete(Nodes0, H, Nodes1),
C1 is C0 + 1,
find_ancestors(T, Graph, Nodes1, Nodes, C1, C, AT0, AT)
).
make_a_list([], _, A, A).
make_a_list([p(Pred, From)|T0], To, [From-p(Pred,To)|T1], T) :-
make_a_list(T0, To, T1, T).
delete_next_links([], _, []).
delete_next_links([H-DelNext|T], Graph, NewLeaves) :-
graph_nodes(Graph, Nodes),
rb_lookup(H, Data, Nodes),
node_data_next(Data, Next0),
subtract_set(Next0, DelNext, Next),
set_next_of_node_data(Next, Data),
( Next == []
-> NewLeaves = [H|NewLeaves1],
delete_next_links(T, Graph, NewLeaves1)
; delete_next_links(T, Graph, NewLeaves)
).
%% subtract_set(+Set, +Subtract, -Remaining) is det.
%
% Remaining are the elements from Set that are not in Subtract.
% None of the sets is ordered. We have to apply heuristics to find
% out the best approach.
subtract_set(Set, [One], Remaining) :- !,
selectchk(One, Set, Remaining).
subtract_set(Set, Subtract, Remaining) :-
sort(Set, SSet),
sort(Subtract, SSubstract),
ord_subtract(SSet, SSubstract, Remaining).
%% search_graph_nodes_score_list(@Graph, -Scores:list(Node-Score)) is det.
%
% Scores is a list with Node-Score pairs, with all nodes in
% Graph.
search_graph_nodes_score_list(Graph, Scores) :-
graph_nodes(Graph, Assoc),
rb_visit(Assoc, NodeList),
scores(NodeList, Scores).
scores([], []).
scores([Node-Data|T0], [Node-Score|T]) :-
node_data_score(Data, Score), !,
scores(T0, T).
%% search_graph_drains(@Graph, -Nodes)
%
% List of all drains (end-of-path)
search_graph_drains(Graph, Nodes) :-
graph_nodes(Graph, Assoc),
rb_visit(Assoc, NodeList),
drains(NodeList, Nodes).
drains([], []).
drains([Node-Data|T0], [Node|T]) :-
node_data_next(Data, []), !,
drains(T0, T).
drains([_|T0], T) :-
drains(T0, T).
%% search_graph_sources(@Graph, -Nodes)
%
% List of all drains (start-of-path)
search_graph_sources(Graph, Nodes) :-
graph_nodes(Graph, Assoc),
rb_visit(Assoc, NodeList),
sources(NodeList, Nodes).
sources([], []).
sources([Node-Data|T0], [Node|T]) :-
node_data_previous(Data, []), !,
sources(T0, T).
sources([_|T0], T) :-
sources(T0, T).
/*******************************
* AGENDA *
*******************************/
%% add_new_node_to_agenda(!Graph, +Node, +Score, -FinalScore) is det.
%
% Add a new node to the agenda. We want a unique score for each
% node, so we can find the node from the score. Therefore, if
% there is already a node with this score, we use a slightly lower
% score, etc.
%
% @tbd: use rb_update/5.
add_new_node_to_agenda(Graph, Node, Score, FinalScore) :-
must_be(between(0.0,1.0), Score),
graph_agenda(Graph, RBTree0),
into_agenda(RBTree0, Node, Score, RBTree, FinalScore),
set_agenda_of_graph(RBTree, Graph).
into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :-
rb_insert_new(RBTree0, Score, Node, RBTree), !,
FinalScore = Score.
into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :-
rb_previous(RBTree0, Score, PrevScore, _), !,
( Score - PrevScore =< 0.001
-> between(1, infinite, Step),
FinalScore is PrevScore - 0.000000001*Step
; between(1, infinite, Step),
FinalScore is Score - 0.001*Step
),
rb_insert_new(RBTree0, FinalScore, Node, RBTree), !.
into_agenda(RBTree0, Node, Score, RBTree, FinalScore) :-
FinalScore is Score - 0.001,
rb_insert_new(RBTree0, FinalScore, Node, RBTree).
%% reschedule_node(!Graph, +Node, +OldScore, -NewScore)
%
% Change the score of Node in the agenda of Graph. Leaves the
% agenda untouched if Node is not in it as this implies the node
% is already expanded.
reschedule_node(Graph, Node, OldScore, NewScore, FinalScore) :-
graph_agenda(Graph, RBTree0),
( rb_delete(OldScore, Node, RBTree0)
-> into_agenda(RBTree0, Node, NewScore, RBTree, FinalScore),
set_agenda_of_graph(RBTree, Graph)
; FinalScore = NewScore % already expanded
).
%% search_graph_next_agenda(!Graph, -Node, -Score) is semidet.
%
% Fetches the best node from the agenda and deletes it
search_graph_next_agenda(Graph, Node, Score) :-
graph_agenda(Graph, RBTree0),
rb_del_max(RBTree0, Score, Node, RBTree),
set_agenda_of_graph(RBTree, Graph).
%% search_graph_agenda(+Graph, -Pairs)
%
% Get agenda from the graph, showing not-expanded nodes as pairs
% Score-Node. E.g.
%
% ==
% Agenda = [1-a, 0.5-b, 0.25-c]
% ==
search_graph_agenda(Graph, Pairs) :-
graph_agenda(Graph, RBTree),
rb_visit(RBTree, Pairs0),
reverse(Pairs0, Pairs).
/*******************************
* RDF *
*******************************/
%% search_graph_rdf_graph(+Graph, -RDF:list(rdf(S,P,O))) is det.
%
% Create a list of rdf(S,P,O) triples from Graph.
search_graph_rdf_graph(Graph, RDF) :-
graph_nodes(Graph, Nodes),
rb_visit(Nodes, Pairs),
phrase(graph_to_rdf(Pairs), RDF).
graph_to_rdf([]) -->
[].
graph_to_rdf([R-Data|T]) -->
{ node_data_next(Data, Next)
},
node_to_rdf(Next, R),
graph_to_rdf(T).
node_to_rdf([], _) -->
[].
node_to_rdf([p(P, O)|T], S) -->
link_to_rdf(P, S, O),
node_to_rdf(T, S).
link_to_rdf(f(P), S, O) -->
[ rdf(S, P, O) ].
link_to_rdf(i(P), S, O) -->
[ rdf(O, P, S) ].
%% search_graph_rdf(+Graph, ?S, ?P, ?O) is nondet.
%
% rdf(S,P,O) is an RDF statement in Graph. Provides indexed access
% if either S or O are given.
:- rdf_meta
search_graph_rdf(+, r, r, o),
search_graph_rdfs(+, r, r, o).
search_graph_rdf(Graph, S, P, O) :-
ground(S), ground(O), !,
graph_nodes(Graph, Nodes),
rb_lookup(S, SData, Nodes),
rb_lookup(O, OData, Nodes),
( node_data_next(SData, SN),
dmember(P, p(f(P),O), SN)
; node_data_next(OData, PN),
dmember(P, p(i(P),S), PN)
).
search_graph_rdf(Graph, S, P, O) :-
( search_graph_rdf_(Graph, S, f(P), O)
; search_graph_rdf_(Graph, O, i(P), S)
).
search_graph_rdf_(Graph, S, P, O) :-
graph_nodes(Graph, Nodes),
( ground(S)
-> rb_lookup(S, SData, Nodes),
node_data_next(SData, List),
member(p(P, O), List)
; ground(O)
-> rb_lookup(O, OData, Nodes),
node_data_previous(OData, List),
member(p(P, S), List)
; rb_in(S, SData, Nodes),
node_data_next(SData, List),
member(p(P, O), List)
).
dmember(Ground, Element, List) :-
ground(Ground), !,
memberchk(Element, List).
dmember(_Ground, Element, List) :-
member(Element, List), !.
%% search_graph_rdfs(+Graph, ?S, ?P, ?O) is nondet.
%
% rdf(S,P0,O) is an RDF statement in Graph and P0 is a
% subPropertyOf P. Provides indexed access if either S or O are
% given.
%
% @see This is the same as rdf_has/3 from library rdf_db.pl
search_graph_rdfs(Graph, S, P, O) :-
search_graph_rdf(Graph, S, P0, O),
rdf_reachable(P0, rdfs:subPropertyOf, P).