View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2018, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_history,
   36          [ rdfh_transaction/1,         % :Goal
   37            rdfh_assert/3,              % +S,+P,+O
   38            rdfh_retractall/3,          % +S,+P,+O
   39            rdfh_update/3,              % +S[->NS],+P[->NP],+O[->[NO]
   40            rdfh_db_transaction/3,      % ?DB, +Condition, ?Transaction
   41            rdfh_triple_transaction/2,  % +Triple, -Transaction
   42            rdfh_transaction_member/2   % ?Action, +Transaction
   43          ]).   44:- use_module(library('http/http_session')).   45:- use_module(library(lists)).   46:- use_module(library(record)).   47:- use_module(library(error)).   48:- use_module(library(debug)).   49:- use_module(library('semweb/rdf_persistency')).   50:- use_module(library('semweb/rdf_db')).   51
   52
   53/** <module> RDF Persistent store change history
   54
   55This  module  deals  with  accessing  the   journal  files  of  the  RDF
   56persistency layer to get insight in the   provenance  and history of the
   57RDF database. It is designed for   Wiki-like collaborative editing of an
   58RDF graph. We make the following assumptions:
   59
   60 * Users are identified using a URI, typically an OpenID (http://openid.net/)
   61 * Triples created by a user are added to a named graph identified by the
   62   URI of the user.
   63 * Changes are grouped using rdf_transaction(Goal, log(Message, User))
   64 * The number that is associated with the named graph of a triple (normally
   65   expressing the line number in the source) is used to store the time-stamp.
   66   Although this information is redundant (the time stamp is the same as
   67   for the transaction), it allows for binary search through the history
   68   file for the enclosing transaction.
   69
   70@tbd    Cleanup thoughts on delete and update.
   71
   72@author Jan Wielemaker
   73*/
   74
   75                 /*******************************
   76                 *         DECLARATIONS         *
   77                 *******************************/
   78
   79:- module_transparent
   80    rdfh_transaction/1.   81
   82:- rdf_meta
   83    rdfh_assert(r,r,o),
   84    rdfh_retractall(r,r,o),
   85    rdfh_update(t,t,t).   86
   87:- multifile
   88    rdfh_hook/1.   89
   90:- record
   91    rdf_transaction(id:integer,
   92                    nesting:integer,
   93                    time:number,
   94                    message,
   95                    actions:list,
   96                    other_graphs:list).   97
   98
   99                 /*******************************
  100                 *         MODIFICATIONS        *
  101                 *******************************/
  102
  103%!  rdfh_transaction(:Goal) is semidet.
  104%
  105%   Run Goal using rdf_transaction/2, using information from the HTTP
  106%   layer to provide OpenID and session-id.
  107
  108rdfh_transaction(Goal) :-
  109    rdfh_user(User),
  110    transaction_context(Context),
  111    rdf_transaction(Goal, log(rdfh([user(User)|Context]), User)).
  112
  113
  114%!  rdfh_assert(+S, +P, +O) is det.
  115%
  116%   Assert a triple, adding current  user   and  time  to the triple
  117%   context.
  118
  119rdfh_assert(S,P,O) :-
  120    (   rdf_active_transaction(log(rdfh(_), User))
  121    ->  rdfh_time(Time),
  122        rdf_assert(S,P,O,User:Time)
  123    ;   throw(error(permission_error(assert, triple, rdf(S,P,O)),
  124                    context(_, 'No rdfh_transaction/1')))
  125    ).
  126
  127
  128%!  rdfh_retractall(+S, +P, +O) is det.
  129%
  130%   Retract triples that  match  {S,P,O}.   Note  that  all matching
  131%   triples are added to the journal, so   we can undo the action as
  132%   well as report on  retracted  triples,   even  if  multiple  are
  133%   retracted at the same time.
  134%
  135%   One of the problems we are faced   with is that a retract action
  136%   goes into the journal of  the   user  whose triple is retracted,
  137%   which may or may not be the one who performed the action.
  138
  139rdfh_retractall(S,P,O) :-
  140    (   rdf_active_transaction(log(rdfh(_), _User))
  141    ->  rdf_retractall(S,P,O)
  142    ;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  143                    context(_, 'No rdfh_transaction/1')))
  144    ).
  145
  146
  147%!  rdfh_update(+S, +P, +O) is det.
  148%
  149%   More tricky stuff, replacing a triple by another. Typically this
  150%   will be changing the predicate or object. Provenance info should
  151%   move the new triple to the user making the change, surely if the
  152%   object is changed. If the  predicate   is  changed  to a related
  153%   predicate, this actually becomes less obvious.
  154%
  155%   Current simple-minded approach is  to  turn   an  update  into a
  156%   retract and assert. The S,P,O specifications are either a ground
  157%   value or of the form _Old_ =|->|= _New_. Here is an example:
  158%
  159%   ==
  160%   rdfh_update(Work, Style, wn:oldstyle -> wn:newstyle)
  161%   ==
  162
  163rdfh_update(S,P,O) :-
  164    (   rdf_active_transaction(log(rdfh(_), User))
  165    ->  update(S,P,O, rdf(RS, RP, RO), rdf(AS, AP, AO)),
  166        must_be(ground, RS),
  167        must_be(ground, RP),
  168        must_be(ground, RO),
  169        rdfh_time(Time),
  170        rdf_retractall(RS, RP, RO),
  171        rdf_assert(AS, AP, AO, User:Time)
  172    ;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  173                    context(_, 'No rdfh_transaction/1')))
  174    ).
  175
  176update(Ss, Ps, Os, rdf(S0, P0, O0), rdf(S,P,O)) :-
  177    update(Ss, S0, S),
  178    update(Ps, P0, P),
  179    update(Os, O0, O).
  180
  181update(From->To, From, To) :- !.
  182update(Value, Value, Value).
  183
  184
  185%!  transaction_context(-Term) is det.
  186%
  187%   Context to pass with an RDF transaction.   Note that we pass the
  188%   user. We don't need this for simple additions, but we do need it
  189%   to track deletions.
  190
  191transaction_context(Context) :-
  192    (   rdfh_session(Session)
  193    ->  Context = [session(Session)]
  194    ;   Context = []
  195    ).
  196
  197%!  rdfh_session(-Session) is semidet.
  198%
  199%   Session is a (ground) identifier for the current session.
  200
  201rdfh_session(Session) :-
  202    rdfh_hook(session(Session)),
  203    !.
  204rdfh_session(Session) :-
  205    catch(http_session_id(Session), _, fail).
  206
  207
  208%!  rdfh_user(-URI) is det.
  209%
  210%   Get user-id of current session.
  211%
  212%   @tbd    Make hookable, so we can use the SeRQL user/openid hooks
  213
  214rdfh_user(User) :-
  215    rdfh_hook(user(User)),
  216    !.
  217rdfh_user(OpenId) :-
  218    http_session_data(openid(OpenId)).
  219
  220%!  rdfh_time(-Time:integer) is det.
  221%
  222%   Get time stamp as integer.  Second resolution is enough, and
  223%   avoids rounding problems associated with floats.
  224
  225rdfh_time(Seconds) :-
  226    get_time(Now),
  227    Seconds is round(Now).
  228
  229
  230                 /*******************************
  231                 *       EXAMINE HISTORY        *
  232                 *******************************/
  233
  234%!  rdfh_triple_transaction(+Triple:rdf(S,P,O), -Transaction) is nondet.
  235%
  236%   True if the (partial) Triple is modified in Transaction.
  237
  238rdfh_triple_transaction(rdf(S,P,O), Transaction) :-
  239    rdf(S,P,O,DB:Time),
  240    After is Time - 1,
  241    rdfh_db_transaction(DB, after(After), Transaction),
  242    rdfh_transaction_member(assert(S,P,O,Time), Transaction).
  243
  244%!  rdfh_db_transaction(?DB, +Condition, ?Transaction) is nondet.
  245%
  246%   True if Transaction satisfying  Condition   was  executed on DB.
  247%   Condition is one of:
  248%
  249%     * true
  250%     Always true, returns all transactions.
  251%     * id(Id)
  252%     Specifies the identifier of the transaction.  Only makes sense
  253%     if DB is specified as transaction identifiers are local to each
  254%     DB.
  255%     * after(Time)
  256%     True if transaction is executed at or after Time.
  257%
  258%     @tbd  More conditions (e.g. before(Time)).
  259
  260rdfh_db_transaction(DB, true, Transaction) :-
  261    !,
  262    rdf_journal_file(DB, Journal),
  263    journal_transaction(Journal, Transaction).
  264rdfh_db_transaction(DB, id(Id), Transaction) :-
  265    !,
  266    must_be(atom, DB),
  267    rdf_journal_file(DB, Journal),
  268    open_journal(Journal, Fd),
  269    call_cleanup((seek_journal(Fd, id(Id)),
  270                  read_transaction(Fd, Transaction)),
  271                 close(Fd)).
  272rdfh_db_transaction(DB, Condition, Transaction) :-
  273    !,
  274    valid_condition(Condition),
  275    rdf_journal_file(DB, Journal),
  276    open_journal(Journal, Fd),
  277    seek_journal(Fd, Condition),
  278    stream_transaction(Fd, Transaction).
  279
  280valid_condition(Var) :-
  281    var(Var),
  282    !,
  283    instantiation_error(Var).
  284valid_condition(after(Time)) :-
  285    !,
  286    must_be(number, Time).
  287valid_condition(Cond) :-
  288    type_error(condition, Cond).
  289
  290%!  open_journal(+File, -Stream) is det.
  291%
  292%   Open a journal file.  Journal files are always UTF-8 encoded.
  293
  294open_journal(JournalFile, Fd) :-
  295    open(JournalFile, read, Fd, [encoding(utf8)]).
  296
  297%!  journal_transaction(+JournalFile, ?Transaction) is nondet.
  298%
  299%   True if Transaction is a transaction in JournalFile,
  300
  301journal_transaction(JournalFile, Transaction) :-
  302    open_journal(JournalFile, Fd),
  303    stream_transaction(Fd, Transaction).
  304
  305stream_transaction(JFD, Transaction) :-
  306    call_cleanup(read_transaction(JFD, Transaction), close(JFD)).
  307
  308read_transaction(In, Transaction) :-
  309    repeat,
  310       read(In, T0),
  311    (   T0 == end_of_file
  312    ->  !, fail
  313    ;   transaction(T0, In, T),     % transaction/3 is not steadfast
  314        T = Transaction
  315    ).
  316
  317transaction(begin(Id, Nest, Time, Msg), In,
  318            rdf_transaction(Id, Nest, Time, Msg, Actions, Others)) :-
  319    !,
  320    read(In, T2),
  321    read_transaction_actions(T2, Id, In, Actions, Others).
  322transaction(start(_), _, _) :- !, fail. % Open journal
  323transaction(end(_), _, _) :- !, fail.   % Close journal
  324transaction(Action, _, Action).         % Action outside transaction?
  325
  326read_transaction_actions(end(Id, _, Others), Id, _, [], Others) :- !.
  327read_transaction_actions(end_of_file, _, _, [], []) :- !. % TBD: Incomplete transaction (error)
  328read_transaction_actions(Action, Id, In, Actions, Others) :-
  329    ignore_in_transaction(Action),
  330    !,
  331    read(In, T2),
  332    read_transaction_actions(T2, Id, In, Actions, Others).
  333read_transaction_actions(Action, Id, In, [Action|Actions], Others) :-
  334    read(In, T2),
  335    read_transaction_actions(T2, Id, In, Actions, Others).
  336
  337ignore_in_transaction(start(_)).
  338ignore_in_transaction(end(_)).
  339ignore_in_transaction(begin(_,_,_,_)).
  340ignore_in_transaction(end(_,_,_)).
  341
  342
  343%!  seek_journal(+Fd:stream, +Spec) is semidet.
  344%
  345%   See an open journal descriptor to the start of a transaction
  346%   specified by Spec.  Spec is one of:
  347%
  348%     * after(Time)
  349%     First transaction at or after Time.  Fails if there are no
  350%     transactions after time.
  351%     * id(Id)
  352%     Start of transaction labeled with given Id.  Fails if there
  353%     is no transaction labeled Id.
  354%
  355%   The implementation relies on the incrementing identifier numbers
  356%   and time-stamps.
  357
  358seek_journal(Fd, Spec) :-
  359    stream_property(Fd, file_name(File)),
  360    size_file(File, Size),
  361    Here is Size//2,
  362    Last = last(-),
  363    (   is_after_spec(Spec)
  364    ->  (   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  365        ->  true
  366        ;   arg(1, Last, StartOfTerm),
  367            StartOfTerm \== (-),
  368            seek(Fd, StartOfTerm, bof, _)
  369        )
  370    ;   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  371    ).
  372
  373is_after_spec(after(_Time)).
  374
  375%!  bsearch_journal(+Fd, +Start, +Here, +End, +Spec, !Last) is semidet.
  376%
  377%   Perform a binary search in the journal opened as Fd.
  378
  379bsearch_journal(Fd, Start, Here, End, Spec, Last) :-
  380    start_of_transaction(Fd, Here, StartOfTerm, Begin),
  381    !,
  382    compare_transaction(Spec, Begin, Diff),
  383    (   Diff == (=)
  384    ->  seek(Fd, StartOfTerm, bof, _)
  385    ;   Diff == (<)
  386    ->  NewHere is Start+(Here-Start)//2,
  387        NewHere < Here,
  388        nb_setarg(1, Last, StartOfTerm),
  389        bsearch_journal(Fd, Start, NewHere, Here, Spec, Last)
  390    ;   NewHere is StartOfTerm+(End-StartOfTerm)//2,
  391        NewHere > StartOfTerm,
  392        bsearch_journal(Fd, StartOfTerm, NewHere, End, Spec, Last)
  393    ).
  394bsearch_journal(Fd, Start, Here, _End, Spec, Last) :-
  395    NewHere is Start+(Here-Start)//2,
  396    NewHere < Here,
  397    bsearch_journal(Fd, Start, NewHere, Here, Spec, Last).
  398
  399compare_transaction(id(Id), begin(Id2,_,_,_), Diff) :-
  400    !,
  401    compare(Diff, Id, Id2).
  402compare_transaction(after(Time), begin(_,_,T,_), Diff) :-
  403    !,
  404    compare(Diff, Time, T).
  405
  406%!  start_of_transaction(+Fd, +From, -Start, -Term) is semidet.
  407%
  408%   Term is the start  term  of   the  first  transaction after byte
  409%   position From. Fails if no transaction can be found after From.
  410
  411start_of_transaction(Fd, From, Start, Term) :-
  412    seek(Fd, From, bof, _),
  413    skip(Fd, 10),
  414    repeat,
  415        seek(Fd, 0, current, Start),
  416        read(Fd, Term),
  417        (   transaction_start(Term)
  418        ->  !
  419        ;   Term == end_of_file
  420        ->  !, fail
  421        ;   fail
  422        ).
  423
  424transaction_start(begin(_Id,_Nest,_Time,_Message)).
  425
  426%!  rdfh_transaction_member(Action, Transaction) is nondet.
  427%
  428%   True if Action is an action in Transaction.
  429
  430rdfh_transaction_member(Action, Transaction) :-
  431    rdf_transaction_actions(Transaction, Actions),
  432    member(Action, Actions)