1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2023, VU University Amsterdam 7 SWI-Prolog Solutions b.v. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(uri, 37 [ uri_components/2, % ?URI, ?Components 38 uri_data/3, % ?Field, +Components, ?Data 39 uri_data/4, % +Field, +Components, -Data, -New 40 uri_edit/3, % +Actions,+URI0,-URI 41 42 uri_normalized/2, % +URI, -NormalizedURI 43 iri_normalized/2, % +IRI, -NormalizedIRI 44 uri_normalized_iri/2, % +URI, -NormalizedIRI 45 uri_normalized/3, % +URI, +Base, -NormalizedURI 46 iri_normalized/3, % +IRI, +Base, -NormalizedIRI 47 uri_normalized_iri/3, % +URI, +Base, -NormalizedIRI 48 uri_resolve/3, % +URI, +Base, -AbsURI 49 uri_is_global/1, % +URI 50 uri_query_components/2, % ?QueryString, ?NameValueList 51 uri_authority_components/2, % ?Authority, ?Components 52 uri_authority_data/3, % ?Field, ?Components, ?Data 53 % Encoding 54 uri_encoded/3, % +Component, ?Value, ?Encoded 55 uri_file_name/2, % ?URI, ?Path 56 uri_iri/2 % ?URI, ?IRI 57 ]). 58:- autoload(library(error), [domain_error/2]). 59:- use_foreign_library(foreign(uri)).
^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 12 3 4 5 6 7 8 9
scheme
, authority
, path
, search
and fragment
102uri_data(scheme, uri_components(S, _, _, _, _), S). 103uri_data(authority, uri_components(_, A, _, _, _), A). 104uri_data(path, uri_components(_, _, P, _, _), P). 105uri_data(search, uri_components(_, _, _, S, _), S). 106uri_data(fragment, uri_components(_, _, _, _, F), F).
112uri_data(scheme, uri_components(_, A, P, Q, F), S, 113 uri_components(S, A, P, Q, F)). 114uri_data(authority, uri_components(S, _, P, Q, F), A, 115 uri_components(S, A, P, Q, F)). 116uri_data(path, uri_components(S, A, _, Q, F), P, 117 uri_components(S, A, P, Q, F)). 118uri_data(search, uri_components(S, A, P, _, F), Q, 119 uri_components(S, A, P, Q, F)). 120uri_data(fragment, uri_components(S, A, P, Q, _), F, 121 uri_components(S, A, P, Q, F)).
uri_is_global(URI) :- uri_components(URI, Components), uri_data(scheme, Components, Scheme), nonvar(Scheme), atom_length(Scheme, Len), Len > 1.
uri_normalized(URI, Base, NormalizedGlobalURI) :- uri_resolve(URI, Base, GlobalURI), uri_normalized(GlobalURI, NormalizedGlobalURI).
uri_normalized(URI, Base, NormalizedGlobalIRI) :- uri_resolve(URI, Base, GlobalURI), uri_normalized_iri(GlobalURI, NormalizedGlobalIRI).
?- uri_query_components(QS, [a=b, c('d+w'), n-'VU Amsterdam']). QS = 'a=b&c=d%2Bw&n=VU%20Amsterdam'. ?- uri_query_components('a=b&c=d%2Bw&n=VU%20Amsterdam', Q). Q = [a=b, c='d+w', n='VU Amsterdam'].
[ip]
,
returning the ip as host
, without the enclosing []
. When
constructing an authority string and the host contains :
, the
host is embraced in []
. If []
is not used correctly, the
behavior should be considered poorly defined. If there is no
balancing `]` or the host part does not end with `]`, these
characters are considered normal characters and part of the
(invalid) host name.user
, password
, host
and port
242uri_authority_data(user, uri_authority(U, _, _, _), U). 243uri_authority_data(password, uri_authority(_, P, _, _), P). 244uri_authority_data(host, uri_authority(_, _, H, _), H). 245uri_authority_data(port, uri_authority(_, _, _, P), P).
query_value
, fragment
, path
or
segment
. Besides alphanumerical characters, the following
characters are passed verbatim (the set is split in logical groups
according to RFC3986).
284uri_file_name(URI, FileName) :- 285 nonvar(URI), 286 !, 287 uri_components(URI, Components), 288 uri_data(scheme, Components, File), File == file, 289 ( uri_data(authority, Components, '') 290 -> true 291 ; uri_data(authority, Components, localhost) 292 ), 293 uri_data(path, Components, FileNameEnc), 294 uri_encoded(path, FileName0, FileNameEnc), 295 delete_leading_slash(FileName0, FileName). 296uri_file_name(URI, FileName) :- 297 nonvar(FileName), 298 !, 299 absolute_file_name(FileName, Path0), 300 ensure_leading_slash(Path0, Path), 301 uri_encoded(path, Path, PathEnc), 302 uri_data(scheme, Components, file), 303 uri_data(authority, Components, ''), 304 uri_data(path, Components, PathEnc), 305 uri_components(URI, Components).
314ensure_leading_slash(Path, SlashPath) :- 315 ( sub_atom(Path, 0, _, _, /) 316 -> SlashPath = Path 317 ; atom_concat(/, Path, SlashPath) 318 ). 319 320:- if(current_prolog_flag(windows, true)). 321delete_leading_slash(Path, WinPath) :- 322 atom_concat(/, WinPath, Path), 323 is_absolute_file_name(WinPath), 324 !. 325:- endif. 326delete_leading_slash(Path, Path). 327 328 329 /******************************* 330 * MODIFYING * 331 *******************************/
http
, https
, etc.)path
component. If Path is not absolute it
is taken relative to the path of URI0.Key=Value
pairs of the current search (query)
component. New values replace existing values. If KeyValues
is written as =(KeyValues) the current search component is
ignored. KeyValues is a list, whose elements are one of
Key=Value
, Key-Value
or `Key(Value)`.
Components can be removed by using a variable as value, except
from path
which can be reset using path(/)
and query which can
be dropped using query(=([]))
.
367uri_edit(Actions, URI0, URI) :- 368 ( var(URI0) 369 -> URI1 = '/' 370 ; URI1 = URI0 371 ), 372 uri_components(URI1, Comp0), 373 edit_components(Actions, Comp0, Comp), 374 uri_components(URI, Comp). 375 376edit_components([], Comp0, Comp) => 377 Comp = Comp0. 378edit_components([H|T], Comp0, Comp) => 379 edit_components(H, Comp0, Comp1), 380 edit_components(T, Comp1, Comp). 381edit_components(scheme(Scheme), Comp0, Comp) => 382 uri_data(scheme, Comp0, Scheme, Comp). 383edit_components(path(Path), Comp0, Comp) => 384 uri_data(path, Comp0, Path0), 385 ( ( var(Path0) 386 ; Path0 == '' 387 ) 388 -> Path1 = '/' 389 ; Path1 = Path0 390 ), 391 uri_normalized(Path, Path1, Path2), 392 uri_data(path, Comp0, Path2, Comp). 393edit_components(fragment(Fragment), Comp0, Comp) => 394 uri_data(fragment, Comp0, Fragment, Comp). 395edit_components(Authority, Comp0, Comp), 396 authority_field(Authority) => 397 uri_data(authority, Comp0, Auth0), 398 ( var(Auth0) 399 -> true 400 ; uri_authority_components(Auth0, AComp0) 401 ), 402 edit_auth_components(Authority, AComp0, AComp), 403 uri_authority_components(Auth, AComp), 404 uri_data(authority, Comp0, Auth, Comp). 405edit_components(query(Search), Comp0, Comp) => 406 edit_components(search(Search), Comp0, Comp). 407edit_components(search(=(Search)), Comp0, Comp) => 408 uri_query_components(String, Search), 409 uri_data(search, Comp0, String, Comp). 410edit_components(search(Search), Comp0, Comp) => 411 uri_data(search, Comp0, SS0), 412 ( var(SS0) 413 -> Search0 = [] 414 ; uri_query_components(SS0, Search0) 415 ), 416 join_search(Search0, Search, Search1), 417 uri_query_components(SS1, Search1), 418 uri_data(search, Comp0, SS1, Comp). 419edit_components(Other, _, _) => 420 domain_error(uri_edit, Other). 421 user(_)) (. 423authority_field(password(_)). 424authority_field(host(_)). 425authority_field(port(_)). 426 427edit_auth_components(user(User), 428 uri_authority(_, Passwd, Host, Port), 429 uri_authority(User, Passwd, Host, Port)). 430edit_auth_components(password(Passwd), 431 uri_authority(User, _, Host, Port), 432 uri_authority(User, Passwd, Host, Port)). 433edit_auth_components(host(Host), 434 uri_authority(User, Passwd, _, Port), 435 uri_authority(User, Passwd, Host, Port)). 436edit_auth_components(port(Port), 437 uri_authority(User, Passwd, Host, _), 438 uri_authority(User, Passwd, Host, Port)). 439 440join_search([], Search, Search). 441join_search([N=_|ST], New, Search) :- 442 ( memberchk(N=_, New) 443 -> true 444 ; functor(T, N, 1), 445 memberchk(T, New) 446 -> true 447 ; memberchk(N-_, New) 448 ), 449 !, 450 join_search(ST, New, Search). 451join_search([H|ST], New, [H|Search]) :- 452 join_search(ST, New, Search). 453 454 455 /******************************* 456 * SANDBOX * 457 *******************************/ 458 459:- multifile sandbox:safe_primitive/1. 460 461sandbox:safe_primitive(uri:uri_components(_,_)). 462sandbox:safe_primitive(uri:uri_normalized(_,_)). 463sandbox:safe_primitive(uri:iri_normalized(_,_)). 464sandbox:safe_primitive(uri:uri_normalized_iri(_,_)). 465sandbox:safe_primitive(uri:uri_normalized(_,_,_)). 466sandbox:safe_primitive(uri:iri_normalized(_,_,_)). 467sandbox:safe_primitive(uri:uri_normalized_iri(_,_,_)). 468sandbox:safe_primitive(uri:uri_resolve(_,_,_)). 469sandbox:safe_primitive(uri:uri_is_global(_)). 470sandbox:safe_primitive(uri:uri_query_components(_,_)). 471sandbox:safe_primitive(uri:uri_authority_components(_,_)). 472sandbox:safe_primitive(uri:uri_encoded(_,_,_)). 473sandbox:safe_primitive(uri:uri_iri(_,_))
Process URIs
This library provides high-performance C-based primitives for manipulating URIs. We decided for a C-based implementation for the much better performance on raw character manipulation. Notably, URI handling primitives are used in time-critical parts of RDF processing. This implementation is based on RFC-3986:
The URI processing in this library is rather liberal. That is, we break URIs according to the rules, but we do not validate that the components are valid. Also, percent-decoding for IRIs is liberal. It first tries UTF-8; then ISO-Latin-1 and finally accepts %-characters verbatim.
Earlier experience has shown that strict enforcement of the URI syntax results in many errors that are accepted by many other web-document processing tools. */