Skip to content

Commit f061723

Browse files
committed
Read and write annotations.
1 parent cf1a677 commit f061723

File tree

4 files changed

+578
-49
lines changed

4 files changed

+578
-49
lines changed

lib/rdf/rdfxml/reader.rb

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class Reader < RDF::Reader
1818
format Format
1919
include RDF::Util::Logger
2020

21-
CORE_SYNTAX_TERMS = %w(RDF ID about parseType resource nodeID datatype).map {|n| "http://www.w3.org/1999/02/22-rdf-syntax-ns##{n}"}
21+
CORE_SYNTAX_TERMS = %w(RDF ID annotation annotationNodeID about parseType resource nodeID datatype).map {|n| "http://www.w3.org/1999/02/22-rdf-syntax-ns##{n}"}
2222
OLD_TERMS = %w(aboutEach aboutEachPrefix bagID).map {|n| "http://www.w3.org/1999/02/22-rdf-syntax-ns##{n}"}
2323

2424
# The Recursive Baggage
@@ -393,6 +393,7 @@ def nodeElement(el, ec)
393393
# is deleted.
394394
attrs = {}
395395
id = datatype = parseType = resourceAttr = nodeID = nil
396+
annotation = annotationNodeID = nil
396397

397398
child.attribute_nodes.each do |attr|
398399
if attr.namespace.to_s.empty?
@@ -406,35 +407,43 @@ def nodeElement(el, ec)
406407
# No production. Lang and base elements already extracted
407408
elsif attr.namespace.href == RDF.to_uri.to_s
408409
case attr.name
409-
when "ID" then id = attr.value
410-
when "datatype" then datatype = attr.value
411-
when "parseType" then parseType = attr.value
412-
when "resource" then resourceAttr = attr.value
413-
when "nodeID" then nodeID = attr.value
414-
when "version" then nil # version already extracted
415-
else attrs[attr] = attr.value
410+
when "annotation"
411+
annotation = ec.base.join(RDF::NTriples.unescape(attr.value))
412+
when "annotationNodeID"
413+
nodeID_check(child, RDF::NTriples.unescape(attr.value))
414+
annotationNodeID = bnode(attr.value)
415+
when "datatype" then datatype = attr.value
416+
when "ID" then id = attr.value
417+
when "nodeID" then nodeID = attr.value
418+
when "parseType" then parseType = attr.value
419+
when "resource" then resourceAttr = attr.value
420+
when "version" then nil # version already extracted
421+
else attrs[attr] = attr.value
416422
end
417423
elsif attr.namespace.href == RDF::ITS.to_s
418424
# No production. Direction already extracted
419425
else
420426
attrs[attr] = attr.value
421427
end
422428
end
423-
424-
add_error(el, "Cannot have rdf:nodeID and rdf:resource.") if nodeID && resourceAttr
429+
430+
add_error(child, "Cannot have rdf:nodeID and rdf:resource.") if nodeID && resourceAttr
431+
add_error(child, "Cannot have rdf:annotationNodeID and rdf:annotation.") if annotationNodeID && annotation
425432

426433
# Apply character transformations
427434
id = id_check(el, RDF::NTriples.unescape(id), nil) if id
428435
resourceAttr = RDF::NTriples.unescape(resourceAttr) if resourceAttr
429436
nodeID = nodeID_check(el, RDF::NTriples.unescape(nodeID)) if nodeID
430437

431-
add_debug(child) {"attrs: #{attrs.inspect}"}
438+
#add_debug(child) {"attrs: #{attrs.inspect}"}
432439
add_debug(child) {"datatype: #{datatype}"} if datatype
433440
add_debug(child) {"parseType: #{parseType}"} if parseType
434441
add_debug(child) {"resource: #{resourceAttr}"} if resourceAttr
435442
add_debug(child) {"nodeID: #{nodeID}"} if nodeID
443+
add_debug(child) {"annotation: #{nodeID}"} if annotation
444+
add_debug(child) {"annotationNodeID: #{nodeID}"} if annotationNodeID
436445
add_debug(child) {"id: #{id}"} if id
437-
446+
438447
if attrs.empty? && datatype.nil? && parseType.nil? && element_nodes.size == 1
439448
# Production resourcePropertyElt
440449

@@ -446,6 +455,9 @@ def nodeElement(el, ec)
446455
add_debug(child) {"resourcePropertyElt: #{node_path(new_node_element)}"}
447456
new_subject = nodeElement(new_node_element, new_ec)
448457
add_triple(child, subject, predicate, new_subject)
458+
reify(id, child, subject, predicate, new_subject, ec) if id
459+
annotate(annotation, child, subject, predicate, new_subject, ec) if annotation
460+
annotate(annotationNodeID, child, subject, predicate, new_subject, ec) if annotationNodeID
449461
elsif attrs.empty? && parseType.nil? && element_nodes.size == 0 && text_nodes.size > 0
450462
# Production literalPropertyElt
451463
add_debug(child, "literalPropertyElt")
@@ -460,6 +472,8 @@ def nodeElement(el, ec)
460472
literal = RDF::Literal.new(child.inner_text, **literal_opts)
461473
add_triple(child, subject, predicate, literal)
462474
reify(id, child, subject, predicate, literal, ec) if id
475+
annotate(annotation, child, subject, predicate, literal, ec) if annotation
476+
annotate(annotationNodeID, child, subject, predicate, literal, ec) if annotationNodeID
463477
elsif parseType == 'Resource'
464478
# Production parseTypeResourcePropertyElt
465479
add_debug(child, "parseTypeResourcePropertyElt")
@@ -474,7 +488,9 @@ def nodeElement(el, ec)
474488

475489
# Reification
476490
reify(id, child, subject, predicate, n, child_ec) if id
477-
491+
annotate(annotation, child, subject, predicate, n, child_ec) if annotation
492+
annotate(annotationNodeID, child, subject, predicate, n, child_ec) if annotationNodeID
493+
478494
# If the element content c is not empty, then use event n to create a new sequence of events as follows:
479495
#
480496
# start-element(URI := rdf:Description,
@@ -506,7 +522,9 @@ def nodeElement(el, ec)
506522
n = s.first || RDF["nil"]
507523
add_triple(child, subject, predicate, n)
508524
reify(id, child, subject, predicate, n, child_ec) if id
509-
525+
annotate(annotation, child, subject, predicate, n, child_ec) if annotation
526+
annotate(annotationNodeID, child, subject, predicate, n, child_ec) if annotationNodeID
527+
510528
# Add first/rest entries for all list elements
511529
s.each_index do |i|
512530
n = s[i]
@@ -591,6 +609,8 @@ def nodeElement(el, ec)
591609

592610
# Reification
593611
reify(id, child, subject, predicate, literal, child_ec) if id
612+
annotate(annotation, child, subject, predicate, literal, child_ec) if annotation
613+
annotate(annotationNodeID, child, subject, predicate, literal, child_ec) if annotationNodeID
594614
else
595615
resource = if resourceAttr
596616
uri(ec.base, resourceAttr)
@@ -619,6 +639,8 @@ def nodeElement(el, ec)
619639

620640
# Reification
621641
reify(id, child, subject, predicate, resource, child_ec) if id
642+
annotate(annotation, child, subject, predicate, resource, child_ec) if annotation
643+
annotate(annotationNodeID, child, subject, predicate, resource, child_ec) if annotationNodeID
622644
end
623645
end
624646
end
@@ -638,6 +660,12 @@ def reify(id, el, subject, predicate, object, ec)
638660
add_triple(el, rsubject, RDF.type, RDF["Statement"])
639661
end
640662

663+
# Annotate the triple term formed by subjec, predicate, and object referenced by id given the EvaluationContext (ec) and current XMl element (el).
664+
def annotate(id, el, subject, predicate, object, ec)
665+
add_debug(el, "annotate, id: #{id}")
666+
add_triple(el, id, RDF.reifies, RDF::Statement.new(subject, predicate, object, tripleTerm: true))
667+
end
668+
641669
# Figure out the subject from the element.
642670
def parse_subject(el, ec)
643671
old_property_check(el)

lib/rdf/rdfxml/writer.rb

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ def reset
196196
@references = {}
197197
@serialized = {}
198198
@subjects = {}
199+
@reification = {}
200+
@as_annotation = {}
199201
end
200202

201203
# Render document. Yields each subject to be rendered separately.
@@ -245,7 +247,7 @@ def render_document(subjects, lang: nil, base: nil, **options, &block)
245247
#
246248
# Yields each property to be rendered separately.
247249
#
248-
# @param [Array<RDF::Resource>] subject
250+
# @param [RDF::Resource] subject
249251
# Subject to render
250252
# @param [Builder::RdfXml] builder
251253
# @param [Hash{Symbol => Object}] options Rendering options passed to builder.
@@ -296,7 +298,7 @@ def render_subject(subject, builder, **options, &block)
296298

297299
log_depth do
298300
embed_props.each do |p, objects|
299-
render_property(p, objects, b, **options)
301+
render_property(subject, p, objects, b, **options)
300302
end
301303
end
302304
end
@@ -306,13 +308,14 @@ def render_subject(subject, builder, **options, &block)
306308
#
307309
# If a multi-valued property definition is not found within the template, the writer will use the single-valued property definition multiple times.
308310
#
311+
# @param [RDF::Resource] subject
309312
# @param [String] property
310313
# Property to render, already in QName form.
311314
# @param [Array<RDF::Resource>] objects
312315
# List of objects to render. If the list contains only a single element, the :property_value template will be used. Otherwise, the :property_values template is used.
313316
# @param [Builder::RdfXml] builder
314317
# @param [Hash{Symbol => Object}] options Rendering options.
315-
def render_property(property, objects, builder, **options)
318+
def render_property(subject, property, objects, builder, **options)
316319
log_debug {"render_property(#{property}): #{objects.inspect}"}
317320
property = get_qname(property) if property.is_a?(RDF::URI)
318321

@@ -329,7 +332,7 @@ def render_property(property, objects, builder, **options)
329332
log_debug(depth: log_depth + 1) {"properties with lists: #{lists} non-lists: #{objects - lists.map(&:subject)}"}
330333

331334
unless objects.empty?
332-
render_property(property, objects, builder, **options)
335+
render_property(subject, property, objects, builder, **options)
333336
end
334337

335338
# Render each list
@@ -347,35 +350,49 @@ def render_property(property, objects, builder, **options)
347350
if objects.length == 1
348351
recurse = log_depth <= @max_depth
349352
object = objects.first
350-
353+
attrs = {}
354+
355+
# If there is a single reifier for this statement, write out annotation
356+
tt = RDF::Statement(subject, @uri_to_qname.invert[property], object)
357+
reifs = @reification.select {|k, v| v.include?(tt)}.keys
358+
if reifs.length == 1
359+
reif = reifs.first
360+
@as_annotation[reif] = tt
361+
if reif.iri?
362+
attrs['rdf:annotation'] = reif.relativize(base_uri)
363+
else
364+
attrs['rdf:annotationNodeID'] = reif.id
365+
end
366+
end
367+
351368
if recurse && !is_done?(object)
352-
builder.tag!(property) do |b|
369+
builder.tag!(property, **attrs) do |b|
353370
render_subject(object, b, **options)
354371
end
355372
elsif object.literal? && object.datatype == RDF.XMLLiteral
356-
builder.tag!(property, "rdf:parseType": "Literal", no_whitespace: true) do |b|
373+
builder.tag!(property, "rdf:parseType": "Literal", no_whitespace: true, **attrs) do |b|
357374
b << object.value
358375
end
359376
elsif object.literal?
360-
attrs = {}
361377
attrs[:"xml:lang"] = object.language if object.language?
362378
attrs[:"rdf:datatype"] = object.datatype if object.datatype?
363379
attrs[:"its:dir"] = object.direction if object.direction?
364380
builder.tag!(property, object.value.to_s, **attrs)
365381
elsif object.statement?
382+
# Just write out the triple term, unless it is annotated
366383
builder.tag!(property, "rdf:parseType": "Triple") do |b|
367384
render_triple_term(object, b, **options)
368-
end
385+
end unless @as_annotation.key?(subject)
369386
elsif object.node?
370-
builder.tag!(property, "rdf:nodeID": object.id)
371-
else
372-
builder.tag!(property, "rdf:resource": object.relativize(base_uri))
387+
builder.tag!(property, "rdf:nodeID": object.id, **attrs)
388+
elsif object
389+
builder.tag!(property, "rdf:resource": object.relativize(base_uri), **attrs)
373390
end
374391
else
375392
# Render each property using property_value template
376393
objects.each do |object|
377394
log_depth do
378-
render_property(property, [object], builder, **options)
395+
render_property(subject, property, [object], builder, **options)
379396
end
380397
end
381398
end
@@ -389,7 +406,7 @@ def render_triple_term(term, builder, **options)
389406
attr_props = attr_props.merge("rdf:about": term.subject.relativize(base_uri)) if term.subject.uri?
390407

391408
builder.tag!("rdf:Description", **attr_props) do |b|
392-
render_property(term.predicate, [term.object], b)
409+
render_property(term.subject, term.predicate, [term.object], b)
393410
end
394411
end
395412

@@ -488,6 +505,20 @@ def preprocess_statement(statement)
488505
else
489506
@version = "1.2"
490507
end
508+
509+
bump_reference(statement.subject)
510+
511+
# If this statement is also asserted, note it as an annotation
512+
513+
# Also count references of triple terms
514+
preprocess_statement(statement.object) if statement.object.statement?
515+
516+
# If it fits, allow this to be rendered as an annotation
517+
if statement.predicate == RDF.reifies
518+
@reification[statement.subject] ||= []
519+
@reification[statement.subject] << statement.object unless
520+
@reification[statement.subject].include?(statement.object)
521+
end
491522
end
492523
end
493524

@@ -526,7 +557,8 @@ def order_subjects
526557

527558
log_debug {"order_subjects: #{recursable.inspect}"}
528559

529-
subjects += recursable.map{|r| r.last}
560+
# Sort recursable unto those that are not reifiers and those that are, so that reifieres come last.
561+
subjects += recursable.map{|r| r.last}.partition {|r| !@reification.key?(r)}.flatten
530562
end
531563

532564
# @param [RDF::Resource] subject

0 commit comments

Comments
 (0)