Class: RDF::NTriples::Reader

Inherits:
Reader
  • Object
show all
Defined in:
lib/rdf/ntriples/reader.rb

Overview

N-Triples parser.

Examples:

Obtaining an NTriples reader class

RDF::Reader.for(:ntriples)     #=> RDF::NTriples::Reader
RDF::Reader.for("etc/doap.nt")
RDF::Reader.for(:file_name      => "etc/doap.nt")
RDF::Reader.for(:file_extension => "nt")
RDF::Reader.for(:content_type   => "text/plain")

Parsing RDF statements from an NTriples file

RDF::NTriples::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

Parsing RDF statements from an NTriples string

data = StringIO.new(File.read("etc/doap.nt"))
RDF::NTriples::Reader.new(data) do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

See Also:

Direct Known Subclasses

RDF::NQuads::Reader

Constant Summary

COMMENT =
/^#\s*(.*)$/.freeze
NODEID =
/^_:([A-Za-z][A-Za-z0-9\-_]*)/.freeze
URIREF =
/^<([^>]+)>/.freeze
LITERAL_PLAIN =
/^"((?:\\"|[^"])*)"/.freeze
LITERAL_WITH_LANGUAGE =
/^"((?:\\"|[^"])*)"@([a-z]+[\-A-Za-z0-9]*)/.freeze
LITERAL_WITH_DATATYPE =
/^"((?:\\"|[^"])*)"\^\^<([^>]+)>/.freeze
LANGUAGE_TAG =
/^@([a-z]+[\-A-Za-z0-9]*)/.freeze
DATATYPE_URI =
/^\^\^<([^>]+)>/.freeze
LITERAL =
Regexp.union(LITERAL_WITH_LANGUAGE, LITERAL_WITH_DATATYPE, LITERAL_PLAIN).freeze
SUBJECT =
Regexp.union(URIREF, NODEID).freeze
PREDICATE =
Regexp.union(URIREF).freeze
OBJECT =
Regexp.union(URIREF, NODEID, LITERAL).freeze
ESCAPE_CHARS =
["\t", "\n", "\r", "\"", "\\"].freeze
ESCAPE_CHAR4 =
/\\u([0-9A-Fa-f]{4,4})/.freeze
ESCAPE_CHAR8 =
/\\U([0-9A-Fa-f]{8,8})/.freeze
ESCAPE_CHAR =
Regexp.union(ESCAPE_CHAR4, ESCAPE_CHAR8).freeze
ESCAPE_SURROGATE =
/\\u([0-9A-Fa-f]{4,4})\\u([0-9A-Fa-f]{4,4})/.freeze
ESCAPE_SURROGATE1 =
(0xD800..0xDBFF).freeze
ESCAPE_SURROGATE2 =
(0xDC00..0xDFFF).freeze

Class Method Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from Reader

#canonicalize?, #close, each, #each_statement, #each_triple, #encoding, #fail_object, #fail_predicate, #fail_subject, for, format, #initialize, #intern?, open, #prefix, #prefixes, #prefixes=, #read_statement, #rewind, #validate?

Methods included from Util::Aliasing::LateBound

#alias_method

Methods included from Enumerable

#contexts, #dump, #each_context, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_triple, #enum_context, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_triple, #has_context?, #has_object?, #has_predicate?, #has_quad?, #has_statement?, #has_subject?, #has_triple?, #objects, #predicates, #quads, #statements, #subjects, #to_a, #to_hash, #to_set, #triples

Methods included from Countable

#count, #empty?

Methods included from Readable

#readable?

Constructor Details

This class inherits a constructor from RDF::Reader

Class Method Details

+ (RDF::Literal) parse_literal(input)

Parameters:

  • (String) input

Returns:



109
110
111
112
113
114
115
116
117
118
# File 'lib/rdf/ntriples/reader.rb', line 109

def self.parse_literal(input)
  case input
    when LITERAL_WITH_LANGUAGE
      RDF::Literal.new(unescape($1), :language => $2)
    when LITERAL_WITH_DATATYPE
      RDF::Literal.new(unescape($1), :datatype => $2)
    when LITERAL_PLAIN
      RDF::Literal.new(unescape($1))
  end
end

+ (RDF::Node) parse_node(input)

Parameters:

  • (String) input

Returns:



91
92
93
94
95
# File 'lib/rdf/ntriples/reader.rb', line 91

def self.parse_node(input)
  if input =~ NODEID
    RDF::Node.new($1)
  end
end

+ (RDF::Term) parse_object(input)

Parameters:

  • (String) input

Returns:



84
85
86
# File 'lib/rdf/ntriples/reader.rb', line 84

def self.parse_object(input)
  parse_uri(input) || parse_node(input) || parse_literal(input)
end

+ (RDF::URI) parse_predicate(input)

Parameters:

  • (String) input

Returns:



77
78
79
# File 'lib/rdf/ntriples/reader.rb', line 77

def self.parse_predicate(input)
  parse_uri(input, :intern => true)
end

+ (RDF::Resource) parse_subject(input)

Parameters:

  • (String) input

Returns:



70
71
72
# File 'lib/rdf/ntriples/reader.rb', line 70

def self.parse_subject(input)
  parse_uri(input) || parse_node(input)
end

+ (RDF::URI) parse_uri(input, options = {})

Parameters:

  • (String) input

Returns:



100
101
102
103
104
# File 'lib/rdf/ntriples/reader.rb', line 100

def self.parse_uri(input, options = {})
  if input =~ URIREF
    RDF::URI.send(options[:intern] ? :intern : :new, $1)
  end
end

+ (String) unescape(string)



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/rdf/ntriples/reader.rb', line 126

def self.unescape(string)
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)

  # Decode \t|\n|\r|\"|\\ character escapes:
  ESCAPE_CHARS.each { |escape| string.gsub!(escape.inspect[1...-1], escape) }

  # Decode \uXXXX\uXXXX surrogate pairs:
  while
    (string.sub!(ESCAPE_SURROGATE) do
      if ESCAPE_SURROGATE1.include?($1.hex) && ESCAPE_SURROGATE2.include?($2.hex)
        s = [$1, $2].pack('H*H*')
        s = s.respond_to?(:force_encoding) ?
          s.force_encoding(Encoding::UTF_16BE).encode!(Encoding::UTF_8) : # for Ruby 1.9+
          Iconv.conv('UTF-8', 'UTF-16BE', s)                              # for Ruby 1.8.x
      else
        s = [$1.hex].pack('U*') << '\u' << $2
      end
      s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
    end)
  end

  # Decode \uXXXX and \UXXXXXXXX code points:
  string.gsub!(ESCAPE_CHAR) do
    s = [($1 || $2).hex].pack('U*')
    s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
  end

  string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding)
  string
end

+ (RDF::Term) unserialize(input)

Reconstructs an RDF value from its serialized N-Triples representation.

Parameters:

  • (String) input

Returns:



60
61
62
63
64
65
# File 'lib/rdf/ntriples/reader.rb', line 60

def self.unserialize(input)
  case input
    when nil then nil
    else self.new(input).read_value
  end
end

Instance Method Details

- (Boolean) read_comment

Returns:

  • (Boolean)

See Also:



192
193
194
# File 'lib/rdf/ntriples/reader.rb', line 192

def read_comment
  match(COMMENT)
end

- (RDF::Literal) read_literal

Returns:

See Also:



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/rdf/ntriples/reader.rb', line 222

def read_literal
  if literal_str = match(LITERAL_PLAIN)
    literal_str = self.class.unescape(literal_str)
    literal = case
      when language = match(LANGUAGE_TAG)
        RDF::Literal.new(literal_str, :language => language)
      when datatype = match(/^(\^\^)/) # FIXME
        RDF::Literal.new(literal_str, :datatype => read_uriref || fail_object)
      else
        RDF::Literal.new(literal_str) # plain string literal
    end
    literal.validate!     if validate?
    literal.canonicalize! if canonicalize?
    literal
  end
end

- (RDF::Node) read_node

Returns:

See Also:



212
213
214
215
216
217
# File 'lib/rdf/ntriples/reader.rb', line 212

def read_node
  if node_id = match(NODEID)
    @nodes ||= {}
    @nodes[node_id] ||= RDF::Node.new(node_id)
  end
end

- (Array) read_triple



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/rdf/ntriples/reader.rb', line 170

def read_triple
  loop do
    readline.strip! # EOFError thrown on end of input
    line = @line    # for backtracking input in case of parse error

    begin
      unless blank? || read_comment
        subject   = read_uriref || read_node || fail_subject
        predicate = read_uriref(:intern => true) || fail_predicate
        object    = read_uriref || read_node || read_literal || fail_object
        return [subject, predicate, object]
      end
    rescue RDF::ReaderError => e
      @line = line  # this allows #read_value to work
      raise e
    end
  end
end

- (RDF::URI) read_uriref(options = {})

Returns:

See Also:



199
200
201
202
203
204
205
206
207
# File 'lib/rdf/ntriples/reader.rb', line 199

def read_uriref(options = {})
  if uri_str = match(URIREF)
    uri_str = self.class.unescape(uri_str)
    uri = RDF::URI.send(intern? && options[:intern] ? :intern : :new, uri_str)
    uri.validate!     if validate?
    uri.canonicalize! if canonicalize?
    uri
  end
end

- (RDF::Term) read_value

Returns:



159
160
161
162
163
164
165
# File 'lib/rdf/ntriples/reader.rb', line 159

def read_value
  begin
    read_statement
  rescue RDF::ReaderError => e
    read_uriref || read_node || read_literal
  end
end