Class: RDF::NTriples::Reader
- Inherits:
-
Reader
- Object
- Reader
- RDF::NTriples::Reader
- Defined in:
- lib/rdf/ntriples/reader.rb
Overview
N-Triples parser.
Direct Known Subclasses
Constant Summary
- COMMENT =
/^#\s*(.*)$/.freeze
- NODEID =
/^_:([A-Za-z][A-Za-z0-9\-_]*)/.freeze
- URIREF =
/^<([^>]+)>/.freeze
- LITERAL_PLAIN =
/^"((?:\\"|[^"])*)"/.freeze
- LITERAL_WITH_LANGUAGE =
/^"((?:\\"|[^"])*)"@([a-z]+[\-A-Za-z0-9]*)/.freeze
- LITERAL_WITH_DATATYPE =
/^"((?:\\"|[^"])*)"\^\^<([^>]+)>/.freeze
- LANGUAGE_TAG =
/^@([a-z]+[\-A-Za-z0-9]*)/.freeze
- DATATYPE_URI =
/^\^\^<([^>]+)>/.freeze
- LITERAL =
Regexp.union(LITERAL_WITH_LANGUAGE, LITERAL_WITH_DATATYPE, LITERAL_PLAIN).freeze
- SUBJECT =
Regexp.union(URIREF, NODEID).freeze
- PREDICATE =
Regexp.union(URIREF).freeze
- OBJECT =
Regexp.union(URIREF, NODEID, LITERAL).freeze
- ESCAPE_CHARS =
["\t", "\n", "\r", "\"", "\\"].freeze
- ESCAPE_CHAR4 =
/\\u([0-9A-Fa-f]{4,4})/.freeze
- ESCAPE_CHAR8 =
/\\U([0-9A-Fa-f]{8,8})/.freeze
- ESCAPE_CHAR =
Regexp.union(ESCAPE_CHAR4, ESCAPE_CHAR8).freeze
- ESCAPE_SURROGATE =
/\\u([0-9A-Fa-f]{4,4})\\u([0-9A-Fa-f]{4,4})/.freeze
- ESCAPE_SURROGATE1 =
(0xD800..0xDBFF).freeze
- ESCAPE_SURROGATE2 =
(0xDC00..0xDFFF).freeze
Class Method Summary (collapse)
- + (RDF::Literal) parse_literal(input)
- + (RDF::Node) parse_node(input)
- + (RDF::Term) parse_object(input)
- + (RDF::URI) parse_predicate(input)
- + (RDF::Resource) parse_subject(input)
- + (RDF::URI) parse_uri(input, options = {})
- + (String) unescape(string)
-
+ (RDF::Term) unserialize(input)
Reconstructs an RDF value from its serialized N-Triples representation.
Instance Method Summary (collapse)
- - (Boolean) read_comment
- - (RDF::Literal) read_literal
- - (RDF::Node) read_node
- - (Array) read_triple
- - (RDF::URI) read_uriref(options = {})
- - (RDF::Term) read_value
Methods inherited from Reader
#canonicalize?, #close, each, #each_statement, #each_triple, #encoding, #fail_object, #fail_predicate, #fail_subject, for, format, #initialize, #intern?, open, #prefix, #prefixes, #prefixes=, #read_statement, #rewind, #validate?
Methods included from Util::Aliasing::LateBound
Methods included from Enumerable
#contexts, #dump, #each_context, #each_graph, #each_object, #each_predicate, #each_quad, #each_statement, #each_subject, #each_triple, #enum_context, #enum_graph, #enum_object, #enum_predicate, #enum_quad, #enum_statement, #enum_subject, #enum_triple, #has_context?, #has_object?, #has_predicate?, #has_quad?, #has_statement?, #has_subject?, #has_triple?, #objects, #predicates, #quads, #statements, #subjects, #to_a, #to_hash, #to_set, #triples
Methods included from Countable
Methods included from Readable
Constructor Details
This class inherits a constructor from RDF::Reader
Class Method Details
+ (RDF::Literal) parse_literal(input)
109 110 111 112 113 114 115 116 117 118 |
# File 'lib/rdf/ntriples/reader.rb', line 109 def self.parse_literal(input) case input when LITERAL_WITH_LANGUAGE RDF::Literal.new(unescape($1), :language => $2) when LITERAL_WITH_DATATYPE RDF::Literal.new(unescape($1), :datatype => $2) when LITERAL_PLAIN RDF::Literal.new(unescape($1)) end end |
+ (RDF::Node) parse_node(input)
91 92 93 94 95 |
# File 'lib/rdf/ntriples/reader.rb', line 91 def self.parse_node(input) if input =~ NODEID RDF::Node.new($1) end end |
+ (RDF::Term) parse_object(input)
84 85 86 |
# File 'lib/rdf/ntriples/reader.rb', line 84 def self.parse_object(input) parse_uri(input) || parse_node(input) || parse_literal(input) end |
+ (RDF::URI) parse_predicate(input)
77 78 79 |
# File 'lib/rdf/ntriples/reader.rb', line 77 def self.parse_predicate(input) parse_uri(input, :intern => true) end |
+ (RDF::Resource) parse_subject(input)
70 71 72 |
# File 'lib/rdf/ntriples/reader.rb', line 70 def self.parse_subject(input) parse_uri(input) || parse_node(input) end |
+ (RDF::URI) parse_uri(input, options = {})
100 101 102 103 104 |
# File 'lib/rdf/ntriples/reader.rb', line 100 def self.parse_uri(input, = {}) if input =~ URIREF RDF::URI.send([:intern] ? :intern : :new, $1) end end |
+ (String) unescape(string)
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/rdf/ntriples/reader.rb', line 126 def self.unescape(string) string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding) # Decode \t|\n|\r|\"|\\ character escapes: ESCAPE_CHARS.each { |escape| string.gsub!(escape.inspect[1...-1], escape) } # Decode \uXXXX\uXXXX surrogate pairs: while (string.sub!(ESCAPE_SURROGATE) do if ESCAPE_SURROGATE1.include?($1.hex) && ESCAPE_SURROGATE2.include?($2.hex) s = [$1, $2].pack('H*H*') s = s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::UTF_16BE).encode!(Encoding::UTF_8) : # for Ruby 1.9+ Iconv.conv('UTF-8', 'UTF-16BE', s) # for Ruby 1.8.x else s = [$1.hex].pack('U*') << '\u' << $2 end s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s end) end # Decode \uXXXX and \UXXXXXXXX code points: string.gsub!(ESCAPE_CHAR) do s = [($1 || $2).hex].pack('U*') s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s end string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) string end |
+ (RDF::Term) unserialize(input)
Reconstructs an RDF value from its serialized N-Triples representation.
60 61 62 63 64 65 |
# File 'lib/rdf/ntriples/reader.rb', line 60 def self.unserialize(input) case input when nil then nil else self.new(input).read_value end end |
Instance Method Details
- (Boolean) read_comment
192 193 194 |
# File 'lib/rdf/ntriples/reader.rb', line 192 def read_comment match(COMMENT) end |
- (RDF::Literal) read_literal
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/rdf/ntriples/reader.rb', line 222 def read_literal if literal_str = match(LITERAL_PLAIN) literal_str = self.class.unescape(literal_str) literal = case when language = match(LANGUAGE_TAG) RDF::Literal.new(literal_str, :language => language) when datatype = match(/^(\^\^)/) # FIXME RDF::Literal.new(literal_str, :datatype => read_uriref || fail_object) else RDF::Literal.new(literal_str) # plain string literal end literal.validate! if validate? literal.canonicalize! if canonicalize? literal end end |
- (RDF::Node) read_node
212 213 214 215 216 217 |
# File 'lib/rdf/ntriples/reader.rb', line 212 def read_node if node_id = match(NODEID) @nodes ||= {} @nodes[node_id] ||= RDF::Node.new(node_id) end end |
- (Array) read_triple
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/rdf/ntriples/reader.rb', line 170 def read_triple loop do readline.strip! # EOFError thrown on end of input line = @line # for backtracking input in case of parse error begin unless blank? || read_comment subject = read_uriref || read_node || fail_subject predicate = read_uriref(:intern => true) || fail_predicate object = read_uriref || read_node || read_literal || fail_object return [subject, predicate, object] end rescue RDF::ReaderError => e @line = line # this allows #read_value to work raise e end end end |
- (RDF::URI) read_uriref(options = {})
199 200 201 202 203 204 205 206 207 |
# File 'lib/rdf/ntriples/reader.rb', line 199 def read_uriref( = {}) if uri_str = match(URIREF) uri_str = self.class.unescape(uri_str) uri = RDF::URI.send(intern? && [:intern] ? :intern : :new, uri_str) uri.validate! if validate? uri.canonicalize! if canonicalize? uri end end |
- (RDF::Term) read_value
159 160 161 162 163 164 165 |
# File 'lib/rdf/ntriples/reader.rb', line 159 def read_value begin read_statement rescue RDF::ReaderError => e read_uriref || read_node || read_literal end end |