JSON Voorhees
Killer JSON for C++
Loading...
Searching...
No Matches
ast.hpp
Go to the documentation of this file.
1/// \file jsonv/ast.hpp
2/// Utilities for directly dealing with a JSON AST. For most cases, it is more convenient to use \c jsonv::value.
3///
4/// Copyright (c) 2020 by Travis Gockel. All rights reserved.
5///
6/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License
7/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later
8/// version.
9///
10/// \author Travis Gockel (travis@gockelhut.com)
11#pragma once
12
13#include <jsonv/config.hpp>
14#include <jsonv/kind.hpp>
15#include <string_view>
16
17#include <cstdint>
18#include <iosfwd>
19#include <utility>
20#include <variant>
21
22namespace jsonv
23{
24
25class value;
26
27namespace detail
28{
29
30inline std::string_view string_from_token(std::string_view token, std::false_type is_escaped JSONV_UNUSED)
31{
32 return std::string_view(token.data() + 1, token.size() - 2U);
33}
34
35std::string string_from_token(std::string_view token, std::true_type is_escaped);
36
37}
38
39/// \ingroup Value
40/// \{
41
42/// Marker type for an encountered token type.
43///
44/// - \c document_end
45/// The end of a document.
46/// - \c document_start
47/// The beginning of a document.
48/// - \c object_begin
49/// The beginning of an \c kind::object (`{`).
50/// - \c object_end
51/// The end of an \c kind::object (`}`).
52/// - \c array_begin
53/// The beginning of an \c kind::array (`[`).
54/// - \c array_end
55/// The end of an \c kind::array (`]`).
56/// - \c string_canonical
57/// A \c kind::string whose JSON-encoded format matches the canonical UTF-8 representation. There is no need to
58/// translate JSON escape sequences to extract a \c std::string value, so accessing the raw text is safe.
59/// - \c string_escaped
60/// A \c kind::string whose JSON-encoded format contains escape sequences, so it must be translated to extract a
61/// \c std::string value.
62/// - \c key_canonical
63/// The \c ast_node_type::string_canonical key of an \c kind::object.
64/// - \c key_escaped
65/// The \c ast_node_type::string_escaped key of an \c kind::object.
66/// - \c literal_true
67/// The \c kind::boolean literal \c true.
68/// - \c literal_false
69/// The \c kind::boolean literal \c false.
70/// - \c literal_null
71/// The \c kind::null literal \c null.
72/// - \c integer
73/// An \c kind::integer value. No decimals or exponent symbols were encountered during parsing. Note that integers are
74/// \e not bounds-checked by the AST -- values outside of \c std::int64_t are still \c integer values.
75/// - \c decimal
76/// A \c kind::decimal value.
77/// - \c error
78/// An AST parsing error.
79///
80/// \see parse_index
81/// \see ast_node
82enum class ast_node_type : std::uint8_t
83{
84 document_end = 0,
85 document_start = 1,
86 object_begin = 2,
87 object_end = 3,
88 array_begin = 4,
89 array_end = 5,
90 string_canonical = 6,
91 string_escaped = 7,
92 key_canonical = 8,
93 key_escaped = 9,
94 literal_true = 10,
95 literal_false = 11,
96 literal_null = 12,
97 integer = 13,
98 decimal = 14,
99 error = 15,
100};
101
102/// \{
103/// +--------------------+--------+
104/// | `ast_node_type` | Output |
105/// +--------------------+--------+
106/// | `document_start` | `^` |
107/// | `document_end` | `$` |
108/// | `object_begin` | `{` |
109/// | `object_end` | `}` |
110/// | `array_begin` | `[` |
111/// | `array_end` | `]` |
112/// | `string_canonical` | `s` |
113/// | `string_escaped` | `S` |
114/// | `key_canonical` | `k` |
115/// | `key_escaped` | `K` |
116/// | `literal_true` | `t` |
117/// | `literal_false` | `f` |
118/// | `literal_null` | `n` |
119/// | `integer` | `i` |
120/// | `decimal` | `d` |
121/// | `error` | `!` |
122/// +--------------------+--------+
123JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_node_type& type);
124JSONV_PUBLIC std::string to_string(const ast_node_type& type);
125/// \}
126
127/// Error code encountered while building the AST.
128enum class ast_error : std::uint64_t
129{
130 none = 0,
131 expected_document,
132 expected_string,
133 expected_key_delimiter,
134 unexpected_token,
135 unexpected_comma,
136 unexpected_eof,
137 expected_eof,
138 depth_exceeded,
139 extra_close,
140 mismatched_close,
141 close_after_comma,
142 invalid_literal,
143 invalid_number,
144 invalid_string,
145 invalid_comment,
146 internal,
147};
148
149/// \{
150/// Get a description of the error \a code.
151JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_error& code);
152JSONV_PUBLIC std::string to_string(const ast_error& code);
153/// \}
154
155/// Represents an entry in a JSON AST.
156///
157/// \see parse_index
159{
160public:
161 template <typename TSelf, ast_node_type KIndexToken>
162 class base
163 {
164 public:
165 /// Get the \c ast_node_type type.
166 static constexpr ast_node_type type()
167 {
168 return KIndexToken;
169 }
170
171 /// \see ast_node::token_raw
172 std::string_view token_raw() const
173 {
174 return std::string_view(_token_begin, static_cast<const TSelf&>(*this).token_size());
175 }
176
177 /// Allow implicit conversion to the more generic \c ast_node.
178 operator ast_node() const;
179
180 protected:
181 explicit constexpr base(const char* token_begin) :
182 _token_begin(token_begin)
183 { }
184
185 private:
186 const char* _token_begin;
187 };
188
189 template <typename TSelf, ast_node_type KIndexToken, std::size_t KTokenSize>
190 class basic_fixed_size_token : public base<TSelf, KIndexToken>
191 {
192 public:
193 explicit constexpr basic_fixed_size_token(const char* token_begin) :
195 { }
196
197 constexpr std::size_t token_size() const
198 {
199 return KTokenSize;
200 }
201 };
202
203 template <typename TSelf, ast_node_type KIndexToken>
204 class basic_dynamic_size_token : public base<TSelf, KIndexToken>
205 {
206 public:
207 explicit constexpr basic_dynamic_size_token(const char* token_begin, std::size_t token_size) :
209 _token_size(token_size)
210 { }
211
212 constexpr std::size_t token_size() const
213 {
214 return _token_size;
215 }
216
217 private:
218 std::size_t _token_size;
219 };
220
221 /// The start of a document.
222 class document_start final : public basic_fixed_size_token<document_start, ast_node_type::document_start, 0U>
223 {
224 public:
225 using basic_fixed_size_token<document_start, ast_node_type::document_start, 0U>::basic_fixed_size_token;
226 };
227
228 /// The end of a document.
229 class document_end final : public basic_fixed_size_token<document_end, ast_node_type::document_end, 0U>
230 {
231 public:
232 using basic_fixed_size_token<document_end, ast_node_type::document_end, 0U>::basic_fixed_size_token;
233 };
234
235 /// The beginning of an \c kind::object (`{`).
236 class object_begin final : public basic_fixed_size_token<object_begin, ast_node_type::object_begin, 1U>
237 {
238 public:
239 explicit constexpr object_begin(const char* token_begin, std::size_t element_count) :
241 _element_count(element_count)
242 { }
243
244 /// Get the number of elements in the object this token starts. This is useful for reserving memory.
245 constexpr std::size_t element_count() const
246 {
247 return _element_count;
248 }
249
250 private:
251 std::size_t _element_count;
252 };
253
254 /// The end of an \c kind::object (`}`).
255 class object_end final : public basic_fixed_size_token<object_end, ast_node_type::object_end, 1U>
256 {
257 public:
258 using basic_fixed_size_token<object_end, ast_node_type::object_end, 1U>::basic_fixed_size_token;
259 };
260
261 /// The beginning of an \c kind::array (`[`).
262 class array_begin final : public basic_fixed_size_token<array_begin, ast_node_type::array_begin, 1U>
263 {
264 public:
265 explicit constexpr array_begin(const char* token_begin, std::size_t element_count) :
267 _element_count(element_count)
268 { }
269
270 /// Get the number of elements in the array this token starts. This is useful for reserving memory.
271 constexpr std::size_t element_count() const
272 {
273 return _element_count;
274 }
275
276 private:
277 std::size_t _element_count;
278 };
279
280 /// The end of an \c kind::array (`]`).
281 class array_end final : public basic_fixed_size_token<array_end, ast_node_type::array_end, 1U>
282 {
283 public:
284 using basic_fixed_size_token<array_end, ast_node_type::array_end, 1U>::basic_fixed_size_token;
285 };
286
287 template <typename TSelf, ast_node_type KIndexToken, bool KEscaped>
288 class basic_string_token : public basic_dynamic_size_token<TSelf, KIndexToken>
289 {
290 public:
291 /// The return type of \c value is based on if the string is \c canonical or \c escaped. Strings in canonical
292 /// representation can be returned directly from the text through a \c std::string_view.
293 using value_type = std::conditional_t<KEscaped, std::string, std::string_view>;
294
295 public:
296 explicit constexpr basic_string_token(const char* token_begin, std::size_t token_size) :
298 _token_size(token_size)
299 { }
300
301 /// Was the source JSON for this string encoded in the canonical UTF-8 representation? If this is \c true, there
302 /// is no need to translate JSON escape sequences to extract a \c std::string value. This is the opposite of
303 /// \c escaped.
304 constexpr bool canonical() const noexcept
305 {
306 return !KEscaped;
307 }
308
309 /// Did the source JSON for this string contain escape sequences? If this is \c true, JSON escape sequences must
310 /// be translated into their canonical UTF-8 representation on extraction. This is the opposite of \c canonical.
311 constexpr bool escaped() const noexcept
312 {
313 return KEscaped;
314 }
315
316 constexpr std::size_t token_size() const
317 {
318 return _token_size;
319 }
320
321 value_type value() const
322 {
323 return detail::string_from_token(this->token_raw(), std::integral_constant<bool, KEscaped>());
324 }
325
326 private:
327 std::size_t _token_size;
328 };
329
330 class string_canonical final : public basic_string_token<string_canonical, ast_node_type::string_canonical, false>
331 {
332 public:
333 using basic_string_token<string_canonical, ast_node_type::string_canonical, false>::basic_string_token;
334 };
335
336 class string_escaped final : public basic_string_token<string_escaped, ast_node_type::string_escaped, true>
337 {
338 public:
339 using basic_string_token<string_escaped, ast_node_type::string_escaped, true>::basic_string_token;
340 };
341
342 class key_canonical final : public basic_string_token<key_canonical, ast_node_type::key_canonical, false>
343 {
344 public:
345 using basic_string_token<key_canonical, ast_node_type::key_canonical, false>::basic_string_token;
346 };
347
348 class key_escaped final : public basic_string_token<key_escaped, ast_node_type::key_escaped, true>
349 {
350 public:
351 using basic_string_token<key_escaped, ast_node_type::key_escaped, true>::basic_string_token;
352 };
353
354 class literal_true final : public basic_fixed_size_token<literal_true, ast_node_type::literal_true, 4U>
355 {
356 public:
357 using basic_fixed_size_token<literal_true, ast_node_type::literal_true, 4U>::basic_fixed_size_token;
358
359 bool value() const noexcept
360 {
361 return true;
362 }
363 };
364
365 class literal_false final : public basic_fixed_size_token<literal_false, ast_node_type::literal_false, 5U>
366 {
367 public:
368 using basic_fixed_size_token<literal_false, ast_node_type::literal_false, 5U>::basic_fixed_size_token;
369
370 bool value() const noexcept
371 {
372 return false;
373 }
374 };
375
376 class literal_null final : public basic_fixed_size_token<literal_null, ast_node_type::literal_null, 4U>
377 {
378 public:
379 using basic_fixed_size_token<literal_null, ast_node_type::literal_null, 4U>::basic_fixed_size_token;
380
381 jsonv::value value() const;
382 };
383
384 class integer final : public basic_dynamic_size_token<integer, ast_node_type::integer>
385 {
386 public:
387 using basic_dynamic_size_token<integer, ast_node_type::integer>::basic_dynamic_size_token;
388
389 std::int64_t value() const;
390 };
391
392 class decimal final : public basic_dynamic_size_token<decimal, ast_node_type::decimal>
393 {
394 public:
395 using basic_dynamic_size_token<decimal, ast_node_type::decimal>::basic_dynamic_size_token;
396
397 double value() const;
398 };
399
400 class error final : public basic_dynamic_size_token<error, ast_node_type::error>
401 {
402 public:
403 explicit constexpr error(const char* token_begin, std::size_t token_size, ast_error error_code) :
405 _error_code(error_code)
406 { }
407
408 constexpr ast_error error_code() const
409 {
410 return _error_code;
411 }
412
413 private:
414 ast_error _error_code;
415 };
416
417 using storage_type = std::variant<document_end,
422 array_end,
430 integer,
431 decimal,
432 error
433 >;
434
435public:
436 ast_node(const storage_type& value) :
437 _impl(value)
438 { }
439
440 template <typename T, typename... TArgs>
441 explicit ast_node(std::in_place_type_t<T> type, TArgs&&... args) :
442 _impl(type, std::forward<TArgs>(args)...)
443 { }
444
445 /// Get the \c std::variant that backs this type.
446 ///
447 /// \see visit
448 const storage_type& storage() const
449 {
450 return _impl;
451 }
452
453 /// Convenience function for calling \c std::visit on the underlying \c storage of this node.
454 template <typename FVisitor>
455 auto visit(FVisitor&& visitor) const
456 {
457 return std::visit(std::forward<FVisitor>(visitor), _impl);
458 }
459
460 /// Convenience function for calling \c std::visit on a key (see \c as_key).
461 template <typename FVisitor>
463 {
464 return std::visit(std::forward<FVisitor>(visitor), as_key());
465 }
466
467 /// Get the \c ast_node_type that tells the underlying type of this instance.
469 {
470 return visit([](const auto& x) { return x.type(); });
471 }
472
473 /// Get a view of the raw token. For example, \c "true", \c "{", or \c "1234". Note that this includes the complete
474 /// source, so string types such as \c ast_node_type::string_canonical include the opening and closing quotations.
475 std::string_view token_raw() const
476 {
477 return visit([](const auto& x) { return x.token_raw(); });
478 }
479
480 /// Get the underlying data of this node as \c T.
481 ///
482 /// \throws std::bad_variant_access if the requested \c T is different from the \c type of this instance.
483 template <typename T>
484 const T& as() const
485 {
486 return std::get<T>(_impl);
487 }
488
489 /// Get the underlying data of this node as one of the key types: \c key_canonical or \c key_escaped.
490 ///
491 /// \throw std::bad_variant_access if the \c type of this instance is neither \c key_canonical nor \c key_escaped.
492 std::variant<key_canonical, key_escaped> as_key() const
493 {
494 if (type() == ast_node_type::key_canonical)
495 return as<key_canonical>();
496 else
497 return as<key_escaped>();
498 }
499
500private:
501 storage_type _impl;
502};
503
504template <typename TSelf, ast_node_type KIndexToken>
506{
507 return ast_node(ast_node::storage_type(static_cast<const TSelf&>(*this)));
508}
509
510/// \}
511
512}
ast_error
Error code encountered while building the AST.
Definition ast.hpp:129
The beginning of an kind::array ([).
Definition ast.hpp:263
constexpr std::size_t element_count() const
Get the number of elements in the array this token starts. This is useful for reserving memory.
Definition ast.hpp:271
The end of an kind::array (]).
Definition ast.hpp:282
static constexpr ast_node_type type()
Get the ast_node_type type.
Definition ast.hpp:166
std::string_view token_raw() const
Definition ast.hpp:172
constexpr bool escaped() const noexcept
Did the source JSON for this string contain escape sequences? If this is true, JSON escape sequences ...
Definition ast.hpp:311
constexpr bool canonical() const noexcept
Was the source JSON for this string encoded in the canonical UTF-8 representation?...
Definition ast.hpp:304
std::conditional_t< KEscaped, std::string, std::string_view > value_type
The return type of value is based on if the string is canonical or escaped.
Definition ast.hpp:293
The end of a document.
Definition ast.hpp:230
The start of a document.
Definition ast.hpp:223
The beginning of an kind::object ({).
Definition ast.hpp:237
constexpr std::size_t element_count() const
Get the number of elements in the object this token starts. This is useful for reserving memory.
Definition ast.hpp:245
The end of an kind::object (}).
Definition ast.hpp:256
Represents an entry in a JSON AST.
Definition ast.hpp:159
std::variant< key_canonical, key_escaped > as_key() const
Get the underlying data of this node as one of the key types: key_canonical or key_escaped.
Definition ast.hpp:492
ast_node_type type() const
Get the ast_node_type that tells the underlying type of this instance.
Definition ast.hpp:468
auto visit_key(FVisitor &&visitor) const
Convenience function for calling std::visit on a key (see as_key).
Definition ast.hpp:462
auto visit(FVisitor &&visitor) const
Convenience function for calling std::visit on the underlying storage of this node.
Definition ast.hpp:455
std::string_view token_raw() const
Get a view of the raw token.
Definition ast.hpp:475
const storage_type & storage() const
Get the std::variant that backs this type.
Definition ast.hpp:448
const T & as() const
Get the underlying data of this node as T.
Definition ast.hpp:484
An adapter for enumeration types.
Represents a single JSON value, which can be any one of a potential kind, each behaving slightly diff...
Definition value.hpp:107
Copyright (c) 2014-2020 by Travis Gockel.
#define JSONV_UNUSED
Note that you know the variable is unused, but make the compiler stop complaining about it.
Definition config.hpp:109
#define JSONV_PUBLIC
This function or class is part of the public API for JSON Voorhees.
Definition config.hpp:102
ast_node_type
Marker type for an encountered token type.
Definition ast.hpp:83
Copyright (c) 2019-2020 by Travis Gockel.
STL namespace.