JSON Voorhees
Killer JSON for C++
Loading...
Searching...
No Matches
ast.hpp
Go to the documentation of this file.
1/// \file jsonv/ast.hpp
2/// Utilities for directly dealing with a JSON AST. For most cases, it is more convenient to use \c jsonv::value.
3///
4/// Copyright (c) 2020 by Travis Gockel. All rights reserved.
5///
6/// This program is free software: you can redistribute it and/or modify it under the terms of the Apache License
7/// as published by the Apache Software Foundation, either version 2 of the License, or (at your option) any later
8/// version.
9///
10/// \author Travis Gockel (travis@gockelhut.com)
11#pragma once
12
13#include <jsonv/config.hpp>
14#include <jsonv/kind.hpp>
15#include <jsonv/optional.hpp>
16#include <jsonv/string_view.hpp>
17
18#include <cstdint>
19#include <iosfwd>
20#include <utility>
21#include <variant>
22
23namespace jsonv
24{
25
26class value;
27
28namespace detail
29{
30
31inline string_view string_from_token(string_view token, std::false_type is_escaped JSONV_UNUSED)
32{
33 return string_view(token.data() + 1, token.size() - 2U);
34}
35
36std::string string_from_token(string_view token, std::true_type is_escaped);
37
38}
39
40/// \ingroup Value
41/// \{
42
43/// Marker type for an encountered token type.
44///
45/// - \c document_end
46/// The end of a document.
47/// - \c document_start
48/// The beginning of a document.
49/// - \c object_begin
50/// The beginning of an \c kind::object (`{`).
51/// - \c object_end
52/// The end of an \c kind::object (`}`).
53/// - \c array_begin
54/// The beginning of an \c kind::array (`[`).
55/// - \c array_end
56/// The end of an \c kind::array (`]`).
57/// - \c string_canonical
58/// A \c kind::string whose JSON-encoded format matches the canonical UTF-8 representation. There is no need to
59/// translate JSON escape sequences to extract a \c std::string value, so accessing the raw text is safe.
60/// - \c string_escaped
61/// A \c kind::string whose JSON-encoded format contains escape sequences, so it must be translated to extract a
62/// \c std::string value.
63/// - \c key_canonical
64/// The \c ast_node_type::string_canonical key of an \c kind::object.
65/// - \c key_escaped
66/// The \c ast_node_type::string_escaped key of an \c kind::object.
67/// - \c literal_true
68/// The \c kind::boolean literal \c true.
69/// - \c literal_false
70/// The \c kind::boolean literal \c false.
71/// - \c literal_null
72/// The \c kind::null literal \c null.
73/// - \c integer
74/// An \c kind::integer value. No decimals or exponent symbols were encountered during parsing. Note that integers are
75/// \e not bounds-checked by the AST -- values outside of \c std::int64_t are still \c integer values.
76/// - \c decimal
77/// A \c kind::decimal value.
78/// - \c error
79/// An AST parsing error.
80///
81/// \see parse_index
82/// \see ast_node
83enum class ast_node_type : std::uint8_t
84{
85 document_end = 0,
86 document_start = 1,
87 object_begin = 2,
88 object_end = 3,
89 array_begin = 4,
90 array_end = 5,
91 string_canonical = 6,
92 string_escaped = 7,
93 key_canonical = 8,
94 key_escaped = 9,
95 literal_true = 10,
96 literal_false = 11,
97 literal_null = 12,
98 integer = 13,
99 decimal = 14,
100 error = 15,
101};
102
103/// \{
104/// +--------------------+--------+
105/// | `ast_node_type` | Output |
106/// +--------------------+--------+
107/// | `document_start` | `^` |
108/// | `document_end` | `$` |
109/// | `object_begin` | `{` |
110/// | `object_end` | `}` |
111/// | `array_begin` | `[` |
112/// | `array_end` | `]` |
113/// | `string_canonical` | `s` |
114/// | `string_escaped` | `S` |
115/// | `key_canonical` | `k` |
116/// | `key_escaped` | `K` |
117/// | `literal_true` | `t` |
118/// | `literal_false` | `f` |
119/// | `literal_null` | `n` |
120/// | `integer` | `i` |
121/// | `decimal` | `d` |
122/// | `error` | `!` |
123/// +--------------------+--------+
124JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_node_type& type);
125JSONV_PUBLIC std::string to_string(const ast_node_type& type);
126/// \}
127
128/// Error code encountered while building the AST.
129enum class ast_error : std::uint64_t
130{
131 none = 0,
132 expected_document,
133 expected_string,
134 expected_key_delimiter,
135 unexpected_token,
136 unexpected_comma,
137 unexpected_eof,
138 expected_eof,
139 depth_exceeded,
140 extra_close,
141 mismatched_close,
142 close_after_comma,
143 invalid_literal,
144 invalid_number,
145 invalid_string,
146 invalid_comment,
147 internal,
148};
149
150/// \{
151/// Get a description of the error \a code.
152JSONV_PUBLIC std::ostream& operator<<(std::ostream&, const ast_error& code);
153JSONV_PUBLIC std::string to_string(const ast_error& code);
154/// \}
155
156/// Represents an entry in a JSON AST.
157///
158/// \see parse_index
160{
161public:
162 template <typename TSelf, ast_node_type KIndexToken>
163 class base
164 {
165 public:
166 /// Get the \c ast_node_type type.
167 static constexpr ast_node_type type()
168 {
169 return KIndexToken;
170 }
171
172 /// \see ast_node::token_raw
174 {
175 return string_view(_token_begin, static_cast<const TSelf&>(*this).token_size());
176 }
177
178 /// Allow implicit conversion to the more generic \c ast_node.
179 operator ast_node() const;
180
181 protected:
182 explicit constexpr base(const char* token_begin) :
183 _token_begin(token_begin)
184 { }
185
186 private:
187 const char* _token_begin;
188 };
189
190 template <typename TSelf, ast_node_type KIndexToken, std::size_t KTokenSize>
191 class basic_fixed_size_token : public base<TSelf, KIndexToken>
192 {
193 public:
194 explicit constexpr basic_fixed_size_token(const char* token_begin) :
196 { }
197
198 constexpr std::size_t token_size() const
199 {
200 return KTokenSize;
201 }
202 };
203
204 template <typename TSelf, ast_node_type KIndexToken>
205 class basic_dynamic_size_token : public base<TSelf, KIndexToken>
206 {
207 public:
208 explicit constexpr basic_dynamic_size_token(const char* token_begin, std::size_t token_size) :
210 _token_size(token_size)
211 { }
212
213 constexpr std::size_t token_size() const
214 {
215 return _token_size;
216 }
217
218 private:
219 std::size_t _token_size;
220 };
221
222 /// The start of a document.
223 class document_start final : public basic_fixed_size_token<document_start, ast_node_type::document_start, 0U>
224 {
225 public:
226 using basic_fixed_size_token<document_start, ast_node_type::document_start, 0U>::basic_fixed_size_token;
227 };
228
229 /// The end of a document.
230 class document_end final : public basic_fixed_size_token<document_end, ast_node_type::document_end, 0U>
231 {
232 public:
233 using basic_fixed_size_token<document_end, ast_node_type::document_end, 0U>::basic_fixed_size_token;
234 };
235
236 /// The beginning of an \c kind::object (`{`).
237 class object_begin final : public basic_fixed_size_token<object_begin, ast_node_type::object_begin, 1U>
238 {
239 public:
240 explicit constexpr object_begin(const char* token_begin, std::size_t element_count) :
242 _element_count(element_count)
243 { }
244
245 /// Get the number of elements in the object this token starts. This is useful for reserving memory.
246 constexpr std::size_t element_count() const
247 {
248 return _element_count;
249 }
250
251 private:
252 std::size_t _element_count;
253 };
254
255 /// The end of an \c kind::object (`}`).
256 class object_end final : public basic_fixed_size_token<object_end, ast_node_type::object_end, 1U>
257 {
258 public:
259 using basic_fixed_size_token<object_end, ast_node_type::object_end, 1U>::basic_fixed_size_token;
260 };
261
262 /// The beginning of an \c kind::array (`[`).
263 class array_begin final : public basic_fixed_size_token<array_begin, ast_node_type::array_begin, 1U>
264 {
265 public:
266 explicit constexpr array_begin(const char* token_begin, std::size_t element_count) :
268 _element_count(element_count)
269 { }
270
271 /// Get the number of elements in the array this token starts. This is useful for reserving memory.
272 constexpr std::size_t element_count() const
273 {
274 return _element_count;
275 }
276
277 private:
278 std::size_t _element_count;
279 };
280
281 /// The end of an \c kind::array (`]`).
282 class array_end final : public basic_fixed_size_token<array_end, ast_node_type::array_end, 1U>
283 {
284 public:
285 using basic_fixed_size_token<array_end, ast_node_type::array_end, 1U>::basic_fixed_size_token;
286 };
287
288 template <typename TSelf, ast_node_type KIndexToken, bool KEscaped>
289 class basic_string_token : public basic_dynamic_size_token<TSelf, KIndexToken>
290 {
291 public:
292 /// The return type of \c value is based on if the string is \c canonical or \c escaped. Strings in canonical
293 /// representation can be returned directly from the text through a \c string_view.
294 using value_type = std::conditional_t<KEscaped, std::string, string_view>;
295
296 public:
297 explicit constexpr basic_string_token(const char* token_begin, std::size_t token_size) :
299 _token_size(token_size)
300 { }
301
302 /// Was the source JSON for this string encoded in the canonical UTF-8 representation? If this is \c true, there
303 /// is no need to translate JSON escape sequences to extract a \c std::string value. This is the opposite of
304 /// \c escaped.
305 constexpr bool canonical() const noexcept
306 {
307 return !KEscaped;
308 }
309
310 /// Did the source JSON for this string contain escape sequences? If this is \c true, JSON escape sequences must
311 /// be translated into their canonical UTF-8 representation on extraction. This is the opposite of \c canonical.
312 constexpr bool escaped() const noexcept
313 {
314 return KEscaped;
315 }
316
317 constexpr std::size_t token_size() const
318 {
319 return _token_size;
320 }
321
322 value_type value() const
323 {
324 return detail::string_from_token(this->token_raw(), std::integral_constant<bool, KEscaped>());
325 }
326
327 private:
328 std::size_t _token_size;
329 };
330
331 class string_canonical final : public basic_string_token<string_canonical, ast_node_type::string_canonical, false>
332 {
333 public:
334 using basic_string_token<string_canonical, ast_node_type::string_canonical, false>::basic_string_token;
335 };
336
337 class string_escaped final : public basic_string_token<string_escaped, ast_node_type::string_escaped, true>
338 {
339 public:
340 using basic_string_token<string_escaped, ast_node_type::string_escaped, true>::basic_string_token;
341 };
342
343 class key_canonical final : public basic_string_token<key_canonical, ast_node_type::key_canonical, false>
344 {
345 public:
346 using basic_string_token<key_canonical, ast_node_type::key_canonical, false>::basic_string_token;
347 };
348
349 class key_escaped final : public basic_string_token<key_escaped, ast_node_type::key_escaped, true>
350 {
351 public:
352 using basic_string_token<key_escaped, ast_node_type::key_escaped, true>::basic_string_token;
353 };
354
355 class literal_true final : public basic_fixed_size_token<literal_true, ast_node_type::literal_true, 4U>
356 {
357 public:
358 using basic_fixed_size_token<literal_true, ast_node_type::literal_true, 4U>::basic_fixed_size_token;
359
360 bool value() const noexcept
361 {
362 return true;
363 }
364 };
365
366 class literal_false final : public basic_fixed_size_token<literal_false, ast_node_type::literal_false, 5U>
367 {
368 public:
369 using basic_fixed_size_token<literal_false, ast_node_type::literal_false, 5U>::basic_fixed_size_token;
370
371 bool value() const noexcept
372 {
373 return false;
374 }
375 };
376
377 class literal_null final : public basic_fixed_size_token<literal_null, ast_node_type::literal_null, 4U>
378 {
379 public:
380 using basic_fixed_size_token<literal_null, ast_node_type::literal_null, 4U>::basic_fixed_size_token;
381
382 jsonv::value value() const;
383 };
384
385 class integer final : public basic_dynamic_size_token<integer, ast_node_type::integer>
386 {
387 public:
388 using basic_dynamic_size_token<integer, ast_node_type::integer>::basic_dynamic_size_token;
389
390 std::int64_t value() const;
391 };
392
393 class decimal final : public basic_dynamic_size_token<decimal, ast_node_type::decimal>
394 {
395 public:
396 using basic_dynamic_size_token<decimal, ast_node_type::decimal>::basic_dynamic_size_token;
397
398 double value() const;
399 };
400
401 class error final : public basic_dynamic_size_token<error, ast_node_type::error>
402 {
403 public:
404 explicit constexpr error(const char* token_begin, std::size_t token_size, ast_error error_code) :
406 _error_code(error_code)
407 { }
408
409 constexpr ast_error error_code() const
410 {
411 return _error_code;
412 }
413
414 private:
415 ast_error _error_code;
416 };
417
418 using storage_type = std::variant<document_end,
423 array_end,
431 integer,
432 decimal,
433 error
434 >;
435
436public:
437 ast_node(const storage_type& value) :
438 _impl(value)
439 { }
440
441 template <typename T, typename... TArgs>
442 explicit ast_node(std::in_place_type_t<T> type, TArgs&&... args) :
443 _impl(type, std::forward<TArgs>(args)...)
444 { }
445
446 /// Get the \c std::variant that backs this type.
447 ///
448 /// \see visit
449 const storage_type& storage() const
450 {
451 return _impl;
452 }
453
454 /// Convenience function for calling \c std::visit on the underlying \c storage of this node.
455 template <typename FVisitor>
456 auto visit(FVisitor&& visitor) const
457 {
458 return std::visit(std::forward<FVisitor>(visitor), _impl);
459 }
460
461 /// Convenience function for calling \c std::visit on a key (see \c as_key).
462 template <typename FVisitor>
464 {
465 return std::visit(std::forward<FVisitor>(visitor), as_key());
466 }
467
468 /// Get the \c ast_node_type that tells the underlying type of this instance.
470 {
471 return visit([](const auto& x) { return x.type(); });
472 }
473
474 /// Get a view of the raw token. For example, \c "true", \c "{", or \c "1234". Note that this includes the complete
475 /// source, so string types such as \c ast_node_type::string_canonical include the opening and closing quotations.
477 {
478 return visit([](const auto& x) { return x.token_raw(); });
479 }
480
481 /// Get the underlying data of this node as \c T.
482 ///
483 /// \throws std::bad_variant_access if the requested \c T is different from the \c type of this instance.
484 template <typename T>
485 const T& as() const
486 {
487 return std::get<T>(_impl);
488 }
489
490 /// Get the underlying data of this node as one of the key types: \c key_canonical or \c key_escaped.
491 ///
492 /// \throw std::bad_variant_access if the \c type of this instance is neither \c key_canonical nor \c key_escaped.
493 std::variant<key_canonical, key_escaped> as_key() const
494 {
495 if (type() == ast_node_type::key_canonical)
496 return as<key_canonical>();
497 else
498 return as<key_escaped>();
499 }
500
501private:
502 storage_type _impl;
503};
504
505template <typename TSelf, ast_node_type KIndexToken>
507{
508 return ast_node(ast_node::storage_type(static_cast<const TSelf&>(*this)));
509}
510
511/// \}
512
513}
ast_error
Error code encountered while building the AST.
Definition ast.hpp:130
The beginning of an kind::array ([).
Definition ast.hpp:264
constexpr std::size_t element_count() const
Get the number of elements in the array this token starts. This is useful for reserving memory.
Definition ast.hpp:272
The end of an kind::array (]).
Definition ast.hpp:283
string_view token_raw() const
Definition ast.hpp:173
static constexpr ast_node_type type()
Get the ast_node_type type.
Definition ast.hpp:167
constexpr bool escaped() const noexcept
Did the source JSON for this string contain escape sequences? If this is true, JSON escape sequences ...
Definition ast.hpp:312
std::conditional_t< KEscaped, std::string, string_view > value_type
The return type of value is based on if the string is canonical or escaped.
Definition ast.hpp:294
constexpr bool canonical() const noexcept
Was the source JSON for this string encoded in the canonical UTF-8 representation?...
Definition ast.hpp:305
The end of a document.
Definition ast.hpp:231
The start of a document.
Definition ast.hpp:224
The beginning of an kind::object ({).
Definition ast.hpp:238
constexpr std::size_t element_count() const
Get the number of elements in the object this token starts. This is useful for reserving memory.
Definition ast.hpp:246
The end of an kind::object (}).
Definition ast.hpp:257
Represents an entry in a JSON AST.
Definition ast.hpp:160
std::variant< key_canonical, key_escaped > as_key() const
Get the underlying data of this node as one of the key types: key_canonical or key_escaped.
Definition ast.hpp:493
ast_node_type type() const
Get the ast_node_type that tells the underlying type of this instance.
Definition ast.hpp:469
auto visit_key(FVisitor &&visitor) const
Convenience function for calling std::visit on a key (see as_key).
Definition ast.hpp:463
auto visit(FVisitor &&visitor) const
Convenience function for calling std::visit on the underlying storage of this node.
Definition ast.hpp:456
const storage_type & storage() const
Get the std::variant that backs this type.
Definition ast.hpp:449
string_view token_raw() const
Get a view of the raw token.
Definition ast.hpp:476
const T & as() const
Get the underlying data of this node as T.
Definition ast.hpp:485
An adapter for enumeration types.
A wrapper type for creating a result with result_state::error.
Definition result.hpp:626
Represents a single JSON value, which can be any one of a potential kind, each behaving slightly diff...
Definition value.hpp:107
Copyright (c) 2014-2020 by Travis Gockel.
#define JSONV_UNUSED
Note that you know the variable is unused, but make the compiler stop complaining about it.
Definition config.hpp:109
#define JSONV_PUBLIC
This function or class is part of the public API for JSON Voorhees.
Definition config.hpp:102
ast_node_type
Marker type for an encountered token type.
Definition ast.hpp:84
Copyright (c) 2019-2020 by Travis Gockel.
STL namespace.
Pulls in an implementation of optional.
Pulls in an implementation of string_view.
std::string_view string_view
A non-owning reference to a string.