Newer
Older
Import / research / reflection / source / topics_db.cpp
/*
 *  Experimentation
 *  Topics/DB/tuples
 *  (C) Copyright 2019
 *  John Ryland
 *
 *  Parts based on work from:
 *    Code posted to stackoverflow
 *    https://gist.github.com/ilsken/dd91285d50197d6345f9
 */


// Make:  g++ -std=c++14 topics_db.cpp -o test


// ranges - filter map reduce
// use | operator, eg:
//        array_type values = { 0, 1, 2, 3, 4 };
//        int result = values | map([](){ ... }) | filter([](){ ... }) | reduce([](){ ... });

// views - string_view, DB view
// DBs / topics -> graphs, high charts etc, joins and a UI to build queries

// web-views - launches/opens browser
//           - establishes a bi-directional connection
//           - vue - reactive - DOM
//           - DOM manipulation in C++?


// tuples vs structs
//   tagged tuples, named tuples, 
//  enum to give static name -> id mapping
//  string -> dynamic mapping
//  can iterate members

// perhaps could be used to do  SoA vs AoS

// cool website

// UI for ESP IoT stuff



#include <iostream>
#include <tuple>

// https://gist.github.com/ilsken/dd91285d50197d6345f9
namespace tagged_tuple {
  // should probably move all these helper templates into a detail namespace
template <class T>
  struct has_name_tag_helper {
    template<class U> static std::true_type check(typename U::name_tag*);
    template<class U> static std::false_type check(...);
    static constexpr bool value = decltype(check<T>(nullptr))::value;
  };
  // detects if a type has a name_tag type defined
  template <class T>
  struct has_name_tag : std::integral_constant<bool, has_name_tag_helper<T>::value> {};
  
  // simple class to hold two types
  // works with incomplete types, important for our use case
  template <class First, class Second>
  struct type_pair {
    // using first = First;
    // using second = Second;
    
    using second = First;
    using first = Second;
  };
  template <class T>
  struct is_type_pair_helper {
    template <class T1, class T2> static std::true_type check(type_pair<T1, T2>* ptr);
    template <class T1> static std::false_type check(T1*);
    static constexpr bool value = decltype(check((T*)(0)))::value;
  };
  
  // detects if T is a type_pair
  template <class T>
  using is_type_pair = std::integral_constant<bool, is_type_pair_helper<T>::value>;
  
  template <class T, bool IsTag = is_type_pair<typename T::name_tag>::value>
  struct name_tag_traits_helper;
  template <class T>
  struct name_tag_traits_helper<T, true> {
    using type = typename T::name_tag;
  };
  template <class T>
  struct name_tag_traits_helper<T, false> {
    using type = type_pair<typename T::name_tag, T>;
  };

  template <class T, bool IsTypePair = is_type_pair<T>::value, bool HasNameTag = has_name_tag<T>::value>
  struct base_name_tag_traits;
  
  template <class T>
  struct base_name_tag_traits<T, true, false>  {
    using type = T;
  };
  template <class T>
  struct base_name_tag_traits<T, false, true> {
    using type = typename std::conditional<is_type_pair<T>::value, typename T::name_tag, type_pair<typename T::name_tag, T>>::type;
  };
  
  // gets info about a classes name tag (key -> value type mapping) from the class
  // if it's a type info it just uses that
  // if the class has a name_tag type it will use it if it's a type_pair, otherwise it'll be the same as type_pair<typename T::name_tag, T>
  template <class T>
  struct name_tag_traits : base_name_tag_traits<T>::type {
    // aliases for our specific use case
    using tag_type = typename base_name_tag_traits<T>::type::first;
    using value_type = typename base_name_tag_traits<T>::type::second;
    using pair_type = typename base_name_tag_traits<T>::type;
  };

  template <class T, class U, class... Types>
  constexpr std::size_t do_type_count(std::size_t count = 0) {
    return do_type_count<T, Types...>(count + std::is_same<T, U>::value ? 1 : 0);
  };
  template <class T>
  constexpr std::size_t do_type_count(std::size_t count = 0) { return count; };
  // counts the number of times T appears in parameter pack, eventually we should use this to make sure
  // that the name_tag_traits::tag_type only appears once in a tagged tuple list
  template <class T, class... Types>
  constexpr std::size_t type_count() {
    return do_type_count<T, Types...>();
  };
  
 
  // helper alias, turns a list of TypePairs supplied to tagged_tuple to a list of key/tag/name types
  template <class T>
  using name_tag_t = typename name_tag_traits<T>::tag_type;
  // same as above but returns a list of value types
  template <class T>
  using name_tag_value_t = typename name_tag_traits<T>::value_type;
  template <class Needle>
  constexpr size_t index_of_impl(size_t index, size_t end) {
      return end;
  };
  template <class Needle, class T, class... Haystack>
  constexpr size_t index_of_impl(size_t index, size_t end) {
    return std::is_same<Needle, T>::value ? index : index_of_impl<Needle, Haystack...>(index + 1, end);
  };
    // find the index of T in a type list, returns sizeof...(Haystack) + 1 on failure (think std::end())
    template <class Needle, class... Haystack>
    static constexpr size_t index_of() {
        return index_of_impl<Needle, Haystack...>(0, sizeof...(Haystack) + 1);
    };
    // and here's our little wrapper class that enables tagged tuples
  template <class... TypePairs>
  class tagged_tuple : public std::tuple<name_tag_value_t<TypePairs>...> {
    public:
    // not really needed for now but if we switch to private inheritance it'll come in handy
    using tag_type = std::tuple<name_tag_t<TypePairs>...>;
    using value_type = std::tuple<name_tag_value_t<TypePairs>...>;
    using value_type::value_type;
    using value_type::swap;
    using value_type::operator=;
  };
  // our special get functions
  template <class Name, class... TypePairs>
  auto get(tagged_tuple<TypePairs...>& tuple) ->
    typename std::tuple_element<index_of<Name, name_tag_t<TypePairs>...>(),
                       typename tagged_tuple<TypePairs...>::value_type>::type&
  {
    return std::get<index_of<Name, name_tag_t<TypePairs>...>()>(tuple);
  };
  template <class Name, class... TypePairs>
  auto get(const tagged_tuple<TypePairs...>& tuple) ->
    const typename std::tuple_element<index_of<Name, name_tag_t<TypePairs>...>(),
                       typename tagged_tuple<TypePairs...>::value_type>::type&
  {
    return std::get<index_of<Name, name_tag_t<TypePairs>...>()>(tuple);
  };
  template <class Name, class... TypePairs>
  auto get(tagged_tuple<TypePairs...>&& tuple) ->
    typename std::tuple_element<index_of<Name, name_tag_t<TypePairs>...>(),
                       typename tagged_tuple<TypePairs...>::value_type>::type&&
  {
    return std::get<index_of<Name, name_tag_t<TypePairs>...>()>(tuple);
  };
}






/*

  stream operator << uint8_t

  Would expect 0 to print as '0', and 1 as '1' etc
  but treated as a char, so it is '\0' etc.

  So uint8_t in theory should be different to char8_t

  char8_t is not available until C++20, needed for really handling UTF-8 properly.

  C++ standard libraries are a mess
  
  Need to start clean, and start at the bottom with fixed-types with clear semantics

eg:
   uint8_t  is really a numeric type, so need new definition


   Bool
   Void
   Null

   Int8
   Int16
   Int32
   Int64

   UInt8
   UInt16
   UInt32
   UInt64

   UChar8
   UChar16
   UChar32

   Float16
   Float32
   Float64

   Fixed16
   Fixed32
   Fixed64

   BigNumber

   CString
   UTF8String
   String


   String classes based on usage:

     FormatStrings -> can produce translatable and non-translatable strings
                   -> for translatable, the format is translated and the substitution can be in other orders, subs are translated etc
                   -> for non-display, fprintf style is okay, can be cstrings
     Translatable/DisplayStrings -> need to be unicode -> size != length etc, RTL/LTR
     Non-Display Strings -> cstrings, hashable, not to be used for end-user display, lookups
     FileNames/FileSystem Strings -> doesn't change based on language, but needs to be unicode

  CString     ->   const char*
  UTF8String  ->   std::vector<uint8_t>

  template <typename T>
  class FormatString
  {
     FormatString(T format);

     T formatted(args);
  };


*/

// implict cast of const char * to a  templated pair of  the compile time hash and the const char *
// then can pass the string and pre-computed hash as parameters to constructor, then avoid doing 
// the hash at run-time. Cost is the hash-map lookup, and if hit, it is quite fast.


using cstring     = char const* const;
using hash_value  = std::size_t;
using string_t    = uint64_t;      // the id of a string_id
// struct string_t { uint64_t m; };      // the id of a string_id

/*
struct literal_string
{
  template <size_t N>
  constexpr literal_string(const char s[N])
    : m_cstring(s)
  {
  }

  cstring m_cstring;
};
*/

hash_value constexpr hash(cstring a_string)
{
  return a_string[0] ? static_cast<unsigned int>(a_string[0]) + 33 * hash(a_string + 1) : 5381;
}

hash_value constexpr operator "" _hash(const char* s, size_t)
{
  return hash(s);
}

struct HashString
{
  hash_value   m_hash;
  cstring      m_cstring;

  // We really want to be able to restrict this to string literals only
  //explicit
  constexpr HashString(const char *s)
    : m_hash(hash(s))
    , m_cstring(s)
  {
  }

  // operator string_t();
};

HashString constexpr operator "" _(const char* s, size_t)
{
  return HashString( s );
}

namespace std
{
  template<>
  struct hash<HashString>
  {
    std::size_t operator()(HashString const& s) const noexcept
    {
      return s.m_hash;
    }
  };

  template<>
  struct equal_to<HashString>
  {
    constexpr bool operator()(const HashString &lhs, const HashString &rhs) const 
    {
      return (lhs.m_hash == rhs.m_hash) && (strcmp(lhs.m_cstring, rhs.m_cstring) == 0);
    }
  };
}

#include <unordered_map>

class StringId
{
public:
  StringId()
    : m_stringId{0}
  {}

  /*
  explicit StringId(const string_t& a_stringId)
    : m_stringId(a_stringId)
  {}
  */

  // constexpr
  // explicit
  StringId(const char *a_string)
    : m_stringId(StringToInt(HashString(a_string)))
  {
  }

  // convertability back and forth to string_t

  //explicit
  StringId(const HashString& a_string)
    : m_stringId(StringToInt(a_string))
  {}

  ~StringId() = default;

  operator const char*()
  {
    assert(m_stringId != 0);
    return IntToString(m_stringId).m_cstring;
  }

  /*
  operator std::string()
  {
    assert(m_stringId != 0);
    return IntToString(m_stringId).m_cstring;
  }
  */

  int32_t asInt() const
  {
    assert(m_stringId != 0);
    return m_stringId;
  }

  /*
  operator string_t()
  {
    assert(m_stringId != 0);
    return m_stringId;
  }
  */

  bool operator==(const StringId& a_other)
  {
    return m_stringId == a_other.m_stringId;
  }

private:
  string_t m_stringId;

  static string_t StringToInt(const HashString& a_string)
  {
    static string_t lastId{ 0 };
    static std::unordered_map<HashString, string_t> mapping;
    if (mapping.count(a_string))
    {
      return mapping.at(a_string);
    }
    lastId++;
    mapping.insert(std::make_pair(a_string, lastId));

    // Save the reverse mapping - populate the reverse map
    IntToString(lastId, true, a_string);
    return lastId;
  }

  static HashString IntToString(string_t a_int,
       bool a_add = false, const HashString& a_mappedString = HashString(""))
  {
    static std::unordered_map<string_t, HashString> mapping;
    if (a_add)
    {
      mapping.insert(std::make_pair(a_int, a_mappedString));
      return a_mappedString;
    }
    assert(mapping.count(a_int));
    return mapping.at(a_int);
  }
};


static_assert(sizeof(StringId) <= 8, "too big");

/*
HashString::operator string_t()
{
  return StringId(*this);
}
*/

// using namespace my_hash::literals;
void one() {} void two() {} void other() {}

void foo( const std::string& value )
{
  switch( hash(value.c_str()) )
  {
    case "one"_hash: one(); break;
    case "two"_hash: two(); break;
    case "two1"_.m_hash: two(); break;
    /*many more cases*/
    default: other(); break;
  }

  HashString s = "blah"_;

  StringId id(s);

  StringId id2("dfd"_);
  
  StringId id3("dfd2");

  std::string s2 = "hlah";

  StringId id4(s2.c_str());
}



union variant_value
{
  std::nullptr_t  m_null;
  bool            m_bool;
  uint64_t        m_uint;
  int64_t         m_int;
  double          m_float;
  //string_t        m_string;
  StringId        m_stringId;
};

enum class ValueType
{
  Null,
  Bool,
  UInt,
  Int,
  Float,
  String
};

struct variant
{
  ValueType      m_type;
  variant_value  m_value;
};



// Think DB

// Tables

// Define the schema - then actual data layout is internal detail

struct cell
{
  variant_value  m_data;
};

/*
CREATE TABLE table_name (
   id    INTEGER  PRIMARY KEY,
   col2  CHARACTER VARYING(20),
   col3  INTEGER REFERENCES other_table(column_name),
   ... )
*/

struct column_def
{
  std::string m_name;
  std::string m_description;
  std::string m_units;  // Perhaps need a units type - units as in:  kg, km, m, seconds etc
  bool        m_primaryKey;   // implies table is indexed on this column. Should be unique to be a primary key. Means a foreign key can reference by this.
  bool        m_foreignKey;   // this links to another table
  bool        m_index;        // perhaps same as primary-key, this means that lookup can be made by the value of this column
  ValueType   m_type;
  std::string m_foreignKeyTable;
  std::string m_foreignKeyColumn;
};

#include <vector>
struct table_data
{
  std::vector<std::vector<cell>>  m_data;
};

struct table
{
  std::string             m_name;
  std::vector<column_def> m_columns;
  uint64_t                m_rowSize;
  table_data              m_data;
  // 
};

struct database
{
  std::vector<table> m_tables;

  bool check_schema();
};

struct column_
{
  cstring     m_name;
  cstring     m_description;
  ValueType   m_type;
  cstring     m_units;  // Perhaps need a units type - units as in:  kg, km, m, seconds etc
  
  bool        m_primaryKey;   // implies table is indexed on this column. Should be unique to be a primary key. Means a foreign key can reference by this.
  bool        m_foreignKey;   // this links to another table
  bool        m_index;        // perhaps same as primary-key, this means that lookup can be made by the value of this column
  cstring     m_foreignKeyTable = 0;
  cstring     m_foreignKeyColumn = 0;
};

template <size_t N>
struct table_
{
  cstring     m_name;
  column_     m_columns[N];
};

table create_table(std::string a_tableName, const column_ a_columnDefs[], size_t a_columnCount)
{
  table t;
  return t;
}

template <size_t N>
table create_table_(std::string a_tableName, const column_ (&a_columnDefs)[N])
{
  return create_table(a_tableName, a_columnDefs, N);
}

void add_column(table& a_table, std::string a_name, ValueType a_type);

const column_ colDefs[] =
{
  {  "blah1",     "this is blah 1",                  ValueType::Float,  "m" },
  {  "blah2",     "this is blah 2",                  ValueType::Float,  "m" },
  {  "blah3",     "this is blah 3",                  ValueType::Float,  "m" },
};


/*
Id,         EN,          JP
String,     String,      String
PrimaryKey
_COIN       coin         jp_coin
_GEM        gem          jp_gem
*/


// columns could be reordered by type size - perhaps don't need to be union/variant
// allocation of the columns by the sizeof the type times the number of rows
// then a programming language to define these, like pascal, but generates the required
// code as C code to do SoA or AoS etc 

column_ entityTableDefinition[] =
{
  {  "id",            "the entity id",                  ValueType::Int,    "key"         },
  {  "positionX",     "the x of position",              ValueType::Float,  "m"           },
  {  "positionY",     "the y of position",              ValueType::Float,  "m"           },
  {  "positionZ",     "the z of position",              ValueType::Float,  "m"           },
};

column_ dynamicEntityTableDefinition[] =
{
  {  "id",            "the id",                         ValueType::Int,    "key"         },
  {  "entityId",      "the entity id",                  ValueType::Int,    ".entity(id)" },
  {  "velocityX",     "the x of velocity",              ValueType::Float,  "m/s"         },
  {  "velocityY",     "the y of velocity",              ValueType::Float,  "m/s"         },
  {  "velocityZ",     "the z of velocity",              ValueType::Float,  "m/s"         },
  {  "accelerationX", "the x of acceleration",          ValueType::Float,  "m/s^2"       },
  {  "accelerationY", "the y of acceleration",          ValueType::Float,  "m/s^2"       },
  {  "accelerationZ", "the z of acceleration",          ValueType::Float,  "m/s^2"       },
};

table_<> entityDefinition =
{
  "entity",
  {
    {  "id",            "the entity id",                  ValueType::Int,    "id"         },
    {  "positionX",     "the x of position",              ValueType::Float,  "m"          },
    {  "positionY",     "the y of position",              ValueType::Float,  "m"          },
    {  "positionZ",     "the z of position",              ValueType::Float,  "m"          },
  }
};


void test()
{
  table t = create_table_("blah", colDefs);
  //table t = create_table("blah", colDefs, array_size(colDefs));
  t = create_table("blah", colDefs, sizeof(colDefs)/sizeof(colDefs[0]));
  
  t = create_table_(entityDefinition.m_name, entityDefinition.m_columns);
}




struct generic_struct
{
  enum class MemberNames
  {
    Name,
    Number,
    Address,
    Email,
    MembersSize
  };
  constexpr static int m_size = static_cast<int>(MemberNames::MembersSize);
  variant  m_members[m_size];
};



// #include "tagged_tuple.hpp"
using namespace tagged_tuple;

void test1()
{
  // defines binding between types (names/tags) 
  // easier to maintain than just going with tuple<tag, type, tag, type, etc...>
  // no chance of you accidently removing a only a tag and setting the whole list off balance
  // also they are re-usable so you can always bind a certain type to a name
//  using tagged_tuple::type_pair;
//  using tagged_tuple::tagged_tuple;
  // bring in our get function for ADL
//  using tagged_tuple::get;
  // can co-exist with std::get 
  using std::get;
  // define a tagged tuple
  // instead of using type_pair<key_t, value_t> you can define a `name_tag` type on your classes and tagged_tuple will use that type as the key
  // for example struct user; class User { using name_tag = user; }; tagged_tuple<User> foo; auto val = get<user>(foo); 
  //using user_t = tagged_tuple<type_pair<struct name, std::string>, type_pair<struct age, int>>;
  // using user_t = tagged_tuple<type_pair<std::string, struct name>, type_pair<int, struct age>>;
  using user_t = tagged_tuple<type_pair<std::string, struct name>, type_pair<int, struct age>>;
  // it's initialized the same way as a tuple created with the value types of the type pairs (so tuple<string, int> in this case)
  user_t user  { "chris", 21 };
  std::cout << "Name: " << get<name>(user) << std::endl;
  std::cout << "Age: " << get<age>(user) << std::endl;
  ++get<age>(user);
  std::cout << "Age: " << get<age>(user) << std::endl;
  // you can still access properties via numeric indexes as if the class was defined as tuple<string, int>
  std::cout << "user[0] = " << get<0>(user) << std::endl;
  // tagged_tuple is derives from tuple<value_types<TagPairs>...> (in this example tuple<string, int>)
  // so it's implicitly convertible
  std::tuple<std::string, int> regular_tuple { user };
 
//  user_t another_user { regular_tuple };

  // if you don't like this you just need to make tagged_tuple privately derive from std::tuple instead of publically
  // I don't think splicing is an issue because it adds no data members. Just keeps track of the TagType -> Index mapping
  // The mapping is done statically so there's no allocations or memory overhead. Should be just as fast as using a normal tuple with proper inlining
}


void TestAPI(StringId a_string)
{
  printf("%s\n", (const char*)a_string );//.c_str());
}

int main(int argc, char* argv[])
{
  //test1();

  StringId id("dfd2"_);
  StringId id2("dfd2");
  StringId id3("dfd"_);
  
  //StringId id4(5);
  //StringId id5 = StringId(5);
  //"dfd"_);

  TestAPI(StringId("sdfd"));
  TestAPI("sdfd");

  //return string_t(id3); // s.m_hash;
  return id3.asInt(); // s.m_hash;
}