diff --git a/CMakeLists.txt b/CMakeLists.txt index c2d269c..ac83280 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,7 +48,7 @@ clangformat_targets() message(STATUS "Adding tests") -FILE(GLOB ALL_FILES +FILE(GLOB_RECURSE ALL_FILES CONFIGURE_DEPENDS tests/* ) @@ -62,3 +62,5 @@ endforeach() set_property(TEST three_regions.vpy PROPERTY WILL_FAIL true) set_property(TEST leak_with_global.vpy PROPERTY WILL_FAIL true) +set_property(TEST modify_cown.vpy PROPERTY WILL_FAIL true) +set_property(TEST read_region.vpy PROPERTY WILL_FAIL true) diff --git a/src/lang/bytecode.h b/src/lang/bytecode.h index 4faa24f..67eacfe 100644 --- a/src/lang/bytecode.h +++ b/src/lang/bytecode.h @@ -9,6 +9,7 @@ inline const trieste::TokenDef StoreField{"store_field"}; inline const trieste::TokenDef CreateObject{"create_object"}; inline const trieste::TokenDef Proto{"prototype"}; inline const trieste::TokenDef Dictionary{"dictionary"}; +inline const trieste::TokenDef Cown{"cown"}; inline const trieste::TokenDef String{"string", trieste::flag::print}; inline const trieste::TokenDef KeyIter{"key_iter"}; inline const trieste::TokenDef Func{"func"}; @@ -43,4 +44,5 @@ inline const trieste::TokenDef Jump{"jump", trieste::flag::print}; /// Jump if the current stack frame is `False` inline const trieste::TokenDef JumpFalse{"jump_false", trieste::flag::print}; inline const trieste::TokenDef Print("print", trieste::flag::print); +inline const trieste::TokenDef Taint{"display_taint"}; inline const trieste::TokenDef IterNext("iter_next"); diff --git a/src/lang/interpreter.cc b/src/lang/interpreter.cc index 894cbd4..ed02d0d 100644 --- a/src/lang/interpreter.cc +++ b/src/lang/interpreter.cc @@ -119,14 +119,25 @@ namespace verona::interpreter // ========================================== // Operators that shouldn't be printed // ========================================== - if (node == Print) + if (node == Print || node == Taint) { // Console output std::cout << node->location().view() << std::endl << std::endl; + std::vector taint = {}; + if (node == Taint) + { + auto v = pop("taint source"); + taint.push_back(v); + // Removing the reference here is a bit early, but should be safe + // since it comes from an ident. Removing it later would require an + // additional if clause + rt::remove_reference(frame(), v); + } + // Mermaid output std::vector roots{frame()}; - ui->output(roots, std::string(node->location().view())); + ui->output(roots, std::string(node->location().view()), &taint); // Continue return ExecNext{}; @@ -174,6 +185,12 @@ namespace verona::interpreter "CreateObject: A bytecode function requires a body node"); obj = rt::make_func(new Bytecode{payload->at(0)}); } + else if (payload == Cown) + { + auto v = pop("cown reagion"); + obj = rt::make_cown(v); + rt::move_reference(frame(), obj, v); + } else { assert(false && "CreateObject has to specify a value"); @@ -464,13 +481,15 @@ namespace verona::interpreter out.open(path); } - void - output(std::vector& roots, std::string message) + void output( + std::vector& roots, + std::string message, + std::vector* taint = nullptr) { out << "```" << std::endl; out << message << std::endl; out << "```" << std::endl; - rt::ui::mermaid(roots, out); + rt::ui::mermaid(roots, out, taint); if (interactive) { out.close(); diff --git a/src/lang/lang.h b/src/lang/lang.h index 8d6cee6..20711f4 100644 --- a/src/lang/lang.h +++ b/src/lang/lang.h @@ -32,7 +32,8 @@ inline const TokenDef Compile{"compile"}; namespace verona::wf { inline const auto lv = Ident | Lookup; - inline const auto rv = lv | Empty | Null | String | Create | Call | Method; + inline const auto rv = + lv | Empty | Null | String | Create | Call | Method | Cown; inline const auto cmp_values = Ident | Lookup | Null; inline const auto key = Ident | Lookup | String; inline const auto operand = Lookup | Call | Method | Ident; @@ -40,11 +41,12 @@ namespace verona::wf inline const auto grouping = (Top <<= File) | (File <<= Body) | (Body <<= Block) | (Block <<= - (Freeze | Region | Assign | If | For | Func | Return | ReturnValue | Call | - Method)++) | + (Freeze | Taint | Region | Assign | If | For | Func | Return | + ReturnValue | Call | Method)++) | (Assign <<= (Lhs >>= lv) * (Rhs >>= rv)) | (Lookup <<= (Op >>= operand) * (Rhs >>= key)) | (Region <<= Ident) | - (Freeze <<= Ident) | (Create <<= Ident) | (If <<= Eq * Block * Block) | + (Freeze <<= Ident) | (Taint <<= Ident) | (Cown <<= Ident) | + (Create <<= Ident) | (If <<= Eq * Block * Block) | (For <<= (Key >>= Ident) * (Value >>= Ident) * (Op >>= lv) * Block) | (Eq <<= (Lhs >>= cmp_values) * (Rhs >>= cmp_values)) | (Func <<= Ident * Params * Body) | (Call <<= Ident * List) | @@ -54,16 +56,16 @@ namespace verona::wf inline const trieste::wf::Wellformed bytecode = (Top <<= Body) | (Body <<= (LoadFrame | StoreFrame | LoadField | StoreField | Drop | Null | - CreateObject | CreateRegion | FreezeObject | IterNext | Print | Eq | Neq | - Jump | JumpFalse | Label | Call | Return | ReturnValue | ClearStack | - Dup)++) | - (CreateObject <<= (Dictionary | String | KeyIter | Proto | Func)) | + CreateObject | CreateRegion | FreezeObject | Taint | IterNext | Print | + Eq | Neq | Jump | JumpFalse | Label | Call | Return | ReturnValue | + ClearStack | Dup)++) | + (CreateObject <<= (Dictionary | String | KeyIter | Proto | Func | Cown)) | (Func <<= Body) | (Label <<= Ident)[Ident]; } inline const auto LV = T(Ident, Lookup); inline const auto RV = - T(Empty, Ident, Lookup, Null, String, Create, Call, Method); + T(Empty, Ident, Lookup, Null, String, Create, Call, Method, Cown); inline const auto CMP_V = T(Ident, Lookup, Null); inline const auto KEY = T(Ident, Lookup, String); inline const auto OPERAND = T(Lookup, Call, Method, Ident); diff --git a/src/lang/passes/bytecode.cc b/src/lang/passes/bytecode.cc index 0c3dbbc..77b5878 100644 --- a/src/lang/passes/bytecode.cc +++ b/src/lang/passes/bytecode.cc @@ -1,5 +1,7 @@ #include "../lang.h" +inline const trieste::TokenDef DestructiveRead{"destructive_read"}; + PassDef bytecode() { PassDef p{ @@ -74,6 +76,24 @@ PassDef bytecode() << create_print(_(Op)); }, + T(Compile) << (T(Taint)[Op] << T(Ident)[Ident]) >> + [](auto& _) { + auto print_str = std::string(create_print(_(Op))->location().view()); + return Seq << (Compile << _[Ident]) << (Taint ^ print_str); + }, + + T(Compile) << (T(DestructiveRead) << T(Ident)[Ident]) >> + [](auto& _) { + // Read the value from the frame and set the frame value to null + return Seq << (Compile << _(Ident)) << Null + << create_from(StoreFrame, _(Ident)); + }, + T(Compile) << (T(Cown)[Op] << T(Ident)[Ident]) >> + [](auto& _) { + return Seq << (Compile << (DestructiveRead << _(Ident))) + << (CreateObject << Cown); + }, + T(Compile) << (T(Create)[Op] << T(Ident)[Ident]) >> [](auto& _) { return Seq << (Compile << _[Ident]) << (CreateObject << Proto); diff --git a/src/lang/passes/call_stmts.cc b/src/lang/passes/call_stmts.cc index 6eb6ce4..029ab8b 100644 --- a/src/lang/passes/call_stmts.cc +++ b/src/lang/passes/call_stmts.cc @@ -6,8 +6,8 @@ namespace verona::wf inline const auto call_stmts = grouping | (Block <<= - (Freeze | Region | Assign | If | For | Func | Return | ReturnValue | Call | - Method | ClearStack | Print)++); + (Freeze | Taint | Region | Assign | If | For | Func | Return | + ReturnValue | Call | Method | ClearStack | Print)++); } PassDef call_stmts() diff --git a/src/lang/passes/flatten.cc b/src/lang/passes/flatten.cc index 3fcbf4a..2d89994 100644 --- a/src/lang/passes/flatten.cc +++ b/src/lang/passes/flatten.cc @@ -6,16 +6,17 @@ namespace verona::wf inline const trieste::wf::Wellformed flatten = (Top <<= File) | (File <<= Body) | (Body <<= - (Freeze | Region | Assign | Eq | Neq | Label | Jump | JumpFalse | Print | - StoreFrame | LoadFrame | CreateObject | Ident | IterNext | Create | - StoreField | Lookup | String | Call | Method | Return | ReturnValue | - ClearStack)++) | + (Freeze | Taint | Region | Assign | Eq | Neq | Label | Jump | JumpFalse | + Print | StoreFrame | LoadFrame | CreateObject | Ident | IterNext | + Create | StoreField | Lookup | String | Call | Method | Return | + ReturnValue | ClearStack)++) | (CreateObject <<= (KeyIter | String | Dictionary | Func)) | (Func <<= Compile) | (Compile <<= Body) | (Create <<= Ident) | (Assign <<= (Lhs >>= lv) * (Rhs >>= rv)) | (Lookup <<= (Op >>= operand) * (Rhs >>= key)) | (Region <<= Ident) | - (Freeze <<= Ident) | (Call <<= Ident * List) | (Method <<= Lookup * List) | - (List <<= rv++) | (Params <<= Ident++) | + (Freeze <<= Ident) | (Taint <<= Ident) | (Cown <<= Ident) | + (Call <<= Ident * List) | (Method <<= Lookup * List) | (List <<= rv++) | + (Params <<= Ident++) | (Eq <<= (Lhs >>= cmp_values) * (Rhs >>= cmp_values)) | (Neq <<= (Lhs >>= cmp_values) * (Rhs >>= cmp_values)) | (Label <<= Ident)[Ident]; diff --git a/src/lang/passes/grouping.cc b/src/lang/passes/grouping.cc index 2218863..3301af8 100644 --- a/src/lang/passes/grouping.cc +++ b/src/lang/passes/grouping.cc @@ -16,16 +16,12 @@ PassDef grouping() In(Group) * OPERAND[Op] * (T(Lookup)[Lookup] << (T(Group) << KEY[Rhs])) >> [](auto& _) { return Lookup << _(Op) << _(Rhs); }, - T(Group) << ((T(Region)[Region] << End) * T(Ident)[Ident] * End) >> - [](auto& _) { - _(Region)->extend(_(Ident)->location()); - return _(Region) << _(Ident); - }, - - T(Group) << ((T(Freeze)[Freeze] << End) * T(Ident)[Ident] * End) >> + T(Group) + << ((T(Freeze, Taint, Cown, Region)[Op] << End) * T(Ident)[Ident] * + End) >> [](auto& _) { - _(Freeze)->extend(_(Ident)->location()); - return _(Freeze) << _(Ident); + _(Op)->extend(_(Ident)->location()); + return _(Op) << _(Ident); }, T(Group) << ((T(Drop)[Drop] << End) * LV[Lhs] * End) >> diff --git a/src/lang/passes/parse.cc b/src/lang/passes/parse.cc index 3cc5090..8e21dab 100644 --- a/src/lang/passes/parse.cc +++ b/src/lang/passes/parse.cc @@ -5,7 +5,7 @@ namespace verona::wf using namespace trieste::wf; inline const auto parse_tokens = Region | Ident | Lookup | Empty | Freeze | - Drop | Null | String | Create | Parens; + Taint | Cown | Drop | Null | String | Create | Parens; inline const auto parse_groups = Group | Assign | If | Else | Block | For | Func | List | Return; @@ -154,6 +154,8 @@ trieste::Parse parser() "drop" >> [](auto& m) { m.add(Drop); }, "create" >> [](auto& m) { m.add(Create); }, "freeze" >> [](auto& m) { m.add(Freeze); }, + "taint" >> [](auto& m) { m.add(Taint); }, + "cown" >> [](auto& m) { m.add(Cown); }, "region" >> [](auto& m) { m.add(Region); }, "None" >> [](auto& m) { m.add(Null); }, "[0-9A-Za-z_]+" >> [](auto& m) { m.add(Ident); }, diff --git a/src/rt/core.h b/src/rt/core.h index e4acfc0..c1d45f3 100644 --- a/src/rt/core.h +++ b/src/rt/core.h @@ -174,6 +174,55 @@ namespace rt::core } }; + // The prototype object for cown + inline PrototypeObject* cownPrototypeObject() + { + static PrototypeObject* proto = new PrototypeObject("Cown"); + return proto; + } + + class CownObject : public objects::DynObject + { + // For now always false, but might be needed later if we want to simulate + // concurrency. + bool acquired = false; + + public: + CownObject(objects::DynObject* region) + : objects::DynObject(cownPrototypeObject()) + { + // FIXME: Add once regions are reified + // assert( + // region->get_prototype() == regionPrototype() && + // "Cowns can only store regions"); + // + // FIXME: Also check that the region has a LRC == 1, with 1 + // being the reference passed into this constructor + auto old = this->set("region", region); + assert(old == nullptr); + } + + std::string get_name() + { + return ""; + } + + objects::DynObject* is_primitive() + { + return this; + } + + bool is_cown() override + { + return true; + } + + bool is_cown_acquired() override + { + return acquired; + } + }; + inline std::set* globals() { static std::set* globals = @@ -185,6 +234,7 @@ namespace rt::core keyIterPrototypeObject(), trueObject(), falseObject(), + cownPrototypeObject(), }; return globals; } diff --git a/src/rt/objects/dyn_object.h b/src/rt/objects/dyn_object.h index ac6e2b5..3e4ccac 100644 --- a/src/rt/objects/dyn_object.h +++ b/src/rt/objects/dyn_object.h @@ -27,8 +27,10 @@ namespace rt::objects { friend class Reference; friend objects::DynObject* rt::make_iter(objects::DynObject* obj); - friend void - rt::ui::mermaid(std::vector& roots, std::ostream& out); + friend void rt::ui::mermaid( + std::vector& roots, + std::ostream& out, + std::vector* taint); friend void destruct(DynObject* obj); friend void dealloc(DynObject* obj); template @@ -77,8 +79,11 @@ namespace rt::objects return; } - assert(target->parent == src); - Region::dec_prc(target); + if (src) + { + assert(target->parent == src); + Region::dec_prc(target); + } return; } @@ -232,7 +237,7 @@ namespace rt::objects // TODO SCC algorithm visit(this, [](Edge e) { auto obj = e.target; - if (obj->is_immutable()) + if (!obj || obj->is_immutable()) return false; auto r = get_region(obj); @@ -241,7 +246,8 @@ namespace rt::objects get_region(obj)->objects.erase(obj); } obj->region.set_tag(ImmutableTag); - return true; + + return !obj->is_cown(); }); } @@ -250,6 +256,20 @@ namespace rt::objects return region.get_tag() == ImmutableTag; } + virtual bool is_cown() + { + return false; + } + virtual bool is_cown_acquired() + { + ui::error("is_cown_acquired() should only be called on cowns"); + return false; + } + bool is_opaque() + { + return this->is_cown() && !this->is_cown_acquired(); + } + [[nodiscard]] DynObject* get(std::string name) { auto result = fields.find(name); @@ -270,7 +290,7 @@ namespace rt::objects [[nodiscard]] DynObject* set(std::string name, DynObject* value) { - if (is_immutable()) + if (is_immutable() && this->is_cown()) { ui::error("Cannot mutate immutable object"); } @@ -282,6 +302,7 @@ namespace rt::objects // The caller must provide an rc for value. [[nodiscard]] DynObject* set_prototype(DynObject* value) { + // No need to check for a cown, since cowns already have a set prototype if (is_immutable()) { ui::error("Cannot mutate immutable object"); @@ -331,6 +352,7 @@ namespace rt::objects static void move_reference(DynObject* src, DynObject* dst, DynObject* target) { + // An immutable cown can't be moved to another region if (target == nullptr || target->is_immutable()) return; diff --git a/src/rt/rt.cc b/src/rt/rt.cc index 6c46386..0bd667a 100644 --- a/src/rt/rt.cc +++ b/src/rt/rt.cc @@ -34,11 +34,18 @@ namespace rt return new core::FrameObject(parent); } + objects::DynObject* make_cown(objects::DynObject* region) + { + return new core::CownObject(region); + } + thread_local objects::RegionPointer objects::DynObject::local_region = new Region(); void freeze(objects::DynObject* obj) { + // Cown specific handling of the freeze operation is handled by the + // `freeze()` implementation of the object obj->freeze(); } @@ -49,6 +56,10 @@ namespace rt objects::DynObject* get(objects::DynObject* obj, std::string key) { + if (obj->is_opaque()) + { + ui::error("opaque objects can't be accessed"); + } return obj->get(key); } @@ -72,6 +83,13 @@ namespace rt objects::DynObject* set(objects::DynObject* obj, std::string key, objects::DynObject* value) { + if (obj->is_opaque()) + { + // Overwriting data can change the RC and then call destructors of the + // type this action therefore requires the cown to be acquired + ui::error("opaque objects can't be modified"); + } + return obj->set(key, value); } diff --git a/src/rt/rt.h b/src/rt/rt.h index 4fc2046..9eea970 100644 --- a/src/rt/rt.h +++ b/src/rt/rt.h @@ -15,6 +15,7 @@ namespace rt objects::DynObject* make_str(std::string str_value); objects::DynObject* make_object(); objects::DynObject* make_frame(objects::DynObject* parent); + objects::DynObject* make_cown(objects::DynObject* region); void freeze(objects::DynObject* obj); void create_region(objects::DynObject* objects); diff --git a/src/rt/ui.h b/src/rt/ui.h index 18a824e..818af4b 100644 --- a/src/rt/ui.h +++ b/src/rt/ui.h @@ -12,10 +12,17 @@ namespace rt::ui class UI { public: - virtual void output(std::vector&, std::string) {} + virtual void output( + std::vector&, + std::string, + std::vector* = nullptr) + {} }; - void mermaid(std::vector& roots, std::ostream& out); + void mermaid( + std::vector& roots, + std::ostream& out, + std::vector* taint = nullptr); [[noreturn]] inline void error(const std::string& msg) { diff --git a/src/rt/ui/mermaid.cc b/src/rt/ui/mermaid.cc index 29a432b..17d59a5 100644 --- a/src/rt/ui/mermaid.cc +++ b/src/rt/ui/mermaid.cc @@ -10,6 +10,15 @@ namespace rt::ui { + struct MermaidDecoration + { + const char* start; + const char* end; + const char* out; + }; + const inline auto NORMAL = MermaidDecoration{"[", "]", "-->"}; + const inline auto COWN = MermaidDecoration{"[[", "]]", "-.->"}; + void replace(std::string& text, std::string from, std::string replace) { size_t pos = 0; @@ -31,7 +40,10 @@ namespace rt::ui return text; } - void mermaid(std::vector& roots, std::ostream& out) + void mermaid( + std::vector& roots, + std::ostream& out, + std::vector* taint) { // Give a nice id to each object. std::map visited; @@ -61,9 +73,13 @@ namespace rt::ui { return false; } + auto src_deco = ((src && src->is_cown()) ? &COWN : &NORMAL); + auto dst_deco = ((dst && dst->is_cown()) ? &COWN : &NORMAL); + if (src != nullptr) { - out << " id" << visited[src] << " -->|" << escape(key) << "| "; + out << " id" << visited[src] << " " << src_deco->out << "|" + << escape(key) << "| "; } if (visited.find(dst) != visited.end()) { @@ -72,11 +88,12 @@ namespace rt::ui } auto curr_id = id++; visited[dst] = curr_id; - out << "id" << curr_id << "[ "; + out << "id" << curr_id << dst_deco->start << " "; out << escape(dst->get_name()); out << "
rc=" << dst->rc; - out << " ]" << (unreachable ? ":::unreachable" : "") << std::endl; + out << " " << dst_deco->end << (unreachable ? ":::unreachable" : "") + << std::endl; auto region = objects::DynObject::get_region(dst); if (region != nullptr) @@ -149,7 +166,32 @@ namespace rt::ui // Output object count as very useful. out << "subgraph Count " << objects::DynObject::get_count() << std::endl; out << "end" << std::endl; - out << "classDef unreachable stroke:red,stroke-width:2px" << std::endl; + out << "classDef unreachable stroke:red,stroke-width:2px;" << std::endl; + + // Taint nodes on request + if (taint && !taint->empty()) + { + out << "classDef tainted fill:#43a;" << std::endl; + std::set tainted; + + auto mark_tained = [&](objects::Edge e) { + objects::DynObject* dst = e.target; + if (tainted.contains(dst)) + { + return false; + } + out << "class id" << visited[dst] << " tainted;" << std::endl; + tainted.insert(dst); + + return !dst->is_opaque(); + }; + + for (auto root : *taint) + { + objects::visit(root, mark_tained); + } + } + // Footer (end of mermaid graph) out << "```" << std::endl; } diff --git a/tests/invalid_cowns/modify_cown.vpy b/tests/invalid_cowns/modify_cown.vpy new file mode 100644 index 0000000..c7f043b --- /dev/null +++ b/tests/invalid_cowns/modify_cown.vpy @@ -0,0 +1,7 @@ + +# A simple cown +a = {} +region a +co = cown a + +co.other = {} diff --git a/tests/invalid_cowns/read_region.vpy b/tests/invalid_cowns/read_region.vpy new file mode 100644 index 0000000..dfc7958 --- /dev/null +++ b/tests/invalid_cowns/read_region.vpy @@ -0,0 +1,7 @@ + +# A simple cown +a = {} +region a +co = cown a + +dummy = co.region diff --git a/tests/three_regions.vpy b/tests/three_regions.vpy index cbb7426..a7d94ec 100644 --- a/tests/three_regions.vpy +++ b/tests/three_regions.vpy @@ -5,25 +5,27 @@ b = {} a["b"] = b region a + c = {} c["self"] = c -d = {} -c["d"] = d +c["d"] = {} region c e = {} e["self"] = e -f = {} -e["f"] = f -region e +e["f"] = {} +region e +taint a # connect first region to second in two ways -a["c"] = c -b["d"] = d +a.c = c +a.b.d = c.d # Connect first region to third with single entry point -b["e"] = f +a.b.e = e.f + +taint a # Now freeze part of the first region, and the reachable parts of the # second and third regions @@ -32,8 +34,6 @@ freeze b # Drop all the references drop a -drop d -drop f -drop b +drop b drop c drop e diff --git a/tests/valid_cowns.vpy b/tests/valid_cowns.vpy new file mode 100644 index 0000000..28f7b55 --- /dev/null +++ b/tests/valid_cowns.vpy @@ -0,0 +1,17 @@ +global = {} + +# A simple cown +a = {} +a.b = {} +region a +c01 = cown a + +# Store the cown in a global +global.cown = c01 +taint global + +# Freeze global with a cown +freeze global + +drop c01 +drop global