Browse Source

Added some notes I had lying around

Neia Finch 1 year ago
parent
commit
e97c49fd49
6 changed files with 425 additions and 0 deletions
  1. 37
    0
      dlang.md
  2. 113
    0
      dpetalc/builtins.md
  3. 2
    0
      dpetalc/src/petal/semantic/typecheck.d
  4. 60
    0
      dpetalc/threadlocal_gc.md
  5. 121
    0
      metaprogramming.md
  6. 92
    0
      style.md

+ 37
- 0
dlang.md View File

@@ -0,0 +1,37 @@
1
+# Implementing the compiler in D
2
+
3
+How would we implement the compiler in D?
4
+
5
+* Use Bison as the compiler generator. Recent Bison supports D.
6
+    * This requires me to write AST creation stuff manually, which kind of sucks.
7
+    * But I don't have to write an AST-to-semantic copier, which is nice.
8
+* Hand-written lexer?
9
+    * Can autogenerate the detect-literal portion of it using a prefix tree.
10
+* Use LLVM-C API instead of writing to a file.
11
+    * This lets me keep a local variable basic block open, which means fewer separate visitors.
12
+* Use RTTI-switch visitor instead of indirection visitor.
13
+* Use reflection to implement `visitChildren`.
14
+* Use reflection to implement rewrite visitor.
15
+* Use `make` instead of `dub` to build, because we really don't need dub.
16
+
17
+## String interpolation
18
+
19
+The hand-written lexer makes string interpolation easier:
20
+
21
+    "Hello $name! You are ${person:years} old!"
22
+
23
+becomes:
24
+
25
+    INTERP_START
26
+    STRING("Hello ")
27
+    IDENT("name")
28
+    STRING("! You are ")
29
+    IDENT("person")
30
+    CHILD
31
+    IDENT("years")
32
+    STRING(" old!")
33
+    INTERP_END
34
+
35
+Because the hand-written lexer can do a lot more. Then I have productions like:
36
+
37
+    string: STRING | INTERP_START exp* INTERP_END;

+ 113
- 0
dpetalc/builtins.md View File

@@ -0,0 +1,113 @@
1
+# How to handle builtin types with nontrivial methods
2
+
3
+## Basic types
4
+
5
+We have a "builtin" library. It contains a set of types that mirror builtin types like int64.
6
+
7
+So for instance `core.Float64` is a struct containing instance methods and static methods, like so:
8
+
9
+```petal
10
+struct Float64 {
11
+  @intrinsic("llvm.fabs.f64")
12
+  extern fn abs -> float64.
13
+
14
+  @intrinsic("llvm.trunc.f64")
15
+  extern fn trunc -> float64.
16
+
17
+  @intrinsic("llvm.llrint.i64.f64")
18
+  extern fn iround -> int64.
19
+
20
+  @intrinsic("llvm.lrint.i64.f64")
21
+  extern fn to_int -> int64.
22
+
23
+  @intrinsic("llvm.maxnum.f64")
24
+  static extern fn max(float64 a, float64 b) -> float64.
25
+
26
+  static fn parse(string s) -> float64 {
27
+    float64 total!
28
+    int64 divisor!
29
+    bool has_divisor!
30
+    foreach c in s {
31
+      divisor++.
32
+      if c == '.' {
33
+        has_divisor = true.
34
+        divisor = 0.
35
+        continue.
36
+      }
37
+      total *= 10.
38
+      total += c - '0'.
39
+    }
40
+    if has_divisor { return total / (10 ^ divisor). }
41
+    return total.
42
+  }
43
+
44
+  fn to_string(int64 max_digits = 6) -> string {
45
+    let i = this:to_int.
46
+    let rem = this - i.
47
+    let s = i:to_string.
48
+    if rem == 0 { return s. }
49
+    s ~= '.'.
50
+    foreach _ in max_digits:iterate {
51
+      if rem == 0 { break. }
52
+      rem *= 10.
53
+      s ~= "0123456789"[rem:to_int].
54
+    }
55
+    return s.
56
+  }
57
+}
58
+```
59
+
60
+The compiler routes `ChildExpr` on `float64` and expressions of type `float64` to `Float64`, unless
61
+we need to handle some separately.
62
+
63
+## Array properties
64
+
65
+We're going to keep the array interface minimal:
66
+
67
+* indexing
68
+* appending
69
+* resizing
70
+* iteration
71
+* length
72
+* slicing
73
+
74
+We *could* make a `core.Array` parameterized type. This might be a good plan? But I'm not so
75
+certain. We're going to have a runtime function for resizing. Iteration is going to compile to
76
+pointer stuff that I want to put off as long as possible. Slicing is pretty easy, although the
77
+bounds checks take a bit more thought.
78
+
79
+Internally, an array is `struct Array[T] { pointer[T] ptr. uint64 len. uint64 capacity. }` A slice
80
+is an array where `len == capacity`.
81
+
82
+The length getter just returns `len`. The length *setter* calls a runtime function to resize the
83
+array. The runtime function is essentially:
84
+
85
+```c
86
+struct Array {
87
+    void* ptr;
88
+    uint64_t len;
89
+}
90
+
91
+bool safe_mult(uint64_t a, uint64_t b, uint64_t* ret) {
92
+    if ((UINT64_MAX / a) < b) {
93
+        *ret = a * b;
94
+        return TRUE;
95
+    }
96
+    return FALSE;
97
+}
98
+
99
+void resize(Array* array, uint64_t element_size, uint64_t new_length) {
100
+    array->len = new_length;
101
+    uint64_t new_capacity = MAX(new_length, (array->len * 17) / 10);
102
+    uint64_t new_byte_capacity;
103
+    if (safe_mult(new_capacity, element_size, &new_byte_capacity))
104
+        array->ptr = realloc(array->ptr, new_byte_capacity);
105
+    else
106
+        abort();
107
+}
108
+```
109
+
110
+## Runtime functions
111
+
112
+Runtime functions that we *don't* need users to see can be hidden entirely. The compiler can define
113
+them, or we'll have an interface file that's bundled into the compiler binary, or something.

+ 2
- 0
dpetalc/src/petal/semantic/typecheck.d View File

@@ -229,6 +229,8 @@ class Typecheck : ChainedVisitor!void
229 229
         }
230 230
         else
231 231
         {
232
+            // Is this a type?
233
+            // That's actually a *little* hard to tell for us.
232 234
             // Function pointer of some sort. We don't support it just now.
233 235
             errors.report(s, ErrorType.unsupported, "function pointers not currently supported");
234 236
             return;

+ 60
- 0
dpetalc/threadlocal_gc.md View File

@@ -0,0 +1,60 @@
1
+# Option 1: semi-manual thread RC
2
+
3
+1. Everything is local to its source thread by default.
4
+2. You must explicitly call a specific function to share data with a thread.
5
+3. When you share data to a thread, the data is now *also* considered local to that thread. This
6
+   doesn't protect against race conditions, but we can modify this somewhat to account for that.
7
+4. Data that's shared to another thread gets pinned in its allocating thread.
8
+5. Data that's pinned can't be collected.
9
+6. A thread unpins an object when it GCs and has no remaining references, or when it exits.
10
+
11
+## Sharing procedure
12
+
13
+1. The source thread (S) acquires the sharing lock of the target thread (T).
14
+2. If T is completed, S drops the lock and ends the sharing procedure.
15
+3. S increments the cross-thread reference count (CTRC) for the shared object, O.
16
+3. S inserts O into T's foreign object collection.
17
+4. S drops T's sharing lock.
18
+5. T runs a garbage collection cycle, discovering that O has no more references in the local stack
19
+   or heap.
20
+6. T acquires T's sharing lock.
21
+7. T removes O from its foreign object collection.
22
+8. T decrements O's CTRC.
23
+9. If S is dead, T enqueues it for a dead-threads collection cycle. This works like a standard GC
24
+   cycle, except T's stack and registers are not scanned.
25
+10. When S executes a GC cycle, it considers any object with a CTRC greater than 0 to be alive.
26
+    Objects that are unreachable from any root and have a CTRC of 0 are collected.
27
+
28
+The CTRC is an atomic integer. We need at least 3 bytes, so atomic int32.
29
+
30
+## Collecting when you have circular references
31
+
32
+This is where it gets ugly.
33
+
34
+Let's say I share an `Object[] array` from A to B. Then I allocate an `Object o` in B and store it
35
+in that array. Now I need to mark `Object o` as being shared and ensure that every thread with a
36
+reference to `array` also marks `o` as reachable.
37
+
38
+That's expensive. That's also implicit in this model. We need to have the type system make it
39
+explicit. And if we do that, we still have the issue that we can't really go more than one level
40
+deep that easily. We'd need each object to store a list of threads referencing it.
41
+
42
+And we can't make that implicit just by having a shared and a local heap. You can't tell if
43
+something's referenced by the shared heap effortlessly.
44
+
45
+## Sharing to multiple threads
46
+
47
+If thread A shares object O to thread B, which shares it to thread C, B executes the second sharing
48
+procedure. The procedure is otherwise unchanged.
49
+
50
+## Allocating CTRCs
51
+
52
+Most objects are strictly local. We want lazy allocation.
53
+
54
+
55
+# Option 2: shared heap plus local heap
56
+
57
+We have `shared(T)` and `T`. Converting from `T` to `shared(T)` marks the object as shared, pinning
58
+it. (This only works for reference types?)
59
+
60
+

+ 121
- 0
metaprogramming.md View File

@@ -0,0 +1,121 @@
1
+# Metaprogramming in Petal: goals
2
+
3
+## First-class entities
4
+
5
+The goal for Petal is that types and functions should be first-class. However, I'm not sure this is
6
+doable.
7
+
8
+Specifically, we want:
9
+
10
+* Syntax for defining types and functions normally
11
+* Syntax for defining types and functions with some level of interpolation
12
+* The ability to pass types and functions to functions and store them in variables
13
+* Use type variables to call constructors and functions
14
+* Use the return values in a reasonable way
15
+
16
+We should be able to specify some required interface for the type to conform to and use everything
17
+in that interface seamlessly. Or perhaps the compiler should do it for us and we can optionally be
18
+more explicit.
19
+
20
+## Metaprogramming
21
+
22
+### Reading data
23
+
24
+We want to read data about things defined in our program:
25
+
26
+* general
27
+    * visibility
28
+    * attributes
29
+    * name
30
+    * where it's defined
31
+    * kind (function / struct / etc)
32
+* functions
33
+    * parameters
34
+    * return type
35
+    * linkage
36
+* types
37
+    * constructors
38
+    * fields
39
+    * member functions
40
+
41
+### Altering flow of execution
42
+
43
+We should be able to alter the flow of execution easily according to data that we've read. This
44
+should include things like calling a function only if the type defines it, accessing a field only if
45
+it's visible, that sort of thing. This should type-check properly and not cause compilation errors.
46
+
47
+The moral equivalent of:
48
+
49
+```python
50
+if hasattr(obj, 'foo', Int -> Int):
51
+    a: Int = obj.foo(10)
52
+```
53
+
54
+### Producing new data
55
+
56
+We want to be able to produce new declarations, blocks of statements, or even expressions. We want
57
+to do this easily when we have inputs that are strings, numbers, other user-defined data structures,
58
+types, etc.
59
+
60
+
61
+# What this might look like
62
+
63
+```petal
64
+Jsonizable ClassType ty -> ClassType {
65
+    let outtype = ty:clone.
66
+    outtype.base = ty.
67
+    let fn = Method "jsonize" (-> Json).
68
+    fn:body += <! let Json js! !>.
69
+    for field in outtype:fields {
70
+        # Pretend we defined :jsonize UFCS-able funcs for builtins
71
+        fn:body += <! js[${field:name}] = ${field:get this}:jsonize. !>.
72
+    }
73
+    fn:body += <! return js. !>.
74
+    outtype.members += fn.
75
+    return outtype.
76
+}
77
+class Foo {
78
+    string a.
79
+    int b.
80
+}
81
+main {
82
+    let (Jsonizable Foo) f!
83
+    # or:
84
+    # let JFoo = Jsonizable Foo.
85
+    # let JFoo f!
86
+    f:a = "hello world".
87
+    f:b = 10.
88
+    println f:jsonize:str.
89
+}
90
+```
91
+
92
+This would output:
93
+
94
+```json
95
+{"a":"hello world","b":10}
96
+```
97
+
98
+## Issues
99
+* How do we tell if we need to execute something at compile time?
100
+    * If it appears in a "execute this at compile time" context.
101
+* How do we parse and lex the `<! ... !>` stuff?
102
+    * We can't do a top-down, context-insensitive parse because we don't know where to start. So a
103
+      recursive descent parser is not an option. If we use recursive descent for the main parser,
104
+      we'll need a different child parser.
105
+        * LALR(1) is bottom-up and can be provided by bison.
106
+    * We won't always know what sort of thing to expect.
107
+    * Maybe we can only support a limited number of things: `decl+`, `decl_or_statement+`, `attr`,
108
+      `expr`?
109
+    * At what level do we stitch in external references?
110
+        * If it's in the token stream, we can trivially switch between struct and class, for
111
+          instance.
112
+        * If it's in the AST, there's more safety.
113
+* How hygenic is that concatenation stuff?
114
+    * How do we distinguish lookups in the mixed in scope from lookups in the external scope?
115
+        * Using `${}` syntax
116
+    * What can you append?
117
+        * The obvious sorts of things. Blocks let you append blocks, function arg lists let you
118
+          append args, etc.
119
+* What object model do we need?
120
+* How do we typecheck the code literal stuff?
121
+* How do we do this sort of thing inline? There's value in that.

+ 92
- 0
style.md View File

@@ -0,0 +1,92 @@
1
+# Petal style guide
2
+
3
+It is easier to read and write code written in a consistent style. Here are
4
+guidelines for how to write Petal in the canonical way.
5
+
6
+## Identifiers
7
+
8
+Type names should be `PascalCase` and all other identifiers should be
9
+`snake_case`.
10
+
11
+Short initialisms / acronyms should be capitalized in type names. Initialisms of
12
+five or more characters should be treated as a word. For instance:
13
+
14
+    # These are short acronyms, so they're capitalized.
15
+    struct URI {}
16
+    enum HTTPStatusCode {}
17
+    # But "NPAPI" is too long, so it's a word.
18
+    class NpapiWrapper {}
19
+
20
+A type intended to be a drop-in replacement for a builtin type, or to serve a
21
+similar role, should be `lowercase`, without underscores.
22
+
23
+    # Good: Like string, but encoded in utf16.
24
+    struct wstring { let uint16[] data. }
25
+    # Good: Rational numbers that can be used similarly to float.
26
+    struct rational64 { let int64 num = 0.  let int64 denom = 0. }
27
+    # Bad: test cases don't mimic a builtin type, and builtins don't behave like classes.
28
+    class testcase { let string name. let (unit -> unit) fn. }
29
+
30
+You MAY violate these rules to bind to an external library to make porting
31
+easier, but it is recommended to use the Petal style.
32
+
33
+## Spacing
34
+
35
+### Indentation and newlines
36
+
37
+Use two spaces to indent. Do not try to align code with tabs or spaces.
38
+
39
+Try to limit column width to 80 characters. This lets someone using a moderately
40
+large font to fit two columns of code comfortably on a 1920x1200 screen.
41
+
42
+Opening curly brackets `{` go on the same line as the thing they belong to.
43
+Short blocks can be written on the same line; blocks containing more than one
44
+statement should almost always have a line for each.
45
+
46
+    if some_condition { print "equal!". }
47
+    if 1 == 2 { print "equal!". }
48
+
49
+Use the Rectangle Rule to determine how to lay out your code: you should be able
50
+to draw a rectangle around each independent piece of code that doesn't intersect
51
+with anything else (ignoring brackets).
52
+
53
+When indenting a continuation of a previous line, double-indent. This better
54
+distinguishes the continuation from a subsequent indent.
55
+
56
+    # Bad: the argument list is an independent piece of code, but its rectangle
57
+    # intersects with the parent's name.
58
+    funcWithLotsOfArgs int32 a, string b,
59
+        int64 c, MyType d -> int32 {
60
+      return 0.
61
+    }
62
+    # Good: all in one line means nothing intersects
63
+    funcWithLotsOfArgs int32 a, string b, int64 c, MyType d -> int32 {}
64
+    # Good: multiple lines without intersect
65
+    funcWithLotsOfArgs
66
+        int32 a, string b,
67
+        int64 c, MyType d -> int32 {
68
+      return 0.
69
+    }
70
+
71
+###
72
+
73
+Operators should have spaces around them.
74
+
75
+    # Good
76
+    return 1 + 3 * 2.
77
+    # Bad: inconsistent and misleading
78
+    return 1+3 * 2.
79
+    # Bad: everything's squished together and harder to read
80
+    return 1+3*2.
81
+
82
+Curly braces should have spaces between them and any other symbol:
83
+
84
+    # Good
85
+    if 1 == 2 { println "Math is broken". } else { println "Safe!". }
86
+    # Bad
87
+    if 1 == 2{println "Math is broken".}else{println "Safe!".}
88
+
89
+Short flow control statements can be all on one line, as above.
90
+
91
+You can't omit the curly braces; that simply won't compile.
92
+