From bd58085d9ed1bc5a917ee6e7078fe3681595b18e Mon Sep 17 00:00:00 2001 From: Annwan Date: Mon, 1 Jun 2026 18:33:05 +0200 Subject: [PATCH] docs: ucsd abi --- docs/.template.typ | 21 ++- docs/ain48.sublime-syntax | 4 +- docs/isa.typ | 48 ++---- docs/struct.sublime-syntax | 17 +++ docs/ucsd_abi.typ | 303 ++++++++++++++++++++++++++++++++++--- 5 files changed, 336 insertions(+), 57 deletions(-) create mode 100644 docs/struct.sublime-syntax diff --git a/docs/.template.typ b/docs/.template.typ index b1c741d..e4f7ef6 100644 --- a/docs/.template.typ +++ b/docs/.template.typ @@ -6,7 +6,6 @@ set text( font: "Iosevka Etoile", features: (cv47: 10), - number-type: "old-style", weight: 400, size: 12pt, ) @@ -26,5 +25,25 @@ })) set raw(theme: "./gruvbox-white.tmTheme") show raw.where(lang: "ain48"): set raw(syntaxes: "./ain48.sublime-syntax") + show raw.where(lang: "struct"): set raw(syntaxes: "./struct.sublime-syntax") doc } + +#let byte_diag(..bytes, voff: 0) = { + import cetz.draw: * + for (i, b) in bytes.pos().enumerate() { + let j = 0 + for (l, d) in b { + content((i * 12.5 + j + l / 2, 1 + voff), d) + j = j + l + line((i * 12.5 + j, voff), (i * 12.5 + j, 2 + voff), stroke: gray) + } + rect((i * 12.5, voff), (i * 12.5 + 12, 2 + voff)) + for j in range(0, 13) { + line((i * 12.5 + j, .125 + voff), (i * 12.5 + j, -.125 + voff)) + line((i * 12.5 + j, 2.125 + voff), (i * 12.5 + j, 1.875 + voff)) + } + content((i * 12.5, voff + 2.25), anchor: "south-west")[11] + content((i * 12.5 + 12, voff + 2.25), anchor: "south-east")[0] + } +} diff --git a/docs/ain48.sublime-syntax b/docs/ain48.sublime-syntax index d510cc5..6fcab7f 100644 --- a/docs/ain48.sublime-syntax +++ b/docs/ain48.sublime-syntax @@ -17,9 +17,9 @@ contexts: - match: ';' scope: comment push: comments - - match: '^[a-zA-Z0-9_-]+:$' + - match: '^[a-zA-Z0-9_-]+:' scope: keyword.control.label.ain48 - - match: '^.[a-zA-Z]+' + - match: '\.[a-zA-Z]+' scope: keyword.directive.ain48 comments: - meta_scope: comment diff --git a/docs/isa.typ b/docs/isa.typ index 608a039..b17a4a5 100644 --- a/docs/isa.typ +++ b/docs/isa.typ @@ -64,24 +64,6 @@ In instruction diagrams, each large rectangle represents a byte, that are disposed in memory as implied by the reading order, starting at the lowest memory address. -#let instruction_diagram(..bytes, voff: 0) = { - import cetz.draw: * - for (i, b) in bytes.pos().enumerate() { - let j = 0 - for (l, d) in b { - content((i * 12.5 + j + l / 2, 1 + voff), d) - j = j + l - line((i * 12.5 + j, voff), (i * 12.5 + j, 2 + voff), stroke: gray) - } - rect((i * 12.5, voff), (i * 12.5 + 12, 2 + voff)) - for j in range(0, 13) { - line((i * 12.5 + j, .125 + voff), (i * 12.5 + j, -.125 + voff)) - line((i * 12.5 + j, 2.125 + voff), (i * 12.5 + j, 1.875 + voff)) - } - content((i * 12.5, voff + 2.25), anchor: "south-west")[11] - content((i * 12.5 + 12, voff + 2.25), anchor: "south-east")[0] - } -} #let amal = [Actió Machinae Arithméticae Logicaeque] == #amal @@ -92,7 +74,7 @@ memory address. { import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [0]), (1, [0]), (1, [0]), @@ -104,7 +86,7 @@ memory address. (4, [ac]), ), ((4, [r#sub[f]]), (4, [r#sub[2]]), (4, [r#sub[1]]))) content((12.25, -1))[or] - instruction_diagram(voff: -5, ( + byte_diag(voff: -5, ( (1, [0]), (1, [0]), (1, [0]), @@ -116,7 +98,7 @@ memory address. (4, [ac]), ), ((4, [r#sub[f]]), (4, [r#sub[2]]), (4, text(size: .9em)[imm[0:3]]))) content((12.25, -6))[or] - instruction_diagram(voff: -10, ( + byte_diag(voff: -10, ( (1, [0]), (1, [0]), (1, [0]), @@ -128,7 +110,7 @@ memory address. (4, [op]), ), ((4, [r#sub[f]]), (4, text(size: .9em)[imm[0:3]]), (4, [r#sub[1]]))) content((12.25, -11))[or] - instruction_diagram(voff: -15, ( + byte_diag(voff: -15, ( (1, [0]), (1, [0]), (1, [0]), @@ -152,7 +134,7 @@ memory address. { import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [0]), (1, [0]), (1, [0]), @@ -163,8 +145,8 @@ memory address. (1, [s]), (4, [r]), ), ((12, [imm[0:11]]),)) - instruction_diagram(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3) - instruction_diagram(((12, [imm[36:47]]),), voff: -6) + byte_diag(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3) + byte_diag(((12, [imm[36:47]]),), voff: -6) }, ), ) @@ -175,7 +157,7 @@ memory address. #align(center, cetz.canvas({ import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [0]), (1, [0]), (1, [0]), @@ -193,7 +175,7 @@ memory address. #align(center, cetz.canvas({ import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [0]), (1, [0]), (1, [1]), @@ -208,7 +190,7 @@ memory address. #align(center, cetz.canvas({ import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [1]), (1, [1]), (1, [1]), @@ -231,7 +213,7 @@ memory address. { import cetz.draw: * scale(.65) - instruction_diagram(( + byte_diag(( (1, [1]), (1, [0]), (1, [0]), @@ -245,8 +227,8 @@ memory address. (1, [U]), (1, [R]), ), ((12, [imm[0:11]]),)) - instruction_diagram(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3) - instruction_diagram(((12, [imm[36:47]]),), voff: -6) + byte_diag(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3) + byte_diag(((12, [imm[36:47]]),), voff: -6) }, ), ) @@ -256,7 +238,7 @@ memory address. #align(center, cetz.canvas({ cetz.draw.scale(.65) - instruction_diagram(( + byte_diag(( (1, [1]), (1, [0]), (1, [1]), @@ -274,7 +256,7 @@ memory address. #align(center, cetz.canvas({ cetz.draw.scale(.65) - instruction_diagram(( + byte_diag(( (1, [1]), (1, [1]), (1, [0]), diff --git a/docs/struct.sublime-syntax b/docs/struct.sublime-syntax new file mode 100644 index 0000000..e774246 --- /dev/null +++ b/docs/struct.sublime-syntax @@ -0,0 +1,17 @@ +%YAML 1.2 +--- +name: struct +file_extensions: + - struct +scope: source.struct +contexts: + main: + - match: '(?i)\b(byte|hword|word|bit)\b' + scope: storage.type.struct + - match: '\b[0-9_]+\b' + scope: constant.numeric.struct + - match: '\b(struct|enum|union|flags)\s+[a-zA-Z0-9_]+\b' + scope: storage.type.struct + - match: '%.*$' + scope: comment + diff --git a/docs/ucsd_abi.typ b/docs/ucsd_abi.typ index dd53929..a3a30be 100644 --- a/docs/ucsd_abi.typ +++ b/docs/ucsd_abi.typ @@ -1,34 +1,295 @@ #import ".template.typ": * #show: conf +#show regex("[\u{e3e0}-\u{e3ff}]+(\s+[\u{e3e0-\u{e3ff}]+)*"): text.with(font: "AndikaAmbNaran") +#show raw.where(block: true): block.with(breakable: false) + #title[Unichal Software Distribution System ABI\ Version 3.4 for AIN-48] © University of Chalmosique, Cross. 301 -- Cross. 319 -= Object Format -b += The RIM file format +The RIM#footnote[ +    --- Rainsihen Iehanac Móscirts --- Computer + Instruction Format +] format is the format used by UCSD to encode machine code object files and executables.. It is defined as follows. where each box represents a byte, in execution order. +All numbers bigger than a byte are represented in little endian. +The sizes of the types are as follows: +/ ```struct byte```: 12 bit +/ ```struct hword```: 24 bit +/ ```struct word```: 48 bit +All numbers are unsigned -= Integer representation -= Calling convention -== Parameter passing -== Return value passing -== Call Sequence +== RIM Header +Placed at the start of the file, it identifies the file as a RIM object and contains some additional flags -```ain48 -.at #o00000000 -fib_n: - ilg %2, #0 - ilg %3, #1 -__loop: - padd %4, %3, %2 - tsc %2, %3 - tsc %3, %3 - pstr %1, #1 - nui __loop - - tsc %1, %2 - rv +```struct +enum PLATFORM as hword: + UCSD-at12 = 00000000 + UCSD-ain24 = 00000001 + UCSD-ain48 = 00000002 + % other formats may exist, as defined in their own implementation + % of the RIM specification +``` +```struct +flags FILETYPE as byte: + EXECUTABLE = 0001 + SHARED = 0002 + REPOSITIONABLE = 0004 +``` +```struct +struct RIM_HEADER: + byte[4] magic_number = [0206, 0203, 0213, 7777] + % \7777 in the old AT12-IM codepage + enum PLATFORM platform + hword version % For UCSD 3.4: version = 0001 + flags FILETYPE type + word index_address % offset in the file where the section table is ``` +adjective +== Section Table +Placed at the file offset indicated by `index_address` in the header, the section table contains a list of all the sections, their lengths and their offsets into the file. +```struct +enum SEC_TYPE as byte: + RO_DATA = 0 % Data to be copied into non-executable read-only memory + CODE = 1 % Data to be copied into executable read-only of memory + RW_DATA = 2 % Data to be copied into non-executable read-write memory + RW_CODE = 3 % Data to be copied into executable read-write memory + SYMBOLS_INTERNAL = 4 % Table listing all the linkable symbols of + % the object + SYMBOLS_EXTERNAL = 5 % Table listing all the symbols the objects needs + % to link to to be usable. + STRINGS = 6 % Container for all the strings for symbols and debug info + DEBUG = 7 % Debug information + META = 7777 % Extra Information about the object +``` +```struct +struct SEC_TABLE_ENTRY: + enum SEC_TYPE type +adjective + word offset +``` +```struct +struct SECTION_TABLE: + hword count + struct SEC_TABLE_ENTRY[count] entries +``` + +== Code and Data Sections +Sections tagged `RO_DATA`, `CODE`, `RW_DATA` and `RW_CODE` are all encoded the same way. +Ideally the start of such a section is to be at an offset that is a multiple of 4 bytes. +```struct +struct DATA_SECTION: + word length % How many bytes of data + word req_addr % Where should the data be located in memory. Note + % that this field is ignored if the RELOCATABLE flag + % is set in the header. + byte[length] data +``` + +== Symbol tables +The symbol tables contains data about the linkable symbols of the object +```struct +struct SYMBOL_TABLE: + hword count + struct SYMBOL_TABLE_ENTRY[count] entries +``` +```struct +struct SYMBOL_TABLE_ENTRY: + % Number of the string section and index into that section + % referencing the name of the symbol + hword string_section + hword string_number + % For internal symbol tables: the section and offset refering to the + % symbol. For external symbol tables: the section and offset where + % the adress for that symbol needs to be replaced. If a symbol needs + % to be linked between two data sections of the object, have it be + % present in both the internal and external tables. + hword data_section + word data_offset +``` + +== Strings section +```struct +struct STRINGS_SECTION: + hword count % number of string entries + word len % length of the data part + word[count] entry_offsets % offsets into `data` of each string + byte[len] data % binary data containing the strings +``` + +== Debug +The contents of debug tables is platform-specific. +/ TODO: Write the debug info format for UCSD eventually™ + +== Meta +The meta section contains additional metadata about the file + +```struct +struct META_TABLE: + hword count + struct META_ENTRY[count] entries +``` +```struct +struct META_ENTRY: + enum META_ENTRY_TYPE type + union META_VALUE val +``` +```struct +union META_VALUE: % varient depends on meta entry type + struct META_VALUE_STRING string_value + word int_value +``` +```struct +struct META_VALUE_STRING: + hword section % strings section number + hword id % entry in string section +``` +```struct +enum META_ENTRY_TYPE as hword: + ENTRY_POINT = 0 % Kind: int_value, only useful when the file is said to + % be executable, gives the entry point for the program + DYNAMIC_LINKER = 1 % Kind: string_value, the name of the dynamic linker + % in charge of resolving the symbols upon load of + % theobject, only useful if the SHARED flag is set. + HASH = 2 % Kind: string_value, a hash against which to verify the + % integrity of the object file, suported algorithms depend on + % the linker used. the string is of the format + % `algorithm:octal-dump-of-hash` + + % META_ENTRY_TYPEs between 1000 0000 and 3777 7777 (inclusive) are + % reserved for platform-specific information. Here are the ones for + % UCSD 3.4 + UCSD_APPLICATION_NAME = 1000 0000 % Kind: string_value user facing name + % of the application + UCSD_ICON_DATA_SYM = 1000 0001 % Kind: string_value, name of a symbol + % referencing bitmap image data for an + % application icon + UCSD_ICON_DATA_NAME = 1000 0002 % Kind: string_value, name of a system- + % wide icon that may be installed on + % the system + % Multiple instances of UCSD_ICON_DATA_NAME and UCSD_ICON_DATA_SYM can + % be specified, the system will make use of the first one that resolves + % to a usable image. + + % META_ENTRY_TYPEs greater or equal to 4000 0000 are reserved for + % private use by external tooling and are not specified in this + % document +``` + += Representation of types + +== Integers +#block(breakable: false)[This ABI defines 6 integer types: `u12`, `u24` and `u48` which are unsigned 12, 24 and 48 bit integers, as well as `i12`, `i24` and `i48` which are signed (by the two's complement method) 12, 24 and 48 bit integers. + +#grid(columns: (1fr, 1fr), row-gutter: .65em)[ +/ `u12`: Unsigned 12-bit integer + / Storage size: 1 byte + / Alignment: 1 byte +][ +/ `i12`: Signed 12-bit integer + / Storage size: 1 byte + / Alignment: 1 byte +][ +/ `u24`: Unsigned 24-bit integer + / Storage size: 2 bytes + / Alignment: 2 bytes +][ +/ `i24`: Signed 24-bit integer + / Storage size: 2 bytes + / Alignment: 2 bytes +][ +/ `u48`: Unsigned 48-bit integer + / Storage size: 4 bytes + / Alignment: 4 bytes +][ +/ `i48`: Signed 48-bit integer + / Storage size: 4 bytes + / Alignment: 4 bytes +]] + +== Booleans +Booleans are represented as `u12`s where 0 is false and any non-zero value (but canonically 7777) is true. + +== Strings +Strings are represented by pointers to a `u12`, followed by that number of bytes of data representing the contents of the string. + + += Userspace invocation convention +== Parameter passing +Parameters are expanded to take a full word and are placed in order: +#columns(2)[ ++ in ```ain48 %01``` ++ in ```ain48 %02``` ++ in ```ain48 %03``` ++ in ```ain48 %04``` +#colbreak() +5. in ```ain48 %05``` ++ in ```ain48 %06``` ++ in ```ain48 %07``` ++ pushed on the stack, with the 8#super[th] argument at the lowest adress and the last argument at the highest address. +] +Arguments placed on the stack are placed starting at the word with the adress immediately higher to that of the return adress. + +== Return value passing +The return value is to be placed in ```ain48 %01```. + +== Clobbering +Registers ```ain48 %01```, ```ain48 %02```, ```ain48 %03```, ```ain48 %04```, ```ain48 %05```, +```ain48 %06``` and ```ain48 %07``` may freely be clobbered by the invokee. It is the responsability of the invoker to save them if it is desired to keep their value. Registers ```ain48 %10```, ```ain48 %11```, ```ain48 %12```, ```ain48 %13```, ```ain48 %14```, ```ain48 %15```, ```ain48 %16``` and ```ain48 %it``` must be returned to the invoker in the same state they were upon call. It is the responsability of the invokee to save them if they are needed for other uses. + +== Invocation Sequence ++ The invoker pushes all the registers it needs saved to the stack ++ The invoker pushes the stack arguments to the stack ++ The invoker loads the registers with the register arguments ++ The register executes the ```ain48 IVC``` instruction. This pushes the return adress to the stack and transfers control flow to the invokee ++ The invokee allocates its local variables on top of the stack. ++ The invokee saves any register it needs to prevent the clobering of. ++ The invokee run its course ++ The invokee restores the registers ++ The invokee frees it’s local variables ++ The invokee loads it’s return value into ```ain48 %01``` ++ The invokee runs the ```ain48 RV``` instruction. This pops the return adress off the stack and transfers control flow back to the invoker ++ The invoker frees the stack arguments. ++ The invoker restores the registers it pushed + + +```ain48 +; An example +.init _init +.pars data_sl +.univ arg8, arg9 +arg8: .word #o7612 +arg9: .hword #o5 .byte #o0, #o0, #o0, #o0, #o0 +.pars actiones +.univ _init +_init: CST %02 ; 1. Push registers to be saved + ILG %02, arg9 ; 2. Push 9th argument + CST %02 + ILG %02, arg8 ; 2. Push 8th argument + LG %02, (%02) + CST %02 + ILG %01, #o1 ; 3. Load Register arguments + ... + ILG %07, #o7 + IVC res ; 4. Transfer to the invokee + PADD %it, #o10 ; 12. Free the 2 words of stack arguments + DST %02 ; 13. Restor the saved register + +res: PSTR %it, #o14 ; 5. Allocate local variables (here 3 words) + CST %10 ; 6. Save invoker-safe registers + ... ; 7. Foo does whatever it does + DST %10 ; 8. The invokee retores the invoker-safe + ; registers + PADD %it, #o14 ; 9. Free the local variables + ; 10. Assuming that foo has already put it’s + ; return value in %01 + RV ; 11. Transfer back to the invoker + +``` + += Kernel invocation convention +Same as userspace invocation, except for the fact that all arguments are shifted by one, and that ```ain48 %01``` is used to hold the kernel invocation number. #pagebreak(weak: true) #outline(depth: 2)