1
0

docs: ucsd abi

This commit is contained in:
2026-06-01 18:33:05 +02:00
parent 3e3a39963e
commit bd58085d9e
5 changed files with 336 additions and 57 deletions

View File

@ -6,7 +6,6 @@
set text(
font: "Iosevka Etoile",
features: (cv47: 10),
number-type: "old-style",
weight: 400,
size: 12pt,
)
@ -26,5 +25,25 @@
}))
set raw(theme: "./gruvbox-white.tmTheme")
show raw.where(lang: "ain48"): set raw(syntaxes: "./ain48.sublime-syntax")
show raw.where(lang: "struct"): set raw(syntaxes: "./struct.sublime-syntax")
doc
}
#let byte_diag(..bytes, voff: 0) = {
import cetz.draw: *
for (i, b) in bytes.pos().enumerate() {
let j = 0
for (l, d) in b {
content((i * 12.5 + j + l / 2, 1 + voff), d)
j = j + l
line((i * 12.5 + j, voff), (i * 12.5 + j, 2 + voff), stroke: gray)
}
rect((i * 12.5, voff), (i * 12.5 + 12, 2 + voff))
for j in range(0, 13) {
line((i * 12.5 + j, .125 + voff), (i * 12.5 + j, -.125 + voff))
line((i * 12.5 + j, 2.125 + voff), (i * 12.5 + j, 1.875 + voff))
}
content((i * 12.5, voff + 2.25), anchor: "south-west")[11]
content((i * 12.5 + 12, voff + 2.25), anchor: "south-east")[0]
}
}

View File

@ -17,9 +17,9 @@ contexts:
- match: ';'
scope: comment
push: comments
- match: '^[a-zA-Z0-9_-]+:$'
- match: '^[a-zA-Z0-9_-]+:'
scope: keyword.control.label.ain48
- match: '^.[a-zA-Z]+'
- match: '\.[a-zA-Z]+'
scope: keyword.directive.ain48
comments:
- meta_scope: comment

View File

@ -64,24 +64,6 @@ In instruction diagrams, each large rectangle represents a byte, that are
disposed in memory as implied by the reading order, starting at the lowest
memory address.
#let instruction_diagram(..bytes, voff: 0) = {
import cetz.draw: *
for (i, b) in bytes.pos().enumerate() {
let j = 0
for (l, d) in b {
content((i * 12.5 + j + l / 2, 1 + voff), d)
j = j + l
line((i * 12.5 + j, voff), (i * 12.5 + j, 2 + voff), stroke: gray)
}
rect((i * 12.5, voff), (i * 12.5 + 12, 2 + voff))
for j in range(0, 13) {
line((i * 12.5 + j, .125 + voff), (i * 12.5 + j, -.125 + voff))
line((i * 12.5 + j, 2.125 + voff), (i * 12.5 + j, 1.875 + voff))
}
content((i * 12.5, voff + 2.25), anchor: "south-west")[11]
content((i * 12.5 + 12, voff + 2.25), anchor: "south-east")[0]
}
}
#let amal = [Actió Machinae Arithméticae Logicaeque]
== #amal
@ -92,7 +74,7 @@ memory address.
{
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [0]),
(1, [0]),
(1, [0]),
@ -104,7 +86,7 @@ memory address.
(4, [ac]),
), ((4, [r#sub[f]]), (4, [r#sub[2]]), (4, [r#sub[1]])))
content((12.25, -1))[or]
instruction_diagram(voff: -5, (
byte_diag(voff: -5, (
(1, [0]),
(1, [0]),
(1, [0]),
@ -116,7 +98,7 @@ memory address.
(4, [ac]),
), ((4, [r#sub[f]]), (4, [r#sub[2]]), (4, text(size: .9em)[imm[0:3]])))
content((12.25, -6))[or]
instruction_diagram(voff: -10, (
byte_diag(voff: -10, (
(1, [0]),
(1, [0]),
(1, [0]),
@ -128,7 +110,7 @@ memory address.
(4, [op]),
), ((4, [r#sub[f]]), (4, text(size: .9em)[imm[0:3]]), (4, [r#sub[1]])))
content((12.25, -11))[or]
instruction_diagram(voff: -15, (
byte_diag(voff: -15, (
(1, [0]),
(1, [0]),
(1, [0]),
@ -152,7 +134,7 @@ memory address.
{
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [0]),
(1, [0]),
(1, [0]),
@ -163,8 +145,8 @@ memory address.
(1, [s]),
(4, [r]),
), ((12, [imm[0:11]]),))
instruction_diagram(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3)
instruction_diagram(((12, [imm[36:47]]),), voff: -6)
byte_diag(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3)
byte_diag(((12, [imm[36:47]]),), voff: -6)
},
),
)
@ -175,7 +157,7 @@ memory address.
#align(center, cetz.canvas({
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [0]),
(1, [0]),
(1, [0]),
@ -193,7 +175,7 @@ memory address.
#align(center, cetz.canvas({
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [0]),
(1, [0]),
(1, [1]),
@ -208,7 +190,7 @@ memory address.
#align(center, cetz.canvas({
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [1]),
(1, [1]),
(1, [1]),
@ -231,7 +213,7 @@ memory address.
{
import cetz.draw: *
scale(.65)
instruction_diagram((
byte_diag((
(1, [1]),
(1, [0]),
(1, [0]),
@ -245,8 +227,8 @@ memory address.
(1, [U]),
(1, [R]),
), ((12, [imm[0:11]]),))
instruction_diagram(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3)
instruction_diagram(((12, [imm[36:47]]),), voff: -6)
byte_diag(((12, [imm[12:23]]),), ((12, [imm[24:35]]),), voff: -3)
byte_diag(((12, [imm[36:47]]),), voff: -6)
},
),
)
@ -256,7 +238,7 @@ memory address.
#align(center, cetz.canvas({
cetz.draw.scale(.65)
instruction_diagram((
byte_diag((
(1, [1]),
(1, [0]),
(1, [1]),
@ -274,7 +256,7 @@ memory address.
#align(center, cetz.canvas({
cetz.draw.scale(.65)
instruction_diagram((
byte_diag((
(1, [1]),
(1, [1]),
(1, [0]),

View File

@ -0,0 +1,17 @@
%YAML 1.2
---
name: struct
file_extensions:
- struct
scope: source.struct
contexts:
main:
- match: '(?i)\b(byte|hword|word|bit)\b'
scope: storage.type.struct
- match: '\b[0-9_]+\b'
scope: constant.numeric.struct
- match: '\b(struct|enum|union|flags)\s+[a-zA-Z0-9_]+\b'
scope: storage.type.struct
- match: '%.*$'
scope: comment

View File

@ -1,34 +1,295 @@
#import ".template.typ": *
#show: conf
#show regex("[\u{e3e0}-\u{e3ff}]+(\s+[\u{e3e0-\u{e3ff}]+)*"): text.with(font: "AndikaAmbNaran")
#show raw.where(block: true): block.with(breakable: false)
#title[Unichal Software Distribution System ABI\ Version 3.4 for AIN-48]
© University of Chalmosique, Cross. 301 -- Cross. 319
= Object Format
b
= The RIM file format
The RIM#footnote[
   --- Rainsihen Iehanac Móscirts --- Computer
Instruction Format
] format is the format used by UCSD to encode machine code object files and executables.. It is defined as follows. where each box represents a byte, in execution order.
All numbers bigger than a byte are represented in little endian.
The sizes of the types are as follows:
/ ```struct byte```: 12 bit
/ ```struct hword```: 24 bit
/ ```struct word```: 48 bit
All numbers are unsigned
= Integer representation
= Calling convention
== Parameter passing
== Return value passing
== Call Sequence
== RIM Header
Placed at the start of the file, it identifies the file as a RIM object and contains some additional flags
```ain48
.at #o00000000
fib_n:
ilg %2, #0
ilg %3, #1
__loop:
padd %4, %3, %2
tsc %2, %3
tsc %3, %3
pstr %1, #1
nui __loop
tsc %1, %2
rv
```struct
enum PLATFORM as hword:
UCSD-at12 = 00000000
UCSD-ain24 = 00000001
UCSD-ain48 = 00000002
% other formats may exist, as defined in their own implementation
% of the RIM specification
```
```struct
flags FILETYPE as byte:
EXECUTABLE = 0001
SHARED = 0002
REPOSITIONABLE = 0004
```
```struct
struct RIM_HEADER:
byte[4] magic_number = [0206, 0203, 0213, 7777]
% \7777 in the old AT12-IM codepage
enum PLATFORM platform
hword version % For UCSD 3.4: version = 0001
flags FILETYPE type
word index_address % offset in the file where the section table is
```
adjective
== Section Table
Placed at the file offset indicated by `index_address` in the header, the section table contains a list of all the sections, their lengths and their offsets into the file.
```struct
enum SEC_TYPE as byte:
RO_DATA = 0 % Data to be copied into non-executable read-only memory
CODE = 1 % Data to be copied into executable read-only of memory
RW_DATA = 2 % Data to be copied into non-executable read-write memory
RW_CODE = 3 % Data to be copied into executable read-write memory
SYMBOLS_INTERNAL = 4 % Table listing all the linkable symbols of
% the object
SYMBOLS_EXTERNAL = 5 % Table listing all the symbols the objects needs
% to link to to be usable.
STRINGS = 6 % Container for all the strings for symbols and debug info
DEBUG = 7 % Debug information
META = 7777 % Extra Information about the object
```
```struct
struct SEC_TABLE_ENTRY:
enum SEC_TYPE type
adjective
word offset
```
```struct
struct SECTION_TABLE:
hword count
struct SEC_TABLE_ENTRY[count] entries
```
== Code and Data Sections
Sections tagged `RO_DATA`, `CODE`, `RW_DATA` and `RW_CODE` are all encoded the same way.
Ideally the start of such a section is to be at an offset that is a multiple of 4 bytes.
```struct
struct DATA_SECTION:
word length % How many bytes of data
word req_addr % Where should the data be located in memory. Note
% that this field is ignored if the RELOCATABLE flag
% is set in the header.
byte[length] data
```
== Symbol tables
The symbol tables contains data about the linkable symbols of the object
```struct
struct SYMBOL_TABLE:
hword count
struct SYMBOL_TABLE_ENTRY[count] entries
```
```struct
struct SYMBOL_TABLE_ENTRY:
% Number of the string section and index into that section
% referencing the name of the symbol
hword string_section
hword string_number
% For internal symbol tables: the section and offset refering to the
% symbol. For external symbol tables: the section and offset where
% the adress for that symbol needs to be replaced. If a symbol needs
% to be linked between two data sections of the object, have it be
% present in both the internal and external tables.
hword data_section
word data_offset
```
== Strings section
```struct
struct STRINGS_SECTION:
hword count % number of string entries
word len % length of the data part
word[count] entry_offsets % offsets into `data` of each string
byte[len] data % binary data containing the strings
```
== Debug
The contents of debug tables is platform-specific.
/ TODO: Write the debug info format for UCSD eventually™
== Meta
The meta section contains additional metadata about the file
```struct
struct META_TABLE:
hword count
struct META_ENTRY[count] entries
```
```struct
struct META_ENTRY:
enum META_ENTRY_TYPE type
union META_VALUE val
```
```struct
union META_VALUE: % varient depends on meta entry type
struct META_VALUE_STRING string_value
word int_value
```
```struct
struct META_VALUE_STRING:
hword section % strings section number
hword id % entry in string section
```
```struct
enum META_ENTRY_TYPE as hword:
ENTRY_POINT = 0 % Kind: int_value, only useful when the file is said to
% be executable, gives the entry point for the program
DYNAMIC_LINKER = 1 % Kind: string_value, the name of the dynamic linker
% in charge of resolving the symbols upon load of
% theobject, only useful if the SHARED flag is set.
HASH = 2 % Kind: string_value, a hash against which to verify the
% integrity of the object file, suported algorithms depend on
% the linker used. the string is of the format
% `algorithm:octal-dump-of-hash`
% META_ENTRY_TYPEs between 1000 0000 and 3777 7777 (inclusive) are
% reserved for platform-specific information. Here are the ones for
% UCSD 3.4
UCSD_APPLICATION_NAME = 1000 0000 % Kind: string_value user facing name
% of the application
UCSD_ICON_DATA_SYM = 1000 0001 % Kind: string_value, name of a symbol
% referencing bitmap image data for an
% application icon
UCSD_ICON_DATA_NAME = 1000 0002 % Kind: string_value, name of a system-
% wide icon that may be installed on
% the system
% Multiple instances of UCSD_ICON_DATA_NAME and UCSD_ICON_DATA_SYM can
% be specified, the system will make use of the first one that resolves
% to a usable image.
% META_ENTRY_TYPEs greater or equal to 4000 0000 are reserved for
% private use by external tooling and are not specified in this
% document
```
= Representation of types
== Integers
#block(breakable: false)[This ABI defines 6 integer types: `u12`, `u24` and `u48` which are unsigned 12, 24 and 48 bit integers, as well as `i12`, `i24` and `i48` which are signed (by the two's complement method) 12, 24 and 48 bit integers.
#grid(columns: (1fr, 1fr), row-gutter: .65em)[
/ `u12`: Unsigned 12-bit integer
/ Storage size: 1 byte
/ Alignment: 1 byte
][
/ `i12`: Signed 12-bit integer
/ Storage size: 1 byte
/ Alignment: 1 byte
][
/ `u24`: Unsigned 24-bit integer
/ Storage size: 2 bytes
/ Alignment: 2 bytes
][
/ `i24`: Signed 24-bit integer
/ Storage size: 2 bytes
/ Alignment: 2 bytes
][
/ `u48`: Unsigned 48-bit integer
/ Storage size: 4 bytes
/ Alignment: 4 bytes
][
/ `i48`: Signed 48-bit integer
/ Storage size: 4 bytes
/ Alignment: 4 bytes
]]
== Booleans
Booleans are represented as `u12`s where 0 is false and any non-zero value (but canonically 7777) is true.
== Strings
Strings are represented by pointers to a `u12`, followed by that number of bytes of data representing the contents of the string.
= Userspace invocation convention
== Parameter passing
Parameters are expanded to take a full word and are placed in order:
#columns(2)[
+ in ```ain48 %01```
+ in ```ain48 %02```
+ in ```ain48 %03```
+ in ```ain48 %04```
#colbreak()
5. in ```ain48 %05```
+ in ```ain48 %06```
+ in ```ain48 %07```
+ pushed on the stack, with the 8#super[th] argument at the lowest adress and the last argument at the highest address.
]
Arguments placed on the stack are placed starting at the word with the adress immediately higher to that of the return adress.
== Return value passing
The return value is to be placed in ```ain48 %01```.
== Clobbering
Registers ```ain48 %01```, ```ain48 %02```, ```ain48 %03```, ```ain48 %04```, ```ain48 %05```,
```ain48 %06``` and ```ain48 %07``` may freely be clobbered by the invokee. It is the responsability of the invoker to save them if it is desired to keep their value. Registers ```ain48 %10```, ```ain48 %11```, ```ain48 %12```, ```ain48 %13```, ```ain48 %14```, ```ain48 %15```, ```ain48 %16``` and ```ain48 %it``` must be returned to the invoker in the same state they were upon call. It is the responsability of the invokee to save them if they are needed for other uses.
== Invocation Sequence
+ The invoker pushes all the registers it needs saved to the stack
+ The invoker pushes the stack arguments to the stack
+ The invoker loads the registers with the register arguments
+ The register executes the ```ain48 IVC``` instruction. This pushes the return adress to the stack and transfers control flow to the invokee
+ The invokee allocates its local variables on top of the stack.
+ The invokee saves any register it needs to prevent the clobering of.
+ The invokee run its course
+ The invokee restores the registers
+ The invokee frees it’s local variables
+ The invokee loads it’s return value into ```ain48 %01```
+ The invokee runs the ```ain48 RV``` instruction. This pops the return adress off the stack and transfers control flow back to the invoker
+ The invoker frees the stack arguments.
+ The invoker restores the registers it pushed
```ain48
; An example
.init _init
.pars data_sl
.univ arg8, arg9
arg8: .word #o7612
arg9: .hword #o5 .byte #o0, #o0, #o0, #o0, #o0
.pars actiones
.univ _init
_init: CST %02 ; 1. Push registers to be saved
ILG %02, arg9 ; 2. Push 9th argument
CST %02
ILG %02, arg8 ; 2. Push 8th argument
LG %02, (%02)
CST %02
ILG %01, #o1 ; 3. Load Register arguments
...
ILG %07, #o7
IVC res ; 4. Transfer to the invokee
PADD %it, #o10 ; 12. Free the 2 words of stack arguments
DST %02 ; 13. Restor the saved register
res: PSTR %it, #o14 ; 5. Allocate local variables (here 3 words)
CST %10 ; 6. Save invoker-safe registers
... ; 7. Foo does whatever it does
DST %10 ; 8. The invokee retores the invoker-safe
; registers
PADD %it, #o14 ; 9. Free the local variables
; 10. Assuming that foo has already put it’s
; return value in %01
RV ; 11. Transfer back to the invoker
```
= Kernel invocation convention
Same as userspace invocation, except for the fact that all arguments are shifted by one, and that ```ain48 %01``` is used to hold the kernel invocation number.
#pagebreak(weak: true)
#outline(depth: 2)