Skip to content

Commit 03449c2

Browse files
committed
Added labels in .data and made number handling better
1 parent 430a57c commit 03449c2

11 files changed

Lines changed: 741 additions & 590 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SRC=as4.c
1+
SRC=as4.c num.c label.c output.c
22
EXE=as4
33
EXTRACFLAGS=
44
EXTRALDFLAGS=

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ The Nibble Knowledge CPU has 8 instructions which are split into two different t
3030
### 2 data types ###
3131
AS4 recognises two inbuilt data types:
3232
* Numberical values. Format: ".data SIZE INITIALVALUE"
33+
* .data can also be used to create a static reference to a label. The SIZE must be 4. The format is ".data SIZE LABEL" or ".data SIZE LABEL[OFFSET]". This will save the static 16-bit memory location pointed to by LABEL or LABEL + OFFSET to the .data section.
3334
* Strings, both plain and zero terminated. Format: ".ascii "String"" or ".asciiz "String""
3435
* Strings must start and end with double quotes.
3536
* AS4 recognises standard escape characters
@@ -42,13 +43,18 @@ Labels when referenced in instructions can be used in two forms:
4243
* INST LABEL
4344
* Where the instruction INST simply references the memory location pointed to by LABEL
4445
* INST LABEL[OFFSET]
45-
* Where the instruction INST references the memory location pointed to by LABEL + OFFSET. OFFSET must be a hexidecimal value, optionally preceded by "0x".
46+
* Where the instruction INST references the memory location pointed to by LABEL + OFFSET. OFFSET is usually a hexidecimal value, optionally preceded by "0x". To use a binary value, prefix with "0b". For an octal value, prefix "0" or "0o". For a decimal value, prefix "0d".
4647

4748
An example of usage would be "LOD sum[F]", which loads the memory address pointed to by "sum" plus the offset of "F" (15 in decimal) into the accumulator.
4849

4950
### Comments ###
5051
Comments in AS4 start with a semicolon, ";" or an octothorp, "#".
5152

53+
### Numbers ###
54+
AS4 accepts binary, octal, hexadecimal and binary numbers. Binary numbers must always be preceded by "0b" and octal by "0" or "0o". The rules for decimal and hexadecimal vary depending on use case.
55+
* When used for an offset, in the form LABEL[OFFSET], it is assumed the default is hexadecimal. Thus, hexadecimal numbers can be written with or without a preceding "0x". Decimal numbers must be written with a preceding "0d".
56+
* In all other cases, decimal is assumed to be the default. "0d" can optionally precede the decimal number. Hexadecimal numbers must be written with a preceding "0x".
57+
5258
### Example code ###
5359
```nasm
5460
; This program reads in integers and adds them together

examples/labeltest.asm

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
LOD derp
2+
derp: .data 4 hurp[0b10]
3+
derp2: .data 4 slurp
4+
hurp: CXA
5+
slurp: NOP
6+
pad: .data 1 0

examples/offsettest.asm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
LOD beep[0x2]
2-
beep: .data 3 2
1+
LOD beep[0d2]
2+
beep: .data 0o3 0b1110

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SRC=as4.c
1+
SRC=as4.c num.c label.c output.c
22
EXE=as4
33
EXTRACFLAGS=
44
EXTRALDFLAGS=

src/as4.c

Lines changed: 19 additions & 582 deletions
Large diffs are not rendered by default.

src/as4.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,24 @@
3333
#define EOFADDR 0x4444
3434
#define UNKNOWNADDR 0xFFFF
3535

36+
/* Macros to determine if we are using strange, macro-generated hex values (no 0x) or standard. */
37+
#define STDHEX 0
38+
#define NSTDHEX 1
39+
40+
/* When we are using labels references labels, we need account for the 4 nibble size instead of 5 for an instruction */
41+
#define INST 0
42+
#define LABEL 1
43+
3644
/* The singular global variable - what line of the assembly file we are on. Helps with error messages */
3745
extern unsigned long long FILELINE;
3846

39-
/* This is the data structure used to identify labels - it merely contains the name of the label and it's address */
47+
/* This is the data structure used to identify labels */
4048
typedef struct _label
4149
{
4250
uint16_t addr;
4351
char *str;
4452
uint16_t offset;
53+
uint8_t type;
4554
} label;
4655

4756
/* Help() prints the help. More useful in large programs */
@@ -52,11 +61,18 @@ void addinst(char *outbuf, uint8_t op, uint16_t addr, unsigned long long *bits,
5261
void adddata(char **outbuf, size_t bufsize, unsigned long long size, long long value, unsigned long long *bits, unsigned long long *bytes);
5362
/* addlabel() adds a label to the collection of labels, to be used by other functions when a label reference is made. */
5463
/* Additionally, if there are outstanding "queries" for a certain label (if the label has been used before it has been declared) it replaces the "unknown address" address in an instruction with the address of the label on declaration */
55-
void addlabel(char *outbuf, label **labels, label **unknownlabels, unsigned long long *numlabels, unsigned long long numunknownlabels, const char *labelstr, unsigned long long bits, unsigned short int baseaddr);
64+
void addlabel(char *outbuf, label **labels, label **unknownlabels, unsigned long long *numlabels, unsigned long long numunknownlabels, char *labelstr, unsigned long long bits, unsigned short int baseaddr);
5665
/* findlabel() determines the memory address that follows the opcode. If the memory address is already a number, returns it */
5766
/* If it is an undeclared label, it adds the label name and the instruction location to the "unknown labels" collection. If the label is declared, it returns it's address */
58-
unsigned short int findlabel(label **unknownlabels, label **labels, const char *labelstr, unsigned long long numlabels, unsigned long long *numunknownlabels, unsigned long long bits);
67+
unsigned short int findlabel(label **unknownlabels, label **labels, char *labelstr, unsigned long long numlabels, unsigned long long *numunknownlabels, unsigned long long bits, uint8_t type);
5968
/* addstring() handles adding .ascii and .ascii (zero terminating ascii string) to the output buffer, including handling escape characters. */
6069
void addstring(char *outbuf, char *string, char zeroterm, unsigned long long *bits, unsigned long long *bytes);
6170

71+
/* estrtol (extended strtol) and estrtoul (extended strtoul) are functions to intelligently determine the base of the number to be converted by the strtol functions and return them. */
72+
/* They function the same as strtol except for the final parameter they accept a type parameter which switches them from expecting hex values in the form 0xF (STDHEX or 0) to just F (NSTDHEX or 1). This is to accomodate the macro assembler. */
73+
/* Switching to NSTDHEX means the default assumption for numbers is hex instead of decimal, so 12 will be assume to be 0x12, or 1. Decimal values will be accepted in this mode in the form 0d9. */
74+
long estrtol(char *str, char **endptr, uint8_t type);
75+
76+
unsigned long estrtoul(char *str, char **endptr, uint8_t type);
77+
6278
#endif /* _AS4_H_ */

src/label.c

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#include "as4.h"
2+
3+
/* These are the label operations. */
4+
5+
/* addlabel() adds a label to the collection of labels, to be used by other functions when a label reference is made. */
6+
/* Additionally, if there are outstanding "queries" for a certain label (if the label has been used before it has been declared) it replaces the "unknown address" address in an instruction with the address of the label on declaration */
7+
void addlabel(char *outbuf, label **labels, label **unknownlabels, unsigned long long *numlabels, unsigned long long numunknownlabels, char *labelstr, unsigned long long bits, unsigned short int baseaddr)
8+
{
9+
/* This is the pointer to the label string, which is used for comparison and assigned to it's element in the collection when we are finished. */
10+
char *tempstr = NULL;
11+
/* Generic looping unit. */
12+
unsigned int i = 0;
13+
14+
/* Because labels is actually placed in here as a pointer to a pointer, if it's NULL there's not pointer to work with, so we cannot do any work. */
15+
if(labels == NULL)
16+
{
17+
return;
18+
}
19+
/* If the pointer to the collection is NULL, it means we haven't made it yet. */
20+
else if(*labels == NULL)
21+
{
22+
/* Make the collection 1 structure big at the start. */
23+
*labels = malloc(sizeof(label));
24+
/* If we can't allocate memory, we're going to have a bad time. */
25+
if(*labels == NULL)
26+
{
27+
/* Quit and complain. */
28+
perror("Could not allocate memory");
29+
exit(3);
30+
}
31+
/* Start with the string being NULL. */
32+
(*labels)[0].str = NULL;
33+
/* We have one label. */
34+
*numlabels = 1;
35+
}
36+
/* If the pointer to the collection is non-NULL, it exists (or it should exist). */
37+
else
38+
{
39+
/* We are being called because there is a new label. Tell ourselves we have another one. */
40+
(*numlabels)++;
41+
/* Resize the collection to hold the new label. */
42+
*labels = realloc(*labels, sizeof(label) * (*numlabels));
43+
/* If we can't, that sucks. */
44+
if(*labels == NULL)
45+
{
46+
/* Quit and complain. */
47+
perror("Could not reallocate memory");
48+
exit(4);
49+
}
50+
}
51+
/* If we get here, we have a place to put the new label. */
52+
/* Allocate some zeroed memory so we can use it for a comparison and assign it to the structure element and use it in other parts of the program. */
53+
tempstr = calloc(1, strlen(labelstr));
54+
/* If we can't allocate the memory, that sucks. */
55+
if(tempstr == NULL)
56+
{
57+
/* Quit and complain. */
58+
perror("Could not allocate memory");
59+
exit(5);
60+
}
61+
/* Copy the contains of the label's name, up until the zero terminator (our calloc gave us an all zero string array, so any data copied in is automatically zero terminated). */
62+
memcpy(tempstr, labelstr, (strlen(labelstr) - 1));
63+
/* Before we commit to keeping this label, check if we've used the name already. */
64+
/* Because using the same label for two different locations makes no sense. */
65+
for(i = 0; i < ((*numlabels) - 1); i++)
66+
{
67+
/* If we find a duplicate, tell the programmer. */
68+
if(!strcmp((*labels)[i].str, tempstr))
69+
{
70+
/* By quitting and complaining. */
71+
fprintf(stderr, "Line %llu: Label %s already used.\n", FILELINE, tempstr);
72+
exit(6);
73+
}
74+
}
75+
76+
/* Assign the label string to the structure. We're sure we want to keep it. */
77+
(*labels)[(*numlabels) - 1].str = tempstr;
78+
/* Assign the current location of the output buffer to as the address, because then the label will point to the next instruction or data element added, which is what we want. */
79+
/* Also add the base address, which is important. */
80+
(*labels)[(*numlabels) - 1].addr = ((bits/4) + baseaddr);
81+
/* Now we've saved the declared label, we can check for it being used before it was declared. */
82+
/* But if there is no output buffer, we can't do much. */
83+
if(outbuf == NULL)
84+
{
85+
return;
86+
}
87+
/* Make sure the pointer to the pointer to the unknown label collection is valid. */
88+
else if(unknownlabels != NULL)
89+
{
90+
/* Make sure the pointer to the collection is valid too. */
91+
if((*unknownlabels) != NULL)
92+
{
93+
/* Check every unknown label. */
94+
for(i = 0; i < numunknownlabels; i++)
95+
{
96+
/* If the unknown label name exits (otherwise it would be hard to identify it)... */
97+
if((*unknownlabels)[i].str != NULL)
98+
{
99+
/* Check if the name of the unknown label is the same as the label that we just had declared. */
100+
if(!strcmp((*unknownlabels)[i].str, tempstr))
101+
{
102+
/* If it is, then take stock of both the address it was referenced. If a label is referencing a label, we need to move 1 nibble back (as there is no instruction, just 4 nibbles). Cheaper than doing an if below. */
103+
unsigned short int instaddress = (*unknownlabels)[i].addr - (*unknownlabels)[i].type;
104+
/* And the address the label points to plus the requested offset. We need to add one nibble if it is an instruction referencing a nibble as we moved one back above. */
105+
unsigned short int labeladdr = (*labels)[(*numlabels) - 1].addr + (*unknownlabels)[i].offset + (*unknownlabels)[i].type;
106+
107+
/* The address it was referenced at is actually the opcode of the instruction, so go up one nibble to point to the address section. */
108+
instaddress++;
109+
/* Since the instaddress is actually what nibble is being addressed, we correctly address a byte every 2 nibbles. */
110+
/* If it's not equal to zero, we're starting with the lower nibble of a byte. */
111+
if((instaddress % 2) != 0)
112+
{
113+
/* As unknown addresses are set to 0xFFFF (which is much safer than 0x0000 as that is a I/O device selection nibble) we cannot just OR things. */
114+
/* So we mask away the lower nibble (the opcode must be the higher nibble of this byte) and OR the highest nibble of the address to it. */
115+
outbuf[instaddress / 2] = (char) ((outbuf[instaddress / 2] & 0xF0) | ((labeladdr >> 12) & 0x000F));
116+
/* Move up a nibble (and implicitly a byte). */
117+
instaddress++;
118+
/* We can now directly assign the middle two nibbles to this byte. */
119+
outbuf[instaddress / 2] = (char) ((labeladdr >> 4) & 0x00FF);
120+
/* Move up 2 nibbles. */
121+
instaddress += 2;
122+
/* However, now we need to assign this nibble to the high nibble of the next byte. */
123+
/* There is also likely data on the lower nibble, so we have to protect it. */
124+
/* So we mask away the upper nibble and OR the lowest nibble of data to it (shifted up 4 bits). */
125+
outbuf[instaddress / 2] = (char) ((outbuf[instaddress / 2] & 0x0F) | (((labeladdr) & 0x000F) << 4));
126+
}
127+
else
128+
/* If it's equal to zero, we're starting with the higher nibble of a byte. */
129+
{
130+
/* We can simply assign the top two nibbles and lower two nibbles to 2 bytes. */
131+
outbuf[instaddress / 2] = (char)(0xFF & (labeladdr >> 8));
132+
instaddress += 2;
133+
outbuf[instaddress / 2] = (char)(0xFF & (labeladdr));
134+
}
135+
}
136+
}
137+
}
138+
}
139+
}
140+
}
141+
142+
/* findlabel() determines the memory address that follows the opcode. If the memory address is already a number, and returns it */
143+
/* If it is an undeclared label, it adds the label name and the instruction location to the "unknown labels" collection. If the label is declared, it returns it's address */
144+
unsigned short int findlabel(label **unknownlabels, label **labels, char *labelstr, unsigned long long numlabels, unsigned long long *numunknownlabels, unsigned long long bits, uint8_t type)
145+
{
146+
/* endptr is used to check if the strtol finds a valid number. */
147+
/* If not, it's likely a label. */
148+
char *endptr = NULL;
149+
/* tempstr stores the name of the label, and becomes copied into the unknown label collection. */
150+
char *tempstr = NULL;
151+
/* Generic looping unit. */
152+
unsigned int i = 0;
153+
/* If we can't find the address of the label, return UNKNOWNADDR (0xFFFF) and wait for it to be changed later. */
154+
unsigned short int address = UNKNOWNADDR;
155+
/* tempaddress is what is assigned to the return value of strtol. We use this value if we find that the token after the assembly instruction is actually a number. */
156+
unsigned short int tempaddress = address;
157+
/* offset stores the offset in nibbles from the specific label. */
158+
unsigned short int offset = 0;
159+
160+
if(type > 1)
161+
{
162+
fprintf(stderr, "findlabel: Type can only be 0 or 1\n");
163+
exit(33);
164+
}
165+
/* Set errno to 0 so we can check for errors from strtol, which will tell us if the token after the assembly instruction is a number or a label (if it's neither we find out a bit later, not in this function). */
166+
errno = 0;
167+
/* tempaddress is 0 if the labelstr is actually a label and not a value. */
168+
tempaddress = estrtol(labelstr, &endptr, STDHEX);
169+
/* If errno == 0 or labelstr == endptr (saying that the first invalid value was at the start of the string) we're not dealing with a hard coded value. Likely a label. */
170+
if(errno != 0 || labelstr == endptr)
171+
{
172+
/* Allocate some zero-initialised memory for comparison and assignment later. */
173+
tempstr = calloc(1, strlen(labelstr));
174+
/* If we cannot allocate memory, that sucks. */
175+
if(tempstr == NULL)
176+
{
177+
/* Quit and complain. */
178+
perror("Could not allocate memory");
179+
exit(9);
180+
}
181+
/* Copy the name of the label into the temporary string pointer for comparison. */
182+
memcpy(tempstr, labelstr, strlen(labelstr));
183+
/* Remove any possible whitespace */
184+
for(i = 0; i < strlen(tempstr); i++)
185+
{
186+
if(isspace((unsigned char)tempstr[i]))
187+
{
188+
tempstr[i] = '\0';
189+
break;
190+
}
191+
}
192+
/* Search for the square brackets to determine label offset */
193+
for(i = 0; i < strlen(tempstr); i++)
194+
{
195+
if(tempstr[i] == '[')
196+
{
197+
/* The macro assembler produces hex offsets that have no 0x, and usually hex offsets only. Assume this. */
198+
offset = estrtol(tempstr + ((i + 1) * sizeof(char)), &endptr, NSTDHEX);
199+
/* Check the offset is in the correct form. If the last character isn't ] for both where we stopped reading the string to find the offset value and where the string ends, then it's wrong. */
200+
/* If both are ], but they're not pointing to the same place (ie: LABEL[OFFSET]uh oh]) then it's still wrong. */
201+
if((*endptr) != ']' || tempstr[strlen(tempstr) - 1] != ']' || (tempstr + ((strlen(tempstr) - 1) * sizeof(char))) != endptr)
202+
{
203+
fprintf(stderr, "Line %llu: Label offsets must be declared in the form LABEL[OFFSET].\n", FILELINE);
204+
exit(26);
205+
}
206+
tempstr[i] = '\0';
207+
}
208+
}
209+
/* Check that the pointer to the pointer to the collection of labels is valid .*/
210+
if(labels != NULL)
211+
{
212+
/* Check the pointer to the collection is valid. */
213+
if(*labels != NULL)
214+
{
215+
/* Interate through the entire collection and check if the label has been declared. */
216+
for(i = 0; i < numlabels; i++)
217+
{
218+
/* Always check that the dynamically allocated string has been allocated. */
219+
if((*labels)[i].str != NULL)
220+
{
221+
/* Check to see if any names match the one we are looking for. */
222+
if(!strcmp((*labels)[i].str, tempstr))
223+
{
224+
/* If they are, use the address the label points to and add the offset we want. */
225+
address = (*labels)[i].addr + offset;
226+
}
227+
}
228+
}
229+
}
230+
}
231+
/* If address still equals UNKNOWNADDR, we didn't find the label in the label collection. */
232+
if(address == UNKNOWNADDR)
233+
{
234+
/* Check that the pointer to the pointer to the collection is valid. */
235+
if(unknownlabels != NULL)
236+
{
237+
/* If the pointer to the collection of unknown labels is NULL, we haven't made the collection yet. */
238+
if((*unknownlabels) == NULL)
239+
{
240+
/* Create the collection for a single label. */
241+
*unknownlabels = malloc(sizeof(label));
242+
/* If we cannot allocate memory, that sucks. */
243+
if(*unknownlabels == NULL)
244+
{
245+
/* Quit and complain. */
246+
perror("Could not allocate memory");
247+
exit(10);
248+
}
249+
/* Assign the name so we know what to look for later. */
250+
(*unknownlabels)[0].str = tempstr;
251+
/* and the current location, so the unknown address number can be replaced later. */
252+
(*unknownlabels)[0].addr = (bits/4);
253+
/* and the offset, so we can add it later. */
254+
(*unknownlabels)[0].offset = offset;
255+
/* and whether a label or instruction is referencing this */
256+
(*unknownlabels)[0].type = type;
257+
/* We have one unknown label now, so record that. */
258+
(*numunknownlabels) = 1;
259+
}
260+
/* If the pointer to the collection of unknown labels is non-NULL, it should exists. */
261+
else
262+
{
263+
/* We have another unknown label, so record that. */
264+
/* As a note, the unknown labels collection (unlike the known labels collection) does not contain only unique label names. */
265+
/* This is because later when the label is declared every instance where it is referenced is replaced using the address location stored in the structure. */
266+
(*numunknownlabels)++;
267+
/* We have another label, so increase the size of the collection. */
268+
*unknownlabels = realloc(*unknownlabels, sizeof(label) * (*numunknownlabels));
269+
/* If we can't resize the memory, that sucks. */
270+
if(*unknownlabels == NULL)
271+
{
272+
/* Quit and complain. */
273+
perror("Could not reallocate memory");
274+
exit(11);
275+
}
276+
/* Save the label name we need to look for. */
277+
(*unknownlabels)[(*numunknownlabels) - 1].str = tempstr;
278+
/* And where we used it. */
279+
(*unknownlabels)[(*numunknownlabels) - 1].addr = (bits/4);
280+
/* And the offset */
281+
(*unknownlabels)[(*numunknownlabels) - 1].offset = offset;
282+
/* and the type */
283+
(*unknownlabels)[(*numunknownlabels) - 1].type = type;
284+
}
285+
}
286+
}
287+
}
288+
/* If there is no error from strtol, the token after the assembly instruction is probably a numerical value. */
289+
else
290+
{
291+
/* Assume the programmer knows what he's doing and directly assign it. */
292+
address = tempaddress;
293+
}
294+
/* Return the final address found. */
295+
return address;
296+
}

0 commit comments

Comments
 (0)