|
1 |
| -// Define token types |
2 |
| -type TokenType = 'INSTRUCTION' | 'REGISTER' | 'NUMBER' | 'COMMA' | 'SEGMENT' | 'ENDS' | 'LABEL' | 'INCLUDE' | 'PROCESSOR_DIRECTIVE' | 'MODEL_DIRECTIVE' | 'STACK_DIRECTIVE' | 'DATA_DIRECTIVE' | 'CODE_DIRECTIVE'; |
| 1 | +// 使用typescript 将汇编语言解析成AST,注释使用英语,支持段定义,包括简单段定义,支持include命令,支持宏和子程序,支持label,支持字符串,支持.386 这样的命令,如果代码有问题,请支持输出代码的问题,包括问题类型,位置。 |
3 | 2 |
|
4 |
| -// Define token interface |
5 |
| -interface Token { |
6 |
| - type: TokenType; |
7 |
| - value: string; |
8 |
| - position: number; |
9 |
| -} |
10 |
| - |
11 |
| -// Define AST node types |
12 |
| -type ASTNode = InstructionNode | SegmentNode | LabelNode | IncludeNode | ProcessorDirectiveNode | ModelDirectiveNode | StackDirectiveNode | DataDirectiveNode | CodeDirectiveNode; |
13 |
| - |
14 |
| -// Define instruction node interface |
15 |
| -interface InstructionNode { |
16 |
| - type: 'INSTRUCTION'; |
17 |
| - name: string; |
18 |
| - operands: (RegisterNode | NumberNode | LabelReferenceNode)[]; |
19 |
| -} |
20 |
| - |
21 |
| -// Define register node interface |
22 |
| -interface RegisterNode { |
23 |
| - type: 'REGISTER'; |
24 |
| - name: string; |
25 |
| -} |
26 |
| - |
27 |
| -// Define number node interface |
28 |
| -interface NumberNode { |
29 |
| - type: 'NUMBER'; |
30 |
| - value: number; |
31 |
| -} |
32 |
| - |
33 |
| -// Define segment node interface |
34 |
| -interface SegmentNode { |
35 |
| - type: 'SEGMENT'; |
36 |
| - name: string; |
37 |
| - instructions: ASTNode[]; |
38 |
| -} |
39 |
| - |
40 |
| -// Define label node interface |
41 |
| -interface LabelNode { |
42 |
| - type: 'LABEL'; |
43 |
| - name: string; |
44 |
| - position: number; |
45 |
| -} |
46 |
| - |
47 |
| -// Define label reference node interface |
48 |
| -interface LabelReferenceNode { |
49 |
| - type: 'LABEL_REFERENCE'; |
50 |
| - name: string; |
51 |
| -} |
52 |
| - |
53 |
| -// Define include node interface |
54 |
| -interface IncludeNode { |
55 |
| - type: 'INCLUDE'; |
56 |
| - filename: string; |
57 |
| - ast: ASTNode[]; |
58 |
| -} |
59 |
| - |
60 |
| -// Define processor directive node interface |
61 |
| -interface ProcessorDirectiveNode { |
62 |
| - type: 'PROCESSOR_DIRECTIVE'; |
63 |
| - directive: string; |
64 |
| -} |
65 |
| - |
66 |
| -// Define .MODEL directive node interface |
67 |
| -interface ModelDirectiveNode { |
68 |
| - type: 'MODEL_DIRECTIVE'; |
69 |
| - model: string; |
70 |
| -} |
71 |
| - |
72 |
| -// Define .STACK directive node interface |
73 |
| -interface StackDirectiveNode { |
74 |
| - type: 'STACK_DIRECTIVE'; |
75 |
| - size: number; |
76 |
| -} |
77 |
| - |
78 |
| -// Define .DATA directive node interface |
79 |
| -interface DataDirectiveNode { |
80 |
| - type: 'DATA_DIRECTIVE'; |
81 |
| -} |
82 |
| - |
83 |
| -// Define .CODE directive node interface |
84 |
| -interface CodeDirectiveNode { |
85 |
| - type: 'CODE_DIRECTIVE'; |
86 |
| -} |
87 |
| - |
88 |
| -// Define error information interface |
89 |
| -interface ErrorInfo { |
90 |
| - type: string; |
91 |
| - position: number; |
92 |
| - message: string; |
93 |
| -} |
94 |
| - |
95 |
| -// Define common assembly instructions |
96 |
| -const commonInstructions = [ |
97 |
| - 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'INC', 'DEC', 'CMP', |
98 |
| - 'JMP', 'JE', 'JNE', 'JG', 'JGE', 'JL', 'JLE', |
99 |
| - 'PUSH', 'POP', 'CALL', 'RET' |
100 |
| -]; |
101 |
| - |
102 |
| -// Simulate file reading function. Replace with actual file reading logic in production. |
103 |
| -function readFile(filename: string): string { |
104 |
| - // Here you can implement the logic to read file content from the file system. |
105 |
| - // In this example, it simply returns an empty string. You need to modify it according to your actual situation. |
106 |
| - return ''; |
107 |
| -} |
108 |
| - |
109 |
| -// Lexical analyzer |
110 |
| -export function tokenize(input: string): { tokens: Token[]; errors: ErrorInfo[] } { |
111 |
| - const tokens: Token[] = []; |
112 |
| - const errors: ErrorInfo[] = []; |
113 |
| - const regex = /\s*(?:([A-Za-z]+):?|(\d+)|(,)|(SEGMENT)|(ENDS)|(INCLUDE)\s+([^\s]+)|(\.[A-Za-z0-9]+)(?:\s+([^\s]+))?)\s*/g; |
114 |
| - let match; |
115 |
| - while ((match = regex.exec(input))!== null) { |
116 |
| - const position = match.index; |
117 |
| - if (match[1]) { |
118 |
| - if (match[1].endsWith(':')) { |
119 |
| - tokens.push({ type: 'LABEL', value: match[1].slice(0, -1), position }); |
120 |
| - } else if (isInstruction(match[1])) { |
121 |
| - tokens.push({ type: 'INSTRUCTION', value: match[1], position }); |
122 |
| - } else { |
123 |
| - tokens.push({ type: 'REGISTER', value: match[1], position }); |
124 |
| - } |
125 |
| - } else if (match[2]) { |
126 |
| - tokens.push({ type: 'NUMBER', value: match[2], position }); |
127 |
| - } else if (match[3]) { |
128 |
| - tokens.push({ type: 'COMMA', value: match[3], position }); |
129 |
| - } else if (match[4]) { |
130 |
| - tokens.push({ type: 'SEGMENT', value: match[4], position }); |
131 |
| - } else if (match[5]) { |
132 |
| - tokens.push({ type: 'ENDS', value: match[5], position }); |
133 |
| - } else if (match[6]) { |
134 |
| - tokens.push({ type: 'INCLUDE', value: match[7], position }); |
135 |
| - } else if (match[8]) { |
136 |
| - switch (match[8].toUpperCase()) { |
137 |
| - case '.386': |
138 |
| - tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position }); |
139 |
| - break; |
140 |
| - case '.MODEL': |
141 |
| - if (!match[9]) { |
142 |
| - errors.push({ |
143 |
| - type: 'SyntaxError', |
144 |
| - position, |
145 |
| - message: 'Expected model type after .MODEL directive' |
146 |
| - }); |
147 |
| - } else { |
148 |
| - tokens.push({ type: 'MODEL_DIRECTIVE', value: match[9], position }); |
149 |
| - } |
150 |
| - break; |
151 |
| - case '.STACK': |
152 |
| - if (!match[9] || isNaN(Number(match[9]))) { |
153 |
| - errors.push({ |
154 |
| - type: 'SyntaxError', |
155 |
| - position, |
156 |
| - message: 'Expected a valid number for stack size after .STACK directive' |
157 |
| - }); |
158 |
| - } else { |
159 |
| - tokens.push({ type: 'STACK_DIRECTIVE', value: match[9], position }); |
160 |
| - } |
161 |
| - break; |
162 |
| - case '.DATA': |
163 |
| - tokens.push({ type: 'DATA_DIRECTIVE', value: match[8], position }); |
164 |
| - break; |
165 |
| - case '.CODE': |
166 |
| - tokens.push({ type: 'CODE_DIRECTIVE', value: match[8], position }); |
167 |
| - break; |
168 |
| - default: |
169 |
| - tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position }); |
170 |
| - } |
171 |
| - } |
172 |
| - } |
173 |
| - return { tokens, errors }; |
174 |
| -} |
175 |
| - |
176 |
| -// Check if a value is an instruction |
177 |
| -function isInstruction(value: string, instructions = commonInstructions): boolean { |
178 |
| - return instructions.includes(value.toUpperCase()); |
179 |
| -} |
180 |
| - |
181 |
| -// Syntax analyzer |
182 |
| -export function parse(tokens: Token[]): { ast: ASTNode[]; errors: ErrorInfo[] } { |
183 |
| - const ast: ASTNode[] = []; |
184 |
| - const errors: ErrorInfo[] = []; |
185 |
| - let currentSegment: SegmentNode | null = null; |
186 |
| - const labelMap = new Map<string, LabelNode>(); |
187 |
| - |
188 |
| - for (let i = 0; i < tokens.length; i++) { |
189 |
| - const token = tokens[i]; |
190 |
| - try { |
191 |
| - switch (token.type) { |
192 |
| - case 'SEGMENT': { |
193 |
| - const segmentNameToken = tokens[++i]; |
194 |
| - if (!segmentNameToken || segmentNameToken.type!== 'REGISTER') { |
195 |
| - throw new Error('Expected segment name after SEGMENT keyword'); |
196 |
| - } |
197 |
| - currentSegment = { |
198 |
| - type: 'SEGMENT', |
199 |
| - name: segmentNameToken.value, |
200 |
| - instructions: [] |
201 |
| - }; |
202 |
| - ast.push(currentSegment); |
203 |
| - break; |
204 |
| - } |
205 |
| - case 'ENDS': { |
206 |
| - if (!currentSegment) { |
207 |
| - throw new Error('ENDS keyword without corresponding SEGMENT'); |
208 |
| - } |
209 |
| - currentSegment = null; |
210 |
| - break; |
211 |
| - } |
212 |
| - case 'LABEL': { |
213 |
| - if (labelMap.has(token.value)) { |
214 |
| - errors.push({ |
215 |
| - type: 'LabelError', |
216 |
| - position: token.position, |
217 |
| - message: `Label '${token.value}' is already defined at position ${labelMap.get(token.value)?.position}` |
218 |
| - }); |
219 |
| - } else { |
220 |
| - const labelNode: LabelNode = { |
221 |
| - type: 'LABEL', |
222 |
| - name: token.value, |
223 |
| - position: token.position |
224 |
| - }; |
225 |
| - labelMap.set(token.value, labelNode); |
226 |
| - if (currentSegment) { |
227 |
| - currentSegment.instructions.push(labelNode); |
228 |
| - } else { |
229 |
| - ast.push(labelNode); |
230 |
| - } |
231 |
| - } |
232 |
| - break; |
233 |
| - } |
234 |
| - case 'INSTRUCTION': { |
235 |
| - const instructionToken = token; |
236 |
| - const operands: (RegisterNode | NumberNode | LabelReferenceNode)[] = []; |
237 |
| - while (i + 1 < tokens.length) { |
238 |
| - const nextToken = tokens[++i]; |
239 |
| - if (nextToken.type === 'REGISTER') { |
240 |
| - operands.push({ type: 'REGISTER', name: nextToken.value }); |
241 |
| - } else if (nextToken.type === 'NUMBER') { |
242 |
| - operands.push({ type: 'NUMBER', value: parseInt(nextToken.value, 10) }); |
243 |
| - } else if (nextToken.type === 'LABEL') { |
244 |
| - operands.push({ type: 'LABEL_REFERENCE', name: nextToken.value }); |
245 |
| - } else if (nextToken.type === 'COMMA') { |
246 |
| - continue; |
247 |
| - } else { |
248 |
| - i--; |
249 |
| - break; |
250 |
| - } |
251 |
| - } |
252 |
| - const instructionNode: InstructionNode = { |
253 |
| - type: 'INSTRUCTION', |
254 |
| - name: instructionToken.value, |
255 |
| - operands |
256 |
| - }; |
257 |
| - if (currentSegment) { |
258 |
| - currentSegment.instructions.push(instructionNode); |
259 |
| - } else { |
260 |
| - ast.push(instructionNode); |
261 |
| - } |
262 |
| - break; |
263 |
| - } |
264 |
| - case 'INCLUDE': { |
265 |
| - const filename = token.value; |
266 |
| - const fileContent = readFile(filename); |
267 |
| - const { tokens: includedTokens, errors: includedErrors } = tokenize(fileContent); |
268 |
| - errors.push(...includedErrors.map(err => ({ |
269 |
| - ...err, |
270 |
| - message: `In included file ${filename}: ${err.message}` |
271 |
| - }))); |
272 |
| - const { ast: includedAst, errors: parseErrors } = parse(includedTokens); |
273 |
| - errors.push(...parseErrors.map(err => ({ |
274 |
| - ...err, |
275 |
| - message: `In included file ${filename}: ${err.message}` |
276 |
| - }))); |
277 |
| - const includeNode: IncludeNode = { |
278 |
| - type: 'INCLUDE', |
279 |
| - filename, |
280 |
| - ast: includedAst |
281 |
| - }; |
282 |
| - if (currentSegment) { |
283 |
| - currentSegment.instructions.push(includeNode); |
284 |
| - } else { |
285 |
| - ast.push(includeNode); |
286 |
| - } |
287 |
| - break; |
288 |
| - } |
289 |
| - case 'PROCESSOR_DIRECTIVE': { |
290 |
| - const directiveNode: ProcessorDirectiveNode = { |
291 |
| - type: 'PROCESSOR_DIRECTIVE', |
292 |
| - directive: token.value |
293 |
| - }; |
294 |
| - if (currentSegment) { |
295 |
| - currentSegment.instructions.push(directiveNode); |
296 |
| - } else { |
297 |
| - ast.push(directiveNode); |
298 |
| - } |
299 |
| - break; |
300 |
| - } |
301 |
| - case 'MODEL_DIRECTIVE': { |
302 |
| - const modelDirectiveNode: ModelDirectiveNode = { |
303 |
| - type: 'MODEL_DIRECTIVE', |
304 |
| - model: token.value |
305 |
| - }; |
306 |
| - ast.push(modelDirectiveNode); |
307 |
| - break; |
308 |
| - } |
309 |
| - case 'STACK_DIRECTIVE': { |
310 |
| - const stackDirectiveNode: StackDirectiveNode = { |
311 |
| - type: 'STACK_DIRECTIVE', |
312 |
| - size: parseInt(token.value, 10) |
313 |
| - }; |
314 |
| - ast.push(stackDirectiveNode); |
315 |
| - break; |
316 |
| - } |
317 |
| - case 'DATA_DIRECTIVE': { |
318 |
| - const dataDirectiveNode: DataDirectiveNode = { |
319 |
| - type: 'DATA_DIRECTIVE' |
320 |
| - }; |
321 |
| - ast.push(dataDirectiveNode); |
322 |
| - break; |
323 |
| - } |
324 |
| - case 'CODE_DIRECTIVE': { |
325 |
| - const codeDirectiveNode: CodeDirectiveNode = { |
326 |
| - type: 'CODE_DIRECTIVE' |
327 |
| - }; |
328 |
| - ast.push(codeDirectiveNode); |
329 |
| - break; |
330 |
| - } |
331 |
| - } |
332 |
| - } catch (e) { |
333 |
| - const errorMessage = e instanceof Error? e.message : 'Unknown error'; |
334 |
| - errors.push({ |
335 |
| - type: 'SyntaxError', |
336 |
| - position: token.position, |
337 |
| - message: errorMessage |
338 |
| - }); |
339 |
| - } |
340 |
| - } |
341 |
| - |
342 |
| - if (currentSegment) { |
343 |
| - errors.push({ |
344 |
| - type: 'SyntaxError', |
345 |
| - position: tokens[tokens.length - 1]?.position || 0, |
346 |
| - message: 'Unclosed segment' |
347 |
| - }); |
348 |
| - } |
349 |
| - |
350 |
| - return { ast, errors }; |
351 |
| -} |
0 commit comments