Skip to content

Commit b9e07c1

Browse files
committed
gpt
1 parent 32c1239 commit b9e07c1

File tree

7 files changed

+570
-353
lines changed

7 files changed

+570
-353
lines changed

masm-tasm/src/language/Hover.ts

+9-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { MarkdownString, Uri } from "vscode";
55
import { Cppdoc } from './hoverFromCppdoc';
66
import { FELIX } from './hoverFelix';
77
import { HoverFromMarkdown } from './hoverFromMarkdown';
8-
import * as ast from "./ast";
8+
import * as ast from "./ast/main";
99

1010
export enum keywordType {
1111
other = 0,
@@ -48,8 +48,14 @@ export class AsmHoverProvider implements vscode.HoverProvider {
4848
const docinfo = DocInfo.getDocInfo(document); //scan the document
4949
const line = docinfo.lines[position.line];
5050

51-
const {tokens, errors}=ast.tokenize(document.getText());
52-
const b=ast.parse(tokens);
51+
const text=document.getText();
52+
const {tokens, errors}=ast.tokenize(text);
53+
const pos=ast.lineAndColumnToPosition(text,position.line,position.character);
54+
for(const token of tokens){
55+
if(pos>token.position && pos<token.value.length+token.position){
56+
return new vscode.Hover(JSON.stringify(token));
57+
}
58+
}
5359

5460
if (range) {
5561
const wordGet = document.getText(range);

masm-tasm/src/language/ast.ts

+1-350
Original file line numberDiff line numberDiff line change
@@ -1,351 +1,2 @@
1-
// Define token types
2-
type TokenType = 'INSTRUCTION' | 'REGISTER' | 'NUMBER' | 'COMMA' | 'SEGMENT' | 'ENDS' | 'LABEL' | 'INCLUDE' | 'PROCESSOR_DIRECTIVE' | 'MODEL_DIRECTIVE' | 'STACK_DIRECTIVE' | 'DATA_DIRECTIVE' | 'CODE_DIRECTIVE';
1+
// 使用typescript 将汇编语言解析成AST,注释使用英语,支持段定义,包括简单段定义,支持include命令,支持宏和子程序,支持label,支持字符串,支持.386 这样的命令,如果代码有问题,请支持输出代码的问题,包括问题类型,位置。
32

4-
// Define token interface
5-
interface Token {
6-
type: TokenType;
7-
value: string;
8-
position: number;
9-
}
10-
11-
// Define AST node types
12-
type ASTNode = InstructionNode | SegmentNode | LabelNode | IncludeNode | ProcessorDirectiveNode | ModelDirectiveNode | StackDirectiveNode | DataDirectiveNode | CodeDirectiveNode;
13-
14-
// Define instruction node interface
15-
interface InstructionNode {
16-
type: 'INSTRUCTION';
17-
name: string;
18-
operands: (RegisterNode | NumberNode | LabelReferenceNode)[];
19-
}
20-
21-
// Define register node interface
22-
interface RegisterNode {
23-
type: 'REGISTER';
24-
name: string;
25-
}
26-
27-
// Define number node interface
28-
interface NumberNode {
29-
type: 'NUMBER';
30-
value: number;
31-
}
32-
33-
// Define segment node interface
34-
interface SegmentNode {
35-
type: 'SEGMENT';
36-
name: string;
37-
instructions: ASTNode[];
38-
}
39-
40-
// Define label node interface
41-
interface LabelNode {
42-
type: 'LABEL';
43-
name: string;
44-
position: number;
45-
}
46-
47-
// Define label reference node interface
48-
interface LabelReferenceNode {
49-
type: 'LABEL_REFERENCE';
50-
name: string;
51-
}
52-
53-
// Define include node interface
54-
interface IncludeNode {
55-
type: 'INCLUDE';
56-
filename: string;
57-
ast: ASTNode[];
58-
}
59-
60-
// Define processor directive node interface
61-
interface ProcessorDirectiveNode {
62-
type: 'PROCESSOR_DIRECTIVE';
63-
directive: string;
64-
}
65-
66-
// Define .MODEL directive node interface
67-
interface ModelDirectiveNode {
68-
type: 'MODEL_DIRECTIVE';
69-
model: string;
70-
}
71-
72-
// Define .STACK directive node interface
73-
interface StackDirectiveNode {
74-
type: 'STACK_DIRECTIVE';
75-
size: number;
76-
}
77-
78-
// Define .DATA directive node interface
79-
interface DataDirectiveNode {
80-
type: 'DATA_DIRECTIVE';
81-
}
82-
83-
// Define .CODE directive node interface
84-
interface CodeDirectiveNode {
85-
type: 'CODE_DIRECTIVE';
86-
}
87-
88-
// Define error information interface
89-
interface ErrorInfo {
90-
type: string;
91-
position: number;
92-
message: string;
93-
}
94-
95-
// Define common assembly instructions
96-
const commonInstructions = [
97-
'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'INC', 'DEC', 'CMP',
98-
'JMP', 'JE', 'JNE', 'JG', 'JGE', 'JL', 'JLE',
99-
'PUSH', 'POP', 'CALL', 'RET'
100-
];
101-
102-
// Simulate file reading function. Replace with actual file reading logic in production.
103-
function readFile(filename: string): string {
104-
// Here you can implement the logic to read file content from the file system.
105-
// In this example, it simply returns an empty string. You need to modify it according to your actual situation.
106-
return '';
107-
}
108-
109-
// Lexical analyzer
110-
export function tokenize(input: string): { tokens: Token[]; errors: ErrorInfo[] } {
111-
const tokens: Token[] = [];
112-
const errors: ErrorInfo[] = [];
113-
const regex = /\s*(?:([A-Za-z]+):?|(\d+)|(,)|(SEGMENT)|(ENDS)|(INCLUDE)\s+([^\s]+)|(\.[A-Za-z0-9]+)(?:\s+([^\s]+))?)\s*/g;
114-
let match;
115-
while ((match = regex.exec(input))!== null) {
116-
const position = match.index;
117-
if (match[1]) {
118-
if (match[1].endsWith(':')) {
119-
tokens.push({ type: 'LABEL', value: match[1].slice(0, -1), position });
120-
} else if (isInstruction(match[1])) {
121-
tokens.push({ type: 'INSTRUCTION', value: match[1], position });
122-
} else {
123-
tokens.push({ type: 'REGISTER', value: match[1], position });
124-
}
125-
} else if (match[2]) {
126-
tokens.push({ type: 'NUMBER', value: match[2], position });
127-
} else if (match[3]) {
128-
tokens.push({ type: 'COMMA', value: match[3], position });
129-
} else if (match[4]) {
130-
tokens.push({ type: 'SEGMENT', value: match[4], position });
131-
} else if (match[5]) {
132-
tokens.push({ type: 'ENDS', value: match[5], position });
133-
} else if (match[6]) {
134-
tokens.push({ type: 'INCLUDE', value: match[7], position });
135-
} else if (match[8]) {
136-
switch (match[8].toUpperCase()) {
137-
case '.386':
138-
tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position });
139-
break;
140-
case '.MODEL':
141-
if (!match[9]) {
142-
errors.push({
143-
type: 'SyntaxError',
144-
position,
145-
message: 'Expected model type after .MODEL directive'
146-
});
147-
} else {
148-
tokens.push({ type: 'MODEL_DIRECTIVE', value: match[9], position });
149-
}
150-
break;
151-
case '.STACK':
152-
if (!match[9] || isNaN(Number(match[9]))) {
153-
errors.push({
154-
type: 'SyntaxError',
155-
position,
156-
message: 'Expected a valid number for stack size after .STACK directive'
157-
});
158-
} else {
159-
tokens.push({ type: 'STACK_DIRECTIVE', value: match[9], position });
160-
}
161-
break;
162-
case '.DATA':
163-
tokens.push({ type: 'DATA_DIRECTIVE', value: match[8], position });
164-
break;
165-
case '.CODE':
166-
tokens.push({ type: 'CODE_DIRECTIVE', value: match[8], position });
167-
break;
168-
default:
169-
tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position });
170-
}
171-
}
172-
}
173-
return { tokens, errors };
174-
}
175-
176-
// Check if a value is an instruction
177-
function isInstruction(value: string, instructions = commonInstructions): boolean {
178-
return instructions.includes(value.toUpperCase());
179-
}
180-
181-
// Syntax analyzer
182-
export function parse(tokens: Token[]): { ast: ASTNode[]; errors: ErrorInfo[] } {
183-
const ast: ASTNode[] = [];
184-
const errors: ErrorInfo[] = [];
185-
let currentSegment: SegmentNode | null = null;
186-
const labelMap = new Map<string, LabelNode>();
187-
188-
for (let i = 0; i < tokens.length; i++) {
189-
const token = tokens[i];
190-
try {
191-
switch (token.type) {
192-
case 'SEGMENT': {
193-
const segmentNameToken = tokens[++i];
194-
if (!segmentNameToken || segmentNameToken.type!== 'REGISTER') {
195-
throw new Error('Expected segment name after SEGMENT keyword');
196-
}
197-
currentSegment = {
198-
type: 'SEGMENT',
199-
name: segmentNameToken.value,
200-
instructions: []
201-
};
202-
ast.push(currentSegment);
203-
break;
204-
}
205-
case 'ENDS': {
206-
if (!currentSegment) {
207-
throw new Error('ENDS keyword without corresponding SEGMENT');
208-
}
209-
currentSegment = null;
210-
break;
211-
}
212-
case 'LABEL': {
213-
if (labelMap.has(token.value)) {
214-
errors.push({
215-
type: 'LabelError',
216-
position: token.position,
217-
message: `Label '${token.value}' is already defined at position ${labelMap.get(token.value)?.position}`
218-
});
219-
} else {
220-
const labelNode: LabelNode = {
221-
type: 'LABEL',
222-
name: token.value,
223-
position: token.position
224-
};
225-
labelMap.set(token.value, labelNode);
226-
if (currentSegment) {
227-
currentSegment.instructions.push(labelNode);
228-
} else {
229-
ast.push(labelNode);
230-
}
231-
}
232-
break;
233-
}
234-
case 'INSTRUCTION': {
235-
const instructionToken = token;
236-
const operands: (RegisterNode | NumberNode | LabelReferenceNode)[] = [];
237-
while (i + 1 < tokens.length) {
238-
const nextToken = tokens[++i];
239-
if (nextToken.type === 'REGISTER') {
240-
operands.push({ type: 'REGISTER', name: nextToken.value });
241-
} else if (nextToken.type === 'NUMBER') {
242-
operands.push({ type: 'NUMBER', value: parseInt(nextToken.value, 10) });
243-
} else if (nextToken.type === 'LABEL') {
244-
operands.push({ type: 'LABEL_REFERENCE', name: nextToken.value });
245-
} else if (nextToken.type === 'COMMA') {
246-
continue;
247-
} else {
248-
i--;
249-
break;
250-
}
251-
}
252-
const instructionNode: InstructionNode = {
253-
type: 'INSTRUCTION',
254-
name: instructionToken.value,
255-
operands
256-
};
257-
if (currentSegment) {
258-
currentSegment.instructions.push(instructionNode);
259-
} else {
260-
ast.push(instructionNode);
261-
}
262-
break;
263-
}
264-
case 'INCLUDE': {
265-
const filename = token.value;
266-
const fileContent = readFile(filename);
267-
const { tokens: includedTokens, errors: includedErrors } = tokenize(fileContent);
268-
errors.push(...includedErrors.map(err => ({
269-
...err,
270-
message: `In included file ${filename}: ${err.message}`
271-
})));
272-
const { ast: includedAst, errors: parseErrors } = parse(includedTokens);
273-
errors.push(...parseErrors.map(err => ({
274-
...err,
275-
message: `In included file ${filename}: ${err.message}`
276-
})));
277-
const includeNode: IncludeNode = {
278-
type: 'INCLUDE',
279-
filename,
280-
ast: includedAst
281-
};
282-
if (currentSegment) {
283-
currentSegment.instructions.push(includeNode);
284-
} else {
285-
ast.push(includeNode);
286-
}
287-
break;
288-
}
289-
case 'PROCESSOR_DIRECTIVE': {
290-
const directiveNode: ProcessorDirectiveNode = {
291-
type: 'PROCESSOR_DIRECTIVE',
292-
directive: token.value
293-
};
294-
if (currentSegment) {
295-
currentSegment.instructions.push(directiveNode);
296-
} else {
297-
ast.push(directiveNode);
298-
}
299-
break;
300-
}
301-
case 'MODEL_DIRECTIVE': {
302-
const modelDirectiveNode: ModelDirectiveNode = {
303-
type: 'MODEL_DIRECTIVE',
304-
model: token.value
305-
};
306-
ast.push(modelDirectiveNode);
307-
break;
308-
}
309-
case 'STACK_DIRECTIVE': {
310-
const stackDirectiveNode: StackDirectiveNode = {
311-
type: 'STACK_DIRECTIVE',
312-
size: parseInt(token.value, 10)
313-
};
314-
ast.push(stackDirectiveNode);
315-
break;
316-
}
317-
case 'DATA_DIRECTIVE': {
318-
const dataDirectiveNode: DataDirectiveNode = {
319-
type: 'DATA_DIRECTIVE'
320-
};
321-
ast.push(dataDirectiveNode);
322-
break;
323-
}
324-
case 'CODE_DIRECTIVE': {
325-
const codeDirectiveNode: CodeDirectiveNode = {
326-
type: 'CODE_DIRECTIVE'
327-
};
328-
ast.push(codeDirectiveNode);
329-
break;
330-
}
331-
}
332-
} catch (e) {
333-
const errorMessage = e instanceof Error? e.message : 'Unknown error';
334-
errors.push({
335-
type: 'SyntaxError',
336-
position: token.position,
337-
message: errorMessage
338-
});
339-
}
340-
}
341-
342-
if (currentSegment) {
343-
errors.push({
344-
type: 'SyntaxError',
345-
position: tokens[tokens.length - 1]?.position || 0,
346-
message: 'Unclosed segment'
347-
});
348-
}
349-
350-
return { ast, errors };
351-
}

0 commit comments

Comments
 (0)