xLucene Parser AST Structure
This document provides a comprehensive guide to the Abstract Syntax Tree (AST) structure generated by the xLucene Parser.
Overview
The xLucene Parser converts query strings into a tree structure where each node represents a different type of query operation.
Node Hierarchy
Node (base interface)
├── TermLikeNode (searchable terms)
│ ├── Term
│ ├── Regexp
│ ├── Wildcard
│ ├── Range
│ ├── FunctionNode
│ └── TermList
├── GroupLikeNode (logical containers)
│ ├── LogicalGroup
│ └── FieldGroup
├── Conjunction (term grouping)
├── Negation (NOT operations)
├── Exists (field existence)
└── EmptyNode (filtered/empty results)
Node Types in Detail
Base Node Interface
All AST nodes implement the base Node
interface:
interface Node {
type: NodeType;
}
Term Node
Represents basic field-value pairs or standalone search terms.
interface Term extends AnyDataType, TermLikeNode {
type: NodeType.Term;
field: string | null; // Field name, null for standalone terms
field_type: xLuceneFieldType; // Coerced field type
value: FieldValue<any>; // The search value
quoted?: boolean; // Whether value was quoted
restricted?: boolean; // Whether value has restricted characters
analyzed?: boolean; // Whether field is analyzed
}
Examples:
// Query: "hello"
{
type: "term",
field: null,
field_type: "string",
quoted: false,
value: { type: "value", value: "hello" }
}
// Query: "name:John"
{
type: "term",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "John" }
}
// Query: "age:25" (with type config)
{
type: "term",
field: "age",
field_type: "integer",
value: { type: "value", value: 25 }
}
Range Node
Represents numeric or string ranges with comparison operators.
interface Range extends TermLikeNode {
type: NodeType.Range;
field: string;
field_type: xLuceneFieldType;
left: RangeNode; // Left boundary
right?: RangeNode; // Right boundary (optional)
}
interface RangeNode {
operator: 'gte' | 'gt' | 'lt' | 'lte';
field_type: xLuceneFieldType;
value: FieldValue<number | string>;
}
Examples:
// Query: "age:>=18"
{
type: "range",
field: "age",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 18 }
}
}
// Query: "score:[80 TO 95]"
{
type: "range",
field: "score",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 80 }
},
right: {
operator: "lte",
field_type: "integer",
value: { type: "value", value: 95 }
}
}
// Query: "score:[10 TO 20}"
{
type: "range",
field: "score",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 10 }
},
right: {
operator: "lt",
field_type: "integer",
value: { type: "value", value: 20 }
}
}
Wildcard Node
Represents pattern matching with *
(multiple chars) and ?
(single char).
interface Wildcard extends StringDataType, TermLikeNode {
type: NodeType.Wildcard;
field: string;
field_type: xLuceneFieldType.String;
value: FieldValue<string>;
quoted: boolean;
}
Example:
// Query: "name:J*n"
{
type: "wildcard",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "J*n" }
}
// Query: "name:J?n"
{
type: "wildcard",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "J?n" }
}
Regexp Node
Represents regular expression patterns.
interface Regexp extends StringDataType, TermLikeNode {
type: NodeType.Regexp;
field: string;
field_type: xLuceneFieldType.String;
value: FieldValue<string>;
quoted: boolean;
}
Example:
// Query: "email:/.*@example\.com/"
{
type: "regexp",
field: "email",
field_type: "string",
quoted: false,
value: { type: "value", value: ".*@example\\.com" }
}
Function Node
Represents specialized function calls that add unique capabilities to Lucene.
interface FunctionNode extends TermLikeNode {
type: NodeType.Function;
field: string;
name: string; // Function name
description?: string; // Optional description
params: (Term | TermList)[]; // Function parameters
}
Example:
// Query: "location:geoDistance(point:\"40,-74\", distance:\"10km\")"
{
type: "function",
field: "location",
name: "geoDistance",
params: [
{
type: "term",
field: "point",
value: { type: "value", value: "40,-74" }
},
{
type: "term",
field: "distance",
value: { type: "value", value: "10km" }
}
]
}
Exists Node
Represents field existence checks.
interface Exists extends Node {
type: NodeType.Exists;
field: string;
}
Example:
// Query: "_exists_:email"
{
type: "exists",
field: "email"
}
LogicalGroup Node
Represents boolean operations connecting multiple terms or groups.
interface LogicalGroup extends GroupLikeNode {
type: NodeType.LogicalGroup;
flow: Conjunction[];
}
interface Conjunction extends Node {
type: NodeType.Conjunction;
nodes: Node[];
}
Examples:
// Query: "name:John AND age:25"
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
},
{
type: "term",
field: "age",
value: { type: "value", value: 25 }
}
]
}
]
}
// Query: "name:John OR name:Jane"
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
}
]
},
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "Jane" }
}
]
}
]
}
FieldGroup Node
Represents multiple operations on the same field.
interface FieldGroup extends GroupLikeNode {
type: NodeType.FieldGroup;
field: string;
field_type: xLuceneFieldType;
flow: Conjunction[];
}
Example:
// Query: "age:(>=18 AND <=65)"
{
type: "field-group",
field: "age",
field_type: "integer",
flow: [
{
type: "conjunction",
nodes: [
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 18 }
}
},
{
type: "range",
field: "age",
left: {
operator: "lte",
value: { type: "value", value: 65 }
}
}
]
}
]
}
Negation Node
Represents NOT operations that negate other nodes.
interface Negation extends Node {
type: NodeType.Negation;
node: Node;
}
Example:
// Query: "NOT status:inactive"
{
type: "negation",
node: {
type: "term",
field: "status",
value: { type: "value", value: "inactive" }
}
}
Empty Node
Represents filtered out or empty results.
interface EmptyNode extends Node {
type: NodeType.Empty;
}
Example:
// Created when variables are filtered out
{
type: "empty"
}
Field Values
Field values can be either literal values or variable references:
type FieldValue<T> = FieldValueValue<T> | FieldValueVariable;
type FieldValueValue<T> = {
type: 'value';
value: T;
};
type FieldValueVariable = {
type: 'variable';
scoped: boolean; // true for @var, false for $var
value: string; // variable name
};
Examples:
// Literal value
{ type: "value", value: "John" }
// Variable reference
{ type: "variable", scoped: false, value: "username" }
// Scoped variable
{ type: "variable", scoped: true, value: "user.name" }
Complex AST Examples
Nested Logical Operations
Query: "(name:John OR name:Jane) AND age:>=18"
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
}
]
},
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "Jane" }
}
]
}
]
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 18 }
}
}
]
}
]
}
Variable Resolution
Query: "name:$username AND age:>=$minAge"
Before resolution:
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "variable", scoped: false, value: "username" }
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "variable", scoped: false, value: "minAge" }
}
}
]
}
]
}
After resolution with { username: "John", minAge: 25 }
:
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 25 }
}
}
]
}
]
}
Working with the AST
Traversing Nodes
Use the Parser methods to traverse the AST:
// Visit all nodes in the AST (depth-first traversal)
parser.walkAST((node) => {
console.log(`Node type: ${node.type}`);
});
// Visit all term-like nodes
parser.forTermTypes((node) => {
console.log(`${node.type}: ${node.field}`);
});
// Visit all field values
parser.forEachFieldValue((value, node) => {
if (value.type === 'variable') {
console.log(`Variable: ${value.value}`);
}
});
// Visit specific node types
parser.forTypes(['term', 'range'], (node) => {
// Process terms and ranges
});
Transforming the AST
Use mapNode
to transform the AST:
// Transform field names
const transformed = parser.mapNode((node) => {
if ('field' in node && node.field === 'old_field') {
return { ...node, field: 'new_field' };
}
return node;
});
Type Guards
Use utility functions to check node types:
import { isTerm, isRange, isLogicalGroup } from 'xlucene-parser';
function processNode(node: Node) {
if (isTerm(node)) {
// TypeScript knows this is a Term
console.log(node.field, node.value);
} else if (isRange(node)) {
// TypeScript knows this is a Range
console.log(node.left.operator, node.left.value);
} else if (isLogicalGroup(node)) {
// TypeScript knows this is a LogicalGroup
console.log(`${node.flow.length} conjunctions`);
}
}
Best Practices for AST Manipulation
- Use Type Guards: Always check node types before accessing type-specific properties
- Preserve Immutability: Use
mapNode
for transformations rather than mutating nodes directly - Handle Variables: Check if values are variables before accessing their literal values
- Validate Field Types: Ensure field types match expected data types
- Consider Edge Cases: Handle empty nodes and missing optional properties
This AST structure provides a flexible foundation for building search interfaces, query validators, and query transformation tools.