Skip to main content

xLucene Parser AST Structure

This document provides a comprehensive guide to the Abstract Syntax Tree (AST) structure generated by the xLucene Parser.

Overview

The xLucene Parser converts query strings into a tree structure where each node represents a different type of query operation.

Node Hierarchy

Node (base interface)
├── TermLikeNode (searchable terms)
│ ├── Term
│ ├── Regexp
│ ├── Wildcard
│ ├── Range
│ ├── FunctionNode
│ └── TermList
├── GroupLikeNode (logical containers)
│ ├── LogicalGroup
│ └── FieldGroup
├── Conjunction (term grouping)
├── Negation (NOT operations)
├── Exists (field existence)
└── EmptyNode (filtered/empty results)

Node Types in Detail

Base Node Interface

All AST nodes implement the base Node interface:

interface Node {
type: NodeType;
}

Term Node

Represents basic field-value pairs or standalone search terms.

interface Term extends AnyDataType, TermLikeNode {
type: NodeType.Term;
field: string | null; // Field name, null for standalone terms
field_type: xLuceneFieldType; // Coerced field type
value: FieldValue<any>; // The search value
quoted?: boolean; // Whether value was quoted
restricted?: boolean; // Whether value has restricted characters
analyzed?: boolean; // Whether field is analyzed
}

Examples:

// Query: "hello" 
{
type: "term",
field: null,
field_type: "string",
quoted: false,
value: { type: "value", value: "hello" }
}

// Query: "name:John"
{
type: "term",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "John" }
}

// Query: "age:25" (with type config)
{
type: "term",
field: "age",
field_type: "integer",
value: { type: "value", value: 25 }
}

Range Node

Represents numeric or string ranges with comparison operators.

interface Range extends TermLikeNode {
type: NodeType.Range;
field: string;
field_type: xLuceneFieldType;
left: RangeNode; // Left boundary
right?: RangeNode; // Right boundary (optional)
}

interface RangeNode {
operator: 'gte' | 'gt' | 'lt' | 'lte';
field_type: xLuceneFieldType;
value: FieldValue<number | string>;
}

Examples:

// Query: "age:>=18"
{
type: "range",
field: "age",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 18 }
}
}

// Query: "score:[80 TO 95]"
{
type: "range",
field: "score",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 80 }
},
right: {
operator: "lte",
field_type: "integer",
value: { type: "value", value: 95 }
}
}

// Query: "score:[10 TO 20}"
{
type: "range",
field: "score",
field_type: "integer",
left: {
operator: "gte",
field_type: "integer",
value: { type: "value", value: 10 }
},
right: {
operator: "lt",
field_type: "integer",
value: { type: "value", value: 20 }
}
}

Wildcard Node

Represents pattern matching with * (multiple chars) and ? (single char).

interface Wildcard extends StringDataType, TermLikeNode {
type: NodeType.Wildcard;
field: string;
field_type: xLuceneFieldType.String;
value: FieldValue<string>;
quoted: boolean;
}

Example:

// Query: "name:J*n"
{
type: "wildcard",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "J*n" }
}

// Query: "name:J?n"
{
type: "wildcard",
field: "name",
field_type: "string",
quoted: false,
value: { type: "value", value: "J?n" }
}

Regexp Node

Represents regular expression patterns.

interface Regexp extends StringDataType, TermLikeNode {
type: NodeType.Regexp;
field: string;
field_type: xLuceneFieldType.String;
value: FieldValue<string>;
quoted: boolean;
}

Example:

// Query: "email:/.*@example\.com/"
{
type: "regexp",
field: "email",
field_type: "string",
quoted: false,
value: { type: "value", value: ".*@example\\.com" }
}

Function Node

Represents specialized function calls that add unique capabilities to Lucene.

interface FunctionNode extends TermLikeNode {
type: NodeType.Function;
field: string;
name: string; // Function name
description?: string; // Optional description
params: (Term | TermList)[]; // Function parameters
}

Example:

// Query: "location:geoDistance(point:\"40,-74\", distance:\"10km\")"
{
type: "function",
field: "location",
name: "geoDistance",
params: [
{
type: "term",
field: "point",
value: { type: "value", value: "40,-74" }
},
{
type: "term",
field: "distance",
value: { type: "value", value: "10km" }
}
]
}

Exists Node

Represents field existence checks.

interface Exists extends Node {
type: NodeType.Exists;
field: string;
}

Example:

// Query: "_exists_:email"
{
type: "exists",
field: "email"
}

LogicalGroup Node

Represents boolean operations connecting multiple terms or groups.

interface LogicalGroup extends GroupLikeNode {
type: NodeType.LogicalGroup;
flow: Conjunction[];
}

interface Conjunction extends Node {
type: NodeType.Conjunction;
nodes: Node[];
}

Examples:

// Query: "name:John AND age:25"
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
},
{
type: "term",
field: "age",
value: { type: "value", value: 25 }
}
]
}
]
}

// Query: "name:John OR name:Jane"
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
}
]
},
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "Jane" }
}
]
}
]
}

FieldGroup Node

Represents multiple operations on the same field.

interface FieldGroup extends GroupLikeNode {
type: NodeType.FieldGroup;
field: string;
field_type: xLuceneFieldType;
flow: Conjunction[];
}

Example:

// Query: "age:(>=18 AND <=65)"
{
type: "field-group",
field: "age",
field_type: "integer",
flow: [
{
type: "conjunction",
nodes: [
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 18 }
}
},
{
type: "range",
field: "age",
left: {
operator: "lte",
value: { type: "value", value: 65 }
}
}
]
}
]
}

Negation Node

Represents NOT operations that negate other nodes.

interface Negation extends Node {
type: NodeType.Negation;
node: Node;
}

Example:

// Query: "NOT status:inactive"
{
type: "negation",
node: {
type: "term",
field: "status",
value: { type: "value", value: "inactive" }
}
}

Empty Node

Represents filtered out or empty results.

interface EmptyNode extends Node {
type: NodeType.Empty;
}

Example:

// Created when variables are filtered out
{
type: "empty"
}

Field Values

Field values can be either literal values or variable references:

type FieldValue<T> = FieldValueValue<T> | FieldValueVariable;

type FieldValueValue<T> = {
type: 'value';
value: T;
};

type FieldValueVariable = {
type: 'variable';
scoped: boolean; // true for @var, false for $var
value: string; // variable name
};

Examples:

// Literal value
{ type: "value", value: "John" }

// Variable reference
{ type: "variable", scoped: false, value: "username" }

// Scoped variable
{ type: "variable", scoped: true, value: "user.name" }

Complex AST Examples

Nested Logical Operations

Query: "(name:John OR name:Jane) AND age:>=18"

{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
}
]
},
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "Jane" }
}
]
}
]
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 18 }
}
}
]
}
]
}

Variable Resolution

Query: "name:$username AND age:>=$minAge"

Before resolution:

{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "variable", scoped: false, value: "username" }
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "variable", scoped: false, value: "minAge" }
}
}
]
}
]
}

After resolution with { username: "John", minAge: 25 }:

{
type: "logical-group",
flow: [
{
type: "conjunction",
nodes: [
{
type: "term",
field: "name",
value: { type: "value", value: "John" }
},
{
type: "range",
field: "age",
left: {
operator: "gte",
value: { type: "value", value: 25 }
}
}
]
}
]
}

Working with the AST

Traversing Nodes

Use the Parser methods to traverse the AST:

// Visit all nodes in the AST (depth-first traversal)
parser.walkAST((node) => {
console.log(`Node type: ${node.type}`);
});

// Visit all term-like nodes
parser.forTermTypes((node) => {
console.log(`${node.type}: ${node.field}`);
});

// Visit all field values
parser.forEachFieldValue((value, node) => {
if (value.type === 'variable') {
console.log(`Variable: ${value.value}`);
}
});

// Visit specific node types
parser.forTypes(['term', 'range'], (node) => {
// Process terms and ranges
});

Transforming the AST

Use mapNode to transform the AST:

// Transform field names
const transformed = parser.mapNode((node) => {
if ('field' in node && node.field === 'old_field') {
return { ...node, field: 'new_field' };
}
return node;
});

Type Guards

Use utility functions to check node types:

import { isTerm, isRange, isLogicalGroup } from 'xlucene-parser';

function processNode(node: Node) {
if (isTerm(node)) {
// TypeScript knows this is a Term
console.log(node.field, node.value);
} else if (isRange(node)) {
// TypeScript knows this is a Range
console.log(node.left.operator, node.left.value);
} else if (isLogicalGroup(node)) {
// TypeScript knows this is a LogicalGroup
console.log(`${node.flow.length} conjunctions`);
}
}

Best Practices for AST Manipulation

  1. Use Type Guards: Always check node types before accessing type-specific properties
  2. Preserve Immutability: Use mapNode for transformations rather than mutating nodes directly
  3. Handle Variables: Check if values are variables before accessing their literal values
  4. Validate Field Types: Ensure field types match expected data types
  5. Consider Edge Cases: Handle empty nodes and missing optional properties

This AST structure provides a flexible foundation for building search interfaces, query validators, and query transformation tools.