Get group names in java regex

There is no API in Java to obtain the names of the named capturing groups. I think this is a missing feature.

The easy way out is to pick out candidate named capturing groups from the pattern, then try to access the named group from the match. In other words, you don’t know the exact names of the named capturing groups, until you plug in a string that matches the whole pattern.

The Pattern to capture the names of the named capturing group is \(\?<([a-zA-Z][a-zA-Z0-9]*)> (derived based on Pattern class documentation).

(The hard way is to implement a parser for regex and get the names of the capturing groups).

A sample implementation:

import java.util.Scanner;
import java.util.Set;
import java.util.TreeSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.MatchResult;

class RegexTester {

    public static void main(String args[]) {
        Scanner scanner = new Scanner(System.in);

        String regex = scanner.nextLine();
        StringBuilder input = new StringBuilder();
        while (scanner.hasNextLine()) {
            input.append(scanner.nextLine()).append('\n');
        }

        Set<String> namedGroups = getNamedGroupCandidates(regex);

        Pattern p = Pattern.compile(regex);
        Matcher m = p.matcher(input);
        int groupCount = m.groupCount();

        int matchCount = 0;

        if (m.find()) {
            // Remove invalid groups
            Iterator<String> i = namedGroups.iterator();
            while (i.hasNext()) {
                try {
                    m.group(i.next());
                } catch (IllegalArgumentException e) {
                    i.remove();
                }
            }

            matchCount += 1;
            System.out.println("Match " + matchCount + ":");
            System.out.println("=" + m.group() + "=");
            System.out.println();
            printMatches(m, namedGroups);

            while (m.find()) {
                matchCount += 1;
                System.out.println("Match " + matchCount + ":");
                System.out.println("=" + m.group() + "=");
                System.out.println();
                printMatches(m, namedGroups);
            }
        }
    }

    private static void printMatches(Matcher matcher, Set<String> namedGroups) {
        for (String name: namedGroups) {
            String matchedString = matcher.group(name);
            if (matchedString != null) {
                System.out.println(name + "=" + matchedString + "=");
            } else {
                System.out.println(name + "_");
            }
        }

        System.out.println();

        for (int i = 1; i < matcher.groupCount(); i++) {
            String matchedString = matcher.group(i);
            if (matchedString != null) {
                System.out.println(i + "=" + matchedString + "=");
            } else {
                System.out.println(i + "_");
            }
        }

        System.out.println();
    }

    private static Set<String> getNamedGroupCandidates(String regex) {
        Set<String> namedGroups = new TreeSet<String>();

        Matcher m = Pattern.compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>").matcher(regex);

            while (m.find()) {
                namedGroups.add(m.group(1));
            }

            return namedGroups;
        }
    }
}

There is a caveat to this implementation, though. It currently doesn’t work with regex in Pattern.COMMENTS mode.

Leave a Comment