How can I check if a Python unicode string contains non-Western letters?

import unicodedata as ud

latin_letters= {}

def is_latin(uchr):
    try: return latin_letters[uchr]
    except KeyError:
         return latin_letters.setdefault(uchr, 'LATIN' in ud.name(uchr))

def only_roman_chars(unistr):
    return all(is_latin(uchr)
           for uchr in unistr
           if uchr.isalpha()) # isalpha suggested by John Machin

>>> only_roman_chars(u"ελληνικά means greek")
False
>>> only_roman_chars(u"frappé")
True
>>> only_roman_chars(u"hôtel lœwe")
True
>>> only_roman_chars(u"123 ångstrom ð áß")
True
>>> only_roman_chars(u"russian: гага")
False

Leave a Comment