Generating Emoji.java using Python
3 min readOct 23, 2024
A supplement to the original article How to Easily Handle Emoji Unicode in Java
Thanks to https://www.unicode.org/emoji/techindex.html for providing a text version of the files for faster download and processing.
Here is a Python program tested with python3.
import requests
import re
import os
def download_file(url, output_file):
"""
Download a file from the given URL and save it to the specified output file.
"""
try:
response = requests.get(url, timeout=30)
response.raise_for_status() # Ensure a successful response
with open(output_file, 'wb') as file:
file.write(response.content)
print(f"File downloaded successfully and saved as '{output_file}'")
return True
except requests.RequestException as e:
print(f"Failed to download the file: {e}")
return False
def transform_column_3(data):
"""
Replace all non-alphanumeric characters in the given string, except asterisks,
with underscores, and ensure no consecutive underscores. Asterisks are replaced
with the word 'asterisk'.
"""
# Replace asterisks with the word "asterisk" first
data = re.sub(r'\*', 'asterisk', data)
# Replace all other non-alphanumeric characters with underscores
data = re.sub(r'[^a-zA-Z0-9]', '_', data)
# Collapse consecutive underscores
data = re.sub(r'_+', '_', data)
# replace value "1st" which is invalid enum to "first"
data = re.sub(r'1st', 'first', data)
return data.lower()
def parse_file(file_path):
"""
Parse the file to extract entries with columns 1 and 3, while ignoring comments after '#'.
Clean up column 1 and transform column 3 as required.
"""
entries = []
try:
with open(file_path, 'r') as file:
for line in file:
# Remove inline comments and strip whitespaces
line = line.split('#')[0].strip()
if not line:
continue
# Split by ';' and check if there are enough columns
columns = line.split(';')
if len(columns) >= 3:
column_1 = columns[0].strip()
column_3 = transform_column_3(columns[2].strip())
if '..' in column_1:
# Handle range, convert each part to int and iterate, then convert back to hex prefixed with '0x'
start_hex, end_hex = column_1.split('..')
start_val = int(start_hex, 16)
end_val = int(end_hex, 16)
# Generate all values in the range, each prefixed with '0x'
hex_range = " ".join(f"0x{val:04X}" for val in range(start_val, end_val + 1))
entries.append((hex_range, column_3))
else:
# Handle single or multiple non-range values separated by space
hex_values = column_1.split()
# Prefix each value with '0x' and rejoin with spaces
hex_values = " ".join(f"0x{value}" for value in hex_values)
entries.append((hex_values, column_3))
except IOError as e:
print(f"Error processing the file: {e}")
return entries
def generate_java_enum(entries, output_file):
"""
Generate a Java Enum class 'Emoji.java' from the provided entries.
"""
try:
with open(output_file, 'w') as file:
# Start the Enum class
file.write("public enum Emoji {\n")
# Write each enum entry
for column_1, column_3 in entries:
column_1_values = ', '.join(column_1.split())
file.write(f" {column_3}({column_1_values}),\n")
# Write the closing part of the Enum class
file.write(" ;\n\n")
file.write(" private final int[] codePoints;\n\n")
file.write(" Emoji(int... codePoints) {\n")
file.write(" this.codePoints = codePoints;\n")
file.write(" }\n\n")
file.write(" @Override\n")
file.write(" public String toString() {\n")
file.write(" StringBuilder stringBuilder = new StringBuilder();\n")
file.write(" for (int codePoint : codePoints) {\n")
file.write(" stringBuilder.append(Character.toChars(codePoint));\n")
file.write(" }\n")
file.write(" return stringBuilder.toString();\n")
file.write(" }\n")
file.write("}\n")
print(f"Java Enum class generated successfully as '{output_file}'")
except IOError as e:
print(f"Error generating Java Enum: {e}")
def main(url, output_file, java_enum_file):
"""
Main function to coordinate file download, parsing, and Java Enum generation.
"""
try:
if download_file(url, output_file):
entries = parse_file(output_file)
if entries:
generate_java_enum(entries, java_enum_file)
finally:
# Remove the temporary file before exiting
if os.path.exists(output_file):
os.remove(output_file)
print(f"Temporary file '{output_file}' has been removed.")
# Example usage
file_url = "https://unicode.org/Public/emoji/latest/emoji-sequences.txt"
output_filename = "emoji_sequences.txt"
java_enum_filename = "Emoji.java"
main(file_url, output_filename, java_enum_filename)
Running this code will generate an Emoji.java enum that encodes emojis as intended by the original program.
import java.util.Arrays;
public class EmojiTest {
public static void main(String[] args) {
Arrays.asList(Emoji.values())
.forEach(emoji -> System.out.printf("%75s : %s\n", emoji.name(), emoji));
}
}