写了个自动制作小字库的程序 (amobbs.com 阿莫电子技术论坛) -

tomzbj 发表于 2018-2-16 00:45:22

写了个自动制作小字库的程序

本帖最后由 tomzbj 于 2018-2-16 15:38 编辑

需要在点阵LCD之类场合使用少量汉字时一般是做成小字库.
以前用纯c做过, 这次用python3重新实现了一遍, gb2312和utf-8编码的源文件都可以识别.

import sys
import chardet

target = "AVR"

def GetContent(path): # 读取文件内容
with open(path, mode="rb") as f:
   return f.read()
# main
if __name__ == "__main__":

if target == "AVR":
   suffix = "PROGMEM" # 如果是AVR, 需要用PROGMEM关键字来把字库放进flash. stm32因为是统一寻址, 只要加上const就足够了.
else:
   suffix = ""

if len(sys.argv) < 4:
   exit()

ziku = GetContent(sys.argv)# 把全部字库文件读到内存
source = GetContent(sys.argv)# 含有汉字字符串的源文件
enc = chardet.detect(source)["encoding"] # 判断源文件编码
if enc == "UTF-8-SIG":          # 有些utf-8编码文件前面有3字节的签名, 要去掉
   source = source
enc = chardet.detect(source)["encoding"].upper()
source = source.decode(encoding=enc) # 至此获得了unicode的输入文件

cstr = ""
for c in source:
   if ord(c) > 127: # 把非ASCII的都挑出来, 假定都是需要的汉字
         cstr += c
cstr = "".join(sorted(set(cstr)))    # 去掉重复的汉字, 再排个序(否则顺序随机)

cstr_gb2312 = cstr.encode(encoding="gb2312")
cstr_utf8 = cstr.encode(encoding="utf-8")    # 得到了gb2312和utf-8编码的源文件
ofss = []
icodes_gb2312 = []
icodes_utf8 = []
while len(cstr_gb2312) > 0:
   icodes_gb2312.append(int(cstr_gb2312) * 256 + int(cstr_gb2312))
   icodes_utf8.append(int(cstr_utf8) * 65536 + int(cstr_utf8) * 256 + int(cstr_utf8))
   ofss.append(((int(cstr_gb2312) - 0xa1) * 94 + (int(cstr_gb2312) - 0xa1)) * 32)# 根据gb2312编码计算出在HZK16V文件中的偏移量, 读出字模
   cstr_gb2312 = cstr_gb2312
   cstr_utf8 = cstr_utf8

if target == "AVR":
   output = "#include <avr/pgmspace.h>\n\n"       # 如果是AVR需要加上这个头文件, 否则不认得PROGMEM关键字
else:
   output = ""

output += "const int cfont_num = %d;\n" % len(cstr)
output += "const char cfont_source_encoding[] = \"" + enc + "\";\n"
output += "const unsigned short cfont_icodes_gb2312[] %s = {\n    " % suffix
n = 0
for icode in icodes_gb2312:
   output += "0x%04x, " % icode
   n += 8
   if n > 66:
         n = 0
         output += "\n    "# 处理折行
if n != 0:
   output = output[:-2]
else:
   output = output[:-11]

output += "\n};\n"

output += "const unsigned long cfont_icodes_utf8[] %s = {\n    " % suffix
n = 0
for icode in icodes_utf8:
   output += "0x%06x, " % icode
   n += 10
   if n > 62:
         n = 0
         output += "\n    "
if n != 0:
   output = output[:-2]
else:
   output = output[:-11]
output += "\n};\n"

output += "const unsigned char cfont_mask[] %s = {\n    " % suffix
n = 0
for ofs in ofss:
   mask = ziku
   for i in mask:
         output += "0x%02x, " % i
         n += 6
         if n > 66:
            n = 0
            output += "\n    "
if n != 0:
   output = output[:-2]
else:
   output = output[:-11]
output += "\n};"

try:
   f = open(sys.argv, "w")# 写入目标文件
except:
   print("err 3")
   exit()
f.write(output)
f.close()

使用方法: font_gen.py HZK16V cstring.c cfont.c
其中HZK16V是16点阵/纵向取模的字库文件, 见附件:
把所有需要用到的汉字字符串常量放进cstring.c, 比如:
unsigned char STRING_1[] = "浔阳江头夜送客";
unsigned char STRING_2[] = "枫叶荻花秋瑟瑟";

cfont.c是输出文件名, 上面例子的效果:
#include <avr/pgmspace.h>

const int cfont_num = 13;
const char cfont_source_encoding[] = "UTF-8";
const unsigned short cfont_icodes_gb2312[] PROGMEM = {
   0xd2b6, 0xd2b9, 0xcdb7, 0xbfcd, 0xb7e3, 0xbdad, 0xe4b1, 0xc9aa, 0xc7ef,
   0xbba8, 0xddb6, 0xcbcd, 0xd1f4
};
const unsigned long cfont_icodes_utf8[] PROGMEM = {
   0xe58fb6, 0xe5a49c, 0xe5a4b4, 0xe5aea2, 0xe69eab, 0xe6b19f, 0xe6b594,
   0xe7919f, 0xe7a78b, 0xe88ab1, 0xe88dbb, 0xe98081, 0xe998b3
};
const unsigned char cfont_mask[] PROGMEM = {
   0x00, 0x00, 0xfc, 0x1f, 0x04, 0x08, 0x04, 0x08, 0xfe, 0x1f, 0x44, 0x00,
   0x40, 0x00, 0x40, 0x00, 0x40, 0x00, 0xff, 0xff, 0x40, 0x00, 0x40, 0x00,
   0x40, 0x00, 0x60, 0x00, 0x40, 0x00, 0x00, 0x00, 0x04, 0x02, 0x04, 0x01,
   0x84, 0x00, 0xe4, 0xff, 0x1c, 0x82, 0x04, 0x81, 0x85, 0x41, 0x46, 0x22,
   0xbc, 0x14, 0x24, 0x09, 0x24, 0x14, 0x24, 0x23, 0xe4, 0x40, 0x06, 0xc0,
   0x04, 0x40, 0x00, 0x00, 0x00, 0x01, 0x00, 0x81, 0x10, 0x81, 0x20, 0x41,
   0x64, 0x41, 0x08, 0x21, 0x18, 0x11, 0x00, 0x0d, 0xff, 0x03, 0x00, 0x09,
   0x00, 0x09, 0x00, 0x11, 0x00, 0x61, 0x80, 0xc1, 0x00, 0x01, 0x00, 0x00,
   0x10, 0x04, 0x0c, 0x04, 0x84, 0x02, 0x44, 0x02, 0x3c, 0xfd, 0x54, 0x45,
   0x95, 0x44, 0x96, 0x44, 0x94, 0x44, 0x54, 0x45, 0x34, 0xfd, 0x14, 0x02,
   0x04, 0x02, 0x14, 0x06, 0x0c, 0x02, 0x00, 0x00, 0x10, 0x04, 0x10, 0x03,
   0xd0, 0x00, 0xff, 0xff, 0x50, 0x80, 0x90, 0x60, 0xfc, 0x1f, 0x44, 0x08,
   0x84, 0x04, 0x04, 0x03, 0x84, 0x04, 0x44, 0x08, 0xfe, 0x7f, 0x04, 0x80,
   0x00, 0xe0, 0x00, 0x00, 0x10, 0x04, 0x21, 0x04, 0x62, 0xfe, 0x06, 0x01,
   0x80, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0x04, 0x20, 0xfc, 0x3f,
   0x04, 0x20, 0x04, 0x20, 0x06, 0x20, 0x04, 0x30, 0x00, 0x20, 0x00, 0x00,
   0x10, 0x04, 0x62, 0x04, 0x04, 0xfe, 0x8c, 0x01, 0x60, 0x02, 0x02, 0x02,
   0x92, 0x06, 0x92, 0x1a, 0x92, 0x02, 0x92, 0x42, 0x92, 0x82, 0x92, 0x7f,
   0xff, 0x02, 0x02, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x82, 0x98,
   0x92, 0x80, 0x92, 0x7c, 0xfe, 0xc0, 0x93, 0xa0, 0x92, 0xa1, 0x00, 0x96,
   0x92, 0x88, 0x92, 0x84, 0xfe, 0x82, 0x92, 0x80, 0xd3, 0xe0, 0x82, 0x08,
   0x00, 0x30, 0x00, 0x00, 0x24, 0x08, 0x24, 0x06, 0xa4, 0x01, 0xfe, 0xff,
   0x23, 0x81, 0x22, 0x42, 0x80, 0x20, 0x70, 0x18, 0x00, 0x06, 0xff, 0x01,
   0x00, 0x06, 0x40, 0x08, 0x20, 0x30, 0x30, 0x60, 0x00, 0x20, 0x00, 0x00,
   0x04, 0x04, 0x04, 0x02, 0x04, 0x01, 0xc4, 0xff, 0x3f, 0x00, 0x04, 0x20,
   0x04, 0x10, 0x04, 0x08, 0xe4, 0x3f, 0x04, 0x42, 0x1f, 0x41, 0x84, 0x40,
   0xc4, 0x40, 0x06, 0x40, 0x04, 0x70, 0x00, 0x00, 0x04, 0x10, 0x04, 0x11,
   0x14, 0x49, 0xa4, 0x84, 0x44, 0x42, 0xaf, 0x3f, 0x04, 0x80, 0x04, 0x42,
   0xc4, 0x31, 0x0f, 0x0c, 0xf4, 0x03, 0x04, 0x1c, 0x04, 0x61, 0xc6, 0xc0,
   0x04, 0x40, 0x00, 0x00, 0x40, 0x00, 0x42, 0x40, 0x44, 0x20, 0xcc, 0x1f,
   0x80, 0x20, 0x88, 0x40, 0x89, 0xa0, 0x8a, 0x90, 0x8c, 0x8c, 0xf8, 0x83,
   0x8c, 0x88, 0x8a, 0x90, 0x89, 0xb0, 0xc8, 0xc0, 0x80, 0x40, 0x00, 0x00,
   0x00, 0x00, 0xfe, 0xff, 0x02, 0x08, 0x22, 0x10, 0xda, 0x08, 0x06, 0x07,
   0x00, 0x00, 0xfe, 0xff, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40, 0x82, 0x40,
   0x82, 0x40, 0xff, 0xff, 0x02, 0x00, 0x00, 0x00
};

在程序中调用时先判断cfont_source_encoding是"GB2312"还是"UTF-8". 如果是前者, 则依次在cfont_icodes_gb2312[]查找需要显示的汉字的内码, 找到序号号从cfont_mask[]里读取相应的字模, 之后源字符串指针+=2. 如果是"UTF-8"则在cfont_icodes_utf8[]里查找, 之后源字符串指针要+=3.

把这个py文件加入到工程中, 编译前先执行, 这样就实现了修改cstring.c里的汉字字符串常量之后自动更新cfont.c里的字库.
每个汉字需要32+2+4=38字节的存储空间. 如果不需要自动判断源程序的编码, 还可以去掉cfont_icodes_gb2312和cfont_icodes_utf8其中之一, 可以再节约几个字节.

这里没有把内码和字模按内码排序,查找时需要逐个比较, 效率比较低. 如果需要显示的汉字稍微多一些, 可以把内码和字模按其中一种内码排序, 读取时可以用二分查找, 速度就快多了. 不过不知道GB2312和UTF-8的汉字排列顺序是不是一致, 如果不一致的话就不容易实现自动兼容两者了.

如果需要显示的汉字很多... 还是外挂一片spi flash把整个字库存进去比较好. ps. GT23/GT30系列的字库IC, 其实就是一片GD25Q16.

6623A 发表于 2018-2-16 01:08:43

新年第一个技术帖

ourdemo 发表于 2018-2-16 01:25:35

向你致敬{:lol:}新年快乐，祝大家新春快乐！财运旺旺!

我是一个大白菜 发表于 2018-2-16 09:18:45

新年快乐，祝大家万事如意，财运连连！

huangqi412 发表于 2018-2-16 10:38:21

。。。凌晨写帖

duxingkei 发表于 2018-2-16 11:22:28

新年快乐，我也干过类似的事情{:lol:}

cu_ice 发表于 2018-2-16 11:38:44

为新年都在做技术工作的点个赞，我今天闲时也在看PDF{:sweat:}

wei669 发表于 2018-2-16 11:56:17

佩服楼主

Excellence 发表于 2018-2-16 18:44:36

新年快乐！新帖子。

gzhua20088ssj 发表于 2018-2-16 18:55:05

新年快乐！取模软件类的吧

亲爱的混蛋 发表于 2018-2-18 18:10:48

感谢楼主开源

页: [1]

amobbs.com 阿莫电子技术论坛's Archiver

写了个自动制作小字库的程序