最近想从hexo转移到typecho,但是typecho是没有一键导入md文章,手动导入又很烦,怎么办呢。于是我就想用 Python 来写一个自动解析md导入typecho的脚本。
于是就开始编码了,首先是用正则表达式提取,hexo的md头部是YAML格式的,只要解析 title
date
tags
categories
就行了,这里就是tags和category难解析,他们可能不止一个标签。但也可能没有标签。
对于上面的解析,我采用两个判断,以及捕获异常的方式解析,tags和category的方式有一些不同。实现如下:
python
代码语言:javascript复制1 # 标题提取
2 title = re.search(r'title: (.*?)n', s, re.S).group(1)
3 # 时间转化时间截
4 date = re.search(r'date: (.*?)n', s, re.S).group(1)
5 date = time.strptime(date, "%Y-%m-%d %H:%M:%S")
6 date = int(time.mktime(date))
7 try:
8 if not re.search(r'tags:[ ]*(.*?)n', s).group(1):
9 if re.search(r'tags:[ ]*n(.*?)nca', s, re.S):
10 items = re.search(r'tags:[ ]*n(.*?)nca', s, re.S).group(1)
11 tags = re.findall(r'- (.*?)n', items)
12 else:
13 tags = ''
14 else:
15 tags = re.search(r'tags:[ ]*(.*?)n', s).group(1)
16 except AttributeError as e:
17 print(e)
18 tags = ''
19
20 try:
21 if not re.search(r'categories:[ ]*(.*?)n', s).group(1):
22 if re.search(r'categories:[ ]*n(.*?)n---', s, re.S):
23 items = re.search(r'categories:[ ]*n(.*?)n---', s, re.S).group(1)
24 categories = re.findall(r'- (.*?)n', items)
25 else:
26 categories = ''
27 else:
28 categories = re.search(r'categories:[ ]*(.*?)n', s).group(1)
29 except AttributeError as e:
30 print(e)
31 categories = ''
32 # 正文提取
33 post = re.search(r'---nn(.*?)$', s, re.S).group(1)
COPY
这里的踩坑点主要是正则中的 s
他不仅匹配一个空格,也可以是换行符等等,所以我采用了 [ ]*
匹配多个空格。如果匹配不到呢,则时会抛出异常 AttributeError
然后捕获异常,让 tags(category) = ‘’
就行了。
然后是插表方面,连接之后,批量解析文件,将字段插入表中,这里需要插入三个表,分别是 typecho_metas
typecho_contents
typecho_relationships
从 typecho_contents
中插入文章内容,从 typecho_metas
中插入分类和标签,从 typecho_relationships
中建立文章和分类和标签的关系。
这里需要为 typecho_metas
表中的 name
tpye
建立联合主键,避免重复插入。在重复插入时进行更新操作,使得 count = count 1
。
python
代码语言:javascript复制1 def insert_post(self, file):
2 data = self.parse_hexo_md(file)
3 self.data = data
4 db = self.db
5 cur = self.cur
6 modified = int(time.mktime(time.localtime(os.stat('_posts/' file).st_mtime)))
7 sql = '''
8 INSERT INTO typecho_contents(title,slug, created,modified, text,type,status,allowComment,allowFeed,allowPing,authorId) VALUES (%s,%s,%s,%s,%s,'post','publish',1,1,1,1)
9 '''
10
11 try:
12 cur.execute(sql, (data[0], file.split('.md')[0], data[1], modified, data[4]))
13 db.commit()
14 except Exception as e:
15 print(e)
16 db.rollback()
17
18 def insert_tags_category(self):
19 data = self.data
20 cur = self.cur
21 # cur.execute('ALTER TABLE typecho_metas ADD UNIQUE KEY(name,type)')
22 sql = '''
23 INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'tag',1) ON DUPLICATE KEY UPDATE count = count 1
24 '''
25 # tags导入
26 try:
27 # (title, date, tags, categories, '<!--markdown-->' post)
28 if isinstance(data[2], list):
29 for i in data[2]:
30 cur.execute(sql, (i, i))
31 self.db.commit()
32 else:
33 if data[2]:
34 cur.execute(sql, (data[2], data[2]))
35 self.db.commit()
36 except pymysql.DatabaseError as e:
37 print(e)
38 self.db.rollback()
39
40 # category 导入
41 sql = '''
42 INSERT INTO typecho_metas(name,slug,type,count) VALUES (%s,%s,'category',1) ON DUPLICATE KEY UPDATE count = count 1
43 '''
44 try:
45 # (title, date, tags, categories, '<!--markdown-->' post)
46 if isinstance(data[3], list):
47 for i in data[3]:
48 cur.execute(sql, (i, i))
49 self.db.commit()
50 else:
51 if data[3]:
52 cur.execute(sql, (data[3], data[3]))
53 self.db.commit()
54 except pymysql.DatabaseError as e:
55 print(e)
56 self.db.rollback()
57
58 def relationships(self):
59 db = self.db
60 cur = self.cur
61 data = self.data
62 print('tag = ', data[2], 'type = ', type(data[2]), 'cet = ', data[3])
63 # 映射 tags
64 select_mid = '''
65 SELECT mid FROM typecho_metas WHERE name = %s AND type = %s
66 '''
67 select_cid = '''
68 SELECT cid FROM typecho_contents WHERE title = %s
69 '''
70 add_relationship = '''
71 INSERT INTO typecho_relationships(cid,mid) VALUES (%s,%s)
72 '''
73
74 try:
75 cur.execute(select_cid, (data[0]))
76
77 cid = cur.fetchall()[0][0] # 获取 cid
78
79 if isinstance(data[2], list):
80 for i in data[2]:
81 cur.execute(select_mid, (i, 'tag'))
82 tu = cur.fetchall()
83 # print('mid = ', tu[0][0]) # mid 获取
84 mid = tu[0][0]
85
86 cur.execute(add_relationship, (cid, mid))
87 else:
88 cur.execute(select_mid, (data[2], 'tag'))
89 tu = cur.fetchall()
90 print('mid = ', tu) # mid 获取
91 mid = tu[0][0]
92 cur.execute(add_relationship, (cid, mid))
93 except pymysql.DatabaseError as e:
94 print(e)
95 db.rollback()
96 except IndexError as e:
97 print('不能建立关系', data[2])
98 return
99
100 # categories
101 # (title, date, tags, categories, '<!--markdown-->' post)
102 try:
103 if isinstance(data[3], list):
104 for i in data[3]:
105 cur.execute(select_mid, (i, 'category'))
106 tu = cur.fetchall()
107 # print('mid = ', tu[0][0]) # mid 获取
108 mid = tu[0][0]
109
110 cur.execute(add_relationship, (cid, mid))
111 else:
112 cur.execute(select_mid, (data[3], 'category'))
113 tu = cur.fetchall()
114 # print(tu) # mid 获取
115 mid = tu[0][0]
116 cur.execute(add_relationship, (cid, mid))
117 except pymysql.DatabaseError as e:
118 print(e)
119 db.rollback()
120 except IndexError as e:
121 print('不能建立关系', data[3])
122 return
COPY
全部源码见:
https://github.com/Innei/move-hexo-to-typecho
欢迎交流