|
本帖最后由 henices 于 2018-1-5 17:32 编辑
简单处理一下,没有处理单词重复出现问题。能够应付 / 多次出现的情况。
- #! /usr/bin/python
- import sys
- import logging
- def find_node(s):
- graph = {}
- for i in s.split(' '):
- for j in i.split('/'):
- if j not in graph:
- graph[j] = []
- else:
- return None
- return graph
-
- def find_edge(s, graph):
- l = s.split(' ')
- for i in xrange(len(l)-1):
- if '/' not in l[i + 1]:
- for j in l[i].split('/'):
- graph[j].append(l[i + 1])
- else:
- for m in l[i + 1].split('/'):
- for n in l[i].split('/'):
- graph[n].append(m)
- return graph
- def find_path(start, graph, path=[], paths=[]):
- path = path + [start]
- if not graph.has_key(start):
- return None
- if graph[start] == []:
- return path
- for i in graph[start]:
- newpath = find_path(i, graph, path, paths)
- if newpath:
- paths.append(newpath)
- return None
- out_fd = open('lnk.mdict', 'w')
- with open(sys.argv[1], 'r') as f:
- for line in f:
- line_ = line.strip()
- g = find_node(line_)
- if not g:
- print '[ERR]: ', line_
- continue
- if len(line_.split(' ')) == 1:
- print '[ERR]: ', line_
- continue
- logging.debug('[OK]: %s' % line_)
- g = find_edge(line_, g)
- for i in line_.split(' ')[0].split('/'):
- paths = []
- find_path(i, g, [], paths)
- logging.debug(paths)
- for path in paths:
- out_fd.write(' '.join(path) + "\n")
- out_fd.write('@@@LINK=%s\n' % line_)
- out_fd.write('</>\n')
- out_fd.close()
复制代码
以下内容需要积分高于 200 才可浏览
py.zip
(718 Bytes, 下载次数: 42)
|
评分
-
2
查看全部评分
-
|