chinese-independent-blogs/formatter.py

27 lines
650 B
Python
Raw Normal View History

2019-12-05 09:51:46 +08:00
with open('blogs-original.csv', 'r') as f:
file_content = f.read()
lines = file_content.split('\n')
content = lines[0] + '\n'
for line in lines[1:]:
line = line.strip()
if not line:
continue
parts = line.split(',')
if len(parts) != 4:
continue
parts = [part.strip() for part in parts]
if parts[3]:
2019-12-05 09:57:03 +08:00
parts[3] = parts[3].strip().replace('', ';')
tags = parts[3].split(';')
2019-12-05 09:51:46 +08:00
tags = [tag.strip() for tag in tags]
2019-12-05 10:01:58 +08:00
parts[3] = '; '.join([tag for tag in tags if tag])
2019-12-05 09:51:46 +08:00
content += ', '.join(parts) + '\n'
with open('blogs-original.csv', 'w') as f:
f.write(content)