Skip to content

Commit 6ed7395

Browse files
authored
Merge pull request #3221 from avaris/importer-fixes
2 parents a20bbb5 + 11c13ce commit 6ed7395

File tree

2 files changed

+47
-52
lines changed

2 files changed

+47
-52
lines changed

pelican/tests/test_importer.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
import datetime
21
import locale
32
import os
43
import re
54
from posixpath import join as posix_join
65
from unittest.mock import patch
76

8-
import dateutil.tz
9-
107
from pelican.settings import DEFAULT_CONFIG
118
from pelican.tests.support import (mute, skipIfNoExecutable, temporary_folder,
129
unittest)
@@ -46,12 +43,9 @@ class TestWithOsDefaults(unittest.TestCase):
4643
def setUp(self):
4744
self.old_locale = locale.setlocale(locale.LC_ALL)
4845
locale.setlocale(locale.LC_ALL, 'C')
49-
self.old_timezone = datetime.datetime.now(dateutil.tz.tzlocal()).tzname()
50-
os.environ['TZ'] = 'UTC'
5146

5247
def tearDown(self):
5348
locale.setlocale(locale.LC_ALL, self.old_locale)
54-
os.environ['TZ'] = self.old_timezone
5549

5650

5751
@skipIfNoExecutable(['pandoc', '--version'])
@@ -502,7 +496,7 @@ def get_posts(api_key, blogname, offset=0):
502496
{
503497
"type": "photo",
504498
"blog_name": "testy",
505-
"date": "2019-11-07 21:26:40 GMT",
499+
"date": "2019-11-07 21:26:40 UTC",
506500
"timestamp": 1573162000,
507501
"format": "html",
508502
"slug": "a-slug",
@@ -528,7 +522,7 @@ def get_posts(api_key, blogname, offset=0):
528522
self.assertEqual(
529523
[('Photo',
530524
'<img alt="" src="https://..fccdc2360ba7182a.jpg" />\n',
531-
'2019-11-07-a-slug', '2019-11-07 21:26:40', 'testy', ['photo'],
525+
'2019-11-07-a-slug', '2019-11-07 21:26:40+0000', 'testy', ['photo'],
532526
['economics'], 'published', 'article', 'html')],
533527
posts,
534528
posts)
@@ -544,7 +538,7 @@ def get_posts(api_key, blogname, offset=0):
544538
"type": "video",
545539
"blog_name": "testy",
546540
"slug": "the-slug",
547-
"date": "2017-07-07 20:31:41 GMT",
541+
"date": "2017-07-07 20:31:41 UTC",
548542
"timestamp": 1499459501,
549543
"state": "published",
550544
"format": "html",
@@ -583,7 +577,7 @@ def get_posts(api_key, blogname, offset=0):
583577
'<iframe>2</iframe>\n'
584578
'<iframe>3</iframe>\n',
585579
'2017-07-07-the-slug',
586-
'2017-07-07 20:31:41', 'testy', ['video'], [], 'published',
580+
'2017-07-07 20:31:41+0000', 'testy', ['video'], [], 'published',
587581
'article', 'html')],
588582
posts,
589583
posts)
@@ -599,7 +593,7 @@ def get_posts(api_key, blogname, offset=0):
599593
"type": "video",
600594
"blog_name": "testy",
601595
"slug": "the-slug",
602-
"date": "2016-08-14 16:37:35 GMT",
596+
"date": "2016-08-14 16:37:35 UTC",
603597
"timestamp": 1471192655,
604598
"state": "published",
605599
"format": "html",
@@ -638,7 +632,7 @@ def get_posts(api_key, blogname, offset=0):
638632
'v=b">via</a></p>\n<p>Caption</p>'
639633
'<p>(This video isn\'t available anymore.)</p>\n',
640634
'2016-08-14-the-slug',
641-
'2016-08-14 16:37:35', 'testy', ['video'], ['interviews'],
635+
'2016-08-14 16:37:35+0000', 'testy', ['video'], ['interviews'],
642636
'published', 'article', 'html')],
643637
posts,
644638
posts)

pelican/tools/pelican_import.py

+41-40
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
#!/usr/bin/env python
22

33
import argparse
4+
import datetime
45
import logging
56
import os
67
import re
78
import subprocess
89
import sys
10+
import tempfile
911
import time
1012
from collections import defaultdict
1113
from html import unescape
@@ -416,10 +418,12 @@ def tumblr2fields(api_key, blogname):
416418
slug = post.get('slug') or slugify(title, regex_subs=subs)
417419
tags = post.get('tags')
418420
timestamp = post.get('timestamp')
419-
date = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
420-
"%Y-%m-%d %H:%M:%S")
421-
slug = SafeDatetime.fromtimestamp(int(timestamp)).strftime(
422-
"%Y-%m-%d-") + slug
421+
date = SafeDatetime.fromtimestamp(
422+
int(timestamp), tz=datetime.timezone.utc
423+
).strftime("%Y-%m-%d %H:%M:%S%z")
424+
slug = SafeDatetime.fromtimestamp(
425+
int(timestamp), tz=datetime.timezone.utc
426+
).strftime("%Y-%m-%d-") + slug
423427
format = post.get('format')
424428
content = post.get('body')
425429
type = post.get('type')
@@ -782,9 +786,8 @@ def fields2pelican(
782786
print(out_filename)
783787

784788
if in_markup in ('html', 'wp-html'):
785-
html_filename = os.path.join(output_path, filename + '.html')
786-
787-
with open(html_filename, 'w', encoding='utf-8') as fp:
789+
with tempfile.TemporaryDirectory() as tmpdir:
790+
html_filename = os.path.join(tmpdir, 'pandoc-input.html')
788791
# Replace newlines with paragraphs wrapped with <p> so
789792
# HTML is valid before conversion
790793
if in_markup == 'wp-html':
@@ -793,40 +796,38 @@ def fields2pelican(
793796
paragraphs = content.splitlines()
794797
paragraphs = ['<p>{}</p>'.format(p) for p in paragraphs]
795798
new_content = ''.join(paragraphs)
796-
797-
fp.write(new_content)
798-
799-
if pandoc_version < (2,):
800-
parse_raw = '--parse-raw' if not strip_raw else ''
801-
wrap_none = '--wrap=none' \
802-
if pandoc_version >= (1, 16) else '--no-wrap'
803-
cmd = ('pandoc --normalize {0} --from=html'
804-
' --to={1} {2} -o "{3}" "{4}"')
805-
cmd = cmd.format(parse_raw,
806-
out_markup if out_markup != 'markdown' else "gfm",
807-
wrap_none,
808-
out_filename, html_filename)
809-
else:
810-
from_arg = '-f html+raw_html' if not strip_raw else '-f html'
811-
cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"')
812-
cmd = cmd.format(from_arg,
813-
out_markup if out_markup != 'markdown' else "gfm",
814-
out_filename, html_filename)
815-
816-
try:
817-
rc = subprocess.call(cmd, shell=True)
818-
if rc < 0:
819-
error = 'Child was terminated by signal %d' % -rc
820-
exit(error)
821-
822-
elif rc > 0:
823-
error = 'Please, check your Pandoc installation.'
799+
with open(html_filename, 'w', encoding='utf-8') as fp:
800+
fp.write(new_content)
801+
802+
if pandoc_version < (2,):
803+
parse_raw = '--parse-raw' if not strip_raw else ''
804+
wrap_none = '--wrap=none' \
805+
if pandoc_version >= (1, 16) else '--no-wrap'
806+
cmd = ('pandoc --normalize {0} --from=html'
807+
' --to={1} {2} -o "{3}" "{4}"')
808+
cmd = cmd.format(parse_raw,
809+
out_markup if out_markup != 'markdown' else "gfm",
810+
wrap_none,
811+
out_filename, html_filename)
812+
else:
813+
from_arg = '-f html+raw_html' if not strip_raw else '-f html'
814+
cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"')
815+
cmd = cmd.format(from_arg,
816+
out_markup if out_markup != 'markdown' else "gfm",
817+
out_filename, html_filename)
818+
819+
try:
820+
rc = subprocess.call(cmd, shell=True)
821+
if rc < 0:
822+
error = 'Child was terminated by signal %d' % -rc
823+
exit(error)
824+
825+
elif rc > 0:
826+
error = 'Please, check your Pandoc installation.'
827+
exit(error)
828+
except OSError as e:
829+
error = 'Pandoc execution failed: %s' % e
824830
exit(error)
825-
except OSError as e:
826-
error = 'Pandoc execution failed: %s' % e
827-
exit(error)
828-
829-
os.remove(html_filename)
830831

831832
with open(out_filename, encoding='utf-8') as fs:
832833
content = fs.read()

0 commit comments

Comments
 (0)