1
1
#!/usr/bin/env python
2
2
3
3
import argparse
4
+ import datetime
4
5
import logging
5
6
import os
6
7
import re
7
8
import subprocess
8
9
import sys
10
+ import tempfile
9
11
import time
10
12
from collections import defaultdict
11
13
from html import unescape
@@ -416,10 +418,12 @@ def tumblr2fields(api_key, blogname):
416
418
slug = post .get ('slug' ) or slugify (title , regex_subs = subs )
417
419
tags = post .get ('tags' )
418
420
timestamp = post .get ('timestamp' )
419
- date = SafeDatetime .fromtimestamp (int (timestamp )).strftime (
420
- "%Y-%m-%d %H:%M:%S" )
421
- slug = SafeDatetime .fromtimestamp (int (timestamp )).strftime (
422
- "%Y-%m-%d-" ) + slug
421
+ date = SafeDatetime .fromtimestamp (
422
+ int (timestamp ), tz = datetime .timezone .utc
423
+ ).strftime ("%Y-%m-%d %H:%M:%S%z" )
424
+ slug = SafeDatetime .fromtimestamp (
425
+ int (timestamp ), tz = datetime .timezone .utc
426
+ ).strftime ("%Y-%m-%d-" ) + slug
423
427
format = post .get ('format' )
424
428
content = post .get ('body' )
425
429
type = post .get ('type' )
@@ -782,9 +786,8 @@ def fields2pelican(
782
786
print (out_filename )
783
787
784
788
if in_markup in ('html' , 'wp-html' ):
785
- html_filename = os .path .join (output_path , filename + '.html' )
786
-
787
- with open (html_filename , 'w' , encoding = 'utf-8' ) as fp :
789
+ with tempfile .TemporaryDirectory () as tmpdir :
790
+ html_filename = os .path .join (tmpdir , 'pandoc-input.html' )
788
791
# Replace newlines with paragraphs wrapped with <p> so
789
792
# HTML is valid before conversion
790
793
if in_markup == 'wp-html' :
@@ -793,40 +796,38 @@ def fields2pelican(
793
796
paragraphs = content .splitlines ()
794
797
paragraphs = ['<p>{}</p>' .format (p ) for p in paragraphs ]
795
798
new_content = '' .join (paragraphs )
796
-
797
- fp .write (new_content )
798
-
799
- if pandoc_version < (2 ,):
800
- parse_raw = '--parse-raw' if not strip_raw else ''
801
- wrap_none = '--wrap=none' \
802
- if pandoc_version >= (1 , 16 ) else '--no-wrap'
803
- cmd = ('pandoc --normalize {0} --from=html'
804
- ' --to={1} {2} -o "{3}" "{4}"' )
805
- cmd = cmd .format (parse_raw ,
806
- out_markup if out_markup != 'markdown' else "gfm" ,
807
- wrap_none ,
808
- out_filename , html_filename )
809
- else :
810
- from_arg = '-f html+raw_html' if not strip_raw else '-f html'
811
- cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"' )
812
- cmd = cmd .format (from_arg ,
813
- out_markup if out_markup != 'markdown' else "gfm" ,
814
- out_filename , html_filename )
815
-
816
- try :
817
- rc = subprocess .call (cmd , shell = True )
818
- if rc < 0 :
819
- error = 'Child was terminated by signal %d' % - rc
820
- exit (error )
821
-
822
- elif rc > 0 :
823
- error = 'Please, check your Pandoc installation.'
799
+ with open (html_filename , 'w' , encoding = 'utf-8' ) as fp :
800
+ fp .write (new_content )
801
+
802
+ if pandoc_version < (2 ,):
803
+ parse_raw = '--parse-raw' if not strip_raw else ''
804
+ wrap_none = '--wrap=none' \
805
+ if pandoc_version >= (1 , 16 ) else '--no-wrap'
806
+ cmd = ('pandoc --normalize {0} --from=html'
807
+ ' --to={1} {2} -o "{3}" "{4}"' )
808
+ cmd = cmd .format (parse_raw ,
809
+ out_markup if out_markup != 'markdown' else "gfm" ,
810
+ wrap_none ,
811
+ out_filename , html_filename )
812
+ else :
813
+ from_arg = '-f html+raw_html' if not strip_raw else '-f html'
814
+ cmd = ('pandoc {0} --to={1}-smart --wrap=none -o "{2}" "{3}"' )
815
+ cmd = cmd .format (from_arg ,
816
+ out_markup if out_markup != 'markdown' else "gfm" ,
817
+ out_filename , html_filename )
818
+
819
+ try :
820
+ rc = subprocess .call (cmd , shell = True )
821
+ if rc < 0 :
822
+ error = 'Child was terminated by signal %d' % - rc
823
+ exit (error )
824
+
825
+ elif rc > 0 :
826
+ error = 'Please, check your Pandoc installation.'
827
+ exit (error )
828
+ except OSError as e :
829
+ error = 'Pandoc execution failed: %s' % e
824
830
exit (error )
825
- except OSError as e :
826
- error = 'Pandoc execution failed: %s' % e
827
- exit (error )
828
-
829
- os .remove (html_filename )
830
831
831
832
with open (out_filename , encoding = 'utf-8' ) as fs :
832
833
content = fs .read ()
0 commit comments