Implement --write-pages option (#736)
* Implement --write-pages option * Fix long lines * Fix file mode to binary * Fix pattern for Windows compatibility
This commit is contained in:
@@ -96,6 +96,17 @@ class Extractor():
|
||||
(400 <= code < 429 or 431 <= code < 500):
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
|
||||
if config.get((), "write_pages", False):
|
||||
# Write the response content to a .dump file
|
||||
# in the current directory.
|
||||
# The file name is derived from the response
|
||||
# url, replacing special characters with "_"
|
||||
r = re.compile(r"[\\\\|/<>:\"?*&=#]+")
|
||||
outfilename = r.sub('_', response.url) + '.dump'
|
||||
with open(outfilename, 'wb') as outfile:
|
||||
outfile.write(response.content)
|
||||
|
||||
return response
|
||||
if notfound and code == 404:
|
||||
raise exception.NotFoundError(notfound)
|
||||
|
||||
@@ -173,6 +173,12 @@ def build_parser():
|
||||
help=("Write URLs, which get emitted by other extractors but cannot "
|
||||
"be handled, to FILE"),
|
||||
)
|
||||
output.add_argument(
|
||||
"--write-pages",
|
||||
dest="write_pages", nargs=0, action=ConfigConstAction, const=True,
|
||||
help=("Write downloaded intermediary pages to files "
|
||||
"in the current directory to debug problems"),
|
||||
)
|
||||
|
||||
downloader = parser.add_argument_group("Downloader Options")
|
||||
downloader.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user