Implement --write-pages option (#736)
* Implement --write-pages option * Fix long lines * Fix file mode to binary * Fix pattern for Windows compatibility
This commit is contained in:
@@ -96,6 +96,17 @@ class Extractor():
|
|||||||
(400 <= code < 429 or 431 <= code < 500):
|
(400 <= code < 429 or 431 <= code < 500):
|
||||||
if encoding:
|
if encoding:
|
||||||
response.encoding = encoding
|
response.encoding = encoding
|
||||||
|
|
||||||
|
if config.get((), "write_pages", False):
|
||||||
|
# Write the response content to a .dump file
|
||||||
|
# in the current directory.
|
||||||
|
# The file name is derived from the response
|
||||||
|
# url, replacing special characters with "_"
|
||||||
|
r = re.compile(r"[\\\\|/<>:\"?*&=#]+")
|
||||||
|
outfilename = r.sub('_', response.url) + '.dump'
|
||||||
|
with open(outfilename, 'wb') as outfile:
|
||||||
|
outfile.write(response.content)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
if notfound and code == 404:
|
if notfound and code == 404:
|
||||||
raise exception.NotFoundError(notfound)
|
raise exception.NotFoundError(notfound)
|
||||||
|
|||||||
@@ -173,6 +173,12 @@ def build_parser():
|
|||||||
help=("Write URLs, which get emitted by other extractors but cannot "
|
help=("Write URLs, which get emitted by other extractors but cannot "
|
||||||
"be handled, to FILE"),
|
"be handled, to FILE"),
|
||||||
)
|
)
|
||||||
|
output.add_argument(
|
||||||
|
"--write-pages",
|
||||||
|
dest="write_pages", nargs=0, action=ConfigConstAction, const=True,
|
||||||
|
help=("Write downloaded intermediary pages to files "
|
||||||
|
"in the current directory to debug problems"),
|
||||||
|
)
|
||||||
|
|
||||||
downloader = parser.add_argument_group("Downloader Options")
|
downloader = parser.add_argument_group("Downloader Options")
|
||||||
downloader.add_argument(
|
downloader.add_argument(
|
||||||
|
|||||||
Reference in New Issue
Block a user