Commit a078202c1b5823f1c13a4c559619158054029e73

Authored by Jay Berkenbilt
Committed by GitHub
2 parents 7b3134ef 4aac7c32

Merge pull request #752 from jberkenbilt/report-mem-usage

Report mem usage
... ... @@ -4,10 +4,12 @@ Next
4 4  
5 5 Before Release:
6 6  
7   -* Review in order #729, #726, #747
  7 +* Review in order #726
8 8 * Make ./performance_check usable by other people by having published
9 9 files to use for testing.
10 10 * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
  11 +* Incorporate --report-mem-usage into performance testing. Make sure
  12 + there is some test somewhere that exercises the millions of nulls case.
11 13 * Evaluate issues tagged with `next`
12 14 * Stay on top of https://github.com/pikepdf/pikepdf/pull/315
13 15  
... ...
cSpell.json
... ... @@ -131,6 +131,7 @@
131 131 "esize",
132 132 "eval",
133 133 "extlibdir",
  134 + "fclose",
134 135 "fdict",
135 136 "ffield",
136 137 "fghij",
... ... @@ -268,6 +269,7 @@
268 269 "maxdepth",
269 270 "maxobjectid",
270 271 "mdash",
  272 + "memstream",
271 273 "mindepth",
272 274 "mkdir",
273 275 "mkinstalldirs",
... ...
include/qpdf/QPDFJob.hh
... ... @@ -711,6 +711,7 @@ class QPDFJob
711 711 bool json_input;
712 712 bool json_output;
713 713 std::string update_from_json;
  714 + bool report_mem_usage;
714 715 };
715 716 std::shared_ptr<Members> m;
716 717 };
... ...
include/qpdf/QUtil.hh
... ... @@ -525,7 +525,17 @@ namespace QUtil
525 525 wchar_t const* const argv[],
526 526 std::function<int(int, char const* const[])> realmain);
527 527 #endif // QPDF_NO_WCHAR_T
528   -}; // namespace QUtil
  528 +
  529 + // Try to return the maximum amount of memory allocated by the
  530 + // current process and its threads. Return 0 if unable to
  531 + // determine. This is Linux-specific and not implemented to be
  532 + // completely reliable. It is used during development for
  533 + // performance testing to detect changes that may significantly
  534 + // change memory usage. It is not recommended for use for other
  535 + // purposes.
  536 + QPDF_DLL
  537 + size_t get_max_memory_usage();
  538 +}; // namespace QUtil
529 539  
530 540 inline bool
531 541 QUtil::is_hex_digit(char ch)
... ...
include/qpdf/auto_job_c_main.hh
... ... @@ -33,6 +33,7 @@ QPDF_DLL Config* qdf();
33 33 QPDF_DLL Config* rawStreamData();
34 34 QPDF_DLL Config* recompressFlate();
35 35 QPDF_DLL Config* removePageLabels();
  36 +QPDF_DLL Config* reportMemUsage();
36 37 QPDF_DLL Config* requiresPassword();
37 38 QPDF_DLL Config* showEncryption();
38 39 QPDF_DLL Config* showEncryptionKey();
... ...
job.sums
... ... @@ -3,15 +3,15 @@ generate_auto_job 9abe2ec994fb98526f5e3c0c199ce2e61a868463cb522a5bc6e9730b655341
3 3 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
4 4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
5 5 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
6   -include/qpdf/auto_job_c_main.hh cdba1ae6ea5525a585d10a3dd95b7996d62b17de4211fe658b78d9d463b0f313
  6 +include/qpdf/auto_job_c_main.hh 493b9798f5ff8bbcb07c0238693554d77eefa4ae71ce1a0d466de94e3a7a3966
7 7 include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c18911614fe8e568ec
8 8 include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
9   -job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412
  9 +job.yml a6f22d425980ed960c77c0a4197f46924c14e7943358cd9f0b75811bb1c480ad
10 10 libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da
11   -libqpdf/qpdf/auto_job_help.hh 53306e4aef8aaca641c0087bc9e064ada1c44a94b826c0bcac7b4eb0c8c41fd5
12   -libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2
  11 +libqpdf/qpdf/auto_job_help.hh 3e9385a7e0dae993467647466fa30f30baa5968f9270c73ff4e664f5aa415dbe
  12 +libqpdf/qpdf/auto_job_init.hh ccb881733849dff5c05721f1aa5c35447cedd415e881c3fef6573901e45be056
13 13 libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
14   -libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268
15   -libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b
  14 +libqpdf/qpdf/auto_job_json_init.hh 7ac8f42fb39eda56144ab62b30152a56e9bb2224d0596eb826b7bc421a78d26b
  15 +libqpdf/qpdf/auto_job_schema.hh 17352791b09c3b8a8db766375cce31d70c98b67b44ecc398e2ac78984e34fe90
16 16 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
17   -manual/cli.rst 41ee93f23f46160fe9eaf7c99fd2ab3bd2e0f6792a341a35bdac1a41cb853ed5
  17 +manual/cli.rst e3fa48bb30c981df1f74d474887155cd6a46f9010b91cd1b7b57e582bf3bf877
... ...
... ... @@ -127,6 +127,7 @@ options:
127 127 - recompress-flate
128 128 - remove-page-labels
129 129 - replace-input
  130 + - report-mem-usage
130 131 - requires-password
131 132 - show-encryption
132 133 - show-encryption-key
... ... @@ -413,6 +414,7 @@ json:
413 414 Pages.password:
414 415 _range: "page range"
415 416 remove-page-labels:
  417 + report-mem-usage:
416 418 rotate:
417 419 overlay:
418 420 _file: "source file for overlay"
... ...
libqpdf/CMakeLists.txt
... ... @@ -376,6 +376,29 @@ int main(int argc, char* argv[]) {
376 376 endif()
377 377 endfunction()
378 378  
  379 +check_c_source_compiles(
  380 +"#include <malloc.h>
  381 +#include <stdio.h>
  382 +int main(int argc, char* argv[]) {
  383 + malloc_info(0, stdout);
  384 + return 0;
  385 +}"
  386 + HAVE_MALLOC_INFO)
  387 +
  388 +check_c_source_compiles(
  389 +"#include <stdio.h>
  390 +#include <stdlib.h>
  391 +int main(int argc, char* argv[]) {
  392 + char* buf;
  393 + size_t size;
  394 + FILE* f;
  395 + f = open_memstream(&buf, &size);
  396 + fclose(f);
  397 + free(buf);
  398 + return 0;
  399 +}"
  400 + HAVE_OPEN_MEMSTREAM)
  401 +
379 402 qpdf_check_ll_fmt("%lld" fmt_lld)
380 403 qpdf_check_ll_fmt("%I64d" fmt_i64d)
381 404 qpdf_check_ll_fmt("%I64lld" fmt_i64lld)
... ...
libqpdf/QPDFJob.cc
... ... @@ -417,7 +417,8 @@ QPDFJob::Members::Members() :
417 417 check_is_encrypted(false),
418 418 check_requires_password(false),
419 419 json_input(false),
420   - json_output(false)
  420 + json_output(false),
  421 + report_mem_usage(false)
421 422 {
422 423 }
423 424  
... ... @@ -625,6 +626,14 @@ QPDFJob::run()
625 626 << ": operation succeeded with warnings\n";
626 627 }
627 628 }
  629 + if (m->report_mem_usage) {
  630 + // Call get_max_memory_usage before generating output. When
  631 + // debugging, it's easier if print statements from
  632 + // get_max_memory_usage are not interleaved with the output.
  633 + auto mem_usage = QUtil::get_max_memory_usage();
  634 + *this->m->log->getWarn()
  635 + << "qpdf-max-memory-usage " << mem_usage << "\n";
  636 + }
628 637 }
629 638  
630 639 bool
... ...
libqpdf/QPDFJob_config.cc
... ... @@ -503,6 +503,13 @@ QPDFJob::Config::removePageLabels()
503 503 }
504 504  
505 505 QPDFJob::Config*
  506 +QPDFJob::Config::reportMemUsage()
  507 +{
  508 + o.m->report_mem_usage = true;
  509 + return this;
  510 +}
  511 +
  512 +QPDFJob::Config*
506 513 QPDFJob::Config::requiresPassword()
507 514 {
508 515 o.m->check_requires_password = true;
... ...
libqpdf/QUtil.cc
... ... @@ -37,6 +37,9 @@
37 37 # include <sys/stat.h>
38 38 # include <unistd.h>
39 39 #endif
  40 +#ifdef HAVE_MALLOC_INFO
  41 +# include <malloc.h>
  42 +#endif
40 43  
41 44 // First element is 24
42 45 static unsigned short pdf_doc_low_to_unicode[] = {
... ... @@ -1968,3 +1971,73 @@ QUtil::call_main_from_wmain(
1968 1971 }
1969 1972  
1970 1973 #endif // QPDF_NO_WCHAR_T
  1974 +
  1975 +size_t
  1976 +QUtil::get_max_memory_usage()
  1977 +{
  1978 +#if defined(HAVE_MALLOC_INFO) && defined(HAVE_OPEN_MEMSTREAM)
  1979 + static std::regex tag_re("<(/?\\w+)([^>]*?)>");
  1980 + static std::regex attr_re("(\\w+)=\"(.*?)\"");
  1981 +
  1982 + char* buf;
  1983 + size_t size;
  1984 + FILE* f = open_memstream(&buf, &size);
  1985 + if (f == nullptr) {
  1986 + return 0;
  1987 + }
  1988 + malloc_info(0, f);
  1989 + fclose(f);
  1990 + if (QUtil::get_env("QPDF_DEBUG_MEM_USAGE")) {
  1991 + fprintf(stderr, "%s", buf);
  1992 + }
  1993 +
  1994 + // Warning: this code uses regular expression to extract data from
  1995 + // an XML string. This is generally a bad idea, but we're going to
  1996 + // do it anyway because QUtil.hh warns against using this function
  1997 + // for other than development/testing, and if this function fails
  1998 + // to generate reasonable output during performance testing, it
  1999 + // will be noticed.
  2000 +
  2001 + // This is my best guess at how to interpret malloc_info. Anyway
  2002 + // it seems to provide useful information for detecting code
  2003 + // changes that drastically change memory usage.
  2004 + size_t result = 0;
  2005 + try {
  2006 + std::cregex_iterator m_begin(buf, buf + size, tag_re);
  2007 + std::cregex_iterator cr_end;
  2008 + std::sregex_iterator sr_end;
  2009 +
  2010 + int in_heap = 0;
  2011 + for (auto m = m_begin; m != cr_end; ++m) {
  2012 + std::string tag(m->str(1));
  2013 + if (tag == "heap") {
  2014 + ++in_heap;
  2015 + } else if (tag == "/heap") {
  2016 + --in_heap;
  2017 + } else if (in_heap == 0) {
  2018 + std::string rest = m->str(2);
  2019 + std::map<std::string, std::string> attrs;
  2020 + std::sregex_iterator a_begin(rest.begin(), rest.end(), attr_re);
  2021 + for (auto m2 = a_begin; m2 != sr_end; ++m2) {
  2022 + attrs[m2->str(1)] = m2->str(2);
  2023 + }
  2024 + if (tag == "total") {
  2025 + if (attrs.count("size") > 0) {
  2026 + result += QIntC::to_size(
  2027 + QUtil::string_to_ull(attrs["size"].c_str()));
  2028 + }
  2029 + } else if (tag == "system" && attrs["type"] == "max") {
  2030 + result += QIntC::to_size(
  2031 + QUtil::string_to_ull(attrs["size"].c_str()));
  2032 + }
  2033 + }
  2034 + }
  2035 + } catch (...) {
  2036 + // ignore -- just return 0
  2037 + }
  2038 + free(buf);
  2039 + return result;
  2040 +#else
  2041 + return 0;
  2042 +#endif
  2043 +}
... ...
libqpdf/qpdf/auto_job_help.hh
... ... @@ -883,6 +883,9 @@ for debugging qpdf.
883 883 ap.addOptionHelp("--test-json-schema", "testing", "test generated json against schema", R"(This is used by qpdf's test suite to check consistency between
884 884 the output of qpdf --json and the output of qpdf --json-help.
885 885 )");
  886 +ap.addOptionHelp("--report-mem-usage", "testing", "best effort report of memory usage", R"(This is used by qpdf's performance test suite to report the
  887 +maximum amount of memory used in supported environments.
  888 +)");
886 889 }
887 890 static void add_help(QPDFArgParser& ap)
888 891 {
... ...
libqpdf/qpdf/auto_job_init.hh
... ... @@ -69,6 +69,7 @@ this-&gt;ap.addBare(&quot;raw-stream-data&quot;, [this](){c_main-&gt;rawStreamData();});
69 69 this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
70 70 this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
71 71 this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
  72 +this->ap.addBare("report-mem-usage", [this](){c_main->reportMemUsage();});
72 73 this->ap.addBare("requires-password", [this](){c_main->requiresPassword();});
73 74 this->ap.addBare("show-encryption", [this](){c_main->showEncryption();});
74 75 this->ap.addBare("show-encryption-key", [this](){c_main->showEncryptionKey();});
... ...
libqpdf/qpdf/auto_job_json_init.hh
... ... @@ -409,6 +409,9 @@ popHandler(); // key: pages
409 409 pushKey("removePageLabels");
410 410 addBare([this]() { c_main->removePageLabels(); });
411 411 popHandler(); // key: removePageLabels
  412 +pushKey("reportMemUsage");
  413 +addBare([this]() { c_main->reportMemUsage(); });
  414 +popHandler(); // key: reportMemUsage
412 415 pushKey("rotate");
413 416 addParameter([this](std::string const& p) { c_main->rotate(p); });
414 417 popHandler(); // key: rotate
... ...
libqpdf/qpdf/auto_job_schema.hh
... ... @@ -144,6 +144,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R&quot;({
144 144 }
145 145 ],
146 146 "removePageLabels": "remove explicit page numbers",
  147 + "reportMemUsage": "best effort report of memory usage",
147 148 "rotate": "rotate pages",
148 149 "overlay": {
149 150 "file": "source file for overlay",
... ...
libqpdf/qpdf/qpdf-config.h.in
... ... @@ -21,6 +21,8 @@
21 21 #cmakedefine HAVE_LOCALTIME_R 1
22 22 #cmakedefine HAVE_RANDOM 1
23 23 #cmakedefine HAVE_TM_GMTOFF 1
  24 +#cmakedefine HAVE_MALLOC_INFO 1
  25 +#cmakedefine HAVE_OPEN_MEMSTREAM 1
24 26  
25 27 /* printf format for long long */
26 28 #cmakedefine LL_FMT "${LL_FMT}"
... ...
libtests/qtest/qutil/qutil.out
... ... @@ -134,3 +134,5 @@ D:20210209191925Z
134 134 2021-02-09T19:19:25Z
135 135 ---- is_long_long
136 136 done
  137 +---- memory usage
  138 +memory usage okay
... ...
libtests/qutil.cc
... ... @@ -703,6 +703,18 @@ is_long_long_test()
703 703 std::cout << "done" << std::endl;
704 704 }
705 705  
  706 +void
  707 +memory_usage_test()
  708 +{
  709 + auto u1 = QUtil::get_max_memory_usage();
  710 + if (u1 > 0) {
  711 + auto x = QUtil::make_shared_array<int>(10 << 20);
  712 + auto u2 = QUtil::get_max_memory_usage();
  713 + assert(u2 > u1);
  714 + }
  715 + std::cout << "memory usage okay" << std::endl;
  716 +}
  717 +
706 718 int
707 719 main(int argc, char* argv[])
708 720 {
... ... @@ -739,6 +751,8 @@ main(int argc, char* argv[])
739 751 timestamp_test();
740 752 std::cout << "---- is_long_long" << std::endl;
741 753 is_long_long_test();
  754 + std::cout << "---- memory usage" << std::endl;
  755 + memory_usage_test();
742 756 } catch (std::exception& e) {
743 757 std::cout << "unexpected exception: " << e.what() << std::endl;
744 758 }
... ...
manual/cli.rst
... ... @@ -3463,6 +3463,16 @@ Related Options
3463 3463 memory and is therefore unsuitable for use with large files. This
3464 3464 is why it's also not on by default.
3465 3465  
  3466 +.. qpdf:option:: --report-mem-usage
  3467 +
  3468 + .. help: best effort report of memory usage
  3469 +
  3470 + This is used by qpdf's performance test suite to report the
  3471 + maximum amount of memory used in supported environments.
  3472 +
  3473 + This is used by qpdf's performance test suite to report the maximum
  3474 + amount of memory used in supported environments.
  3475 +
3466 3476 .. _unicode-passwords:
3467 3477  
3468 3478 Unicode Passwords
... ...