Commit bcea54fcaa16a7d5feff0c4cd038fea51d1359ea
1 parent
81d2c548
Revert removal of unreadCh change for performance
Turns out unreadCh is much more efficient than seek(-1, SEEK_CUR). Update comments and code to reflect this.
Showing
8 changed files
with
24 additions
and
49 deletions
ChangeLog
| @@ -81,15 +81,6 @@ | @@ -81,15 +81,6 @@ | ||
| 81 | disables passing -rpath to the linker when building shared | 81 | disables passing -rpath to the linker when building shared |
| 82 | libraries with libtool. Fixes #422. | 82 | libraries with libtool. Fixes #422. |
| 83 | 83 | ||
| 84 | -2020-10-18 Jay Berkenbilt <ejb@ql.org> | ||
| 85 | - | ||
| 86 | - * Note that InputSource::unreadCh is deprecated and will be | ||
| 87 | - removed in qpdf 11. Use seek(-1, SEEK_CUR) instead. This is what | ||
| 88 | - it has always effectively done with some input sources and some | ||
| 89 | - operating systems which don't allow unreading other than the most | ||
| 90 | - recently read character. InputSource::unreadCh is no longer used | ||
| 91 | - internally within libqpdf. | ||
| 92 | - | ||
| 93 | 2020-10-16 Jay Berkenbilt <ejb@ql.org> | 84 | 2020-10-16 Jay Berkenbilt <ejb@ql.org> |
| 94 | 85 | ||
| 95 | * Accept pull request that improves how the Windows native crypto | 86 | * Accept pull request that improves how the Windows native crypto |
TODO
| @@ -157,10 +157,6 @@ ABI Changes | @@ -157,10 +157,6 @@ ABI Changes | ||
| 157 | This is a list of changes to make next time there is an ABI change. | 157 | This is a list of changes to make next time there is an ABI change. |
| 158 | Comments appear in the code prefixed by "ABI" | 158 | Comments appear in the code prefixed by "ABI" |
| 159 | 159 | ||
| 160 | -* Consider removing InputSource::unreadCh. Maybe we can declare it | ||
| 161 | - final and delete so it will be forced to be removed from derived | ||
| 162 | - classes. | ||
| 163 | - | ||
| 164 | C++-11 | 160 | C++-11 |
| 165 | ====== | 161 | ====== |
| 166 | 162 |
include/qpdf/InputSource.hh
| @@ -85,14 +85,12 @@ class QPDF_DLL_CLASS InputSource | @@ -85,14 +85,12 @@ class QPDF_DLL_CLASS InputSource | ||
| 85 | virtual size_t read(char* buffer, size_t length) = 0; | 85 | virtual size_t read(char* buffer, size_t length) = 0; |
| 86 | 86 | ||
| 87 | // Note: you can only unread the character you just read. The | 87 | // Note: you can only unread the character you just read. The |
| 88 | - // specific character is ignored by some implementations. unreadCh | ||
| 89 | - // will be removed from the API in qpdf 11. | 88 | + // specific character is ignored by some implementations, and the |
| 89 | + // implementation doesn't check this. Use of unreadCh is | ||
| 90 | + // semantically equivalent to seek(-1, SEEK_CUR) but is much more | ||
| 91 | + // efficient. | ||
| 90 | virtual void unreadCh(char ch) = 0; | 92 | virtual void unreadCh(char ch) = 0; |
| 91 | 93 | ||
| 92 | - // ABI: delete unreadCh, and direct people to seek backward by 1 | ||
| 93 | - // character instead. | ||
| 94 | - // virtual void unreadCh(char ch) final = delete; | ||
| 95 | - | ||
| 96 | protected: | 94 | protected: |
| 97 | qpdf_offset_t last_offset; | 95 | qpdf_offset_t last_offset; |
| 98 | 96 |
libqpdf/FileInputSource.cc
| @@ -80,7 +80,7 @@ FileInputSource::findAndSkipNextEOL() | @@ -80,7 +80,7 @@ FileInputSource::findAndSkipNextEOL() | ||
| 80 | } | 80 | } |
| 81 | else if (! ((ch == '\r') || (ch == '\n'))) | 81 | else if (! ((ch == '\r') || (ch == '\n'))) |
| 82 | { | 82 | { |
| 83 | - this->seek(-1, SEEK_CUR); | 83 | + this->unreadCh(ch); |
| 84 | done = true; | 84 | done = true; |
| 85 | } | 85 | } |
| 86 | } | 86 | } |
libqpdf/QPDF.cc
| @@ -632,7 +632,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | @@ -632,7 +632,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) | ||
| 632 | } | 632 | } |
| 633 | else | 633 | else |
| 634 | { | 634 | { |
| 635 | - this->m->file->seek(-1, SEEK_CUR); | 635 | + this->m->file->unreadCh(ch); |
| 636 | done = true; | 636 | done = true; |
| 637 | } | 637 | } |
| 638 | } | 638 | } |
| @@ -1604,7 +1604,7 @@ QPDF::readObject(PointerHolder<InputSource> input, | @@ -1604,7 +1604,7 @@ QPDF::readObject(PointerHolder<InputSource> input, | ||
| 1604 | // start reading stream data in spite | 1604 | // start reading stream data in spite |
| 1605 | // of not having seen a newline. | 1605 | // of not having seen a newline. |
| 1606 | QTC::TC("qpdf", "QPDF stream with CR only"); | 1606 | QTC::TC("qpdf", "QPDF stream with CR only"); |
| 1607 | - input->seek(-1, SEEK_CUR); | 1607 | + input->unreadCh(ch); |
| 1608 | warn(QPDFExc( | 1608 | warn(QPDFExc( |
| 1609 | qpdf_e_damaged_pdf, | 1609 | qpdf_e_damaged_pdf, |
| 1610 | input->getName(), | 1610 | input->getName(), |
| @@ -1629,7 +1629,7 @@ QPDF::readObject(PointerHolder<InputSource> input, | @@ -1629,7 +1629,7 @@ QPDF::readObject(PointerHolder<InputSource> input, | ||
| 1629 | else | 1629 | else |
| 1630 | { | 1630 | { |
| 1631 | QTC::TC("qpdf", "QPDF stream without newline"); | 1631 | QTC::TC("qpdf", "QPDF stream without newline"); |
| 1632 | - input->seek(-1, SEEK_CUR); | 1632 | + input->unreadCh(ch); |
| 1633 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), | 1633 | warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), |
| 1634 | this->m->last_object_description, | 1634 | this->m->last_object_description, |
| 1635 | input->tell(), | 1635 | input->tell(), |
libqpdf/QPDFTokenizer.cc
| @@ -855,7 +855,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | @@ -855,7 +855,7 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input, | ||
| 855 | 855 | ||
| 856 | if (unread_char) | 856 | if (unread_char) |
| 857 | { | 857 | { |
| 858 | - input->seek(-1, SEEK_CUR); | 858 | + input->unreadCh(char_to_unread); |
| 859 | } | 859 | } |
| 860 | 860 | ||
| 861 | if (token.getType() != tt_eof) | 861 | if (token.getType() != tt_eof) |
libtests/closed_file_input_source.cc
| @@ -27,10 +27,22 @@ void do_tests(InputSource* is) | @@ -27,10 +27,22 @@ void do_tests(InputSource* is) | ||
| 27 | check("tell after findAndSkipNextEOL", 522 == is->tell()); | 27 | check("tell after findAndSkipNextEOL", 522 == is->tell()); |
| 28 | char b[1]; | 28 | char b[1]; |
| 29 | b[0] = '\0'; | 29 | b[0] = '\0'; |
| 30 | - is->seek(-1, SEEK_CUR); | ||
| 31 | - check("read previous character", 1 == is->read(b, 1)); | 30 | +#ifdef _WIN32 |
| 31 | + // Empirical evidence, and the passage of the rest of the qpdf | ||
| 32 | + // test suite, suggest that this is working on Windows in the way | ||
| 33 | + // that it needs to work. If this ifdef is made to be true on | ||
| 34 | + // Windows, it passes with ClosedFileInputSource but not with | ||
| 35 | + // FileInputSource, which doesn't make any sense since | ||
| 36 | + // ClosedFileInputSource is calling FileInputSource to do its | ||
| 37 | + // work. | ||
| 38 | + is->seek(521, SEEK_SET); | ||
| 39 | + is->read(b, 1); | ||
| 40 | +#else | ||
| 41 | + is->unreadCh('\n'); | ||
| 42 | + check("read unread character", 1 == is->read(b, 1)); | ||
| 32 | check("got character", '\n' == b[0]); | 43 | check("got character", '\n' == b[0]); |
| 33 | - check("last offset after read previous", 521 == is->getLastOffset()); | 44 | +#endif |
| 45 | + check("last offset after read unread", 521 == is->getLastOffset()); | ||
| 34 | is->seek(0, SEEK_END); | 46 | is->seek(0, SEEK_END); |
| 35 | check("tell at end", 556 == is->tell()); | 47 | check("tell at end", 556 == is->tell()); |
| 36 | is->seek(-25, SEEK_END); | 48 | is->seek(-25, SEEK_END); |
manual/qpdf-manual.xml
| @@ -4967,28 +4967,6 @@ print "\n"; | @@ -4967,28 +4967,6 @@ print "\n"; | ||
| 4967 | </listitem> | 4967 | </listitem> |
| 4968 | </itemizedlist> | 4968 | </itemizedlist> |
| 4969 | </listitem> | 4969 | </listitem> |
| 4970 | - <listitem> | ||
| 4971 | - <para> | ||
| 4972 | - Notice of upcoming API change | ||
| 4973 | - </para> | ||
| 4974 | - <itemizedlist> | ||
| 4975 | - <listitem> | ||
| 4976 | - <para> | ||
| 4977 | - The method <function>InputSource::unreadCh(unsigned | ||
| 4978 | - char)</function> is deprecated and will be removed in qpdf | ||
| 4979 | - 11. It has never worked properly to pass a character to | ||
| 4980 | - <function>unreadCh</function> other than the most recently | ||
| 4981 | - read character. If you happen to be deriving a class from | ||
| 4982 | - <type>InputSource</type>, just implement | ||
| 4983 | - <function>unreadCh</function> to seek backward by one | ||
| 4984 | - character if the current position is greater than 0. If you | ||
| 4985 | - are calling this in your own code, replacing with | ||
| 4986 | - <literal>seek(-1, SEEK_CUR)</literal> should work in all | ||
| 4987 | - cases. | ||
| 4988 | - </para> | ||
| 4989 | - </listitem> | ||
| 4990 | - </itemizedlist> | ||
| 4991 | - </listitem> | ||
| 4992 | </itemizedlist> | 4970 | </itemizedlist> |
| 4993 | </listitem> | 4971 | </listitem> |
| 4994 | </varlistentry> | 4972 | </varlistentry> |