Commit 9ac16d036cb92388fd132dd009ba1599671cfc16

Authored by qooxzuub
1 parent eef51d0c

libqpdf: implement BaseHandle::equivalent_to for structural PDF object comparison

Implements structural equivalence following ISO 32000-2 Annex J,
with a recursion depth limit (default 10) in place of cycle detection.
Handles numeric promotion, sparse arrays, streams, indirect references,
and null/uninitialized normalization. Includes tests in libtests/objects.cc.
include/qpdf/ObjectHandle.hh
... ... @@ -68,6 +68,9 @@ namespace qpdf
68 68 return obj == other.obj;
69 69 }
70 70  
  71 + // Structural equivalence check per PDF Annex J rules.
  72 + bool equivalent_to(BaseHandle const& other, int depth = 10) const;
  73 +
71 74 // For arrays, return the number of items in the array.
72 75 // For null-like objects, return 0.
73 76 // For all other objects, return 1.
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -327,6 +327,148 @@ BaseHandle::copy(bool shallow) const
327 327 return {}; // unreachable
328 328 }
329 329  
  330 +// This method determines structural equivalence up to a given depth.
  331 +// The default depth is 10.
  332 +//
  333 +// Nomenclature note: ISO 32000-2 Annex J uses the term "equal" for this
  334 +// strict recursive comparison (J.4.1). We use "equivalent_to" here to
  335 +// implement Annex J's "equality", distinguishing it from C++ shallow
  336 +// pointer equality.
  337 +//
  338 +// Implementation notes:
  339 +//
  340 +// (1) We deviate from Annex J by comparing raw streams only, without
  341 +// decoding.
  342 +//
  343 +// (2) Loop detection is expensive and is avoided. If either object has
  344 +// a cycle in its forward orbit, this implementation will return false.
  345 +
  346 +bool
  347 +BaseHandle::equivalent_to(BaseHandle const& other, int depth) const
  348 +{
  349 + // A. Identity, size & limit checks
  350 + if (obj == other.obj) {
  351 + return true;
  352 + }
  353 + if (depth < 0) {
  354 + return false;
  355 + }
  356 + size_t size1 = size();
  357 + size_t size2 = other.size();
  358 + if (size1 != size2) {
  359 + return false;
  360 + }
  361 + // B. Structural comparison
  362 + qpdf_object_type_e t1 = resolved_type_code();
  363 + qpdf_object_type_e t2 = other.resolved_type_code();
  364 + if (t1 == ::ot_reference) {
  365 + return referenced_object().equivalent_to(other, depth - 1);
  366 + }
  367 + if (t2 == ::ot_reference) {
  368 + return equivalent_to(other.referenced_object(), depth - 1);
  369 + }
  370 + if (t1 != t2) {
  371 + if ((t1 == ::ot_integer || t1 == ::ot_real) && (t2 == ::ot_integer || t2 == ::ot_real)) {
  372 + // Numeric equivalence per Annex J
  373 + return oh().getNumericValue() == other.oh().getNumericValue();
  374 + }
  375 + // normalize uninitialized and null
  376 + return (t1 == ::ot_uninitialized && t2 == ::ot_null) ||
  377 + (t2 == ::ot_uninitialized && t1 == ::ot_null);
  378 + }
  379 + switch (t1) {
  380 + case ::ot_uninitialized:
  381 + case ::ot_null:
  382 + return true;
  383 + case ::ot_boolean:
  384 + return std::get<QPDF_Bool>(obj->value).val == std::get<QPDF_Bool>(other.obj->value).val;
  385 + case ::ot_string:
  386 + return std::get<QPDF_String>(obj->value).val == std::get<QPDF_String>(other.obj->value).val;
  387 + case ::ot_name:
  388 + return std::get<QPDF_Name>(obj->value).name == std::get<QPDF_Name>(other.obj->value).name;
  389 + case ::ot_array:
  390 + {
  391 + auto const& a1 = std::get<QPDF_Array>(obj->value);
  392 + auto const& a2 = std::get<QPDF_Array>(other.obj->value);
  393 + // sizes size1, size2 were calculated above and checked to be equal
  394 + if (!a1.sp && !a2.sp) {
  395 + for (size_t i = 0; i < size1; ++i) {
  396 + if (!a1.elements[i].equivalent_to(a2.elements[i], depth - 1)) {
  397 + return false;
  398 + }
  399 + }
  400 + return true;
  401 + }
  402 + // at least one array is sparse
  403 + auto get_item = [](QPDF_Array const& arr, size_t idx) -> BaseHandle const& {
  404 + if (arr.sp) {
  405 + auto it = arr.sp->elements.find(idx);
  406 + if (it == arr.sp->elements.end()) {
  407 + static QPDFObjectHandle null_oh = Null();
  408 + return null_oh;
  409 + }
  410 + return it->second;
  411 + }
  412 + return arr.elements[idx];
  413 + };
  414 + for (size_t i = 0; i < size1; ++i) {
  415 + if (!get_item(a1, i).equivalent_to(get_item(a2, i), depth - 1)) {
  416 + return false;
  417 + }
  418 + }
  419 + return true;
  420 + }
  421 + case ::ot_dictionary:
  422 + {
  423 + auto const& map1 = std::get<QPDF_Dictionary>(obj->value).items;
  424 + auto const& map2 = std::get<QPDF_Dictionary>(other.obj->value).items;
  425 + auto it2 = map2.begin();
  426 + auto end2 = map2.end();
  427 + for (auto const& [key1, value1]: map1) {
  428 + if (value1.null()) {
  429 + continue;
  430 + }
  431 + while (it2 != end2 && it2->second.null()) {
  432 + ++it2;
  433 + }
  434 + if (it2 == end2 || key1 != it2->first ||
  435 + !value1.equivalent_to(it2->second, depth - 1)) {
  436 + return false;
  437 + }
  438 + ++it2;
  439 + }
  440 + while (it2 != end2 && it2->second.null()) {
  441 + ++it2;
  442 + }
  443 + return it2 == end2;
  444 + }
  445 + case ::ot_stream:
  446 + {
  447 + auto const& s1 = std::get<QPDF_Stream>(obj->value);
  448 + auto const& s2 = std::get<QPDF_Stream>(other.obj->value);
  449 + if (!s1.m->stream_dict.equivalent_to(s2.m->stream_dict, depth - 1)) {
  450 + return false;
  451 + }
  452 + return s1.m->stream_data->view() == s2.m->stream_data->view();
  453 + }
  454 + case ::ot_operator:
  455 + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_operator");
  456 + case ::ot_inlineimage:
  457 + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_inlineimage");
  458 + case ::ot_integer:
  459 + return std::get<QPDF_Integer>(obj->value).val ==
  460 + std::get<QPDF_Integer>(other.obj->value).val;
  461 + case ::ot_real:
  462 + return oh().getNumericValue() == other.oh().getNumericValue();
  463 + case ::ot_unresolved: // cannot determine equivalence so return false
  464 + case ::ot_reference: // handled above
  465 + case ::ot_destroyed: // should not happen
  466 + case ::ot_reserved: // should not happen
  467 + return false;
  468 + }
  469 + return false; // unreachable
  470 +}
  471 +
330 472 std::string
331 473 BaseHandle::unparse() const
332 474 {
... ...
libtests/objects.cc
... ... @@ -304,6 +304,458 @@ test_2(QPDF&amp; pdf, char const* arg2)
304 304 assert(!default_limits());
305 305 }
306 306  
  307 +// test equivalent_to
  308 +static void
  309 +test_3(QPDF& pdf, char const* arg2)
  310 +{
  311 + // Scenario 1: Basic Equality: Name, Scalars
  312 + {
  313 + auto name = "/Test"_qpdf;
  314 + auto integer = Integer(42);
  315 + assert(name.equivalent_to("/Test"_qpdf));
  316 + assert(!name.equivalent_to(integer));
  317 + }
  318 + // Scenario 2: Numeric Types (Int vs Real)
  319 + {
  320 + auto integer = Integer(1);
  321 + auto real = QPDFObjectHandle::newReal("1.0");
  322 + assert(real.equivalent_to(integer));
  323 + assert(integer.equivalent_to(real));
  324 + }
  325 + // Scenario 3: Array Order Sensitivity
  326 + {
  327 + auto a1 = "[1 2]"_qpdf;
  328 + auto a2 = "[2 1]"_qpdf;
  329 + assert(!a1.equivalent_to(a2));
  330 + assert(!a2.equivalent_to(a1));
  331 + }
  332 + // Scenario 4: Dictionary Key Order Insensitivity
  333 + {
  334 + auto d1 = "<< /A 1 >>"_qpdf;
  335 + d1.replaceKey("/B", Integer(2));
  336 + auto d2 = "<< /B 2 >>"_qpdf;
  337 + d2.replaceKey("/A", Integer(1));
  338 + assert(d1.equivalent_to(d2));
  339 + assert(d2.equivalent_to(d1));
  340 + }
  341 + // Scenario 5: Direct vs Indirect Equality
  342 + {
  343 + auto obj = Integer(100);
  344 + auto indirect = pdf.makeIndirectObject(Integer(100));
  345 + assert(obj.equivalent_to(indirect));
  346 + assert(indirect.equivalent_to(obj));
  347 + }
  348 + // Scenario 6: Diamond Graph Isomorphism
  349 + {
  350 + auto d = pdf.makeIndirectObject(Integer(99));
  351 + auto b = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d}));
  352 + auto c = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d}));
  353 + assert(Array({b, c}).equivalent_to(Array({b, c})));
  354 + }
  355 + // Scenario 7: Circular References (Self-Loop): Compares as False
  356 + {
  357 + auto a1 = pdf.makeIndirectObject("[]"_qpdf);
  358 + a1.appendItem(a1);
  359 + auto a2 = pdf.makeIndirectObject("[]"_qpdf);
  360 + a2.appendItem(a2);
  361 + // The implementation rejects if there is any cycle, for performance reasons
  362 + assert(!a1.equivalent_to(a2));
  363 + }
  364 + // Scenario 8: Cross-Document Comparison (Objects from Different QPDF Instances)
  365 + {
  366 + QPDF pdf2;
  367 + pdf2.emptyPDF();
  368 + auto a1 = pdf.makeIndirectObject("[1]"_qpdf);
  369 + auto a2 = pdf2.makeIndirectObject("[1]"_qpdf);
  370 + auto a3 = pdf2.makeIndirectObject("[2]"_qpdf);
  371 + assert(a1.equivalent_to(a2)); // Same content, different documents
  372 + assert(a2.equivalent_to(a1)); // Same content, different documents
  373 + assert(!a1.equivalent_to(a3)); // Different content, different documents
  374 + assert(!a3.equivalent_to(a1)); // Different content, different documents
  375 + }
  376 + // Scenario 9: Stream Content: Match
  377 + {
  378 + assert(pdf.newStream("Stream data").equivalent_to(pdf.newStream("Stream data")));
  379 + }
  380 + // Scenario 10: Stream Content: Mismatch
  381 + {
  382 + auto s1 = pdf.newStream("Data A");
  383 + auto s2 = pdf.newStream("Data B");
  384 + assert(!s1.equivalent_to(s2));
  385 + assert(!s2.equivalent_to(s1));
  386 + }
  387 + // Scenario 11: Stream Dictionary Differences
  388 + {
  389 + auto s1 = pdf.newStream("same");
  390 + auto s2 = pdf.newStream("same");
  391 + s2.getDict().replaceKey("/Extra", QPDFObjectHandle::newName("/Value"));
  392 + assert(!s1.equivalent_to(s2));
  393 + assert(!s2.equivalent_to(s1));
  394 + }
  395 + // Scenario 12: J.3.6: Absent Keys vs Null
  396 + {
  397 + auto d0 = Dictionary::empty();
  398 + auto d1 = "<</Present null>>"_qpdf;
  399 + auto d2 = "<</Present << >> >>"_qpdf;
  400 + auto d3 = "<</Present [] >>"_qpdf;
  401 + assert(d0.equivalent_to(d1));
  402 + assert(d1.equivalent_to(d0));
  403 + assert(!d0.equivalent_to(d2));
  404 + assert(!d2.equivalent_to(d0));
  405 + assert(!d0.equivalent_to(d3));
  406 + assert(!d3.equivalent_to(d0));
  407 + assert(!d1.equivalent_to(d2));
  408 + assert(!d2.equivalent_to(d1));
  409 + assert(!d1.equivalent_to(d3));
  410 + assert(!d3.equivalent_to(d1));
  411 + }
  412 + // Scenario 13: String Syntax: Hex vs Literal (Annex J)
  413 + {
  414 + auto literal = "(A)"_qpdf;
  415 + auto hex = "<41>"_qpdf;
  416 + assert(literal.equivalent_to(hex));
  417 + assert(hex.equivalent_to(literal));
  418 + }
  419 + // Scenario 14: Name Syntax (Parser) vs Distinct Names (Model)
  420 + {
  421 + auto name1 = "/Name"_qpdf;
  422 + auto name2 = "/Na#6d#65"_qpdf;
  423 + assert(name1.equivalent_to(name2));
  424 + assert(name2.equivalent_to(name1));
  425 + }
  426 + // Scenario 15: Annex J Oddities: Keys, Octals, and Zeros
  427 + {
  428 + auto key1 = "<< /Key 1 >>"_qpdf;
  429 + auto key2 = "<< /K#65#79 1 >>"_qpdf;
  430 + auto lit_A = "(A)"_qpdf;
  431 + auto oct_A = "(\\101)"_qpdf;
  432 + auto zero_i = Integer(0);
  433 + auto zero_r = QPDFObjectHandle::newReal("-0.0");
  434 + auto r1 = QPDFObjectHandle::newReal("12.345");
  435 + auto r2 = QPDFObjectHandle::newReal("12.345000000000000");
  436 + auto i12 = Integer(12);
  437 + // note: we rely on double rounding here
  438 + auto r_lo = QPDFObjectHandle::newReal("11.99999999999999999999999999999999");
  439 + auto i12b = Integer(12);
  440 + auto r_hi = QPDFObjectHandle::newReal("12.00000000000000000000000000000000");
  441 + auto i1 = Integer(1);
  442 + auto r_1 = QPDFObjectHandle::newReal("1.");
  443 + assert(key1.equivalent_to(key2));
  444 + assert(key2.equivalent_to(key1));
  445 + assert(lit_A.equivalent_to(oct_A));
  446 + assert(oct_A.equivalent_to(lit_A));
  447 + assert(zero_i.equivalent_to(zero_r));
  448 + assert(zero_r.equivalent_to(zero_i));
  449 + assert(r1.equivalent_to(r2));
  450 + assert(r2.equivalent_to(r1));
  451 + assert(i12.equivalent_to(r_lo));
  452 + assert(r_lo.equivalent_to(i12));
  453 + assert(i12b.equivalent_to(r_hi));
  454 + assert(r_hi.equivalent_to(i12b));
  455 + assert(i1.equivalent_to(r_1));
  456 + assert(r_1.equivalent_to(i1));
  457 + }
  458 + // Scenario 16: Nested Containers
  459 + {
  460 + assert(Dictionary({{"/K", "[5]"_qpdf}}).equivalent_to(Dictionary({{"/K", "[5]"_qpdf}})));
  461 + }
  462 + // Scenario 17: Boolean and Null mismatch
  463 + {
  464 + auto b_true = QPDFObjectHandle::newBool(true);
  465 + auto b_false = QPDFObjectHandle::newBool(false);
  466 + auto null = QPDFObjectHandle::newNull();
  467 + auto null2 = QPDFObjectHandle::newNull();
  468 + auto one = Integer(1);
  469 + auto zero = Integer(0);
  470 + assert(null.equivalent_to(null));
  471 + assert(null.equivalent_to(null2));
  472 + assert(!b_true.equivalent_to(b_false));
  473 + assert(!b_true.equivalent_to(null));
  474 + assert(!b_true.equivalent_to(one));
  475 + assert(!b_true.equivalent_to(zero));
  476 + assert(!b_false.equivalent_to(null));
  477 + assert(!b_false.equivalent_to(one));
  478 + assert(!b_false.equivalent_to(zero));
  479 + assert(!null.equivalent_to(one));
  480 + assert(!null.equivalent_to(zero));
  481 + assert(!one.equivalent_to(zero));
  482 + }
  483 + // Scenario 18: Stream Semantics (J.3.7) - Strictness Check
  484 + {
  485 + auto s1 = pdf.newStream("test stream");
  486 + auto s2 = pdf.newStream("DIFFERENT_RAW_BYTES");
  487 + auto s3 = pdf.newStream("test stream");
  488 + s2.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode"));
  489 + s3.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode"));
  490 + assert(!s1.equivalent_to(s2));
  491 + assert(!s2.equivalent_to(s1));
  492 + assert(!s1.equivalent_to(s3));
  493 + assert(!s3.equivalent_to(s1));
  494 + assert(!s2.equivalent_to(s3));
  495 + assert(!s3.equivalent_to(s2));
  496 + }
  497 + // Scenario 19: Dictionary Value Type Mismatch
  498 + {
  499 + auto d1 = "<< /Key 1 >>"_qpdf;
  500 + auto d2 = "<< /Key (1) >>"_qpdf;
  501 + assert(!d1.equivalent_to(d2));
  502 + assert(!d2.equivalent_to(d1));
  503 + }
  504 + // Scenario 20: Mixed Direct vs Indirect Nesting
  505 + {
  506 + assert(
  507 + QPDFObjectHandle::newArray({Integer(7)})
  508 + .equivalent_to(QPDFObjectHandle::newArray({pdf.makeIndirectObject(Integer(7))})));
  509 + }
  510 + // Scenario 21: Dictionary Subset vs Superset
  511 + {
  512 + auto d1 = "<< /A 1 /B 2 >>"_qpdf;
  513 + auto d2 = "<< /A 1 >>"_qpdf;
  514 + assert(!d1.equivalent_to(d2));
  515 + assert(!d2.equivalent_to(d1));
  516 + }
  517 + // Scenario 22: Stream Semantic Decode Equivalence
  518 + {
  519 + auto s1 = pdf.newStream("Hello World");
  520 + auto s2 = pdf.newStream("HELLO WORLD RAW");
  521 + s2.getDict().replaceKey("/Filter", "/FlateDecode"_qpdf);
  522 + s2.getDict().replaceKey("/DecodeParms", Dictionary::empty());
  523 + assert(!s1.equivalent_to(s2));
  524 + assert(!s2.equivalent_to(s1));
  525 + }
  526 + // Scenario 23: Indirect Object Identity Independence
  527 + {
  528 + auto i1 = pdf.makeIndirectObject(Integer(123));
  529 + auto i2 = Integer(123);
  530 + assert(i1.equivalent_to(pdf.makeIndirectObject(Integer(123))));
  531 + assert(i1.equivalent_to(i2));
  532 + assert(i2.equivalent_to(i1));
  533 + }
  534 + // Scenario 24: Deep Recursive Structure (Stack Safety)
  535 + {
  536 + QPDFObjectHandle a1 = "[]"_qpdf;
  537 + QPDFObjectHandle a2 = "[]"_qpdf;
  538 + QPDFObjectHandle cur1 = a1;
  539 + QPDFObjectHandle cur2 = a2;
  540 + for (int i = 0; i < 200; ++i) {
  541 + auto n1 = "[]"_qpdf;
  542 + auto n2 = "[]"_qpdf;
  543 + cur1.appendItem(n1);
  544 + cur2.appendItem(n2);
  545 + cur1 = n1;
  546 + cur2 = n2;
  547 + }
  548 + assert(!a1.equivalent_to(a2)); // Default depth = 10 -> fails
  549 + assert(a1.equivalent_to(a2, 500)); // Explicit depth -> passes
  550 + }
  551 + // Scenario 25: Wide Graph Fan-out
  552 + {
  553 + auto a1 = "[]"_qpdf;
  554 + auto a2 = "[]"_qpdf;
  555 + auto a3 = "[]"_qpdf;
  556 + for (int i = 0; i < 200; ++i) {
  557 + a1.appendItem(Integer(i));
  558 + a2.appendItem(Integer(i));
  559 + a3.appendItem(Integer(i));
  560 + }
  561 + a3.appendItem(Integer(200));
  562 + assert(a1.equivalent_to(a2));
  563 + assert(!a1.equivalent_to(a3));
  564 + assert(!a3.equivalent_to(a1));
  565 + }
  566 + // Scenario 26: Two Self-Referential Arrays
  567 + {
  568 + auto a1 = pdf.makeIndirectObject("[]"_qpdf);
  569 + auto a2 = pdf.makeIndirectObject("[]"_qpdf);
  570 + a1.appendItem(a1);
  571 + a2.appendItem(a2);
  572 + assert(!a1.equivalent_to(a2));
  573 + assert(!a1.equivalent_to(a2)); // Check idempotency
  574 + }
  575 + // Scenario 27: Nested Dictionary Reuse / Shared Indirect Objects
  576 + {
  577 + auto shared_array = pdf.makeIndirectObject("[42 99]"_qpdf);
  578 + auto dict1 = "<< /Unique1 /A >>"_qpdf;
  579 + dict1.replaceKey("/Shared", shared_array);
  580 + auto dict2 = "<< /Unique1 /A >>"_qpdf;
  581 + dict2.replaceKey("/Shared", shared_array);
  582 + auto dict3 = "<< /Unique1 /B >>"_qpdf;
  583 + dict3.replaceKey("/Shared", shared_array);
  584 + assert(dict1.equivalent_to(dict2));
  585 + assert(!dict1.equivalent_to(dict3));
  586 + assert(!dict3.equivalent_to(dict1));
  587 + }
  588 + // Scenario 28: Shared Indirect Leaves Reached via Two Paths
  589 + {
  590 + auto leaf1 = pdf.makeIndirectObject("[1]"_qpdf);
  591 + auto leaf2 = pdf.makeIndirectObject("[2]"_qpdf);
  592 + auto mid1 = pdf.makeIndirectObject(Dictionary::empty());
  593 + mid1.replaceKey("/Leaf1", leaf1);
  594 + mid1.replaceKey("/Leaf2", leaf2);
  595 + auto mid2 = pdf.makeIndirectObject(Dictionary::empty());
  596 + mid2.replaceKey("/Leaf1", leaf1);
  597 + mid2.replaceKey("/Leaf2", leaf2);
  598 + assert(
  599 + QPDFObjectHandle::newArray({mid1, mid2})
  600 + .equivalent_to(QPDFObjectHandle::newArray({mid1, mid2})));
  601 + }
  602 + // Scenario 29: Direct vs Indirect Integer
  603 + {
  604 + assert(Integer(42).equivalent_to(pdf.makeIndirectObject(Integer(42))));
  605 + }
  606 + // Scenario 30: Nested Diamond with Direct & Indirect Objects
  607 + {
  608 + assert(
  609 + QPDFObjectHandle::newArray(
  610 + {pdf.makeIndirectObject("[42]"_qpdf), pdf.makeIndirectObject("[42]"_qpdf)})
  611 + .equivalent_to(
  612 + QPDFObjectHandle::newArray(
  613 + {pdf.makeIndirectObject("[42]"_qpdf),
  614 + pdf.makeIndirectObject("[42]"_qpdf)})));
  615 + }
  616 + // Scenario 31: Image XObjects sharing an SMask
  617 + {
  618 + auto smask = pdf.newStream();
  619 + smask.replaceStreamData(
  620 + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  621 + auto img1 = pdf.makeIndirectObject(pdf.newStream());
  622 + img1.replaceStreamData(
  623 + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  624 + img1.getDict().replaceKey("/SMask", smask);
  625 + auto img2 = pdf.makeIndirectObject(pdf.newStream());
  626 + img2.replaceStreamData(
  627 + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  628 + img2.getDict().replaceKey("/SMask", smask);
  629 + assert(img1.equivalent_to(img2));
  630 + }
  631 + // Scenario 32: Image XObjects with two distinct but identical SMasks
  632 + {
  633 + auto smask1 = pdf.newStream();
  634 + smask1.replaceStreamData(
  635 + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  636 + auto smask2 = pdf.makeIndirectObject(pdf.newStream());
  637 + smask2.replaceStreamData(
  638 + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  639 + auto img1 = pdf.newStream();
  640 + img1.replaceStreamData(
  641 + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  642 + img1.getDict().replaceKey("/SMask", smask1);
  643 + auto img2 = pdf.makeIndirectObject(pdf.newStream());
  644 + img2.replaceStreamData(
  645 + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  646 + img2.getDict().replaceKey("/SMask", smask2);
  647 + assert(img1.equivalent_to(img2));
  648 + }
  649 + // Scenario 33: Dictionary Key Equivalence with Value Mismatch (Annex J)
  650 + {
  651 + assert(!"<< /Key 1 >>"_qpdf.equivalent_to("<< /K#65#79 2 >>"_qpdf));
  652 + }
  653 + // Scenario 34: Uninitialized vs. Uninitialized (!obj)
  654 + {
  655 + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle()));
  656 + }
  657 + // Scenario 35: Uninitialized vs. PDF Null
  658 + {
  659 + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle::newNull()));
  660 + }
  661 + // Scenario 36: Distinct Direct Null Objects
  662 + {
  663 + assert(QPDFObjectHandle::newNull().equivalent_to(QPDFObjectHandle::newNull()));
  664 + }
  665 + // Scenario 37: Distinct Indirect Nulls (Different IDs)
  666 + {
  667 + assert(pdf.newIndirectNull().equivalent_to(pdf.newIndirectNull()));
  668 + }
  669 + // Scenario 38: Broken References to Different Missing Objects
  670 + {
  671 + // Both missing objects resolve to null, so we expect equivalence
  672 + assert(pdf.getObject(999999, 0).equivalent_to(pdf.getObject(888888, 0)));
  673 + }
  674 + // Scenario 39: Uninitialized Handle vs PDF Null
  675 + {
  676 + auto h_valid_null = QPDFObjectHandle::newNull();
  677 + QPDFObjectHandle h_uninit;
  678 + assert(h_uninit.equivalent_to(h_valid_null));
  679 + assert(h_valid_null.equivalent_to(h_uninit));
  680 + }
  681 + // Scenario 40: Recursion Depth Limit (The Stack Protector)
  682 + {
  683 + auto make_deep_array = [](int levels) {
  684 + QPDFObjectHandle root = Integer(1);
  685 + for (int i = 0; i < levels; ++i) {
  686 + QPDFObjectHandle arr = "[]"_qpdf;
  687 + arr.appendItem(root);
  688 + root = arr;
  689 + }
  690 + return root;
  691 + };
  692 + auto h_pass_1 = make_deep_array(500);
  693 + auto h_pass_2 = make_deep_array(500);
  694 + assert(h_pass_1.equivalent_to(h_pass_1));
  695 + assert(!h_pass_1.equivalent_to(h_pass_2));
  696 + assert(h_pass_1.equivalent_to(h_pass_1, 499));
  697 + assert(!h_pass_1.equivalent_to(h_pass_2, 499));
  698 + assert(h_pass_1.equivalent_to(h_pass_1, 500));
  699 + assert(h_pass_1.equivalent_to(h_pass_2, 500));
  700 + assert(h_pass_1.equivalent_to(h_pass_1, 501));
  701 + assert(h_pass_1.equivalent_to(h_pass_2, 501));
  702 + auto h_fail_1 = make_deep_array(501);
  703 + auto h_fail_2 = make_deep_array(501);
  704 + assert(h_fail_1.equivalent_to(h_fail_1, 499));
  705 + assert(!h_fail_1.equivalent_to(h_fail_2, 499));
  706 + assert(h_fail_1.equivalent_to(h_fail_1, 500));
  707 + assert(!h_fail_1.equivalent_to(h_fail_2, 500));
  708 + assert(h_fail_1.equivalent_to(h_fail_1, 501));
  709 + assert(h_fail_1.equivalent_to(h_fail_2, 501));
  710 + }
  711 + // Scenario 41: Sparse Arrays (null_count > 100 triggers sparse representation)
  712 + {
  713 + auto dense1 = "[]"_qpdf;
  714 + auto null = "null"_qpdf;
  715 + // Build a parse string with 101 nulls to trigger the sparse path
  716 + std::string sparse_str = "[";
  717 + for (int i = 0; i < 101; ++i) {
  718 + sparse_str += "null ";
  719 + dense1.appendItem(null);
  720 + }
  721 + sparse_str += "]";
  722 + auto sparse1 = QPDFObjectHandle::parse(sparse_str);
  723 + assert(sparse1.equivalent_to(QPDFObjectHandle::parse(sparse_str)));
  724 + assert(dense1.equivalent_to(sparse1));
  725 + assert(sparse1.equivalent_to(dense1));
  726 + // Mismatch: replace one null with an integer
  727 + std::string sparse_diff = "[";
  728 + for (int i = 0; i < 100; ++i) {
  729 + sparse_diff += "null ";
  730 + }
  731 + sparse_diff += "42]";
  732 + auto sparse3 = QPDFObjectHandle::parse(sparse_diff);
  733 + assert(!sparse1.equivalent_to(sparse3));
  734 + assert(!sparse3.equivalent_to(sparse1));
  735 + assert(!dense1.equivalent_to(sparse3));
  736 + assert(!sparse3.equivalent_to(dense1));
  737 + std::string sparse_with_value = "[";
  738 + for (int i = 0; i < 101; ++i) {
  739 + sparse_with_value += "null ";
  740 + }
  741 + sparse_with_value += "42 ]"; // one non-null element at index 100
  742 + assert(
  743 + QPDFObjectHandle::parse(sparse_with_value)
  744 + .equivalent_to(QPDFObjectHandle::parse(sparse_with_value)));
  745 + }
  746 + // Scenario 42: equivalent_to on ot_reference (post-replaceObject)
  747 + {
  748 + auto obj = pdf.makeIndirectObject(Integer(42));
  749 + auto replacement = Integer(42);
  750 + // Hold a handle to replacement before it becomes ot_reference
  751 + auto stale = replacement;
  752 + pdf.replaceObject(obj.getObjGen(), replacement);
  753 + // stale's underlying QPDFObject is now ot_reference
  754 + assert(stale.raw_type_code() == ::ot_reference);
  755 + assert(!stale.equivalent_to(Integer(42)));
  756 + }
  757 +}
  758 +
307 759 void
308 760 runtest(int n, char const* filename1, char const* arg2)
309 761 {
... ... @@ -311,7 +763,7 @@ runtest(int n, char const* filename1, char const* arg2)
311 763 // the test suite to see how the test is invoked to find the file
312 764 // that the test is supposed to operate on.
313 765  
314   - std::set<int> ignore_filename = {1, 2};
  766 + std::set<int> ignore_filename = {1, 2, 3};
315 767  
316 768 QPDF pdf;
317 769 std::shared_ptr<char> file_buf;
... ... @@ -325,7 +777,7 @@ runtest(int n, char const* filename1, char const* arg2)
325 777 }
326 778  
327 779 std::map<int, void (*)(QPDF&, char const*)> test_functions = {
328   - {0, test_0}, {1, test_1}, {2, test_2}};
  780 + {0, test_0}, {1, test_1}, {2, test_2}, {3, test_3}};
329 781  
330 782 auto fn = test_functions.find(n);
331 783 if (fn == test_functions.end()) {
... ...
libtests/qtest/objects.test
... ... @@ -11,7 +11,7 @@ require TestDriver;
11 11  
12 12 my $td = new TestDriver('objects');
13 13  
14   -my $n_tests = 3;
  14 +my $n_tests = 4;
15 15  
16 16 $td->runtest("integer type checks",
17 17 {$td->COMMAND => "objects 0 minimal.pdf"},
... ... @@ -28,4 +28,9 @@ $td-&gt;runtest(&quot;global limits&quot;,
28 28 {$td->FILE => "test2.out", $td->EXIT_STATUS => 0},
29 29 $td->NORMALIZE_NEWLINES);
30 30  
  31 +$td->runtest("equivalent_to structural comparisons",
  32 + {$td->COMMAND => "objects 3 -"},
  33 + {$td->STRING => "test 3 done\n", $td->EXIT_STATUS => 0},
  34 + $td->NORMALIZE_NEWLINES);
  35 +
31 36 $td->report($n_tests);
... ...