Commit 8c886812817487dfec4482d59029ccaf3b501503
Committed by
GitHub
Merge pull request #1665 from qooxzuub/basehandle-is-equivalent
Implement BaseHandle::equivalent_to
Showing
4 changed files
with
605 additions
and
3 deletions
include/qpdf/ObjectHandle.hh
| ... | ... | @@ -68,6 +68,9 @@ namespace qpdf |
| 68 | 68 | return obj == other.obj; |
| 69 | 69 | } |
| 70 | 70 | |
| 71 | + // Structural equivalence check per PDF Annex J rules. | |
| 72 | + bool equivalent_to(BaseHandle const& other, int depth = 10) const; | |
| 73 | + | |
| 71 | 74 | // For arrays, return the number of items in the array. |
| 72 | 75 | // For null-like objects, return 0. |
| 73 | 76 | // For all other objects, return 1. | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -327,6 +327,148 @@ BaseHandle::copy(bool shallow) const |
| 327 | 327 | return {}; // unreachable |
| 328 | 328 | } |
| 329 | 329 | |
| 330 | +// This method determines structural equivalence up to a given depth. | |
| 331 | +// The default depth is 10. | |
| 332 | +// | |
| 333 | +// Nomenclature note: ISO 32000-2 Annex J uses the term "equal" for this | |
| 334 | +// strict recursive comparison (J.4.1). We use "equivalent_to" here to | |
| 335 | +// implement Annex J's "equality", distinguishing it from C++ shallow | |
| 336 | +// pointer equality. | |
| 337 | +// | |
| 338 | +// Implementation notes: | |
| 339 | +// | |
| 340 | +// (1) We deviate from Annex J by comparing raw streams only, without | |
| 341 | +// decoding. | |
| 342 | +// | |
| 343 | +// (2) Loop detection is expensive and is avoided. If either object has | |
| 344 | +// a cycle in its forward orbit, this implementation will return false. | |
| 345 | + | |
| 346 | +bool | |
| 347 | +BaseHandle::equivalent_to(BaseHandle const& other, int depth) const | |
| 348 | +{ | |
| 349 | + // A. Identity, size & limit checks | |
| 350 | + if (obj == other.obj) { | |
| 351 | + return true; | |
| 352 | + } | |
| 353 | + if (depth < 0) { | |
| 354 | + return false; | |
| 355 | + } | |
| 356 | + size_t size1 = size(); | |
| 357 | + size_t size2 = other.size(); | |
| 358 | + if (size1 != size2) { | |
| 359 | + return false; | |
| 360 | + } | |
| 361 | + // B. Structural comparison | |
| 362 | + qpdf_object_type_e t1 = resolved_type_code(); | |
| 363 | + qpdf_object_type_e t2 = other.resolved_type_code(); | |
| 364 | + if (t1 == ::ot_reference) { | |
| 365 | + return referenced_object().equivalent_to(other, depth - 1); | |
| 366 | + } | |
| 367 | + if (t2 == ::ot_reference) { | |
| 368 | + return equivalent_to(other.referenced_object(), depth - 1); | |
| 369 | + } | |
| 370 | + if (t1 != t2) { | |
| 371 | + if ((t1 == ::ot_integer || t1 == ::ot_real) && (t2 == ::ot_integer || t2 == ::ot_real)) { | |
| 372 | + // Numeric equivalence per Annex J | |
| 373 | + return oh().getNumericValue() == other.oh().getNumericValue(); | |
| 374 | + } | |
| 375 | + // normalize uninitialized and null | |
| 376 | + return (t1 == ::ot_uninitialized && t2 == ::ot_null) || | |
| 377 | + (t2 == ::ot_uninitialized && t1 == ::ot_null); | |
| 378 | + } | |
| 379 | + switch (t1) { | |
| 380 | + case ::ot_uninitialized: | |
| 381 | + case ::ot_null: | |
| 382 | + return true; | |
| 383 | + case ::ot_boolean: | |
| 384 | + return std::get<QPDF_Bool>(obj->value).val == std::get<QPDF_Bool>(other.obj->value).val; | |
| 385 | + case ::ot_string: | |
| 386 | + return std::get<QPDF_String>(obj->value).val == std::get<QPDF_String>(other.obj->value).val; | |
| 387 | + case ::ot_name: | |
| 388 | + return std::get<QPDF_Name>(obj->value).name == std::get<QPDF_Name>(other.obj->value).name; | |
| 389 | + case ::ot_array: | |
| 390 | + { | |
| 391 | + auto const& a1 = std::get<QPDF_Array>(obj->value); | |
| 392 | + auto const& a2 = std::get<QPDF_Array>(other.obj->value); | |
| 393 | + // sizes size1, size2 were calculated above and checked to be equal | |
| 394 | + if (!a1.sp && !a2.sp) { | |
| 395 | + for (size_t i = 0; i < size1; ++i) { | |
| 396 | + if (!a1.elements[i].equivalent_to(a2.elements[i], depth - 1)) { | |
| 397 | + return false; | |
| 398 | + } | |
| 399 | + } | |
| 400 | + return true; | |
| 401 | + } | |
| 402 | + // at least one array is sparse | |
| 403 | + auto get_item = [](QPDF_Array const& arr, size_t idx) -> BaseHandle const& { | |
| 404 | + if (arr.sp) { | |
| 405 | + auto it = arr.sp->elements.find(idx); | |
| 406 | + if (it == arr.sp->elements.end()) { | |
| 407 | + static QPDFObjectHandle null_oh = Null(); | |
| 408 | + return null_oh; | |
| 409 | + } | |
| 410 | + return it->second; | |
| 411 | + } | |
| 412 | + return arr.elements[idx]; | |
| 413 | + }; | |
| 414 | + for (size_t i = 0; i < size1; ++i) { | |
| 415 | + if (!get_item(a1, i).equivalent_to(get_item(a2, i), depth - 1)) { | |
| 416 | + return false; | |
| 417 | + } | |
| 418 | + } | |
| 419 | + return true; | |
| 420 | + } | |
| 421 | + case ::ot_dictionary: | |
| 422 | + { | |
| 423 | + auto const& map1 = std::get<QPDF_Dictionary>(obj->value).items; | |
| 424 | + auto const& map2 = std::get<QPDF_Dictionary>(other.obj->value).items; | |
| 425 | + auto it2 = map2.begin(); | |
| 426 | + auto end2 = map2.end(); | |
| 427 | + for (auto const& [key1, value1]: map1) { | |
| 428 | + if (value1.null()) { | |
| 429 | + continue; | |
| 430 | + } | |
| 431 | + while (it2 != end2 && it2->second.null()) { | |
| 432 | + ++it2; | |
| 433 | + } | |
| 434 | + if (it2 == end2 || key1 != it2->first || | |
| 435 | + !value1.equivalent_to(it2->second, depth - 1)) { | |
| 436 | + return false; | |
| 437 | + } | |
| 438 | + ++it2; | |
| 439 | + } | |
| 440 | + while (it2 != end2 && it2->second.null()) { | |
| 441 | + ++it2; | |
| 442 | + } | |
| 443 | + return it2 == end2; | |
| 444 | + } | |
| 445 | + case ::ot_stream: | |
| 446 | + { | |
| 447 | + auto const& s1 = std::get<QPDF_Stream>(obj->value); | |
| 448 | + auto const& s2 = std::get<QPDF_Stream>(other.obj->value); | |
| 449 | + if (!s1.m->stream_dict.equivalent_to(s2.m->stream_dict, depth - 1)) { | |
| 450 | + return false; | |
| 451 | + } | |
| 452 | + return s1.m->stream_data->view() == s2.m->stream_data->view(); | |
| 453 | + } | |
| 454 | + case ::ot_operator: | |
| 455 | + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_operator"); | |
| 456 | + case ::ot_inlineimage: | |
| 457 | + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_inlineimage"); | |
| 458 | + case ::ot_integer: | |
| 459 | + return std::get<QPDF_Integer>(obj->value).val == | |
| 460 | + std::get<QPDF_Integer>(other.obj->value).val; | |
| 461 | + case ::ot_real: | |
| 462 | + return oh().getNumericValue() == other.oh().getNumericValue(); | |
| 463 | + case ::ot_unresolved: // cannot determine equivalence so return false | |
| 464 | + case ::ot_reference: // handled above | |
| 465 | + case ::ot_destroyed: // should not happen | |
| 466 | + case ::ot_reserved: // should not happen | |
| 467 | + return false; | |
| 468 | + } | |
| 469 | + return false; // unreachable | |
| 470 | +} | |
| 471 | + | |
| 330 | 472 | std::string |
| 331 | 473 | BaseHandle::unparse() const |
| 332 | 474 | { | ... | ... |
libtests/objects.cc
| ... | ... | @@ -304,6 +304,458 @@ test_2(QPDF& pdf, char const* arg2) |
| 304 | 304 | assert(!default_limits()); |
| 305 | 305 | } |
| 306 | 306 | |
| 307 | +// test equivalent_to | |
| 308 | +static void | |
| 309 | +test_3(QPDF& pdf, char const* arg2) | |
| 310 | +{ | |
| 311 | + // Scenario 1: Basic Equality: Name, Scalars | |
| 312 | + { | |
| 313 | + auto name = "/Test"_qpdf; | |
| 314 | + auto integer = Integer(42); | |
| 315 | + assert(name.equivalent_to("/Test"_qpdf)); | |
| 316 | + assert(!name.equivalent_to(integer)); | |
| 317 | + } | |
| 318 | + // Scenario 2: Numeric Types (Int vs Real) | |
| 319 | + { | |
| 320 | + auto integer = Integer(1); | |
| 321 | + auto real = QPDFObjectHandle::newReal("1.0"); | |
| 322 | + assert(real.equivalent_to(integer)); | |
| 323 | + assert(integer.equivalent_to(real)); | |
| 324 | + } | |
| 325 | + // Scenario 3: Array Order Sensitivity | |
| 326 | + { | |
| 327 | + auto a1 = "[1 2]"_qpdf; | |
| 328 | + auto a2 = "[2 1]"_qpdf; | |
| 329 | + assert(!a1.equivalent_to(a2)); | |
| 330 | + assert(!a2.equivalent_to(a1)); | |
| 331 | + } | |
| 332 | + // Scenario 4: Dictionary Key Order Insensitivity | |
| 333 | + { | |
| 334 | + auto d1 = "<< /A 1 >>"_qpdf; | |
| 335 | + d1.replaceKey("/B", Integer(2)); | |
| 336 | + auto d2 = "<< /B 2 >>"_qpdf; | |
| 337 | + d2.replaceKey("/A", Integer(1)); | |
| 338 | + assert(d1.equivalent_to(d2)); | |
| 339 | + assert(d2.equivalent_to(d1)); | |
| 340 | + } | |
| 341 | + // Scenario 5: Direct vs Indirect Equality | |
| 342 | + { | |
| 343 | + auto obj = Integer(100); | |
| 344 | + auto indirect = pdf.makeIndirectObject(Integer(100)); | |
| 345 | + assert(obj.equivalent_to(indirect)); | |
| 346 | + assert(indirect.equivalent_to(obj)); | |
| 347 | + } | |
| 348 | + // Scenario 6: Diamond Graph Isomorphism | |
| 349 | + { | |
| 350 | + auto d = pdf.makeIndirectObject(Integer(99)); | |
| 351 | + auto b = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d})); | |
| 352 | + auto c = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d})); | |
| 353 | + assert(Array({b, c}).equivalent_to(Array({b, c}))); | |
| 354 | + } | |
| 355 | + // Scenario 7: Circular References (Self-Loop): Compares as False | |
| 356 | + { | |
| 357 | + auto a1 = pdf.makeIndirectObject("[]"_qpdf); | |
| 358 | + a1.appendItem(a1); | |
| 359 | + auto a2 = pdf.makeIndirectObject("[]"_qpdf); | |
| 360 | + a2.appendItem(a2); | |
| 361 | + // The implementation rejects if there is any cycle, for performance reasons | |
| 362 | + assert(!a1.equivalent_to(a2)); | |
| 363 | + } | |
| 364 | + // Scenario 8: Cross-Document Comparison (Objects from Different QPDF Instances) | |
| 365 | + { | |
| 366 | + QPDF pdf2; | |
| 367 | + pdf2.emptyPDF(); | |
| 368 | + auto a1 = pdf.makeIndirectObject("[1]"_qpdf); | |
| 369 | + auto a2 = pdf2.makeIndirectObject("[1]"_qpdf); | |
| 370 | + auto a3 = pdf2.makeIndirectObject("[2]"_qpdf); | |
| 371 | + assert(a1.equivalent_to(a2)); // Same content, different documents | |
| 372 | + assert(a2.equivalent_to(a1)); // Same content, different documents | |
| 373 | + assert(!a1.equivalent_to(a3)); // Different content, different documents | |
| 374 | + assert(!a3.equivalent_to(a1)); // Different content, different documents | |
| 375 | + } | |
| 376 | + // Scenario 9: Stream Content: Match | |
| 377 | + { | |
| 378 | + assert(pdf.newStream("Stream data").equivalent_to(pdf.newStream("Stream data"))); | |
| 379 | + } | |
| 380 | + // Scenario 10: Stream Content: Mismatch | |
| 381 | + { | |
| 382 | + auto s1 = pdf.newStream("Data A"); | |
| 383 | + auto s2 = pdf.newStream("Data B"); | |
| 384 | + assert(!s1.equivalent_to(s2)); | |
| 385 | + assert(!s2.equivalent_to(s1)); | |
| 386 | + } | |
| 387 | + // Scenario 11: Stream Dictionary Differences | |
| 388 | + { | |
| 389 | + auto s1 = pdf.newStream("same"); | |
| 390 | + auto s2 = pdf.newStream("same"); | |
| 391 | + s2.getDict().replaceKey("/Extra", QPDFObjectHandle::newName("/Value")); | |
| 392 | + assert(!s1.equivalent_to(s2)); | |
| 393 | + assert(!s2.equivalent_to(s1)); | |
| 394 | + } | |
| 395 | + // Scenario 12: J.3.6: Absent Keys vs Null | |
| 396 | + { | |
| 397 | + auto d0 = Dictionary::empty(); | |
| 398 | + auto d1 = "<</Present null>>"_qpdf; | |
| 399 | + auto d2 = "<</Present << >> >>"_qpdf; | |
| 400 | + auto d3 = "<</Present [] >>"_qpdf; | |
| 401 | + assert(d0.equivalent_to(d1)); | |
| 402 | + assert(d1.equivalent_to(d0)); | |
| 403 | + assert(!d0.equivalent_to(d2)); | |
| 404 | + assert(!d2.equivalent_to(d0)); | |
| 405 | + assert(!d0.equivalent_to(d3)); | |
| 406 | + assert(!d3.equivalent_to(d0)); | |
| 407 | + assert(!d1.equivalent_to(d2)); | |
| 408 | + assert(!d2.equivalent_to(d1)); | |
| 409 | + assert(!d1.equivalent_to(d3)); | |
| 410 | + assert(!d3.equivalent_to(d1)); | |
| 411 | + } | |
| 412 | + // Scenario 13: String Syntax: Hex vs Literal (Annex J) | |
| 413 | + { | |
| 414 | + auto literal = "(A)"_qpdf; | |
| 415 | + auto hex = "<41>"_qpdf; | |
| 416 | + assert(literal.equivalent_to(hex)); | |
| 417 | + assert(hex.equivalent_to(literal)); | |
| 418 | + } | |
| 419 | + // Scenario 14: Name Syntax (Parser) vs Distinct Names (Model) | |
| 420 | + { | |
| 421 | + auto name1 = "/Name"_qpdf; | |
| 422 | + auto name2 = "/Na#6d#65"_qpdf; | |
| 423 | + assert(name1.equivalent_to(name2)); | |
| 424 | + assert(name2.equivalent_to(name1)); | |
| 425 | + } | |
| 426 | + // Scenario 15: Annex J Oddities: Keys, Octals, and Zeros | |
| 427 | + { | |
| 428 | + auto key1 = "<< /Key 1 >>"_qpdf; | |
| 429 | + auto key2 = "<< /K#65#79 1 >>"_qpdf; | |
| 430 | + auto lit_A = "(A)"_qpdf; | |
| 431 | + auto oct_A = "(\\101)"_qpdf; | |
| 432 | + auto zero_i = Integer(0); | |
| 433 | + auto zero_r = QPDFObjectHandle::newReal("-0.0"); | |
| 434 | + auto r1 = QPDFObjectHandle::newReal("12.345"); | |
| 435 | + auto r2 = QPDFObjectHandle::newReal("12.345000000000000"); | |
| 436 | + auto i12 = Integer(12); | |
| 437 | + // note: we rely on double rounding here | |
| 438 | + auto r_lo = QPDFObjectHandle::newReal("11.99999999999999999999999999999999"); | |
| 439 | + auto i12b = Integer(12); | |
| 440 | + auto r_hi = QPDFObjectHandle::newReal("12.00000000000000000000000000000000"); | |
| 441 | + auto i1 = Integer(1); | |
| 442 | + auto r_1 = QPDFObjectHandle::newReal("1."); | |
| 443 | + assert(key1.equivalent_to(key2)); | |
| 444 | + assert(key2.equivalent_to(key1)); | |
| 445 | + assert(lit_A.equivalent_to(oct_A)); | |
| 446 | + assert(oct_A.equivalent_to(lit_A)); | |
| 447 | + assert(zero_i.equivalent_to(zero_r)); | |
| 448 | + assert(zero_r.equivalent_to(zero_i)); | |
| 449 | + assert(r1.equivalent_to(r2)); | |
| 450 | + assert(r2.equivalent_to(r1)); | |
| 451 | + assert(i12.equivalent_to(r_lo)); | |
| 452 | + assert(r_lo.equivalent_to(i12)); | |
| 453 | + assert(i12b.equivalent_to(r_hi)); | |
| 454 | + assert(r_hi.equivalent_to(i12b)); | |
| 455 | + assert(i1.equivalent_to(r_1)); | |
| 456 | + assert(r_1.equivalent_to(i1)); | |
| 457 | + } | |
| 458 | + // Scenario 16: Nested Containers | |
| 459 | + { | |
| 460 | + assert(Dictionary({{"/K", "[5]"_qpdf}}).equivalent_to(Dictionary({{"/K", "[5]"_qpdf}}))); | |
| 461 | + } | |
| 462 | + // Scenario 17: Boolean and Null mismatch | |
| 463 | + { | |
| 464 | + auto b_true = QPDFObjectHandle::newBool(true); | |
| 465 | + auto b_false = QPDFObjectHandle::newBool(false); | |
| 466 | + auto null = QPDFObjectHandle::newNull(); | |
| 467 | + auto null2 = QPDFObjectHandle::newNull(); | |
| 468 | + auto one = Integer(1); | |
| 469 | + auto zero = Integer(0); | |
| 470 | + assert(null.equivalent_to(null)); | |
| 471 | + assert(null.equivalent_to(null2)); | |
| 472 | + assert(!b_true.equivalent_to(b_false)); | |
| 473 | + assert(!b_true.equivalent_to(null)); | |
| 474 | + assert(!b_true.equivalent_to(one)); | |
| 475 | + assert(!b_true.equivalent_to(zero)); | |
| 476 | + assert(!b_false.equivalent_to(null)); | |
| 477 | + assert(!b_false.equivalent_to(one)); | |
| 478 | + assert(!b_false.equivalent_to(zero)); | |
| 479 | + assert(!null.equivalent_to(one)); | |
| 480 | + assert(!null.equivalent_to(zero)); | |
| 481 | + assert(!one.equivalent_to(zero)); | |
| 482 | + } | |
| 483 | + // Scenario 18: Stream Semantics (J.3.7) - Strictness Check | |
| 484 | + { | |
| 485 | + auto s1 = pdf.newStream("test stream"); | |
| 486 | + auto s2 = pdf.newStream("DIFFERENT_RAW_BYTES"); | |
| 487 | + auto s3 = pdf.newStream("test stream"); | |
| 488 | + s2.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode")); | |
| 489 | + s3.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode")); | |
| 490 | + assert(!s1.equivalent_to(s2)); | |
| 491 | + assert(!s2.equivalent_to(s1)); | |
| 492 | + assert(!s1.equivalent_to(s3)); | |
| 493 | + assert(!s3.equivalent_to(s1)); | |
| 494 | + assert(!s2.equivalent_to(s3)); | |
| 495 | + assert(!s3.equivalent_to(s2)); | |
| 496 | + } | |
| 497 | + // Scenario 19: Dictionary Value Type Mismatch | |
| 498 | + { | |
| 499 | + auto d1 = "<< /Key 1 >>"_qpdf; | |
| 500 | + auto d2 = "<< /Key (1) >>"_qpdf; | |
| 501 | + assert(!d1.equivalent_to(d2)); | |
| 502 | + assert(!d2.equivalent_to(d1)); | |
| 503 | + } | |
| 504 | + // Scenario 20: Mixed Direct vs Indirect Nesting | |
| 505 | + { | |
| 506 | + assert( | |
| 507 | + QPDFObjectHandle::newArray({Integer(7)}) | |
| 508 | + .equivalent_to(QPDFObjectHandle::newArray({pdf.makeIndirectObject(Integer(7))}))); | |
| 509 | + } | |
| 510 | + // Scenario 21: Dictionary Subset vs Superset | |
| 511 | + { | |
| 512 | + auto d1 = "<< /A 1 /B 2 >>"_qpdf; | |
| 513 | + auto d2 = "<< /A 1 >>"_qpdf; | |
| 514 | + assert(!d1.equivalent_to(d2)); | |
| 515 | + assert(!d2.equivalent_to(d1)); | |
| 516 | + } | |
| 517 | + // Scenario 22: Stream Semantic Decode Equivalence | |
| 518 | + { | |
| 519 | + auto s1 = pdf.newStream("Hello World"); | |
| 520 | + auto s2 = pdf.newStream("HELLO WORLD RAW"); | |
| 521 | + s2.getDict().replaceKey("/Filter", "/FlateDecode"_qpdf); | |
| 522 | + s2.getDict().replaceKey("/DecodeParms", Dictionary::empty()); | |
| 523 | + assert(!s1.equivalent_to(s2)); | |
| 524 | + assert(!s2.equivalent_to(s1)); | |
| 525 | + } | |
| 526 | + // Scenario 23: Indirect Object Identity Independence | |
| 527 | + { | |
| 528 | + auto i1 = pdf.makeIndirectObject(Integer(123)); | |
| 529 | + auto i2 = Integer(123); | |
| 530 | + assert(i1.equivalent_to(pdf.makeIndirectObject(Integer(123)))); | |
| 531 | + assert(i1.equivalent_to(i2)); | |
| 532 | + assert(i2.equivalent_to(i1)); | |
| 533 | + } | |
| 534 | + // Scenario 24: Deep Recursive Structure (Stack Safety) | |
| 535 | + { | |
| 536 | + QPDFObjectHandle a1 = "[]"_qpdf; | |
| 537 | + QPDFObjectHandle a2 = "[]"_qpdf; | |
| 538 | + QPDFObjectHandle cur1 = a1; | |
| 539 | + QPDFObjectHandle cur2 = a2; | |
| 540 | + for (int i = 0; i < 200; ++i) { | |
| 541 | + auto n1 = "[]"_qpdf; | |
| 542 | + auto n2 = "[]"_qpdf; | |
| 543 | + cur1.appendItem(n1); | |
| 544 | + cur2.appendItem(n2); | |
| 545 | + cur1 = n1; | |
| 546 | + cur2 = n2; | |
| 547 | + } | |
| 548 | + assert(!a1.equivalent_to(a2)); // Default depth = 10 -> fails | |
| 549 | + assert(a1.equivalent_to(a2, 500)); // Explicit depth -> passes | |
| 550 | + } | |
| 551 | + // Scenario 25: Wide Graph Fan-out | |
| 552 | + { | |
| 553 | + auto a1 = "[]"_qpdf; | |
| 554 | + auto a2 = "[]"_qpdf; | |
| 555 | + auto a3 = "[]"_qpdf; | |
| 556 | + for (int i = 0; i < 200; ++i) { | |
| 557 | + a1.appendItem(Integer(i)); | |
| 558 | + a2.appendItem(Integer(i)); | |
| 559 | + a3.appendItem(Integer(i)); | |
| 560 | + } | |
| 561 | + a3.appendItem(Integer(200)); | |
| 562 | + assert(a1.equivalent_to(a2)); | |
| 563 | + assert(!a1.equivalent_to(a3)); | |
| 564 | + assert(!a3.equivalent_to(a1)); | |
| 565 | + } | |
| 566 | + // Scenario 26: Two Self-Referential Arrays | |
| 567 | + { | |
| 568 | + auto a1 = pdf.makeIndirectObject("[]"_qpdf); | |
| 569 | + auto a2 = pdf.makeIndirectObject("[]"_qpdf); | |
| 570 | + a1.appendItem(a1); | |
| 571 | + a2.appendItem(a2); | |
| 572 | + assert(!a1.equivalent_to(a2)); | |
| 573 | + assert(!a1.equivalent_to(a2)); // Check idempotency | |
| 574 | + } | |
| 575 | + // Scenario 27: Nested Dictionary Reuse / Shared Indirect Objects | |
| 576 | + { | |
| 577 | + auto shared_array = pdf.makeIndirectObject("[42 99]"_qpdf); | |
| 578 | + auto dict1 = "<< /Unique1 /A >>"_qpdf; | |
| 579 | + dict1.replaceKey("/Shared", shared_array); | |
| 580 | + auto dict2 = "<< /Unique1 /A >>"_qpdf; | |
| 581 | + dict2.replaceKey("/Shared", shared_array); | |
| 582 | + auto dict3 = "<< /Unique1 /B >>"_qpdf; | |
| 583 | + dict3.replaceKey("/Shared", shared_array); | |
| 584 | + assert(dict1.equivalent_to(dict2)); | |
| 585 | + assert(!dict1.equivalent_to(dict3)); | |
| 586 | + assert(!dict3.equivalent_to(dict1)); | |
| 587 | + } | |
| 588 | + // Scenario 28: Shared Indirect Leaves Reached via Two Paths | |
| 589 | + { | |
| 590 | + auto leaf1 = pdf.makeIndirectObject("[1]"_qpdf); | |
| 591 | + auto leaf2 = pdf.makeIndirectObject("[2]"_qpdf); | |
| 592 | + auto mid1 = pdf.makeIndirectObject(Dictionary::empty()); | |
| 593 | + mid1.replaceKey("/Leaf1", leaf1); | |
| 594 | + mid1.replaceKey("/Leaf2", leaf2); | |
| 595 | + auto mid2 = pdf.makeIndirectObject(Dictionary::empty()); | |
| 596 | + mid2.replaceKey("/Leaf1", leaf1); | |
| 597 | + mid2.replaceKey("/Leaf2", leaf2); | |
| 598 | + assert( | |
| 599 | + QPDFObjectHandle::newArray({mid1, mid2}) | |
| 600 | + .equivalent_to(QPDFObjectHandle::newArray({mid1, mid2}))); | |
| 601 | + } | |
| 602 | + // Scenario 29: Direct vs Indirect Integer | |
| 603 | + { | |
| 604 | + assert(Integer(42).equivalent_to(pdf.makeIndirectObject(Integer(42)))); | |
| 605 | + } | |
| 606 | + // Scenario 30: Nested Diamond with Direct & Indirect Objects | |
| 607 | + { | |
| 608 | + assert( | |
| 609 | + QPDFObjectHandle::newArray( | |
| 610 | + {pdf.makeIndirectObject("[42]"_qpdf), pdf.makeIndirectObject("[42]"_qpdf)}) | |
| 611 | + .equivalent_to( | |
| 612 | + QPDFObjectHandle::newArray( | |
| 613 | + {pdf.makeIndirectObject("[42]"_qpdf), | |
| 614 | + pdf.makeIndirectObject("[42]"_qpdf)}))); | |
| 615 | + } | |
| 616 | + // Scenario 31: Image XObjects sharing an SMask | |
| 617 | + { | |
| 618 | + auto smask = pdf.newStream(); | |
| 619 | + smask.replaceStreamData( | |
| 620 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 621 | + auto img1 = pdf.makeIndirectObject(pdf.newStream()); | |
| 622 | + img1.replaceStreamData( | |
| 623 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 624 | + img1.getDict().replaceKey("/SMask", smask); | |
| 625 | + auto img2 = pdf.makeIndirectObject(pdf.newStream()); | |
| 626 | + img2.replaceStreamData( | |
| 627 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 628 | + img2.getDict().replaceKey("/SMask", smask); | |
| 629 | + assert(img1.equivalent_to(img2)); | |
| 630 | + } | |
| 631 | + // Scenario 32: Image XObjects with two distinct but identical SMasks | |
| 632 | + { | |
| 633 | + auto smask1 = pdf.newStream(); | |
| 634 | + smask1.replaceStreamData( | |
| 635 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 636 | + auto smask2 = pdf.makeIndirectObject(pdf.newStream()); | |
| 637 | + smask2.replaceStreamData( | |
| 638 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 639 | + auto img1 = pdf.newStream(); | |
| 640 | + img1.replaceStreamData( | |
| 641 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 642 | + img1.getDict().replaceKey("/SMask", smask1); | |
| 643 | + auto img2 = pdf.makeIndirectObject(pdf.newStream()); | |
| 644 | + img2.replaceStreamData( | |
| 645 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | |
| 646 | + img2.getDict().replaceKey("/SMask", smask2); | |
| 647 | + assert(img1.equivalent_to(img2)); | |
| 648 | + } | |
| 649 | + // Scenario 33: Dictionary Key Equivalence with Value Mismatch (Annex J) | |
| 650 | + { | |
| 651 | + assert(!"<< /Key 1 >>"_qpdf.equivalent_to("<< /K#65#79 2 >>"_qpdf)); | |
| 652 | + } | |
| 653 | + // Scenario 34: Uninitialized vs. Uninitialized (!obj) | |
| 654 | + { | |
| 655 | + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle())); | |
| 656 | + } | |
| 657 | + // Scenario 35: Uninitialized vs. PDF Null | |
| 658 | + { | |
| 659 | + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle::newNull())); | |
| 660 | + } | |
| 661 | + // Scenario 36: Distinct Direct Null Objects | |
| 662 | + { | |
| 663 | + assert(QPDFObjectHandle::newNull().equivalent_to(QPDFObjectHandle::newNull())); | |
| 664 | + } | |
| 665 | + // Scenario 37: Distinct Indirect Nulls (Different IDs) | |
| 666 | + { | |
| 667 | + assert(pdf.newIndirectNull().equivalent_to(pdf.newIndirectNull())); | |
| 668 | + } | |
| 669 | + // Scenario 38: Broken References to Different Missing Objects | |
| 670 | + { | |
| 671 | + // Both missing objects resolve to null, so we expect equivalence | |
| 672 | + assert(pdf.getObject(999999, 0).equivalent_to(pdf.getObject(888888, 0))); | |
| 673 | + } | |
| 674 | + // Scenario 39: Uninitialized Handle vs PDF Null | |
| 675 | + { | |
| 676 | + auto h_valid_null = QPDFObjectHandle::newNull(); | |
| 677 | + QPDFObjectHandle h_uninit; | |
| 678 | + assert(h_uninit.equivalent_to(h_valid_null)); | |
| 679 | + assert(h_valid_null.equivalent_to(h_uninit)); | |
| 680 | + } | |
| 681 | + // Scenario 40: Recursion Depth Limit (The Stack Protector) | |
| 682 | + { | |
| 683 | + auto make_deep_array = [](int levels) { | |
| 684 | + QPDFObjectHandle root = Integer(1); | |
| 685 | + for (int i = 0; i < levels; ++i) { | |
| 686 | + QPDFObjectHandle arr = "[]"_qpdf; | |
| 687 | + arr.appendItem(root); | |
| 688 | + root = arr; | |
| 689 | + } | |
| 690 | + return root; | |
| 691 | + }; | |
| 692 | + auto h_pass_1 = make_deep_array(500); | |
| 693 | + auto h_pass_2 = make_deep_array(500); | |
| 694 | + assert(h_pass_1.equivalent_to(h_pass_1)); | |
| 695 | + assert(!h_pass_1.equivalent_to(h_pass_2)); | |
| 696 | + assert(h_pass_1.equivalent_to(h_pass_1, 499)); | |
| 697 | + assert(!h_pass_1.equivalent_to(h_pass_2, 499)); | |
| 698 | + assert(h_pass_1.equivalent_to(h_pass_1, 500)); | |
| 699 | + assert(h_pass_1.equivalent_to(h_pass_2, 500)); | |
| 700 | + assert(h_pass_1.equivalent_to(h_pass_1, 501)); | |
| 701 | + assert(h_pass_1.equivalent_to(h_pass_2, 501)); | |
| 702 | + auto h_fail_1 = make_deep_array(501); | |
| 703 | + auto h_fail_2 = make_deep_array(501); | |
| 704 | + assert(h_fail_1.equivalent_to(h_fail_1, 499)); | |
| 705 | + assert(!h_fail_1.equivalent_to(h_fail_2, 499)); | |
| 706 | + assert(h_fail_1.equivalent_to(h_fail_1, 500)); | |
| 707 | + assert(!h_fail_1.equivalent_to(h_fail_2, 500)); | |
| 708 | + assert(h_fail_1.equivalent_to(h_fail_1, 501)); | |
| 709 | + assert(h_fail_1.equivalent_to(h_fail_2, 501)); | |
| 710 | + } | |
| 711 | + // Scenario 41: Sparse Arrays (null_count > 100 triggers sparse representation) | |
| 712 | + { | |
| 713 | + auto dense1 = "[]"_qpdf; | |
| 714 | + auto null = "null"_qpdf; | |
| 715 | + // Build a parse string with 101 nulls to trigger the sparse path | |
| 716 | + std::string sparse_str = "["; | |
| 717 | + for (int i = 0; i < 101; ++i) { | |
| 718 | + sparse_str += "null "; | |
| 719 | + dense1.appendItem(null); | |
| 720 | + } | |
| 721 | + sparse_str += "]"; | |
| 722 | + auto sparse1 = QPDFObjectHandle::parse(sparse_str); | |
| 723 | + assert(sparse1.equivalent_to(QPDFObjectHandle::parse(sparse_str))); | |
| 724 | + assert(dense1.equivalent_to(sparse1)); | |
| 725 | + assert(sparse1.equivalent_to(dense1)); | |
| 726 | + // Mismatch: replace one null with an integer | |
| 727 | + std::string sparse_diff = "["; | |
| 728 | + for (int i = 0; i < 100; ++i) { | |
| 729 | + sparse_diff += "null "; | |
| 730 | + } | |
| 731 | + sparse_diff += "42]"; | |
| 732 | + auto sparse3 = QPDFObjectHandle::parse(sparse_diff); | |
| 733 | + assert(!sparse1.equivalent_to(sparse3)); | |
| 734 | + assert(!sparse3.equivalent_to(sparse1)); | |
| 735 | + assert(!dense1.equivalent_to(sparse3)); | |
| 736 | + assert(!sparse3.equivalent_to(dense1)); | |
| 737 | + std::string sparse_with_value = "["; | |
| 738 | + for (int i = 0; i < 101; ++i) { | |
| 739 | + sparse_with_value += "null "; | |
| 740 | + } | |
| 741 | + sparse_with_value += "42 ]"; // one non-null element at index 100 | |
| 742 | + assert( | |
| 743 | + QPDFObjectHandle::parse(sparse_with_value) | |
| 744 | + .equivalent_to(QPDFObjectHandle::parse(sparse_with_value))); | |
| 745 | + } | |
| 746 | + // Scenario 42: equivalent_to on ot_reference (post-replaceObject) | |
| 747 | + { | |
| 748 | + auto obj = pdf.makeIndirectObject(Integer(42)); | |
| 749 | + auto replacement = Integer(42); | |
| 750 | + // Hold a handle to replacement before it becomes ot_reference | |
| 751 | + auto stale = replacement; | |
| 752 | + pdf.replaceObject(obj.getObjGen(), replacement); | |
| 753 | + // stale's underlying QPDFObject is now ot_reference | |
| 754 | + assert(stale.raw_type_code() == ::ot_reference); | |
| 755 | + assert(!stale.equivalent_to(Integer(42))); | |
| 756 | + } | |
| 757 | +} | |
| 758 | + | |
| 307 | 759 | void |
| 308 | 760 | runtest(int n, char const* filename1, char const* arg2) |
| 309 | 761 | { |
| ... | ... | @@ -311,7 +763,7 @@ runtest(int n, char const* filename1, char const* arg2) |
| 311 | 763 | // the test suite to see how the test is invoked to find the file |
| 312 | 764 | // that the test is supposed to operate on. |
| 313 | 765 | |
| 314 | - std::set<int> ignore_filename = {1, 2}; | |
| 766 | + std::set<int> ignore_filename = {1, 2, 3}; | |
| 315 | 767 | |
| 316 | 768 | QPDF pdf; |
| 317 | 769 | std::shared_ptr<char> file_buf; |
| ... | ... | @@ -325,7 +777,7 @@ runtest(int n, char const* filename1, char const* arg2) |
| 325 | 777 | } |
| 326 | 778 | |
| 327 | 779 | std::map<int, void (*)(QPDF&, char const*)> test_functions = { |
| 328 | - {0, test_0}, {1, test_1}, {2, test_2}}; | |
| 780 | + {0, test_0}, {1, test_1}, {2, test_2}, {3, test_3}}; | |
| 329 | 781 | |
| 330 | 782 | auto fn = test_functions.find(n); |
| 331 | 783 | if (fn == test_functions.end()) { | ... | ... |
libtests/qtest/objects.test
| ... | ... | @@ -11,7 +11,7 @@ require TestDriver; |
| 11 | 11 | |
| 12 | 12 | my $td = new TestDriver('objects'); |
| 13 | 13 | |
| 14 | -my $n_tests = 3; | |
| 14 | +my $n_tests = 4; | |
| 15 | 15 | |
| 16 | 16 | $td->runtest("integer type checks", |
| 17 | 17 | {$td->COMMAND => "objects 0 minimal.pdf"}, |
| ... | ... | @@ -28,4 +28,9 @@ $td->runtest("global limits", |
| 28 | 28 | {$td->FILE => "test2.out", $td->EXIT_STATUS => 0}, |
| 29 | 29 | $td->NORMALIZE_NEWLINES); |
| 30 | 30 | |
| 31 | +$td->runtest("equivalent_to structural comparisons", | |
| 32 | + {$td->COMMAND => "objects 3 -"}, | |
| 33 | + {$td->STRING => "test 3 done\n", $td->EXIT_STATUS => 0}, | |
| 34 | + $td->NORMALIZE_NEWLINES); | |
| 35 | + | |
| 31 | 36 | $td->report($n_tests); | ... | ... |