Commit 9ac16d036cb92388fd132dd009ba1599671cfc16
1 parent
eef51d0c
libqpdf: implement BaseHandle::equivalent_to for structural PDF object comparison
Implements structural equivalence following ISO 32000-2 Annex J, with a recursion depth limit (default 10) in place of cycle detection. Handles numeric promotion, sparse arrays, streams, indirect references, and null/uninitialized normalization. Includes tests in libtests/objects.cc.
Showing
4 changed files
with
605 additions
and
3 deletions
include/qpdf/ObjectHandle.hh
| @@ -68,6 +68,9 @@ namespace qpdf | @@ -68,6 +68,9 @@ namespace qpdf | ||
| 68 | return obj == other.obj; | 68 | return obj == other.obj; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | + // Structural equivalence check per PDF Annex J rules. | ||
| 72 | + bool equivalent_to(BaseHandle const& other, int depth = 10) const; | ||
| 73 | + | ||
| 71 | // For arrays, return the number of items in the array. | 74 | // For arrays, return the number of items in the array. |
| 72 | // For null-like objects, return 0. | 75 | // For null-like objects, return 0. |
| 73 | // For all other objects, return 1. | 76 | // For all other objects, return 1. |
libqpdf/QPDFObjectHandle.cc
| @@ -327,6 +327,148 @@ BaseHandle::copy(bool shallow) const | @@ -327,6 +327,148 @@ BaseHandle::copy(bool shallow) const | ||
| 327 | return {}; // unreachable | 327 | return {}; // unreachable |
| 328 | } | 328 | } |
| 329 | 329 | ||
| 330 | +// This method determines structural equivalence up to a given depth. | ||
| 331 | +// The default depth is 10. | ||
| 332 | +// | ||
| 333 | +// Nomenclature note: ISO 32000-2 Annex J uses the term "equal" for this | ||
| 334 | +// strict recursive comparison (J.4.1). We use "equivalent_to" here to | ||
| 335 | +// implement Annex J's "equality", distinguishing it from C++ shallow | ||
| 336 | +// pointer equality. | ||
| 337 | +// | ||
| 338 | +// Implementation notes: | ||
| 339 | +// | ||
| 340 | +// (1) We deviate from Annex J by comparing raw streams only, without | ||
| 341 | +// decoding. | ||
| 342 | +// | ||
| 343 | +// (2) Loop detection is expensive and is avoided. If either object has | ||
| 344 | +// a cycle in its forward orbit, this implementation will return false. | ||
| 345 | + | ||
| 346 | +bool | ||
| 347 | +BaseHandle::equivalent_to(BaseHandle const& other, int depth) const | ||
| 348 | +{ | ||
| 349 | + // A. Identity, size & limit checks | ||
| 350 | + if (obj == other.obj) { | ||
| 351 | + return true; | ||
| 352 | + } | ||
| 353 | + if (depth < 0) { | ||
| 354 | + return false; | ||
| 355 | + } | ||
| 356 | + size_t size1 = size(); | ||
| 357 | + size_t size2 = other.size(); | ||
| 358 | + if (size1 != size2) { | ||
| 359 | + return false; | ||
| 360 | + } | ||
| 361 | + // B. Structural comparison | ||
| 362 | + qpdf_object_type_e t1 = resolved_type_code(); | ||
| 363 | + qpdf_object_type_e t2 = other.resolved_type_code(); | ||
| 364 | + if (t1 == ::ot_reference) { | ||
| 365 | + return referenced_object().equivalent_to(other, depth - 1); | ||
| 366 | + } | ||
| 367 | + if (t2 == ::ot_reference) { | ||
| 368 | + return equivalent_to(other.referenced_object(), depth - 1); | ||
| 369 | + } | ||
| 370 | + if (t1 != t2) { | ||
| 371 | + if ((t1 == ::ot_integer || t1 == ::ot_real) && (t2 == ::ot_integer || t2 == ::ot_real)) { | ||
| 372 | + // Numeric equivalence per Annex J | ||
| 373 | + return oh().getNumericValue() == other.oh().getNumericValue(); | ||
| 374 | + } | ||
| 375 | + // normalize uninitialized and null | ||
| 376 | + return (t1 == ::ot_uninitialized && t2 == ::ot_null) || | ||
| 377 | + (t2 == ::ot_uninitialized && t1 == ::ot_null); | ||
| 378 | + } | ||
| 379 | + switch (t1) { | ||
| 380 | + case ::ot_uninitialized: | ||
| 381 | + case ::ot_null: | ||
| 382 | + return true; | ||
| 383 | + case ::ot_boolean: | ||
| 384 | + return std::get<QPDF_Bool>(obj->value).val == std::get<QPDF_Bool>(other.obj->value).val; | ||
| 385 | + case ::ot_string: | ||
| 386 | + return std::get<QPDF_String>(obj->value).val == std::get<QPDF_String>(other.obj->value).val; | ||
| 387 | + case ::ot_name: | ||
| 388 | + return std::get<QPDF_Name>(obj->value).name == std::get<QPDF_Name>(other.obj->value).name; | ||
| 389 | + case ::ot_array: | ||
| 390 | + { | ||
| 391 | + auto const& a1 = std::get<QPDF_Array>(obj->value); | ||
| 392 | + auto const& a2 = std::get<QPDF_Array>(other.obj->value); | ||
| 393 | + // sizes size1, size2 were calculated above and checked to be equal | ||
| 394 | + if (!a1.sp && !a2.sp) { | ||
| 395 | + for (size_t i = 0; i < size1; ++i) { | ||
| 396 | + if (!a1.elements[i].equivalent_to(a2.elements[i], depth - 1)) { | ||
| 397 | + return false; | ||
| 398 | + } | ||
| 399 | + } | ||
| 400 | + return true; | ||
| 401 | + } | ||
| 402 | + // at least one array is sparse | ||
| 403 | + auto get_item = [](QPDF_Array const& arr, size_t idx) -> BaseHandle const& { | ||
| 404 | + if (arr.sp) { | ||
| 405 | + auto it = arr.sp->elements.find(idx); | ||
| 406 | + if (it == arr.sp->elements.end()) { | ||
| 407 | + static QPDFObjectHandle null_oh = Null(); | ||
| 408 | + return null_oh; | ||
| 409 | + } | ||
| 410 | + return it->second; | ||
| 411 | + } | ||
| 412 | + return arr.elements[idx]; | ||
| 413 | + }; | ||
| 414 | + for (size_t i = 0; i < size1; ++i) { | ||
| 415 | + if (!get_item(a1, i).equivalent_to(get_item(a2, i), depth - 1)) { | ||
| 416 | + return false; | ||
| 417 | + } | ||
| 418 | + } | ||
| 419 | + return true; | ||
| 420 | + } | ||
| 421 | + case ::ot_dictionary: | ||
| 422 | + { | ||
| 423 | + auto const& map1 = std::get<QPDF_Dictionary>(obj->value).items; | ||
| 424 | + auto const& map2 = std::get<QPDF_Dictionary>(other.obj->value).items; | ||
| 425 | + auto it2 = map2.begin(); | ||
| 426 | + auto end2 = map2.end(); | ||
| 427 | + for (auto const& [key1, value1]: map1) { | ||
| 428 | + if (value1.null()) { | ||
| 429 | + continue; | ||
| 430 | + } | ||
| 431 | + while (it2 != end2 && it2->second.null()) { | ||
| 432 | + ++it2; | ||
| 433 | + } | ||
| 434 | + if (it2 == end2 || key1 != it2->first || | ||
| 435 | + !value1.equivalent_to(it2->second, depth - 1)) { | ||
| 436 | + return false; | ||
| 437 | + } | ||
| 438 | + ++it2; | ||
| 439 | + } | ||
| 440 | + while (it2 != end2 && it2->second.null()) { | ||
| 441 | + ++it2; | ||
| 442 | + } | ||
| 443 | + return it2 == end2; | ||
| 444 | + } | ||
| 445 | + case ::ot_stream: | ||
| 446 | + { | ||
| 447 | + auto const& s1 = std::get<QPDF_Stream>(obj->value); | ||
| 448 | + auto const& s2 = std::get<QPDF_Stream>(other.obj->value); | ||
| 449 | + if (!s1.m->stream_dict.equivalent_to(s2.m->stream_dict, depth - 1)) { | ||
| 450 | + return false; | ||
| 451 | + } | ||
| 452 | + return s1.m->stream_data->view() == s2.m->stream_data->view(); | ||
| 453 | + } | ||
| 454 | + case ::ot_operator: | ||
| 455 | + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_operator"); | ||
| 456 | + case ::ot_inlineimage: | ||
| 457 | + throw std::logic_error("Internal error in BaseHandle::equivalent_to: found ot_inlineimage"); | ||
| 458 | + case ::ot_integer: | ||
| 459 | + return std::get<QPDF_Integer>(obj->value).val == | ||
| 460 | + std::get<QPDF_Integer>(other.obj->value).val; | ||
| 461 | + case ::ot_real: | ||
| 462 | + return oh().getNumericValue() == other.oh().getNumericValue(); | ||
| 463 | + case ::ot_unresolved: // cannot determine equivalence so return false | ||
| 464 | + case ::ot_reference: // handled above | ||
| 465 | + case ::ot_destroyed: // should not happen | ||
| 466 | + case ::ot_reserved: // should not happen | ||
| 467 | + return false; | ||
| 468 | + } | ||
| 469 | + return false; // unreachable | ||
| 470 | +} | ||
| 471 | + | ||
| 330 | std::string | 472 | std::string |
| 331 | BaseHandle::unparse() const | 473 | BaseHandle::unparse() const |
| 332 | { | 474 | { |
libtests/objects.cc
| @@ -304,6 +304,458 @@ test_2(QPDF& pdf, char const* arg2) | @@ -304,6 +304,458 @@ test_2(QPDF& pdf, char const* arg2) | ||
| 304 | assert(!default_limits()); | 304 | assert(!default_limits()); |
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | +// test equivalent_to | ||
| 308 | +static void | ||
| 309 | +test_3(QPDF& pdf, char const* arg2) | ||
| 310 | +{ | ||
| 311 | + // Scenario 1: Basic Equality: Name, Scalars | ||
| 312 | + { | ||
| 313 | + auto name = "/Test"_qpdf; | ||
| 314 | + auto integer = Integer(42); | ||
| 315 | + assert(name.equivalent_to("/Test"_qpdf)); | ||
| 316 | + assert(!name.equivalent_to(integer)); | ||
| 317 | + } | ||
| 318 | + // Scenario 2: Numeric Types (Int vs Real) | ||
| 319 | + { | ||
| 320 | + auto integer = Integer(1); | ||
| 321 | + auto real = QPDFObjectHandle::newReal("1.0"); | ||
| 322 | + assert(real.equivalent_to(integer)); | ||
| 323 | + assert(integer.equivalent_to(real)); | ||
| 324 | + } | ||
| 325 | + // Scenario 3: Array Order Sensitivity | ||
| 326 | + { | ||
| 327 | + auto a1 = "[1 2]"_qpdf; | ||
| 328 | + auto a2 = "[2 1]"_qpdf; | ||
| 329 | + assert(!a1.equivalent_to(a2)); | ||
| 330 | + assert(!a2.equivalent_to(a1)); | ||
| 331 | + } | ||
| 332 | + // Scenario 4: Dictionary Key Order Insensitivity | ||
| 333 | + { | ||
| 334 | + auto d1 = "<< /A 1 >>"_qpdf; | ||
| 335 | + d1.replaceKey("/B", Integer(2)); | ||
| 336 | + auto d2 = "<< /B 2 >>"_qpdf; | ||
| 337 | + d2.replaceKey("/A", Integer(1)); | ||
| 338 | + assert(d1.equivalent_to(d2)); | ||
| 339 | + assert(d2.equivalent_to(d1)); | ||
| 340 | + } | ||
| 341 | + // Scenario 5: Direct vs Indirect Equality | ||
| 342 | + { | ||
| 343 | + auto obj = Integer(100); | ||
| 344 | + auto indirect = pdf.makeIndirectObject(Integer(100)); | ||
| 345 | + assert(obj.equivalent_to(indirect)); | ||
| 346 | + assert(indirect.equivalent_to(obj)); | ||
| 347 | + } | ||
| 348 | + // Scenario 6: Diamond Graph Isomorphism | ||
| 349 | + { | ||
| 350 | + auto d = pdf.makeIndirectObject(Integer(99)); | ||
| 351 | + auto b = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d})); | ||
| 352 | + auto c = pdf.makeIndirectObject(QPDFObjectHandle::newArray({d})); | ||
| 353 | + assert(Array({b, c}).equivalent_to(Array({b, c}))); | ||
| 354 | + } | ||
| 355 | + // Scenario 7: Circular References (Self-Loop): Compares as False | ||
| 356 | + { | ||
| 357 | + auto a1 = pdf.makeIndirectObject("[]"_qpdf); | ||
| 358 | + a1.appendItem(a1); | ||
| 359 | + auto a2 = pdf.makeIndirectObject("[]"_qpdf); | ||
| 360 | + a2.appendItem(a2); | ||
| 361 | + // The implementation rejects if there is any cycle, for performance reasons | ||
| 362 | + assert(!a1.equivalent_to(a2)); | ||
| 363 | + } | ||
| 364 | + // Scenario 8: Cross-Document Comparison (Objects from Different QPDF Instances) | ||
| 365 | + { | ||
| 366 | + QPDF pdf2; | ||
| 367 | + pdf2.emptyPDF(); | ||
| 368 | + auto a1 = pdf.makeIndirectObject("[1]"_qpdf); | ||
| 369 | + auto a2 = pdf2.makeIndirectObject("[1]"_qpdf); | ||
| 370 | + auto a3 = pdf2.makeIndirectObject("[2]"_qpdf); | ||
| 371 | + assert(a1.equivalent_to(a2)); // Same content, different documents | ||
| 372 | + assert(a2.equivalent_to(a1)); // Same content, different documents | ||
| 373 | + assert(!a1.equivalent_to(a3)); // Different content, different documents | ||
| 374 | + assert(!a3.equivalent_to(a1)); // Different content, different documents | ||
| 375 | + } | ||
| 376 | + // Scenario 9: Stream Content: Match | ||
| 377 | + { | ||
| 378 | + assert(pdf.newStream("Stream data").equivalent_to(pdf.newStream("Stream data"))); | ||
| 379 | + } | ||
| 380 | + // Scenario 10: Stream Content: Mismatch | ||
| 381 | + { | ||
| 382 | + auto s1 = pdf.newStream("Data A"); | ||
| 383 | + auto s2 = pdf.newStream("Data B"); | ||
| 384 | + assert(!s1.equivalent_to(s2)); | ||
| 385 | + assert(!s2.equivalent_to(s1)); | ||
| 386 | + } | ||
| 387 | + // Scenario 11: Stream Dictionary Differences | ||
| 388 | + { | ||
| 389 | + auto s1 = pdf.newStream("same"); | ||
| 390 | + auto s2 = pdf.newStream("same"); | ||
| 391 | + s2.getDict().replaceKey("/Extra", QPDFObjectHandle::newName("/Value")); | ||
| 392 | + assert(!s1.equivalent_to(s2)); | ||
| 393 | + assert(!s2.equivalent_to(s1)); | ||
| 394 | + } | ||
| 395 | + // Scenario 12: J.3.6: Absent Keys vs Null | ||
| 396 | + { | ||
| 397 | + auto d0 = Dictionary::empty(); | ||
| 398 | + auto d1 = "<</Present null>>"_qpdf; | ||
| 399 | + auto d2 = "<</Present << >> >>"_qpdf; | ||
| 400 | + auto d3 = "<</Present [] >>"_qpdf; | ||
| 401 | + assert(d0.equivalent_to(d1)); | ||
| 402 | + assert(d1.equivalent_to(d0)); | ||
| 403 | + assert(!d0.equivalent_to(d2)); | ||
| 404 | + assert(!d2.equivalent_to(d0)); | ||
| 405 | + assert(!d0.equivalent_to(d3)); | ||
| 406 | + assert(!d3.equivalent_to(d0)); | ||
| 407 | + assert(!d1.equivalent_to(d2)); | ||
| 408 | + assert(!d2.equivalent_to(d1)); | ||
| 409 | + assert(!d1.equivalent_to(d3)); | ||
| 410 | + assert(!d3.equivalent_to(d1)); | ||
| 411 | + } | ||
| 412 | + // Scenario 13: String Syntax: Hex vs Literal (Annex J) | ||
| 413 | + { | ||
| 414 | + auto literal = "(A)"_qpdf; | ||
| 415 | + auto hex = "<41>"_qpdf; | ||
| 416 | + assert(literal.equivalent_to(hex)); | ||
| 417 | + assert(hex.equivalent_to(literal)); | ||
| 418 | + } | ||
| 419 | + // Scenario 14: Name Syntax (Parser) vs Distinct Names (Model) | ||
| 420 | + { | ||
| 421 | + auto name1 = "/Name"_qpdf; | ||
| 422 | + auto name2 = "/Na#6d#65"_qpdf; | ||
| 423 | + assert(name1.equivalent_to(name2)); | ||
| 424 | + assert(name2.equivalent_to(name1)); | ||
| 425 | + } | ||
| 426 | + // Scenario 15: Annex J Oddities: Keys, Octals, and Zeros | ||
| 427 | + { | ||
| 428 | + auto key1 = "<< /Key 1 >>"_qpdf; | ||
| 429 | + auto key2 = "<< /K#65#79 1 >>"_qpdf; | ||
| 430 | + auto lit_A = "(A)"_qpdf; | ||
| 431 | + auto oct_A = "(\\101)"_qpdf; | ||
| 432 | + auto zero_i = Integer(0); | ||
| 433 | + auto zero_r = QPDFObjectHandle::newReal("-0.0"); | ||
| 434 | + auto r1 = QPDFObjectHandle::newReal("12.345"); | ||
| 435 | + auto r2 = QPDFObjectHandle::newReal("12.345000000000000"); | ||
| 436 | + auto i12 = Integer(12); | ||
| 437 | + // note: we rely on double rounding here | ||
| 438 | + auto r_lo = QPDFObjectHandle::newReal("11.99999999999999999999999999999999"); | ||
| 439 | + auto i12b = Integer(12); | ||
| 440 | + auto r_hi = QPDFObjectHandle::newReal("12.00000000000000000000000000000000"); | ||
| 441 | + auto i1 = Integer(1); | ||
| 442 | + auto r_1 = QPDFObjectHandle::newReal("1."); | ||
| 443 | + assert(key1.equivalent_to(key2)); | ||
| 444 | + assert(key2.equivalent_to(key1)); | ||
| 445 | + assert(lit_A.equivalent_to(oct_A)); | ||
| 446 | + assert(oct_A.equivalent_to(lit_A)); | ||
| 447 | + assert(zero_i.equivalent_to(zero_r)); | ||
| 448 | + assert(zero_r.equivalent_to(zero_i)); | ||
| 449 | + assert(r1.equivalent_to(r2)); | ||
| 450 | + assert(r2.equivalent_to(r1)); | ||
| 451 | + assert(i12.equivalent_to(r_lo)); | ||
| 452 | + assert(r_lo.equivalent_to(i12)); | ||
| 453 | + assert(i12b.equivalent_to(r_hi)); | ||
| 454 | + assert(r_hi.equivalent_to(i12b)); | ||
| 455 | + assert(i1.equivalent_to(r_1)); | ||
| 456 | + assert(r_1.equivalent_to(i1)); | ||
| 457 | + } | ||
| 458 | + // Scenario 16: Nested Containers | ||
| 459 | + { | ||
| 460 | + assert(Dictionary({{"/K", "[5]"_qpdf}}).equivalent_to(Dictionary({{"/K", "[5]"_qpdf}}))); | ||
| 461 | + } | ||
| 462 | + // Scenario 17: Boolean and Null mismatch | ||
| 463 | + { | ||
| 464 | + auto b_true = QPDFObjectHandle::newBool(true); | ||
| 465 | + auto b_false = QPDFObjectHandle::newBool(false); | ||
| 466 | + auto null = QPDFObjectHandle::newNull(); | ||
| 467 | + auto null2 = QPDFObjectHandle::newNull(); | ||
| 468 | + auto one = Integer(1); | ||
| 469 | + auto zero = Integer(0); | ||
| 470 | + assert(null.equivalent_to(null)); | ||
| 471 | + assert(null.equivalent_to(null2)); | ||
| 472 | + assert(!b_true.equivalent_to(b_false)); | ||
| 473 | + assert(!b_true.equivalent_to(null)); | ||
| 474 | + assert(!b_true.equivalent_to(one)); | ||
| 475 | + assert(!b_true.equivalent_to(zero)); | ||
| 476 | + assert(!b_false.equivalent_to(null)); | ||
| 477 | + assert(!b_false.equivalent_to(one)); | ||
| 478 | + assert(!b_false.equivalent_to(zero)); | ||
| 479 | + assert(!null.equivalent_to(one)); | ||
| 480 | + assert(!null.equivalent_to(zero)); | ||
| 481 | + assert(!one.equivalent_to(zero)); | ||
| 482 | + } | ||
| 483 | + // Scenario 18: Stream Semantics (J.3.7) - Strictness Check | ||
| 484 | + { | ||
| 485 | + auto s1 = pdf.newStream("test stream"); | ||
| 486 | + auto s2 = pdf.newStream("DIFFERENT_RAW_BYTES"); | ||
| 487 | + auto s3 = pdf.newStream("test stream"); | ||
| 488 | + s2.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode")); | ||
| 489 | + s3.getDict().replaceKey("/Filter", QPDFObjectHandle::newName("/FlateDecode")); | ||
| 490 | + assert(!s1.equivalent_to(s2)); | ||
| 491 | + assert(!s2.equivalent_to(s1)); | ||
| 492 | + assert(!s1.equivalent_to(s3)); | ||
| 493 | + assert(!s3.equivalent_to(s1)); | ||
| 494 | + assert(!s2.equivalent_to(s3)); | ||
| 495 | + assert(!s3.equivalent_to(s2)); | ||
| 496 | + } | ||
| 497 | + // Scenario 19: Dictionary Value Type Mismatch | ||
| 498 | + { | ||
| 499 | + auto d1 = "<< /Key 1 >>"_qpdf; | ||
| 500 | + auto d2 = "<< /Key (1) >>"_qpdf; | ||
| 501 | + assert(!d1.equivalent_to(d2)); | ||
| 502 | + assert(!d2.equivalent_to(d1)); | ||
| 503 | + } | ||
| 504 | + // Scenario 20: Mixed Direct vs Indirect Nesting | ||
| 505 | + { | ||
| 506 | + assert( | ||
| 507 | + QPDFObjectHandle::newArray({Integer(7)}) | ||
| 508 | + .equivalent_to(QPDFObjectHandle::newArray({pdf.makeIndirectObject(Integer(7))}))); | ||
| 509 | + } | ||
| 510 | + // Scenario 21: Dictionary Subset vs Superset | ||
| 511 | + { | ||
| 512 | + auto d1 = "<< /A 1 /B 2 >>"_qpdf; | ||
| 513 | + auto d2 = "<< /A 1 >>"_qpdf; | ||
| 514 | + assert(!d1.equivalent_to(d2)); | ||
| 515 | + assert(!d2.equivalent_to(d1)); | ||
| 516 | + } | ||
| 517 | + // Scenario 22: Stream Semantic Decode Equivalence | ||
| 518 | + { | ||
| 519 | + auto s1 = pdf.newStream("Hello World"); | ||
| 520 | + auto s2 = pdf.newStream("HELLO WORLD RAW"); | ||
| 521 | + s2.getDict().replaceKey("/Filter", "/FlateDecode"_qpdf); | ||
| 522 | + s2.getDict().replaceKey("/DecodeParms", Dictionary::empty()); | ||
| 523 | + assert(!s1.equivalent_to(s2)); | ||
| 524 | + assert(!s2.equivalent_to(s1)); | ||
| 525 | + } | ||
| 526 | + // Scenario 23: Indirect Object Identity Independence | ||
| 527 | + { | ||
| 528 | + auto i1 = pdf.makeIndirectObject(Integer(123)); | ||
| 529 | + auto i2 = Integer(123); | ||
| 530 | + assert(i1.equivalent_to(pdf.makeIndirectObject(Integer(123)))); | ||
| 531 | + assert(i1.equivalent_to(i2)); | ||
| 532 | + assert(i2.equivalent_to(i1)); | ||
| 533 | + } | ||
| 534 | + // Scenario 24: Deep Recursive Structure (Stack Safety) | ||
| 535 | + { | ||
| 536 | + QPDFObjectHandle a1 = "[]"_qpdf; | ||
| 537 | + QPDFObjectHandle a2 = "[]"_qpdf; | ||
| 538 | + QPDFObjectHandle cur1 = a1; | ||
| 539 | + QPDFObjectHandle cur2 = a2; | ||
| 540 | + for (int i = 0; i < 200; ++i) { | ||
| 541 | + auto n1 = "[]"_qpdf; | ||
| 542 | + auto n2 = "[]"_qpdf; | ||
| 543 | + cur1.appendItem(n1); | ||
| 544 | + cur2.appendItem(n2); | ||
| 545 | + cur1 = n1; | ||
| 546 | + cur2 = n2; | ||
| 547 | + } | ||
| 548 | + assert(!a1.equivalent_to(a2)); // Default depth = 10 -> fails | ||
| 549 | + assert(a1.equivalent_to(a2, 500)); // Explicit depth -> passes | ||
| 550 | + } | ||
| 551 | + // Scenario 25: Wide Graph Fan-out | ||
| 552 | + { | ||
| 553 | + auto a1 = "[]"_qpdf; | ||
| 554 | + auto a2 = "[]"_qpdf; | ||
| 555 | + auto a3 = "[]"_qpdf; | ||
| 556 | + for (int i = 0; i < 200; ++i) { | ||
| 557 | + a1.appendItem(Integer(i)); | ||
| 558 | + a2.appendItem(Integer(i)); | ||
| 559 | + a3.appendItem(Integer(i)); | ||
| 560 | + } | ||
| 561 | + a3.appendItem(Integer(200)); | ||
| 562 | + assert(a1.equivalent_to(a2)); | ||
| 563 | + assert(!a1.equivalent_to(a3)); | ||
| 564 | + assert(!a3.equivalent_to(a1)); | ||
| 565 | + } | ||
| 566 | + // Scenario 26: Two Self-Referential Arrays | ||
| 567 | + { | ||
| 568 | + auto a1 = pdf.makeIndirectObject("[]"_qpdf); | ||
| 569 | + auto a2 = pdf.makeIndirectObject("[]"_qpdf); | ||
| 570 | + a1.appendItem(a1); | ||
| 571 | + a2.appendItem(a2); | ||
| 572 | + assert(!a1.equivalent_to(a2)); | ||
| 573 | + assert(!a1.equivalent_to(a2)); // Check idempotency | ||
| 574 | + } | ||
| 575 | + // Scenario 27: Nested Dictionary Reuse / Shared Indirect Objects | ||
| 576 | + { | ||
| 577 | + auto shared_array = pdf.makeIndirectObject("[42 99]"_qpdf); | ||
| 578 | + auto dict1 = "<< /Unique1 /A >>"_qpdf; | ||
| 579 | + dict1.replaceKey("/Shared", shared_array); | ||
| 580 | + auto dict2 = "<< /Unique1 /A >>"_qpdf; | ||
| 581 | + dict2.replaceKey("/Shared", shared_array); | ||
| 582 | + auto dict3 = "<< /Unique1 /B >>"_qpdf; | ||
| 583 | + dict3.replaceKey("/Shared", shared_array); | ||
| 584 | + assert(dict1.equivalent_to(dict2)); | ||
| 585 | + assert(!dict1.equivalent_to(dict3)); | ||
| 586 | + assert(!dict3.equivalent_to(dict1)); | ||
| 587 | + } | ||
| 588 | + // Scenario 28: Shared Indirect Leaves Reached via Two Paths | ||
| 589 | + { | ||
| 590 | + auto leaf1 = pdf.makeIndirectObject("[1]"_qpdf); | ||
| 591 | + auto leaf2 = pdf.makeIndirectObject("[2]"_qpdf); | ||
| 592 | + auto mid1 = pdf.makeIndirectObject(Dictionary::empty()); | ||
| 593 | + mid1.replaceKey("/Leaf1", leaf1); | ||
| 594 | + mid1.replaceKey("/Leaf2", leaf2); | ||
| 595 | + auto mid2 = pdf.makeIndirectObject(Dictionary::empty()); | ||
| 596 | + mid2.replaceKey("/Leaf1", leaf1); | ||
| 597 | + mid2.replaceKey("/Leaf2", leaf2); | ||
| 598 | + assert( | ||
| 599 | + QPDFObjectHandle::newArray({mid1, mid2}) | ||
| 600 | + .equivalent_to(QPDFObjectHandle::newArray({mid1, mid2}))); | ||
| 601 | + } | ||
| 602 | + // Scenario 29: Direct vs Indirect Integer | ||
| 603 | + { | ||
| 604 | + assert(Integer(42).equivalent_to(pdf.makeIndirectObject(Integer(42)))); | ||
| 605 | + } | ||
| 606 | + // Scenario 30: Nested Diamond with Direct & Indirect Objects | ||
| 607 | + { | ||
| 608 | + assert( | ||
| 609 | + QPDFObjectHandle::newArray( | ||
| 610 | + {pdf.makeIndirectObject("[42]"_qpdf), pdf.makeIndirectObject("[42]"_qpdf)}) | ||
| 611 | + .equivalent_to( | ||
| 612 | + QPDFObjectHandle::newArray( | ||
| 613 | + {pdf.makeIndirectObject("[42]"_qpdf), | ||
| 614 | + pdf.makeIndirectObject("[42]"_qpdf)}))); | ||
| 615 | + } | ||
| 616 | + // Scenario 31: Image XObjects sharing an SMask | ||
| 617 | + { | ||
| 618 | + auto smask = pdf.newStream(); | ||
| 619 | + smask.replaceStreamData( | ||
| 620 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 621 | + auto img1 = pdf.makeIndirectObject(pdf.newStream()); | ||
| 622 | + img1.replaceStreamData( | ||
| 623 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 624 | + img1.getDict().replaceKey("/SMask", smask); | ||
| 625 | + auto img2 = pdf.makeIndirectObject(pdf.newStream()); | ||
| 626 | + img2.replaceStreamData( | ||
| 627 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 628 | + img2.getDict().replaceKey("/SMask", smask); | ||
| 629 | + assert(img1.equivalent_to(img2)); | ||
| 630 | + } | ||
| 631 | + // Scenario 32: Image XObjects with two distinct but identical SMasks | ||
| 632 | + { | ||
| 633 | + auto smask1 = pdf.newStream(); | ||
| 634 | + smask1.replaceStreamData( | ||
| 635 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 636 | + auto smask2 = pdf.makeIndirectObject(pdf.newStream()); | ||
| 637 | + smask2.replaceStreamData( | ||
| 638 | + "mask data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 639 | + auto img1 = pdf.newStream(); | ||
| 640 | + img1.replaceStreamData( | ||
| 641 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 642 | + img1.getDict().replaceKey("/SMask", smask1); | ||
| 643 | + auto img2 = pdf.makeIndirectObject(pdf.newStream()); | ||
| 644 | + img2.replaceStreamData( | ||
| 645 | + "image1 data", QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); | ||
| 646 | + img2.getDict().replaceKey("/SMask", smask2); | ||
| 647 | + assert(img1.equivalent_to(img2)); | ||
| 648 | + } | ||
| 649 | + // Scenario 33: Dictionary Key Equivalence with Value Mismatch (Annex J) | ||
| 650 | + { | ||
| 651 | + assert(!"<< /Key 1 >>"_qpdf.equivalent_to("<< /K#65#79 2 >>"_qpdf)); | ||
| 652 | + } | ||
| 653 | + // Scenario 34: Uninitialized vs. Uninitialized (!obj) | ||
| 654 | + { | ||
| 655 | + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle())); | ||
| 656 | + } | ||
| 657 | + // Scenario 35: Uninitialized vs. PDF Null | ||
| 658 | + { | ||
| 659 | + assert(QPDFObjectHandle().equivalent_to(QPDFObjectHandle::newNull())); | ||
| 660 | + } | ||
| 661 | + // Scenario 36: Distinct Direct Null Objects | ||
| 662 | + { | ||
| 663 | + assert(QPDFObjectHandle::newNull().equivalent_to(QPDFObjectHandle::newNull())); | ||
| 664 | + } | ||
| 665 | + // Scenario 37: Distinct Indirect Nulls (Different IDs) | ||
| 666 | + { | ||
| 667 | + assert(pdf.newIndirectNull().equivalent_to(pdf.newIndirectNull())); | ||
| 668 | + } | ||
| 669 | + // Scenario 38: Broken References to Different Missing Objects | ||
| 670 | + { | ||
| 671 | + // Both missing objects resolve to null, so we expect equivalence | ||
| 672 | + assert(pdf.getObject(999999, 0).equivalent_to(pdf.getObject(888888, 0))); | ||
| 673 | + } | ||
| 674 | + // Scenario 39: Uninitialized Handle vs PDF Null | ||
| 675 | + { | ||
| 676 | + auto h_valid_null = QPDFObjectHandle::newNull(); | ||
| 677 | + QPDFObjectHandle h_uninit; | ||
| 678 | + assert(h_uninit.equivalent_to(h_valid_null)); | ||
| 679 | + assert(h_valid_null.equivalent_to(h_uninit)); | ||
| 680 | + } | ||
| 681 | + // Scenario 40: Recursion Depth Limit (The Stack Protector) | ||
| 682 | + { | ||
| 683 | + auto make_deep_array = [](int levels) { | ||
| 684 | + QPDFObjectHandle root = Integer(1); | ||
| 685 | + for (int i = 0; i < levels; ++i) { | ||
| 686 | + QPDFObjectHandle arr = "[]"_qpdf; | ||
| 687 | + arr.appendItem(root); | ||
| 688 | + root = arr; | ||
| 689 | + } | ||
| 690 | + return root; | ||
| 691 | + }; | ||
| 692 | + auto h_pass_1 = make_deep_array(500); | ||
| 693 | + auto h_pass_2 = make_deep_array(500); | ||
| 694 | + assert(h_pass_1.equivalent_to(h_pass_1)); | ||
| 695 | + assert(!h_pass_1.equivalent_to(h_pass_2)); | ||
| 696 | + assert(h_pass_1.equivalent_to(h_pass_1, 499)); | ||
| 697 | + assert(!h_pass_1.equivalent_to(h_pass_2, 499)); | ||
| 698 | + assert(h_pass_1.equivalent_to(h_pass_1, 500)); | ||
| 699 | + assert(h_pass_1.equivalent_to(h_pass_2, 500)); | ||
| 700 | + assert(h_pass_1.equivalent_to(h_pass_1, 501)); | ||
| 701 | + assert(h_pass_1.equivalent_to(h_pass_2, 501)); | ||
| 702 | + auto h_fail_1 = make_deep_array(501); | ||
| 703 | + auto h_fail_2 = make_deep_array(501); | ||
| 704 | + assert(h_fail_1.equivalent_to(h_fail_1, 499)); | ||
| 705 | + assert(!h_fail_1.equivalent_to(h_fail_2, 499)); | ||
| 706 | + assert(h_fail_1.equivalent_to(h_fail_1, 500)); | ||
| 707 | + assert(!h_fail_1.equivalent_to(h_fail_2, 500)); | ||
| 708 | + assert(h_fail_1.equivalent_to(h_fail_1, 501)); | ||
| 709 | + assert(h_fail_1.equivalent_to(h_fail_2, 501)); | ||
| 710 | + } | ||
| 711 | + // Scenario 41: Sparse Arrays (null_count > 100 triggers sparse representation) | ||
| 712 | + { | ||
| 713 | + auto dense1 = "[]"_qpdf; | ||
| 714 | + auto null = "null"_qpdf; | ||
| 715 | + // Build a parse string with 101 nulls to trigger the sparse path | ||
| 716 | + std::string sparse_str = "["; | ||
| 717 | + for (int i = 0; i < 101; ++i) { | ||
| 718 | + sparse_str += "null "; | ||
| 719 | + dense1.appendItem(null); | ||
| 720 | + } | ||
| 721 | + sparse_str += "]"; | ||
| 722 | + auto sparse1 = QPDFObjectHandle::parse(sparse_str); | ||
| 723 | + assert(sparse1.equivalent_to(QPDFObjectHandle::parse(sparse_str))); | ||
| 724 | + assert(dense1.equivalent_to(sparse1)); | ||
| 725 | + assert(sparse1.equivalent_to(dense1)); | ||
| 726 | + // Mismatch: replace one null with an integer | ||
| 727 | + std::string sparse_diff = "["; | ||
| 728 | + for (int i = 0; i < 100; ++i) { | ||
| 729 | + sparse_diff += "null "; | ||
| 730 | + } | ||
| 731 | + sparse_diff += "42]"; | ||
| 732 | + auto sparse3 = QPDFObjectHandle::parse(sparse_diff); | ||
| 733 | + assert(!sparse1.equivalent_to(sparse3)); | ||
| 734 | + assert(!sparse3.equivalent_to(sparse1)); | ||
| 735 | + assert(!dense1.equivalent_to(sparse3)); | ||
| 736 | + assert(!sparse3.equivalent_to(dense1)); | ||
| 737 | + std::string sparse_with_value = "["; | ||
| 738 | + for (int i = 0; i < 101; ++i) { | ||
| 739 | + sparse_with_value += "null "; | ||
| 740 | + } | ||
| 741 | + sparse_with_value += "42 ]"; // one non-null element at index 100 | ||
| 742 | + assert( | ||
| 743 | + QPDFObjectHandle::parse(sparse_with_value) | ||
| 744 | + .equivalent_to(QPDFObjectHandle::parse(sparse_with_value))); | ||
| 745 | + } | ||
| 746 | + // Scenario 42: equivalent_to on ot_reference (post-replaceObject) | ||
| 747 | + { | ||
| 748 | + auto obj = pdf.makeIndirectObject(Integer(42)); | ||
| 749 | + auto replacement = Integer(42); | ||
| 750 | + // Hold a handle to replacement before it becomes ot_reference | ||
| 751 | + auto stale = replacement; | ||
| 752 | + pdf.replaceObject(obj.getObjGen(), replacement); | ||
| 753 | + // stale's underlying QPDFObject is now ot_reference | ||
| 754 | + assert(stale.raw_type_code() == ::ot_reference); | ||
| 755 | + assert(!stale.equivalent_to(Integer(42))); | ||
| 756 | + } | ||
| 757 | +} | ||
| 758 | + | ||
| 307 | void | 759 | void |
| 308 | runtest(int n, char const* filename1, char const* arg2) | 760 | runtest(int n, char const* filename1, char const* arg2) |
| 309 | { | 761 | { |
| @@ -311,7 +763,7 @@ runtest(int n, char const* filename1, char const* arg2) | @@ -311,7 +763,7 @@ runtest(int n, char const* filename1, char const* arg2) | ||
| 311 | // the test suite to see how the test is invoked to find the file | 763 | // the test suite to see how the test is invoked to find the file |
| 312 | // that the test is supposed to operate on. | 764 | // that the test is supposed to operate on. |
| 313 | 765 | ||
| 314 | - std::set<int> ignore_filename = {1, 2}; | 766 | + std::set<int> ignore_filename = {1, 2, 3}; |
| 315 | 767 | ||
| 316 | QPDF pdf; | 768 | QPDF pdf; |
| 317 | std::shared_ptr<char> file_buf; | 769 | std::shared_ptr<char> file_buf; |
| @@ -325,7 +777,7 @@ runtest(int n, char const* filename1, char const* arg2) | @@ -325,7 +777,7 @@ runtest(int n, char const* filename1, char const* arg2) | ||
| 325 | } | 777 | } |
| 326 | 778 | ||
| 327 | std::map<int, void (*)(QPDF&, char const*)> test_functions = { | 779 | std::map<int, void (*)(QPDF&, char const*)> test_functions = { |
| 328 | - {0, test_0}, {1, test_1}, {2, test_2}}; | 780 | + {0, test_0}, {1, test_1}, {2, test_2}, {3, test_3}}; |
| 329 | 781 | ||
| 330 | auto fn = test_functions.find(n); | 782 | auto fn = test_functions.find(n); |
| 331 | if (fn == test_functions.end()) { | 783 | if (fn == test_functions.end()) { |
libtests/qtest/objects.test
| @@ -11,7 +11,7 @@ require TestDriver; | @@ -11,7 +11,7 @@ require TestDriver; | ||
| 11 | 11 | ||
| 12 | my $td = new TestDriver('objects'); | 12 | my $td = new TestDriver('objects'); |
| 13 | 13 | ||
| 14 | -my $n_tests = 3; | 14 | +my $n_tests = 4; |
| 15 | 15 | ||
| 16 | $td->runtest("integer type checks", | 16 | $td->runtest("integer type checks", |
| 17 | {$td->COMMAND => "objects 0 minimal.pdf"}, | 17 | {$td->COMMAND => "objects 0 minimal.pdf"}, |
| @@ -28,4 +28,9 @@ $td->runtest("global limits", | @@ -28,4 +28,9 @@ $td->runtest("global limits", | ||
| 28 | {$td->FILE => "test2.out", $td->EXIT_STATUS => 0}, | 28 | {$td->FILE => "test2.out", $td->EXIT_STATUS => 0}, |
| 29 | $td->NORMALIZE_NEWLINES); | 29 | $td->NORMALIZE_NEWLINES); |
| 30 | 30 | ||
| 31 | +$td->runtest("equivalent_to structural comparisons", | ||
| 32 | + {$td->COMMAND => "objects 3 -"}, | ||
| 33 | + {$td->STRING => "test 3 done\n", $td->EXIT_STATUS => 0}, | ||
| 34 | + $td->NORMALIZE_NEWLINES); | ||
| 35 | + | ||
| 31 | $td->report($n_tests); | 36 | $td->report($n_tests); |