Commit 24f28f0768b655b130626091ac23cf67d9e39630

Authored by Jay Berkenbilt
1 parent c88eaae2

Split qpdf.cc's main into reasonably sized functions

main() had gotten absurdly long. Split it into reasonable chunks. This
refactoring is in preparation for handling splitting output into
single pages.
Showing 1 changed file with 979 additions and 881 deletions
qpdf/qpdf.cc
... ... @@ -37,6 +37,121 @@ struct PageSpec
37 37 char const* range;
38 38 };
39 39  
  40 +struct Options
  41 +{
  42 + Options() :
  43 + password(0),
  44 + linearize(false),
  45 + decrypt(false),
  46 + copy_encryption(false),
  47 + encryption_file(0),
  48 + encryption_file_password(0),
  49 + encrypt(false),
  50 + keylen(0),
  51 + r2_print(true),
  52 + r2_modify(true),
  53 + r2_extract(true),
  54 + r2_annotate(true),
  55 + r3_accessibility(true),
  56 + r3_extract(true),
  57 + r3_print(qpdf_r3p_full),
  58 + r3_modify(qpdf_r3m_all),
  59 + force_V4(false),
  60 + force_R5(false),
  61 + cleartext_metadata(false),
  62 + use_aes(false),
  63 + stream_data_set(false),
  64 + stream_data_mode(qpdf_s_compress),
  65 + normalize_set(false),
  66 + normalize(false),
  67 + suppress_recovery(false),
  68 + object_stream_set(false),
  69 + object_stream_mode(qpdf_o_preserve),
  70 + ignore_xref_streams(false),
  71 + qdf_mode(false),
  72 + precheck_streams(false),
  73 + preserve_unreferenced_objects(false),
  74 + newline_before_endstream(false),
  75 + show_npages(false),
  76 + deterministic_id(false),
  77 + static_id(false),
  78 + static_aes_iv(false),
  79 + suppress_original_object_id(false),
  80 + show_encryption(false),
  81 + check_linearization(false),
  82 + show_linearization(false),
  83 + show_xref(false),
  84 + show_obj(0),
  85 + show_gen(0),
  86 + show_raw_stream_data(false),
  87 + show_filtered_stream_data(false),
  88 + show_pages(false),
  89 + show_page_images(false),
  90 + check(false),
  91 + require_outfile(true),
  92 + infilename(0),
  93 + outfilename(0)
  94 + {
  95 + }
  96 +
  97 + char const* password;
  98 + bool linearize;
  99 + bool decrypt;
  100 + bool copy_encryption;
  101 + char const* encryption_file;
  102 + char const* encryption_file_password;
  103 + bool encrypt;
  104 + std::string user_password;
  105 + std::string owner_password;
  106 + int keylen;
  107 + bool r2_print;
  108 + bool r2_modify;
  109 + bool r2_extract;
  110 + bool r2_annotate;
  111 + bool r3_accessibility;
  112 + bool r3_extract;
  113 + qpdf_r3_print_e r3_print;
  114 + qpdf_r3_modify_e r3_modify;
  115 + bool force_V4;
  116 + bool force_R5;
  117 + bool cleartext_metadata;
  118 + bool use_aes;
  119 + bool stream_data_set;
  120 + qpdf_stream_data_e stream_data_mode;
  121 + bool normalize_set;
  122 + bool normalize;
  123 + bool suppress_recovery;
  124 + bool object_stream_set;
  125 + qpdf_object_stream_e object_stream_mode;
  126 + bool ignore_xref_streams;
  127 + bool qdf_mode;
  128 + bool precheck_streams;
  129 + bool preserve_unreferenced_objects;
  130 + bool newline_before_endstream;
  131 + std::string min_version;
  132 + std::string force_version;
  133 + bool show_npages;
  134 + bool deterministic_id;
  135 + bool static_id;
  136 + bool static_aes_iv;
  137 + bool suppress_original_object_id;
  138 + bool show_encryption;
  139 + bool check_linearization;
  140 + bool show_linearization;
  141 + bool show_xref;
  142 + int show_obj;
  143 + int show_gen;
  144 + bool show_raw_stream_data;
  145 + bool show_filtered_stream_data;
  146 + bool show_pages;
  147 + bool show_page_images;
  148 + bool check;
  149 + std::vector<PageSpec> page_specs;
  150 + bool require_outfile;
  151 + char const* infilename;
  152 + char const* outfilename;
  153 +};
  154 +
40 155 struct QPDFPageData
41 156 {
42 157 QPDFPageData(QPDF* qpdf, char const* range);
... ... @@ -979,156 +1094,60 @@ static void read_args_from_file(char const* filename,
979 1094 }
980 1095 }
981 1096  
982   -int main(int argc, char* argv[])
  1097 +static void handle_help_verison(int argc, char* argv[])
983 1098 {
984   - whoami = QUtil::getWhoami(argv[0]);
985   - QUtil::setLineBuf(stdout);
986   -
987   - // For libtool's sake....
988   - if (strncmp(whoami, "lt-", 3) == 0)
989   - {
990   - whoami += 3;
991   - }
992   -
993 1099 if ((argc == 2) &&
994   - ((strcmp(argv[1], "--version") == 0) ||
995   - (strcmp(argv[1], "-version") == 0)))
996   - {
997   - // make_dist looks for the line of code here that actually
998   - // prints the version number, so read make_dist if you change
999   - // anything other than the version number. Don't worry about
1000   - // the numbers. That's just a guide to 80 columns so that the
1001   - // help message looks right on an 80-column display.
  1100 + ((strcmp(argv[1], "--version") == 0) ||
  1101 + (strcmp(argv[1], "-version") == 0)))
  1102 + {
  1103 + // make_dist looks for the line of code here that actually
  1104 + // prints the version number, so read make_dist if you change
  1105 + // anything other than the version number. Don't worry about
  1106 + // the numbers. That's just a guide to 80 columns so that the
  1107 + // help message looks right on an 80-column display.
1002 1108  
1003   - // 1 2 3 4 5 6 7 8
1004   - // 12345678901234567890123456789012345678901234567890123456789012345678901234567890
1005   - std::cout
1006   - << whoami << " version " << QPDF::QPDFVersion() << std::endl
1007   - << "Copyright (c) 2005-2015 Jay Berkenbilt"
1008   - << std::endl
1009   - << "This software may be distributed under the terms of version 2 of the"
1010   - << std::endl
1011   - << "Artistic License which may be found in the source distribution. It is"
1012   - << std::endl
1013   - << "provided \"as is\" without express or implied warranty."
1014   - << std::endl;
1015   - exit(0);
  1109 + // 1 2 3 4 5 6 7 8
  1110 + // 12345678901234567890123456789012345678901234567890123456789012345678901234567890
  1111 + std::cout
  1112 + << whoami << " version " << QPDF::QPDFVersion() << std::endl
  1113 + << "Copyright (c) 2005-2015 Jay Berkenbilt"
  1114 + << std::endl
  1115 + << "This software may be distributed under the terms of version 2 of the"
  1116 + << std::endl
  1117 + << "Artistic License which may be found in the source distribution. It is"
  1118 + << std::endl
  1119 + << "provided \"as is\" without express or implied warranty."
  1120 + << std::endl;
  1121 + exit(0);
1016 1122 }
1017 1123  
1018 1124 if ((argc == 2) &&
1019   - ((strcmp(argv[1], "--help") == 0) ||
1020   - (strcmp(argv[1], "-help") == 0)))
  1125 + ((strcmp(argv[1], "--help") == 0) ||
  1126 + (strcmp(argv[1], "-help") == 0)))
1021 1127 {
1022   - std::cout << help;
1023   - exit(0);
  1128 + std::cout << help;
  1129 + exit(0);
1024 1130 }
  1131 +}
1025 1132  
1026   - char const* password = 0;
1027   - bool linearize = false;
1028   - bool decrypt = false;
1029   -
1030   - bool copy_encryption = false;
1031   - char const* encryption_file = 0;
1032   - char const* encryption_file_password = 0;
1033   -
1034   - bool encrypt = false;
1035   - std::string user_password;
1036   - std::string owner_password;
1037   - int keylen = 0;
1038   - bool r2_print = true;
1039   - bool r2_modify = true;
1040   - bool r2_extract = true;
1041   - bool r2_annotate = true;
1042   - bool r3_accessibility = true;
1043   - bool r3_extract = true;
1044   - qpdf_r3_print_e r3_print = qpdf_r3p_full;
1045   - qpdf_r3_modify_e r3_modify = qpdf_r3m_all;
1046   - bool force_V4 = false;
1047   - bool force_R5 = false;
1048   - bool cleartext_metadata = false;
1049   - bool use_aes = false;
1050   -
1051   - bool stream_data_set = false;
1052   - qpdf_stream_data_e stream_data_mode = qpdf_s_compress;
1053   - bool normalize_set = false;
1054   - bool normalize = false;
1055   - bool suppress_recovery = false;
1056   - bool object_stream_set = false;
1057   - qpdf_object_stream_e object_stream_mode = qpdf_o_preserve;
1058   - bool ignore_xref_streams = false;
1059   - bool qdf_mode = false;
1060   - bool precheck_streams = false;
1061   - bool preserve_unreferenced_objects = false;
1062   - bool newline_before_endstream = false;
1063   - std::string min_version;
1064   - std::string force_version;
1065   -
1066   - bool show_npages = false;
1067   - bool deterministic_id = false;
1068   - bool static_id = false;
1069   - bool static_aes_iv = false;
1070   - bool suppress_original_object_id = false;
1071   - bool show_encryption = false;
1072   - bool check_linearization = false;
1073   - bool show_linearization = false;
1074   - bool show_xref = false;
1075   - int show_obj = 0;
1076   - int show_gen = 0;
1077   - bool show_raw_stream_data = false;
1078   - bool show_filtered_stream_data = false;
1079   - bool show_pages = false;
1080   - bool show_page_images = false;
1081   - bool check = false;
1082   -
1083   - std::vector<PageSpec> page_specs;
1084   -
1085   - bool require_outfile = true;
1086   - char const* infilename = 0;
1087   - char const* outfilename = 0;
1088   -
1089   - // Support reading arguments from files. Create a new argv. Ensure
1090   - // that argv itself as well as all its contents are automatically
1091   - // deleted by using PointerHolder objects to back the pointers in
1092   - // argv.
1093   - std::vector<PointerHolder<char> > new_argv;
1094   - new_argv.push_back(PointerHolder<char>(QUtil::copy_string(argv[0]), true));
  1133 +static void parse_options(int argc, char* argv[], Options& o)
  1134 +{
1095 1135 for (int i = 1; i < argc; ++i)
1096 1136 {
1097   - if ((strlen(argv[i]) > 1) && (argv[i][0] == '@'))
1098   - {
1099   - read_args_from_file(1+argv[i], new_argv);
1100   - }
1101   - else
  1137 + char const* arg = argv[i];
  1138 + if ((arg[0] == '-') && (strcmp(arg, "-") != 0))
1102 1139 {
1103   - new_argv.push_back(
1104   - PointerHolder<char>(QUtil::copy_string(argv[i]), true));
1105   - }
1106   - }
1107   - PointerHolder<char*> argv_ph(new char*[1+new_argv.size()], true);
1108   - argv = argv_ph.getPointer();
1109   - for (size_t i = 0; i < new_argv.size(); ++i)
1110   - {
1111   - argv[i] = new_argv.at(i).getPointer();
1112   - }
1113   - argc = static_cast<int>(new_argv.size());
1114   - argv[argc] = 0;
1115   -
1116   - for (int i = 1; i < argc; ++i)
1117   - {
1118   - char const* arg = argv[i];
1119   - if ((arg[0] == '-') && (strcmp(arg, "-") != 0))
1120   - {
1121   - ++arg;
1122   - if (arg[0] == '-')
1123   - {
1124   - // Be lax about -arg vs --arg
1125   - ++arg;
1126   - }
1127   - char* parameter = const_cast<char*>(strchr(arg, '='));
1128   - if (parameter)
1129   - {
1130   - *parameter++ = 0;
1131   - }
  1140 + ++arg;
  1141 + if (arg[0] == '-')
  1142 + {
  1143 + // Be lax about -arg vs --arg
  1144 + ++arg;
  1145 + }
  1146 + char* parameter = const_cast<char*>(strchr(arg, '='));
  1147 + if (parameter)
  1148 + {
  1149 + *parameter++ = 0;
  1150 + }
1132 1151  
1133 1152 // Arguments that start with space are undocumented and
1134 1153 // are for use by the test suite.
... ... @@ -1138,797 +1157,876 @@ int main(int argc, char* argv[])
1138 1157 exit(0);
1139 1158 }
1140 1159 else if (strcmp(arg, "password") == 0)
1141   - {
1142   - if (parameter == 0)
1143   - {
1144   - usage("--password must be given as --password=pass");
1145   - }
1146   - password = parameter;
1147   - }
  1160 + {
  1161 + if (parameter == 0)
  1162 + {
  1163 + usage("--password must be given as --password=pass");
  1164 + }
  1165 + o.password = parameter;
  1166 + }
1148 1167 else if (strcmp(arg, "empty") == 0)
1149 1168 {
1150   - infilename = "";
  1169 + o.infilename = "";
  1170 + }
  1171 + else if (strcmp(arg, "linearize") == 0)
  1172 + {
  1173 + o.linearize = true;
  1174 + }
  1175 + else if (strcmp(arg, "encrypt") == 0)
  1176 + {
  1177 + parse_encrypt_options(
  1178 + argc, argv, ++i,
  1179 + o.user_password, o.owner_password, o.keylen,
  1180 + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate,
  1181 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1182 + o.force_V4, o.cleartext_metadata, o.use_aes, o.force_R5);
  1183 + o.encrypt = true;
  1184 + o.decrypt = false;
  1185 + o.copy_encryption = false;
  1186 + }
  1187 + else if (strcmp(arg, "decrypt") == 0)
  1188 + {
  1189 + o.decrypt = true;
  1190 + o.encrypt = false;
  1191 + o.copy_encryption = false;
1151 1192 }
1152   - else if (strcmp(arg, "linearize") == 0)
1153   - {
1154   - linearize = true;
1155   - }
1156   - else if (strcmp(arg, "encrypt") == 0)
1157   - {
1158   - parse_encrypt_options(
1159   - argc, argv, ++i,
1160   - user_password, owner_password, keylen,
1161   - r2_print, r2_modify, r2_extract, r2_annotate,
1162   - r3_accessibility, r3_extract, r3_print, r3_modify,
1163   - force_V4, cleartext_metadata, use_aes, force_R5);
1164   - encrypt = true;
1165   - decrypt = false;
1166   - copy_encryption = false;
1167   - }
1168   - else if (strcmp(arg, "decrypt") == 0)
1169   - {
1170   - decrypt = true;
1171   - encrypt = false;
1172   - copy_encryption = false;
1173   - }
1174 1193 else if (strcmp(arg, "copy-encryption") == 0)
1175 1194 {
1176   - if (parameter == 0)
1177   - {
1178   - usage("--copy-encryption must be given as"
1179   - "--copy_encryption=file");
1180   - }
1181   - encryption_file = parameter;
1182   - copy_encryption = true;
1183   - encrypt = false;
1184   - decrypt = false;
  1195 + if (parameter == 0)
  1196 + {
  1197 + usage("--copy-encryption must be given as"
  1198 + "--copy_encryption=file");
  1199 + }
  1200 + o.encryption_file = parameter;
  1201 + o.copy_encryption = true;
  1202 + o.encrypt = false;
  1203 + o.decrypt = false;
1185 1204 }
1186 1205 else if (strcmp(arg, "encryption-file-password") == 0)
1187 1206 {
1188   - if (parameter == 0)
1189   - {
1190   - usage("--encryption-file-password must be given as"
1191   - "--encryption-file-password=password");
1192   - }
1193   - encryption_file_password = parameter;
  1207 + if (parameter == 0)
  1208 + {
  1209 + usage("--encryption-file-password must be given as"
  1210 + "--encryption-file-password=password");
  1211 + }
  1212 + o.encryption_file_password = parameter;
1194 1213 }
1195 1214 else if (strcmp(arg, "pages") == 0)
1196 1215 {
1197   - page_specs = parse_pages_options(argc, argv, ++i);
1198   - if (page_specs.empty())
  1216 + o.page_specs = parse_pages_options(argc, argv, ++i);
  1217 + if (o.page_specs.empty())
1199 1218 {
1200 1219 usage("--pages: no page specifications given");
1201 1220 }
1202 1221 }
1203   - else if (strcmp(arg, "stream-data") == 0)
1204   - {
1205   - if (parameter == 0)
1206   - {
1207   - usage("--stream-data must be given as"
1208   - "--stream-data=option");
1209   - }
1210   - stream_data_set = true;
1211   - if (strcmp(parameter, "compress") == 0)
1212   - {
1213   - stream_data_mode = qpdf_s_compress;
1214   - }
1215   - else if (strcmp(parameter, "preserve") == 0)
1216   - {
1217   - stream_data_mode = qpdf_s_preserve;
1218   - }
1219   - else if (strcmp(parameter, "uncompress") == 0)
1220   - {
1221   - stream_data_mode = qpdf_s_uncompress;
1222   - }
1223   - else
1224   - {
1225   - usage("invalid stream-data option");
1226   - }
1227   - }
1228   - else if (strcmp(arg, "normalize-content") == 0)
1229   - {
1230   - if ((parameter == 0) || (*parameter == '\0'))
1231   - {
1232   - usage("--normalize-content must be given as"
1233   - " --normalize-content=[yn]");
1234   - }
1235   - normalize_set = true;
1236   - normalize = (parameter[0] == 'y');
1237   - }
1238   - else if (strcmp(arg, "suppress-recovery") == 0)
1239   - {
1240   - suppress_recovery = true;
1241   - }
1242   - else if (strcmp(arg, "object-streams") == 0)
1243   - {
1244   - if (parameter == 0)
1245   - {
1246   - usage("--object-streams must be given as"
1247   - " --object-streams=option");
1248   - }
1249   - object_stream_set = true;
1250   - if (strcmp(parameter, "disable") == 0)
1251   - {
1252   - object_stream_mode = qpdf_o_disable;
1253   - }
1254   - else if (strcmp(parameter, "preserve") == 0)
1255   - {
1256   - object_stream_mode = qpdf_o_preserve;
1257   - }
1258   - else if (strcmp(parameter, "generate") == 0)
1259   - {
1260   - object_stream_mode = qpdf_o_generate;
1261   - }
1262   - else
1263   - {
1264   - usage("invalid object stream mode");
1265   - }
1266   - }
1267   - else if (strcmp(arg, "ignore-xref-streams") == 0)
1268   - {
1269   - ignore_xref_streams = true;
1270   - }
1271   - else if (strcmp(arg, "qdf") == 0)
1272   - {
1273   - qdf_mode = true;
1274   - }
1275   - else if (strcmp(arg, "precheck-streams") == 0)
1276   - {
1277   - precheck_streams = true;
1278   - }
1279   - else if (strcmp(arg, "preserve-unreferenced") == 0)
1280   - {
1281   - preserve_unreferenced_objects = true;
1282   - }
1283   - else if (strcmp(arg, "newline-before-endstream") == 0)
1284   - {
1285   - newline_before_endstream = true;
1286   - }
1287   - else if (strcmp(arg, "min-version") == 0)
1288   - {
1289   - if (parameter == 0)
1290   - {
1291   - usage("--min-version be given as"
1292   - "--min-version=version");
1293   - }
1294   - min_version = parameter;
1295   - }
1296   - else if (strcmp(arg, "force-version") == 0)
1297   - {
1298   - if (parameter == 0)
1299   - {
1300   - usage("--force-version be given as"
1301   - "--force-version=version");
1302   - }
1303   - force_version = parameter;
1304   - }
1305   - else if (strcmp(arg, "deterministic-id") == 0)
1306   - {
1307   - deterministic_id = true;
1308   - }
1309   - else if (strcmp(arg, "static-id") == 0)
1310   - {
1311   - static_id = true;
1312   - }
1313   - else if (strcmp(arg, "static-aes-iv") == 0)
1314   - {
1315   - static_aes_iv = true;
1316   - }
1317   - else if (strcmp(arg, "no-original-object-ids") == 0)
1318   - {
1319   - suppress_original_object_id = true;
1320   - }
1321   - else if (strcmp(arg, "show-encryption") == 0)
1322   - {
1323   - show_encryption = true;
1324   - require_outfile = false;
1325   - }
1326   - else if (strcmp(arg, "check-linearization") == 0)
1327   - {
1328   - check_linearization = true;
1329   - require_outfile = false;
1330   - }
1331   - else if (strcmp(arg, "show-linearization") == 0)
1332   - {
1333   - show_linearization = true;
1334   - require_outfile = false;
1335   - }
1336   - else if (strcmp(arg, "show-xref") == 0)
1337   - {
1338   - show_xref = true;
1339   - require_outfile = false;
1340   - }
1341   - else if (strcmp(arg, "show-object") == 0)
1342   - {
1343   - if (parameter == 0)
1344   - {
1345   - usage("--show-object must be given as"
1346   - " --show-object=obj[,gen]");
1347   - }
1348   - char* obj = parameter;
1349   - char* gen = obj;
1350   - if ((gen = strchr(obj, ',')) != 0)
1351   - {
1352   - *gen++ = 0;
1353   - show_gen = atoi(gen);
1354   - }
1355   - show_obj = atoi(obj);
1356   - require_outfile = false;
1357   - }
1358   - else if (strcmp(arg, "raw-stream-data") == 0)
1359   - {
1360   - show_raw_stream_data = true;
1361   - }
1362   - else if (strcmp(arg, "filtered-stream-data") == 0)
1363   - {
1364   - show_filtered_stream_data = true;
1365   - }
1366   - else if (strcmp(arg, "show-npages") == 0)
  1222 + else if (strcmp(arg, "stream-data") == 0)
1367 1223 {
1368   - show_npages = true;
1369   - require_outfile = false;
1370   - }
1371   - else if (strcmp(arg, "show-pages") == 0)
1372   - {
1373   - show_pages = true;
1374   - require_outfile = false;
1375   - }
1376   - else if (strcmp(arg, "with-images") == 0)
1377   - {
1378   - show_page_images = true;
1379   - }
1380   - else if (strcmp(arg, "check") == 0)
1381   - {
1382   - check = true;
1383   - require_outfile = false;
1384   - }
1385   - else
1386   - {
1387   - usage(std::string("unknown option --") + arg);
1388   - }
1389   - }
1390   - else if (infilename == 0)
1391   - {
1392   - infilename = arg;
1393   - }
1394   - else if (outfilename == 0)
1395   - {
1396   - outfilename = arg;
1397   - }
1398   - else
1399   - {
1400   - usage(std::string("unknown argument ") + arg);
1401   - }
1402   - }
1403   -
1404   - if (infilename == 0)
1405   - {
1406   - usage("an input file name is required");
1407   - }
1408   - else if (require_outfile && (outfilename == 0))
1409   - {
1410   - usage("an output file name is required; use - for standard output");
1411   - }
1412   - else if ((! require_outfile) && (outfilename != 0))
1413   - {
1414   - usage("no output file may be given for this option");
1415   - }
1416   -
1417   - if (QUtil::same_file(infilename, outfilename))
1418   - {
1419   - QTC::TC("qpdf", "qpdf same file error");
1420   - usage("input file and output file are the same; this would cause input file to be lost");
1421   - }
1422   -
1423   - try
1424   - {
1425   - QPDF pdf;
1426   - QPDF encryption_pdf;
1427   - if (ignore_xref_streams)
1428   - {
1429   - pdf.setIgnoreXRefStreams(true);
1430   - }
1431   - if (suppress_recovery)
1432   - {
1433   - pdf.setAttemptRecovery(false);
1434   - }
1435   - if (strcmp(infilename, "") == 0)
1436   - {
1437   - pdf.emptyPDF();
1438   - }
1439   - else
1440   - {
1441   - pdf.processFile(infilename, password);
1442   - }
1443   - if (outfilename == 0)
1444   - {
1445   - int exit_code = 0;
1446   - if (check)
1447   - {
1448   - // Code below may set okay to false but not to true.
1449   - // We assume okay until we prove otherwise but may
1450   - // continue to perform additional checks after finding
1451   - // errors.
1452   - bool okay = true;
1453   - std::cout << "checking " << infilename << std::endl;
1454   - try
1455   - {
1456   - int extension_level = pdf.getExtensionLevel();
1457   - std::cout << "PDF Version: " << pdf.getPDFVersion();
1458   - if (extension_level > 0)
1459   - {
1460   - std::cout << " extension level "
1461   - << pdf.getExtensionLevel();
1462   - }
1463   - std::cout << std::endl;
1464   - ::show_encryption(pdf);
1465   - if (pdf.isLinearized())
1466   - {
1467   - std::cout << "File is linearized\n";
1468   - if (! pdf.checkLinearization())
1469   - {
1470   - // any errors are reported by checkLinearization()
1471   - okay = false;
1472   - }
1473   - }
1474   - else
1475   - {
1476   - std::cout << "File is not linearized\n";
1477   - }
1478   -
1479   - // Write the file no nowhere, uncompressing
1480   - // streams. This causes full file traversal and
1481   - // decoding of all streams we can decode.
1482   - QPDFWriter w(pdf);
1483   - Pl_Discard discard;
1484   - w.setOutputPipeline(&discard);
1485   - w.setStreamDataMode(qpdf_s_uncompress);
1486   - w.write();
1487   -
1488   - // Parse all content streams
1489   - std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
1490   - DiscardContents discard_contents;
1491   - int pageno = 0;
1492   - for (std::vector<QPDFObjectHandle>::iterator iter =
1493   - pages.begin();
1494   - iter != pages.end(); ++iter)
1495   - {
1496   - ++pageno;
1497   - try
1498   - {
1499   - QPDFObjectHandle::parseContentStream(
1500   - (*iter).getKey("/Contents"),
1501   - &discard_contents);
1502   - }
1503   - catch (QPDFExc& e)
1504   - {
1505   - okay = false;
1506   - std::cout << "page " << pageno << ": "
1507   - << e.what() << std::endl;
1508   - }
1509   - }
1510   - }
1511   - catch (std::exception& e)
1512   - {
1513   - std::cout << e.what() << std::endl;
1514   - okay = false;
1515   - }
1516   - if (okay)
1517   - {
1518   - if (! pdf.getWarnings().empty())
1519   - {
1520   - exit_code = EXIT_WARNING;
1521   - }
1522   - else
1523   - {
1524   - std::cout << "No syntax or stream encoding errors"
1525   - << " found; the file may still contain"
1526   - << std::endl
1527   - << "errors that qpdf cannot detect"
1528   - << std::endl;
1529   - }
1530   - }
  1224 + if (parameter == 0)
  1225 + {
  1226 + usage("--stream-data must be given as"
  1227 + "--stream-data=option");
  1228 + }
  1229 + o.stream_data_set = true;
  1230 + if (strcmp(parameter, "compress") == 0)
  1231 + {
  1232 + o.stream_data_mode = qpdf_s_compress;
  1233 + }
  1234 + else if (strcmp(parameter, "preserve") == 0)
  1235 + {
  1236 + o.stream_data_mode = qpdf_s_preserve;
  1237 + }
  1238 + else if (strcmp(parameter, "uncompress") == 0)
  1239 + {
  1240 + o.stream_data_mode = qpdf_s_uncompress;
  1241 + }
1531 1242 else
1532 1243 {
1533   - exit_code = EXIT_ERROR;
  1244 + usage("invalid stream-data option");
1534 1245 }
1535   - }
1536   - if (show_npages)
1537   - {
1538   - QTC::TC("qpdf", "qpdf npages");
1539   - std::cout << pdf.getRoot().getKey("/Pages").
1540   - getKey("/Count").getIntValue() << std::endl;
1541 1246 }
1542   - if (show_encryption)
1543   - {
1544   - ::show_encryption(pdf);
1545   - }
1546   - if (check_linearization)
1547   - {
1548   - if (pdf.checkLinearization())
1549   - {
1550   - std::cout << infilename << ": no linearization errors"
1551   - << std::endl;
1552   - }
1553   - else
1554   - {
1555   - exit_code = EXIT_ERROR;
1556   - }
1557   - }
1558   - if (show_linearization)
1559   - {
1560   - if (pdf.isLinearized())
1561   - {
1562   - pdf.showLinearizationData();
1563   - }
1564   - else
1565   - {
1566   - std::cout << infilename << " is not linearized"
1567   - << std::endl;
1568   - }
1569   - }
1570   - if (show_xref)
1571   - {
1572   - pdf.showXRefTable();
1573   - }
1574   - if (show_obj > 0)
1575   - {
1576   - QPDFObjectHandle obj = pdf.getObjectByID(show_obj, show_gen);
1577   - if (obj.isStream())
1578   - {
1579   - if (show_raw_stream_data || show_filtered_stream_data)
1580   - {
1581   - bool filter = show_filtered_stream_data;
1582   - if (filter &&
1583   - (! obj.pipeStreamData(0, true, false, false)))
1584   - {
1585   - QTC::TC("qpdf", "qpdf unable to filter");
1586   - std::cerr << "Unable to filter stream data."
1587   - << std::endl;
1588   - exit_code = EXIT_ERROR;
1589   - }
1590   - else
1591   - {
1592   - QUtil::binary_stdout();
1593   - Pl_StdioFile out("stdout", stdout);
1594   - obj.pipeStreamData(&out, filter, normalize, false);
1595   - }
1596   - }
1597   - else
1598   - {
1599   - std::cout
1600   - << "Object is stream. Dictionary:" << std::endl
1601   - << obj.getDict().unparseResolved() << std::endl;
1602   - }
1603   - }
1604   - else
1605   - {
1606   - std::cout << obj.unparseResolved() << std::endl;
1607   - }
1608   - }
1609   - if (show_pages)
1610   - {
1611   - if (show_page_images)
  1247 + else if (strcmp(arg, "normalize-content") == 0)
  1248 + {
  1249 + if ((parameter == 0) || (*parameter == '\0'))
1612 1250 {
1613   - pdf.pushInheritedAttributesToPage();
  1251 + usage("--normalize-content must be given as"
  1252 + " --normalize-content=[yn]");
1614 1253 }
1615   - std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
1616   - int pageno = 0;
1617   - for (std::vector<QPDFObjectHandle>::iterator iter =
1618   - pages.begin();
1619   - iter != pages.end(); ++iter)
1620   - {
1621   - QPDFObjectHandle& page = *iter;
1622   - ++pageno;
1623   -
1624   - std::cout << "page " << pageno << ": "
1625   - << page.getObjectID() << " "
1626   - << page.getGeneration() << " R" << std::endl;
1627   - if (show_page_images)
1628   - {
1629   - std::map<std::string, QPDFObjectHandle> images =
1630   - page.getPageImages();
1631   - if (! images.empty())
1632   - {
1633   - std::cout << " images:" << std::endl;
1634   - for (std::map<std::string,
1635   - QPDFObjectHandle>::iterator
1636   - iter = images.begin();
1637   - iter != images.end(); ++iter)
1638   - {
1639   - std::string const& name = (*iter).first;
1640   - QPDFObjectHandle image = (*iter).second;
1641   - QPDFObjectHandle dict = image.getDict();
1642   - int width =
1643   - dict.getKey("/Width").getIntValue();
1644   - int height =
1645   - dict.getKey("/Height").getIntValue();
1646   - std::cout << " " << name << ": "
1647   - << image.unparse()
1648   - << ", " << width << " x " << height
1649   - << std::endl;
1650   - }
1651   - }
1652   - }
1653   -
1654   - std::cout << " content:" << std::endl;
1655   - std::vector<QPDFObjectHandle> content =
1656   - page.getPageContents();
1657   - for (std::vector<QPDFObjectHandle>::iterator iter =
1658   - content.begin();
1659   - iter != content.end(); ++iter)
1660   - {
1661   - std::cout << " " << (*iter).unparse() << std::endl;
1662   - }
1663   - }
1664   - }
1665   - if (exit_code)
  1254 + o.normalize_set = true;
  1255 + o.normalize = (parameter[0] == 'y');
  1256 + }
  1257 + else if (strcmp(arg, "suppress-recovery") == 0)
1666 1258 {
1667   - exit(exit_code);
  1259 + o.suppress_recovery = true;
1668 1260 }
1669   - }
1670   - else
1671   - {
1672   - std::vector<PointerHolder<QPDF> > page_heap;
1673   - if (! page_specs.empty())
  1261 + else if (strcmp(arg, "object-streams") == 0)
1674 1262 {
1675   - // Parse all page specifications and translate them
1676   - // into lists of actual pages.
1677   -
1678   - // Create a QPDF object for each file that we may take
1679   - // pages from.
1680   - std::map<std::string, QPDF*> page_spec_qpdfs;
1681   - page_spec_qpdfs[infilename] = &pdf;
1682   - std::vector<QPDFPageData> parsed_specs;
1683   - for (std::vector<PageSpec>::iterator iter = page_specs.begin();
1684   - iter != page_specs.end(); ++iter)
  1263 + if (parameter == 0)
1685 1264 {
1686   - PageSpec& page_spec = *iter;
1687   - if (page_spec_qpdfs.count(page_spec.filename) == 0)
1688   - {
1689   - // Open the PDF file and store the QPDF
1690   - // object. Throw a PointerHolder to the qpdf
1691   - // into a heap so that it survives through
1692   - // writing the output but gets cleaned up
1693   - // automatically at the end. Do not
1694   - // canonicalize the file name. Using two
1695   - // different paths to refer to the same file
1696   - // is a document workaround for duplicating a
1697   - // page. If you are using this an example of
1698   - // how to do this with the API, you can just
1699   - // create two different QPDF objects to the
1700   - // same underlying file with the same path to
1701   - // achieve the same affect.
1702   - PointerHolder<QPDF> qpdf_ph = new QPDF();
1703   - page_heap.push_back(qpdf_ph);
1704   - QPDF* qpdf = qpdf_ph.getPointer();
1705   - char const* password = page_spec.password;
1706   - if (encryption_file && (password == 0) &&
1707   - (page_spec.filename == encryption_file))
1708   - {
1709   - QTC::TC("qpdf", "qpdf pages encryption password");
1710   - password = encryption_file_password;
1711   - }
1712   - qpdf->processFile(
1713   - page_spec.filename.c_str(), password);
1714   - page_spec_qpdfs[page_spec.filename] = qpdf;
1715   - }
1716   -
1717   - // Read original pages from the PDF, and parse the
1718   - // page range associated with this occurrence of
1719   - // the file.
1720   - parsed_specs.push_back(
1721   - QPDFPageData(page_spec_qpdfs[page_spec.filename],
1722   - page_spec.range));
  1265 + usage("--object-streams must be given as"
  1266 + " --object-streams=option");
1723 1267 }
1724   -
1725   - // Clear all pages out of the primary QPDF's pages
1726   - // tree but leave the objects in place in the file so
1727   - // they can be re-added without changing their object
1728   - // numbers. This enables other things in the original
1729   - // file, such as outlines, to continue to work.
1730   - std::vector<QPDFObjectHandle> orig_pages = pdf.getAllPages();
1731   - for (std::vector<QPDFObjectHandle>::iterator iter =
1732   - orig_pages.begin();
1733   - iter != orig_pages.end(); ++iter)
  1268 + o.object_stream_set = true;
  1269 + if (strcmp(parameter, "disable") == 0)
1734 1270 {
1735   - pdf.removePage(*iter);
  1271 + o.object_stream_mode = qpdf_o_disable;
1736 1272 }
1737   -
1738   - // Add all the pages from all the files in the order
1739   - // specified. Keep track of any pages from the
1740   - // original file that we are selecting.
1741   - std::set<int> selected_from_orig;
1742   - for (std::vector<QPDFPageData>::iterator iter =
1743   - parsed_specs.begin();
1744   - iter != parsed_specs.end(); ++iter)
  1273 + else if (strcmp(parameter, "preserve") == 0)
1745 1274 {
1746   - QPDFPageData& page_data = *iter;
1747   - for (std::vector<int>::iterator pageno_iter =
1748   - page_data.selected_pages.begin();
1749   - pageno_iter != page_data.selected_pages.end();
1750   - ++pageno_iter)
1751   - {
1752   - // Pages are specified from 1 but numbered
1753   - // from 0 in the vector
1754   - int pageno = *pageno_iter - 1;
1755   - pdf.addPage(page_data.orig_pages.at(pageno), false);
1756   - if (page_data.qpdf == &pdf)
1757   - {
1758   - // This is a page from the original file.
1759   - // Keep track of the fact that we are
1760   - // using it.
1761   - selected_from_orig.insert(pageno);
1762   - }
1763   - }
  1275 + o.object_stream_mode = qpdf_o_preserve;
1764 1276 }
1765   -
1766   - // Delete page objects for unused page in primary.
1767   - // This prevents those objects from being preserved by
1768   - // being referred to from other places, such as the
1769   - // outlines dictionary.
1770   - for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
  1277 + else if (strcmp(parameter, "generate") == 0)
1771 1278 {
1772   - if (selected_from_orig.count(pageno) == 0)
1773   - {
1774   - pdf.replaceObject(orig_pages.at(pageno).getObjGen(),
1775   - QPDFObjectHandle::newNull());
1776   - }
  1279 + o.object_stream_mode = qpdf_o_generate;
  1280 + }
  1281 + else
  1282 + {
  1283 + usage("invalid object stream mode");
1777 1284 }
1778 1285 }
1779   -
1780   - if (strcmp(outfilename, "-") == 0)
1781   - {
1782   - outfilename = 0;
1783   - }
1784   - QPDFWriter w(pdf, outfilename);
1785   - if (qdf_mode)
1786   - {
1787   - w.setQDFMode(true);
1788   - }
1789   - if (precheck_streams)
  1286 + else if (strcmp(arg, "ignore-xref-streams") == 0)
1790 1287 {
1791   - w.setPrecheckStreams(true);
  1288 + o.ignore_xref_streams = true;
1792 1289 }
1793   - if (preserve_unreferenced_objects)
  1290 + else if (strcmp(arg, "qdf") == 0)
1794 1291 {
1795   - w.setPreserveUnreferencedObjects(true);
  1292 + o.qdf_mode = true;
1796 1293 }
1797   - if (newline_before_endstream)
  1294 + else if (strcmp(arg, "precheck-streams") == 0)
1798 1295 {
1799   - w.setNewlineBeforeEndstream(true);
  1296 + o.precheck_streams = true;
1800 1297 }
1801   - if (normalize_set)
1802   - {
1803   - w.setContentNormalization(normalize);
1804   - }
1805   - if (stream_data_set)
1806   - {
1807   - w.setStreamDataMode(stream_data_mode);
1808   - }
1809   - if (decrypt)
1810   - {
1811   - w.setPreserveEncryption(false);
1812   - }
1813   - if (deterministic_id)
  1298 + else if (strcmp(arg, "preserve-unreferenced") == 0)
1814 1299 {
1815   - w.setDeterministicID(true);
  1300 + o.preserve_unreferenced_objects = true;
1816 1301 }
1817   - if (static_id)
1818   - {
1819   - w.setStaticID(true);
1820   - }
1821   - if (static_aes_iv)
1822   - {
1823   - w.setStaticAesIV(true);
1824   - }
1825   - if (suppress_original_object_id)
1826   - {
1827   - w.setSuppressOriginalObjectIDs(true);
1828   - }
1829   - if (copy_encryption)
  1302 + else if (strcmp(arg, "newline-before-endstream") == 0)
1830 1303 {
1831   - encryption_pdf.processFile(
1832   - encryption_file, encryption_file_password);
1833   - w.copyEncryptionParameters(encryption_pdf);
  1304 + o.newline_before_endstream = true;
1834 1305 }
1835   - if (encrypt)
1836   - {
1837   - int R = 0;
1838   - if (keylen == 40)
1839   - {
1840   - R = 2;
1841   - }
1842   - else if (keylen == 128)
1843   - {
1844   - if (force_V4 || cleartext_metadata || use_aes)
1845   - {
1846   - R = 4;
1847   - }
1848   - else
1849   - {
1850   - R = 3;
1851   - }
1852   - }
1853   - else if (keylen == 256)
1854   - {
1855   - if (force_R5)
1856   - {
1857   - R = 5;
1858   - }
1859   - else
1860   - {
1861   - R = 6;
1862   - }
1863   - }
1864   - else
1865   - {
1866   - throw std::logic_error("bad encryption keylen");
1867   - }
1868   - if ((R > 3) && (r3_accessibility == false))
  1306 + else if (strcmp(arg, "min-version") == 0)
  1307 + {
  1308 + if (parameter == 0)
1869 1309 {
1870   - std::cerr << whoami
1871   - << ": -accessibility=n is ignored for modern"
1872   - << " encryption formats" << std::endl;
  1310 + usage("--min-version be given as"
  1311 + "--min-version=version");
1873 1312 }
1874   - switch (R)
  1313 + o.min_version = parameter;
  1314 + }
  1315 + else if (strcmp(arg, "force-version") == 0)
  1316 + {
  1317 + if (parameter == 0)
1875 1318 {
1876   - case 2:
1877   - w.setR2EncryptionParameters(
1878   - user_password.c_str(), owner_password.c_str(),
1879   - r2_print, r2_modify, r2_extract, r2_annotate);
1880   - break;
1881   - case 3:
1882   - w.setR3EncryptionParameters(
1883   - user_password.c_str(), owner_password.c_str(),
1884   - r3_accessibility, r3_extract, r3_print, r3_modify);
1885   - break;
1886   - case 4:
1887   - w.setR4EncryptionParameters(
1888   - user_password.c_str(), owner_password.c_str(),
1889   - r3_accessibility, r3_extract, r3_print, r3_modify,
1890   - !cleartext_metadata, use_aes);
1891   - break;
1892   - case 5:
1893   - w.setR5EncryptionParameters(
1894   - user_password.c_str(), owner_password.c_str(),
1895   - r3_accessibility, r3_extract, r3_print, r3_modify,
1896   - !cleartext_metadata);
1897   - break;
1898   - case 6:
1899   - w.setR6EncryptionParameters(
1900   - user_password.c_str(), owner_password.c_str(),
1901   - r3_accessibility, r3_extract, r3_print, r3_modify,
1902   - !cleartext_metadata);
1903   - break;
1904   - default:
1905   - throw std::logic_error("bad encryption R value");
1906   - break;
  1319 + usage("--force-version be given as"
  1320 + "--force-version=version");
1907 1321 }
1908   - }
1909   - if (linearize)
1910   - {
1911   - w.setLinearization(true);
1912   - }
1913   - if (object_stream_set)
1914   - {
1915   - w.setObjectStreamMode(object_stream_mode);
1916   - }
1917   - if (! min_version.empty())
1918   - {
1919   - std::string version;
1920   - int extension_level = 0;
1921   - parse_version(min_version, version, extension_level);
1922   - w.setMinimumPDFVersion(version, extension_level);
1923   - }
1924   - if (! force_version.empty())
1925   - {
1926   - std::string version;
1927   - int extension_level = 0;
1928   - parse_version(force_version, version, extension_level);
1929   - w.forcePDFVersion(version, extension_level);
1930   - }
1931   - w.write();
  1322 + o.force_version = parameter;
  1323 + }
  1324 + else if (strcmp(arg, "deterministic-id") == 0)
  1325 + {
  1326 + o.deterministic_id = true;
  1327 + }
  1328 + else if (strcmp(arg, "static-id") == 0)
  1329 + {
  1330 + o.static_id = true;
  1331 + }
  1332 + else if (strcmp(arg, "static-aes-iv") == 0)
  1333 + {
  1334 + o.static_aes_iv = true;
  1335 + }
  1336 + else if (strcmp(arg, "no-original-object-ids") == 0)
  1337 + {
  1338 + o.suppress_original_object_id = true;
  1339 + }
  1340 + else if (strcmp(arg, "show-encryption") == 0)
  1341 + {
  1342 + o.show_encryption = true;
  1343 + o.require_outfile = false;
  1344 + }
  1345 + else if (strcmp(arg, "check-linearization") == 0)
  1346 + {
  1347 + o.check_linearization = true;
  1348 + o.require_outfile = false;
  1349 + }
  1350 + else if (strcmp(arg, "show-linearization") == 0)
  1351 + {
  1352 + o.show_linearization = true;
  1353 + o.require_outfile = false;
  1354 + }
  1355 + else if (strcmp(arg, "show-xref") == 0)
  1356 + {
  1357 + o.show_xref = true;
  1358 + o.require_outfile = false;
  1359 + }
  1360 + else if (strcmp(arg, "show-object") == 0)
  1361 + {
  1362 + if (parameter == 0)
  1363 + {
  1364 + usage("--show-object must be given as"
  1365 + " --show-object=obj[,gen]");
  1366 + }
  1367 + char* obj = parameter;
  1368 + char* gen = obj;
  1369 + if ((gen = strchr(obj, ',')) != 0)
  1370 + {
  1371 + *gen++ = 0;
  1372 + o.show_gen = atoi(gen);
  1373 + }
  1374 + o.show_obj = atoi(obj);
  1375 + o.require_outfile = false;
  1376 + }
  1377 + else if (strcmp(arg, "raw-stream-data") == 0)
  1378 + {
  1379 + o.show_raw_stream_data = true;
  1380 + }
  1381 + else if (strcmp(arg, "filtered-stream-data") == 0)
  1382 + {
  1383 + o.show_filtered_stream_data = true;
  1384 + }
  1385 + else if (strcmp(arg, "show-npages") == 0)
  1386 + {
  1387 + o.show_npages = true;
  1388 + o.require_outfile = false;
  1389 + }
  1390 + else if (strcmp(arg, "show-pages") == 0)
  1391 + {
  1392 + o.show_pages = true;
  1393 + o.require_outfile = false;
  1394 + }
  1395 + else if (strcmp(arg, "with-images") == 0)
  1396 + {
  1397 + o.show_page_images = true;
  1398 + }
  1399 + else if (strcmp(arg, "check") == 0)
  1400 + {
  1401 + o.check = true;
  1402 + o.require_outfile = false;
  1403 + }
  1404 + else
  1405 + {
  1406 + usage(std::string("unknown option --") + arg);
  1407 + }
  1408 + }
  1409 + else if (o.infilename == 0)
  1410 + {
  1411 + o.infilename = arg;
  1412 + }
  1413 + else if (o.outfilename == 0)
  1414 + {
  1415 + o.outfilename = arg;
  1416 + }
  1417 + else
  1418 + {
  1419 + usage(std::string("unknown argument ") + arg);
  1420 + }
  1421 + }
  1422 +
  1423 + if (o.infilename == 0)
  1424 + {
  1425 + usage("an input file name is required");
  1426 + }
  1427 + else if (o.require_outfile && (o.outfilename == 0))
  1428 + {
  1429 + usage("an output file name is required; use - for standard output");
  1430 + }
  1431 + else if ((! o.require_outfile) && (o.outfilename != 0))
  1432 + {
  1433 + usage("no output file may be given for this option");
  1434 + }
  1435 +
  1436 + if (QUtil::same_file(o.infilename, o.outfilename))
  1437 + {
  1438 + QTC::TC("qpdf", "qpdf same file error");
  1439 + usage("input file and output file are the same; this would cause input file to be lost");
  1440 + }
  1441 +}
  1442 +
  1443 +static void set_qpdf_options(QPDF& pdf, Options& o)
  1444 +{
  1445 + if (o.ignore_xref_streams)
  1446 + {
  1447 + pdf.setIgnoreXRefStreams(true);
  1448 + }
  1449 + if (o.suppress_recovery)
  1450 + {
  1451 + pdf.setAttemptRecovery(false);
  1452 + }
  1453 +}
  1454 +
  1455 +static void do_check(QPDF& pdf, Options& o, int& exit_code)
  1456 +{
  1457 + // Code below may set okay to false but not to true.
  1458 + // We assume okay until we prove otherwise but may
  1459 + // continue to perform additional checks after finding
  1460 + // errors.
  1461 + bool okay = true;
  1462 + std::cout << "checking " << o.infilename << std::endl;
  1463 + try
  1464 + {
  1465 + int extension_level = pdf.getExtensionLevel();
  1466 + std::cout << "PDF Version: " << pdf.getPDFVersion();
  1467 + if (extension_level > 0)
  1468 + {
  1469 + std::cout << " extension level "
  1470 + << pdf.getExtensionLevel();
  1471 + }
  1472 + std::cout << std::endl;
  1473 + show_encryption(pdf);
  1474 + if (pdf.isLinearized())
  1475 + {
  1476 + std::cout << "File is linearized\n";
  1477 + if (! pdf.checkLinearization())
  1478 + {
  1479 + // any errors are reported by checkLinearization()
  1480 + okay = false;
  1481 + }
  1482 + }
  1483 + else
  1484 + {
  1485 + std::cout << "File is not linearized\n";
  1486 + }
  1487 +
  1488 + // Write the file no nowhere, uncompressing
  1489 + // streams. This causes full file traversal and
  1490 + // decoding of all streams we can decode.
  1491 + QPDFWriter w(pdf);
  1492 + Pl_Discard discard;
  1493 + w.setOutputPipeline(&discard);
  1494 + w.setStreamDataMode(qpdf_s_uncompress);
  1495 + w.write();
  1496 +
  1497 + // Parse all content streams
  1498 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1499 + DiscardContents discard_contents;
  1500 + int pageno = 0;
  1501 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1502 + pages.begin();
  1503 + iter != pages.end(); ++iter)
  1504 + {
  1505 + ++pageno;
  1506 + try
  1507 + {
  1508 + QPDFObjectHandle::parseContentStream(
  1509 + (*iter).getKey("/Contents"),
  1510 + &discard_contents);
  1511 + }
  1512 + catch (QPDFExc& e)
  1513 + {
  1514 + okay = false;
  1515 + std::cout << "page " << pageno << ": "
  1516 + << e.what() << std::endl;
  1517 + }
  1518 + }
  1519 + }
  1520 + catch (std::exception& e)
  1521 + {
  1522 + std::cout << e.what() << std::endl;
  1523 + okay = false;
  1524 + }
  1525 + if (okay)
  1526 + {
  1527 + if (! pdf.getWarnings().empty())
  1528 + {
  1529 + exit_code = EXIT_WARNING;
  1530 + }
  1531 + else
  1532 + {
  1533 + std::cout << "No syntax or stream encoding errors"
  1534 + << " found; the file may still contain"
  1535 + << std::endl
  1536 + << "errors that qpdf cannot detect"
  1537 + << std::endl;
  1538 + }
  1539 + }
  1540 + else
  1541 + {
  1542 + exit_code = EXIT_ERROR;
  1543 + }
  1544 +}
  1545 +
  1546 +static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
  1547 +{
  1548 + QPDFObjectHandle obj = pdf.getObjectByID(o.show_obj, o.show_gen);
  1549 + if (obj.isStream())
  1550 + {
  1551 + if (o.show_raw_stream_data || o.show_filtered_stream_data)
  1552 + {
  1553 + bool filter = o.show_filtered_stream_data;
  1554 + if (filter &&
  1555 + (! obj.pipeStreamData(0, true, false, false)))
  1556 + {
  1557 + QTC::TC("qpdf", "qpdf unable to filter");
  1558 + std::cerr << "Unable to filter stream data."
  1559 + << std::endl;
  1560 + exit_code = EXIT_ERROR;
  1561 + }
  1562 + else
  1563 + {
  1564 + QUtil::binary_stdout();
  1565 + Pl_StdioFile out("stdout", stdout);
  1566 + obj.pipeStreamData(&out, filter, o.normalize, false);
  1567 + }
  1568 + }
  1569 + else
  1570 + {
  1571 + std::cout
  1572 + << "Object is stream. Dictionary:" << std::endl
  1573 + << obj.getDict().unparseResolved() << std::endl;
  1574 + }
  1575 + }
  1576 + else
  1577 + {
  1578 + std::cout << obj.unparseResolved() << std::endl;
  1579 + }
  1580 +}
  1581 +
  1582 +static void do_show_pages(QPDF& pdf, Options& o)
  1583 +{
  1584 + if (o.show_page_images)
  1585 + {
  1586 + pdf.pushInheritedAttributesToPage();
  1587 + }
  1588 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1589 + int pageno = 0;
  1590 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1591 + pages.begin();
  1592 + iter != pages.end(); ++iter)
  1593 + {
  1594 + QPDFObjectHandle& page = *iter;
  1595 + ++pageno;
  1596 +
  1597 + std::cout << "page " << pageno << ": "
  1598 + << page.getObjectID() << " "
  1599 + << page.getGeneration() << " R" << std::endl;
  1600 + if (o.show_page_images)
  1601 + {
  1602 + std::map<std::string, QPDFObjectHandle> images =
  1603 + page.getPageImages();
  1604 + if (! images.empty())
  1605 + {
  1606 + std::cout << " images:" << std::endl;
  1607 + for (std::map<std::string,
  1608 + QPDFObjectHandle>::iterator
  1609 + iter = images.begin();
  1610 + iter != images.end(); ++iter)
  1611 + {
  1612 + std::string const& name = (*iter).first;
  1613 + QPDFObjectHandle image = (*iter).second;
  1614 + QPDFObjectHandle dict = image.getDict();
  1615 + int width =
  1616 + dict.getKey("/Width").getIntValue();
  1617 + int height =
  1618 + dict.getKey("/Height").getIntValue();
  1619 + std::cout << " " << name << ": "
  1620 + << image.unparse()
  1621 + << ", " << width << " x " << height
  1622 + << std::endl;
  1623 + }
  1624 + }
  1625 + }
  1626 +
  1627 + std::cout << " content:" << std::endl;
  1628 + std::vector<QPDFObjectHandle> content =
  1629 + page.getPageContents();
  1630 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1631 + content.begin();
  1632 + iter != content.end(); ++iter)
  1633 + {
  1634 + std::cout << " " << (*iter).unparse() << std::endl;
  1635 + }
  1636 + }
  1637 +}
  1638 +
  1639 +static void do_inspection(QPDF& pdf, Options& o)
  1640 +{
  1641 + int exit_code = 0;
  1642 + if (o.check)
  1643 + {
  1644 + do_check(pdf, o, exit_code);
  1645 + }
  1646 + if (o.show_npages)
  1647 + {
  1648 + QTC::TC("qpdf", "qpdf npages");
  1649 + std::cout << pdf.getRoot().getKey("/Pages").
  1650 + getKey("/Count").getIntValue() << std::endl;
  1651 + }
  1652 + if (o.show_encryption)
  1653 + {
  1654 + show_encryption(pdf);
  1655 + }
  1656 + if (o.check_linearization)
  1657 + {
  1658 + if (pdf.checkLinearization())
  1659 + {
  1660 + std::cout << o.infilename << ": no linearization errors"
  1661 + << std::endl;
  1662 + }
  1663 + else
  1664 + {
  1665 + exit_code = EXIT_ERROR;
  1666 + }
  1667 + }
  1668 + if (o.show_linearization)
  1669 + {
  1670 + if (pdf.isLinearized())
  1671 + {
  1672 + pdf.showLinearizationData();
  1673 + }
  1674 + else
  1675 + {
  1676 + std::cout << o.infilename << " is not linearized"
  1677 + << std::endl;
  1678 + }
  1679 + }
  1680 + if (o.show_xref)
  1681 + {
  1682 + pdf.showXRefTable();
  1683 + }
  1684 + if (o.show_obj > 0)
  1685 + {
  1686 + do_show_obj(pdf, o, exit_code);
  1687 + }
  1688 + if (o.show_pages)
  1689 + {
  1690 + do_show_pages(pdf, o);
  1691 + }
  1692 + if (exit_code)
  1693 + {
  1694 + exit(exit_code);
  1695 + }
  1696 +}
  1697 +
  1698 +static void handle_page_specs(QPDF& pdf, Options& o,
  1699 + std::vector<PointerHolder<QPDF> >& page_heap)
  1700 +{
  1701 + // Parse all page specifications and translate them into lists of
  1702 + // actual pages.
  1703 +
  1704 + // Create a QPDF object for each file that we may take pages from.
  1705 + std::map<std::string, QPDF*> page_spec_qpdfs;
  1706 + page_spec_qpdfs[o.infilename] = &pdf;
  1707 + std::vector<QPDFPageData> parsed_specs;
  1708 + for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
  1709 + iter != o.page_specs.end(); ++iter)
  1710 + {
  1711 + PageSpec& page_spec = *iter;
  1712 + if (page_spec_qpdfs.count(page_spec.filename) == 0)
  1713 + {
  1714 + // Open the PDF file and store the QPDF object. Throw a
  1715 + // PointerHolder to the qpdf into a heap so that it
  1716 + // survives through writing the output but gets cleaned up
  1717 + // automatically at the end. Do not canonicalize the file
  1718 + // name. Using two different paths to refer to the same
  1719 + // file is a document workaround for duplicating a page.
  1720 + // If you are using this an example of how to do this with
  1721 + // the API, you can just create two different QPDF objects
  1722 + // to the same underlying file with the same path to
  1723 + // achieve the same affect.
  1724 + PointerHolder<QPDF> qpdf_ph = new QPDF();
  1725 + page_heap.push_back(qpdf_ph);
  1726 + QPDF* qpdf = qpdf_ph.getPointer();
  1727 + char const* password = page_spec.password;
  1728 + if (o.encryption_file && (password == 0) &&
  1729 + (page_spec.filename == o.encryption_file))
  1730 + {
  1731 + QTC::TC("qpdf", "qpdf pages encryption password");
  1732 + password = o.encryption_file_password;
  1733 + }
  1734 + qpdf->processFile(
  1735 + page_spec.filename.c_str(), password);
  1736 + page_spec_qpdfs[page_spec.filename] = qpdf;
  1737 + }
  1738 +
  1739 + // Read original pages from the PDF, and parse the page range
  1740 + // associated with this occurrence of the file.
  1741 + parsed_specs.push_back(
  1742 + QPDFPageData(page_spec_qpdfs[page_spec.filename],
  1743 + page_spec.range));
  1744 + }
  1745 +
  1746 + // Clear all pages out of the primary QPDF's pages tree but leave
  1747 + // the objects in place in the file so they can be re-added
  1748 + // without changing their object numbers. This enables other
  1749 + // things in the original file, such as outlines, to continue to
  1750 + // work.
  1751 + std::vector<QPDFObjectHandle> orig_pages = pdf.getAllPages();
  1752 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1753 + orig_pages.begin();
  1754 + iter != orig_pages.end(); ++iter)
  1755 + {
  1756 + pdf.removePage(*iter);
  1757 + }
  1758 +
  1759 + // Add all the pages from all the files in the order specified.
  1760 + // Keep track of any pages from the original file that we are
  1761 + // selecting.
  1762 + std::set<int> selected_from_orig;
  1763 + for (std::vector<QPDFPageData>::iterator iter =
  1764 + parsed_specs.begin();
  1765 + iter != parsed_specs.end(); ++iter)
  1766 + {
  1767 + QPDFPageData& page_data = *iter;
  1768 + for (std::vector<int>::iterator pageno_iter =
  1769 + page_data.selected_pages.begin();
  1770 + pageno_iter != page_data.selected_pages.end();
  1771 + ++pageno_iter)
  1772 + {
  1773 + // Pages are specified from 1 but numbered from 0 in the
  1774 + // vector
  1775 + int pageno = *pageno_iter - 1;
  1776 + pdf.addPage(page_data.orig_pages.at(pageno), false);
  1777 + if (page_data.qpdf == &pdf)
  1778 + {
  1779 + // This is a page from the original file. Keep track
  1780 + // of the fact that we are using it.
  1781 + selected_from_orig.insert(pageno);
  1782 + }
  1783 + }
  1784 + }
  1785 +
  1786 + // Delete page objects for unused page in primary. This prevents
  1787 + // those objects from being preserved by being referred to from
  1788 + // other places, such as the outlines dictionary.
  1789 + for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
  1790 + {
  1791 + if (selected_from_orig.count(pageno) == 0)
  1792 + {
  1793 + pdf.replaceObject(orig_pages.at(pageno).getObjGen(),
  1794 + QPDFObjectHandle::newNull());
  1795 + }
  1796 + }
  1797 +}
  1798 +
  1799 +static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
  1800 +{
  1801 + int R = 0;
  1802 + if (o.keylen == 40)
  1803 + {
  1804 + R = 2;
  1805 + }
  1806 + else if (o.keylen == 128)
  1807 + {
  1808 + if (o.force_V4 || o.cleartext_metadata || o.use_aes)
  1809 + {
  1810 + R = 4;
  1811 + }
  1812 + else
  1813 + {
  1814 + R = 3;
  1815 + }
  1816 + }
  1817 + else if (o.keylen == 256)
  1818 + {
  1819 + if (o.force_R5)
  1820 + {
  1821 + R = 5;
  1822 + }
  1823 + else
  1824 + {
  1825 + R = 6;
  1826 + }
  1827 + }
  1828 + else
  1829 + {
  1830 + throw std::logic_error("bad encryption keylen");
  1831 + }
  1832 + if ((R > 3) && (o.r3_accessibility == false))
  1833 + {
  1834 + std::cerr << whoami
  1835 + << ": -accessibility=n is ignored for modern"
  1836 + << " encryption formats" << std::endl;
  1837 + }
  1838 + switch (R)
  1839 + {
  1840 + case 2:
  1841 + w.setR2EncryptionParameters(
  1842 + o.user_password.c_str(), o.owner_password.c_str(),
  1843 + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate);
  1844 + break;
  1845 + case 3:
  1846 + w.setR3EncryptionParameters(
  1847 + o.user_password.c_str(), o.owner_password.c_str(),
  1848 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify);
  1849 + break;
  1850 + case 4:
  1851 + w.setR4EncryptionParameters(
  1852 + o.user_password.c_str(), o.owner_password.c_str(),
  1853 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1854 + !o.cleartext_metadata, o.use_aes);
  1855 + break;
  1856 + case 5:
  1857 + w.setR5EncryptionParameters(
  1858 + o.user_password.c_str(), o.owner_password.c_str(),
  1859 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1860 + !o.cleartext_metadata);
  1861 + break;
  1862 + case 6:
  1863 + w.setR6EncryptionParameters(
  1864 + o.user_password.c_str(), o.owner_password.c_str(),
  1865 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1866 + !o.cleartext_metadata);
  1867 + break;
  1868 + default:
  1869 + throw std::logic_error("bad encryption R value");
  1870 + break;
  1871 + }
  1872 +}
  1873 +
  1874 +static void write_outfile(QPDF& pdf, Options& o)
  1875 +{
  1876 + QPDF encryption_pdf;
  1877 + std::vector<PointerHolder<QPDF> > page_heap;
  1878 + if (! o.page_specs.empty())
  1879 + {
  1880 + handle_page_specs(pdf, o, page_heap);
  1881 + }
  1882 +
  1883 + if (strcmp(o.outfilename, "-") == 0)
  1884 + {
  1885 + o.outfilename = 0;
  1886 + }
  1887 + QPDFWriter w(pdf, o.outfilename);
  1888 + if (o.qdf_mode)
  1889 + {
  1890 + w.setQDFMode(true);
  1891 + }
  1892 + if (o.precheck_streams)
  1893 + {
  1894 + w.setPrecheckStreams(true);
  1895 + }
  1896 + if (o.preserve_unreferenced_objects)
  1897 + {
  1898 + w.setPreserveUnreferencedObjects(true);
  1899 + }
  1900 + if (o.newline_before_endstream)
  1901 + {
  1902 + w.setNewlineBeforeEndstream(true);
  1903 + }
  1904 + if (o.normalize_set)
  1905 + {
  1906 + w.setContentNormalization(o.normalize);
  1907 + }
  1908 + if (o.stream_data_set)
  1909 + {
  1910 + w.setStreamDataMode(o.stream_data_mode);
  1911 + }
  1912 + if (o.decrypt)
  1913 + {
  1914 + w.setPreserveEncryption(false);
  1915 + }
  1916 + if (o.deterministic_id)
  1917 + {
  1918 + w.setDeterministicID(true);
  1919 + }
  1920 + if (o.static_id)
  1921 + {
  1922 + w.setStaticID(true);
  1923 + }
  1924 + if (o.static_aes_iv)
  1925 + {
  1926 + w.setStaticAesIV(true);
  1927 + }
  1928 + if (o.suppress_original_object_id)
  1929 + {
  1930 + w.setSuppressOriginalObjectIDs(true);
  1931 + }
  1932 + if (o.copy_encryption)
  1933 + {
  1934 + encryption_pdf.processFile(
  1935 + o.encryption_file, o.encryption_file_password);
  1936 + w.copyEncryptionParameters(encryption_pdf);
  1937 + }
  1938 + if (o.encrypt)
  1939 + {
  1940 + set_encryption_options(pdf, o, w);
  1941 + }
  1942 + if (o.linearize)
  1943 + {
  1944 + w.setLinearization(true);
  1945 + }
  1946 + if (o.object_stream_set)
  1947 + {
  1948 + w.setObjectStreamMode(o.object_stream_mode);
  1949 + }
  1950 + if (! o.min_version.empty())
  1951 + {
  1952 + std::string version;
  1953 + int extension_level = 0;
  1954 + parse_version(o.min_version, version, extension_level);
  1955 + w.setMinimumPDFVersion(version, extension_level);
  1956 + }
  1957 + if (! o.force_version.empty())
  1958 + {
  1959 + std::string version;
  1960 + int extension_level = 0;
  1961 + parse_version(o.force_version, version, extension_level);
  1962 + w.forcePDFVersion(version, extension_level);
  1963 + }
  1964 + w.write();
  1965 +}
  1966 +
  1967 +int main(int argc, char* argv[])
  1968 +{
  1969 + whoami = QUtil::getWhoami(argv[0]);
  1970 + QUtil::setLineBuf(stdout);
  1971 +
  1972 + // For libtool's sake....
  1973 + if (strncmp(whoami, "lt-", 3) == 0)
  1974 + {
  1975 + whoami += 3;
  1976 + }
  1977 +
  1978 + handle_help_verison(argc, argv);
  1979 +
  1980 + // Support reading arguments from files. Create a new argv. Ensure
  1981 + // that argv itself as well as all its contents are automatically
  1982 + // deleted by using PointerHolder objects to back the pointers in
  1983 + // argv.
  1984 + std::vector<PointerHolder<char> > new_argv;
  1985 + new_argv.push_back(PointerHolder<char>(QUtil::copy_string(argv[0]), true));
  1986 + for (int i = 1; i < argc; ++i)
  1987 + {
  1988 + if ((strlen(argv[i]) > 1) && (argv[i][0] == '@'))
  1989 + {
  1990 + read_args_from_file(1+argv[i], new_argv);
  1991 + }
  1992 + else
  1993 + {
  1994 + new_argv.push_back(
  1995 + PointerHolder<char>(QUtil::copy_string(argv[i]), true));
  1996 + }
  1997 + }
  1998 + PointerHolder<char*> argv_ph(new char*[1+new_argv.size()], true);
  1999 + argv = argv_ph.getPointer();
  2000 + for (size_t i = 0; i < new_argv.size(); ++i)
  2001 + {
  2002 + argv[i] = new_argv.at(i).getPointer();
  2003 + }
  2004 + argc = static_cast<int>(new_argv.size());
  2005 + argv[argc] = 0;
  2006 +
  2007 + Options o;
  2008 + parse_options(argc, argv, o);
  2009 +
  2010 + try
  2011 + {
  2012 + QPDF pdf;
  2013 + set_qpdf_options(pdf, o);
  2014 + if (strcmp(o.infilename, "") == 0)
  2015 + {
  2016 + pdf.emptyPDF();
  2017 + }
  2018 + else
  2019 + {
  2020 + pdf.processFile(o.infilename, o.password);
  2021 + }
  2022 +
  2023 + if (o.outfilename == 0)
  2024 + {
  2025 + do_inspection(pdf, o);
  2026 + }
  2027 + else
  2028 + {
  2029 + write_outfile(pdf, o);
1932 2030 }
1933 2031 if (! pdf.getWarnings().empty())
1934 2032 {
... ...