Commit 24f28f0768b655b130626091ac23cf67d9e39630

Authored by Jay Berkenbilt
1 parent c88eaae2

Split qpdf.cc's main into reasonably sized functions

main() had gotten absurdly long. Split it into reasonable chunks. This
refactoring is in preparation for handling splitting output into
single pages.
Showing 1 changed file with 979 additions and 881 deletions
qpdf/qpdf.cc
@@ -37,6 +37,121 @@ struct PageSpec @@ -37,6 +37,121 @@ struct PageSpec
37 char const* range; 37 char const* range;
38 }; 38 };
39 39
  40 +struct Options
  41 +{
  42 + Options() :
  43 + password(0),
  44 + linearize(false),
  45 + decrypt(false),
  46 + copy_encryption(false),
  47 + encryption_file(0),
  48 + encryption_file_password(0),
  49 + encrypt(false),
  50 + keylen(0),
  51 + r2_print(true),
  52 + r2_modify(true),
  53 + r2_extract(true),
  54 + r2_annotate(true),
  55 + r3_accessibility(true),
  56 + r3_extract(true),
  57 + r3_print(qpdf_r3p_full),
  58 + r3_modify(qpdf_r3m_all),
  59 + force_V4(false),
  60 + force_R5(false),
  61 + cleartext_metadata(false),
  62 + use_aes(false),
  63 + stream_data_set(false),
  64 + stream_data_mode(qpdf_s_compress),
  65 + normalize_set(false),
  66 + normalize(false),
  67 + suppress_recovery(false),
  68 + object_stream_set(false),
  69 + object_stream_mode(qpdf_o_preserve),
  70 + ignore_xref_streams(false),
  71 + qdf_mode(false),
  72 + precheck_streams(false),
  73 + preserve_unreferenced_objects(false),
  74 + newline_before_endstream(false),
  75 + show_npages(false),
  76 + deterministic_id(false),
  77 + static_id(false),
  78 + static_aes_iv(false),
  79 + suppress_original_object_id(false),
  80 + show_encryption(false),
  81 + check_linearization(false),
  82 + show_linearization(false),
  83 + show_xref(false),
  84 + show_obj(0),
  85 + show_gen(0),
  86 + show_raw_stream_data(false),
  87 + show_filtered_stream_data(false),
  88 + show_pages(false),
  89 + show_page_images(false),
  90 + check(false),
  91 + require_outfile(true),
  92 + infilename(0),
  93 + outfilename(0)
  94 + {
  95 + }
  96 +
  97 + char const* password;
  98 + bool linearize;
  99 + bool decrypt;
  100 + bool copy_encryption;
  101 + char const* encryption_file;
  102 + char const* encryption_file_password;
  103 + bool encrypt;
  104 + std::string user_password;
  105 + std::string owner_password;
  106 + int keylen;
  107 + bool r2_print;
  108 + bool r2_modify;
  109 + bool r2_extract;
  110 + bool r2_annotate;
  111 + bool r3_accessibility;
  112 + bool r3_extract;
  113 + qpdf_r3_print_e r3_print;
  114 + qpdf_r3_modify_e r3_modify;
  115 + bool force_V4;
  116 + bool force_R5;
  117 + bool cleartext_metadata;
  118 + bool use_aes;
  119 + bool stream_data_set;
  120 + qpdf_stream_data_e stream_data_mode;
  121 + bool normalize_set;
  122 + bool normalize;
  123 + bool suppress_recovery;
  124 + bool object_stream_set;
  125 + qpdf_object_stream_e object_stream_mode;
  126 + bool ignore_xref_streams;
  127 + bool qdf_mode;
  128 + bool precheck_streams;
  129 + bool preserve_unreferenced_objects;
  130 + bool newline_before_endstream;
  131 + std::string min_version;
  132 + std::string force_version;
  133 + bool show_npages;
  134 + bool deterministic_id;
  135 + bool static_id;
  136 + bool static_aes_iv;
  137 + bool suppress_original_object_id;
  138 + bool show_encryption;
  139 + bool check_linearization;
  140 + bool show_linearization;
  141 + bool show_xref;
  142 + int show_obj;
  143 + int show_gen;
  144 + bool show_raw_stream_data;
  145 + bool show_filtered_stream_data;
  146 + bool show_pages;
  147 + bool show_page_images;
  148 + bool check;
  149 + std::vector<PageSpec> page_specs;
  150 + bool require_outfile;
  151 + char const* infilename;
  152 + char const* outfilename;
  153 +};
  154 +
40 struct QPDFPageData 155 struct QPDFPageData
41 { 156 {
42 QPDFPageData(QPDF* qpdf, char const* range); 157 QPDFPageData(QPDF* qpdf, char const* range);
@@ -979,156 +1094,60 @@ static void read_args_from_file(char const* filename, @@ -979,156 +1094,60 @@ static void read_args_from_file(char const* filename,
979 } 1094 }
980 } 1095 }
981 1096
982 -int main(int argc, char* argv[]) 1097 +static void handle_help_verison(int argc, char* argv[])
983 { 1098 {
984 - whoami = QUtil::getWhoami(argv[0]);  
985 - QUtil::setLineBuf(stdout);  
986 -  
987 - // For libtool's sake....  
988 - if (strncmp(whoami, "lt-", 3) == 0)  
989 - {  
990 - whoami += 3;  
991 - }  
992 -  
993 if ((argc == 2) && 1099 if ((argc == 2) &&
994 - ((strcmp(argv[1], "--version") == 0) ||  
995 - (strcmp(argv[1], "-version") == 0)))  
996 - {  
997 - // make_dist looks for the line of code here that actually  
998 - // prints the version number, so read make_dist if you change  
999 - // anything other than the version number. Don't worry about  
1000 - // the numbers. That's just a guide to 80 columns so that the  
1001 - // help message looks right on an 80-column display. 1100 + ((strcmp(argv[1], "--version") == 0) ||
  1101 + (strcmp(argv[1], "-version") == 0)))
  1102 + {
  1103 + // make_dist looks for the line of code here that actually
  1104 + // prints the version number, so read make_dist if you change
  1105 + // anything other than the version number. Don't worry about
  1106 + // the numbers. That's just a guide to 80 columns so that the
  1107 + // help message looks right on an 80-column display.
1002 1108
1003 - // 1 2 3 4 5 6 7 8  
1004 - // 12345678901234567890123456789012345678901234567890123456789012345678901234567890  
1005 - std::cout  
1006 - << whoami << " version " << QPDF::QPDFVersion() << std::endl  
1007 - << "Copyright (c) 2005-2015 Jay Berkenbilt"  
1008 - << std::endl  
1009 - << "This software may be distributed under the terms of version 2 of the"  
1010 - << std::endl  
1011 - << "Artistic License which may be found in the source distribution. It is"  
1012 - << std::endl  
1013 - << "provided \"as is\" without express or implied warranty."  
1014 - << std::endl;  
1015 - exit(0); 1109 + // 1 2 3 4 5 6 7 8
  1110 + // 12345678901234567890123456789012345678901234567890123456789012345678901234567890
  1111 + std::cout
  1112 + << whoami << " version " << QPDF::QPDFVersion() << std::endl
  1113 + << "Copyright (c) 2005-2015 Jay Berkenbilt"
  1114 + << std::endl
  1115 + << "This software may be distributed under the terms of version 2 of the"
  1116 + << std::endl
  1117 + << "Artistic License which may be found in the source distribution. It is"
  1118 + << std::endl
  1119 + << "provided \"as is\" without express or implied warranty."
  1120 + << std::endl;
  1121 + exit(0);
1016 } 1122 }
1017 1123
1018 if ((argc == 2) && 1124 if ((argc == 2) &&
1019 - ((strcmp(argv[1], "--help") == 0) ||  
1020 - (strcmp(argv[1], "-help") == 0))) 1125 + ((strcmp(argv[1], "--help") == 0) ||
  1126 + (strcmp(argv[1], "-help") == 0)))
1021 { 1127 {
1022 - std::cout << help;  
1023 - exit(0); 1128 + std::cout << help;
  1129 + exit(0);
1024 } 1130 }
  1131 +}
1025 1132
1026 - char const* password = 0;  
1027 - bool linearize = false;  
1028 - bool decrypt = false;  
1029 -  
1030 - bool copy_encryption = false;  
1031 - char const* encryption_file = 0;  
1032 - char const* encryption_file_password = 0;  
1033 -  
1034 - bool encrypt = false;  
1035 - std::string user_password;  
1036 - std::string owner_password;  
1037 - int keylen = 0;  
1038 - bool r2_print = true;  
1039 - bool r2_modify = true;  
1040 - bool r2_extract = true;  
1041 - bool r2_annotate = true;  
1042 - bool r3_accessibility = true;  
1043 - bool r3_extract = true;  
1044 - qpdf_r3_print_e r3_print = qpdf_r3p_full;  
1045 - qpdf_r3_modify_e r3_modify = qpdf_r3m_all;  
1046 - bool force_V4 = false;  
1047 - bool force_R5 = false;  
1048 - bool cleartext_metadata = false;  
1049 - bool use_aes = false;  
1050 -  
1051 - bool stream_data_set = false;  
1052 - qpdf_stream_data_e stream_data_mode = qpdf_s_compress;  
1053 - bool normalize_set = false;  
1054 - bool normalize = false;  
1055 - bool suppress_recovery = false;  
1056 - bool object_stream_set = false;  
1057 - qpdf_object_stream_e object_stream_mode = qpdf_o_preserve;  
1058 - bool ignore_xref_streams = false;  
1059 - bool qdf_mode = false;  
1060 - bool precheck_streams = false;  
1061 - bool preserve_unreferenced_objects = false;  
1062 - bool newline_before_endstream = false;  
1063 - std::string min_version;  
1064 - std::string force_version;  
1065 -  
1066 - bool show_npages = false;  
1067 - bool deterministic_id = false;  
1068 - bool static_id = false;  
1069 - bool static_aes_iv = false;  
1070 - bool suppress_original_object_id = false;  
1071 - bool show_encryption = false;  
1072 - bool check_linearization = false;  
1073 - bool show_linearization = false;  
1074 - bool show_xref = false;  
1075 - int show_obj = 0;  
1076 - int show_gen = 0;  
1077 - bool show_raw_stream_data = false;  
1078 - bool show_filtered_stream_data = false;  
1079 - bool show_pages = false;  
1080 - bool show_page_images = false;  
1081 - bool check = false;  
1082 -  
1083 - std::vector<PageSpec> page_specs;  
1084 -  
1085 - bool require_outfile = true;  
1086 - char const* infilename = 0;  
1087 - char const* outfilename = 0;  
1088 -  
1089 - // Support reading arguments from files. Create a new argv. Ensure  
1090 - // that argv itself as well as all its contents are automatically  
1091 - // deleted by using PointerHolder objects to back the pointers in  
1092 - // argv.  
1093 - std::vector<PointerHolder<char> > new_argv;  
1094 - new_argv.push_back(PointerHolder<char>(QUtil::copy_string(argv[0]), true)); 1133 +static void parse_options(int argc, char* argv[], Options& o)
  1134 +{
1095 for (int i = 1; i < argc; ++i) 1135 for (int i = 1; i < argc; ++i)
1096 { 1136 {
1097 - if ((strlen(argv[i]) > 1) && (argv[i][0] == '@'))  
1098 - {  
1099 - read_args_from_file(1+argv[i], new_argv);  
1100 - }  
1101 - else 1137 + char const* arg = argv[i];
  1138 + if ((arg[0] == '-') && (strcmp(arg, "-") != 0))
1102 { 1139 {
1103 - new_argv.push_back(  
1104 - PointerHolder<char>(QUtil::copy_string(argv[i]), true));  
1105 - }  
1106 - }  
1107 - PointerHolder<char*> argv_ph(new char*[1+new_argv.size()], true);  
1108 - argv = argv_ph.getPointer();  
1109 - for (size_t i = 0; i < new_argv.size(); ++i)  
1110 - {  
1111 - argv[i] = new_argv.at(i).getPointer();  
1112 - }  
1113 - argc = static_cast<int>(new_argv.size());  
1114 - argv[argc] = 0;  
1115 -  
1116 - for (int i = 1; i < argc; ++i)  
1117 - {  
1118 - char const* arg = argv[i];  
1119 - if ((arg[0] == '-') && (strcmp(arg, "-") != 0))  
1120 - {  
1121 - ++arg;  
1122 - if (arg[0] == '-')  
1123 - {  
1124 - // Be lax about -arg vs --arg  
1125 - ++arg;  
1126 - }  
1127 - char* parameter = const_cast<char*>(strchr(arg, '='));  
1128 - if (parameter)  
1129 - {  
1130 - *parameter++ = 0;  
1131 - } 1140 + ++arg;
  1141 + if (arg[0] == '-')
  1142 + {
  1143 + // Be lax about -arg vs --arg
  1144 + ++arg;
  1145 + }
  1146 + char* parameter = const_cast<char*>(strchr(arg, '='));
  1147 + if (parameter)
  1148 + {
  1149 + *parameter++ = 0;
  1150 + }
1132 1151
1133 // Arguments that start with space are undocumented and 1152 // Arguments that start with space are undocumented and
1134 // are for use by the test suite. 1153 // are for use by the test suite.
@@ -1138,797 +1157,876 @@ int main(int argc, char* argv[]) @@ -1138,797 +1157,876 @@ int main(int argc, char* argv[])
1138 exit(0); 1157 exit(0);
1139 } 1158 }
1140 else if (strcmp(arg, "password") == 0) 1159 else if (strcmp(arg, "password") == 0)
1141 - {  
1142 - if (parameter == 0)  
1143 - {  
1144 - usage("--password must be given as --password=pass");  
1145 - }  
1146 - password = parameter;  
1147 - } 1160 + {
  1161 + if (parameter == 0)
  1162 + {
  1163 + usage("--password must be given as --password=pass");
  1164 + }
  1165 + o.password = parameter;
  1166 + }
1148 else if (strcmp(arg, "empty") == 0) 1167 else if (strcmp(arg, "empty") == 0)
1149 { 1168 {
1150 - infilename = ""; 1169 + o.infilename = "";
  1170 + }
  1171 + else if (strcmp(arg, "linearize") == 0)
  1172 + {
  1173 + o.linearize = true;
  1174 + }
  1175 + else if (strcmp(arg, "encrypt") == 0)
  1176 + {
  1177 + parse_encrypt_options(
  1178 + argc, argv, ++i,
  1179 + o.user_password, o.owner_password, o.keylen,
  1180 + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate,
  1181 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1182 + o.force_V4, o.cleartext_metadata, o.use_aes, o.force_R5);
  1183 + o.encrypt = true;
  1184 + o.decrypt = false;
  1185 + o.copy_encryption = false;
  1186 + }
  1187 + else if (strcmp(arg, "decrypt") == 0)
  1188 + {
  1189 + o.decrypt = true;
  1190 + o.encrypt = false;
  1191 + o.copy_encryption = false;
1151 } 1192 }
1152 - else if (strcmp(arg, "linearize") == 0)  
1153 - {  
1154 - linearize = true;  
1155 - }  
1156 - else if (strcmp(arg, "encrypt") == 0)  
1157 - {  
1158 - parse_encrypt_options(  
1159 - argc, argv, ++i,  
1160 - user_password, owner_password, keylen,  
1161 - r2_print, r2_modify, r2_extract, r2_annotate,  
1162 - r3_accessibility, r3_extract, r3_print, r3_modify,  
1163 - force_V4, cleartext_metadata, use_aes, force_R5);  
1164 - encrypt = true;  
1165 - decrypt = false;  
1166 - copy_encryption = false;  
1167 - }  
1168 - else if (strcmp(arg, "decrypt") == 0)  
1169 - {  
1170 - decrypt = true;  
1171 - encrypt = false;  
1172 - copy_encryption = false;  
1173 - }  
1174 else if (strcmp(arg, "copy-encryption") == 0) 1193 else if (strcmp(arg, "copy-encryption") == 0)
1175 { 1194 {
1176 - if (parameter == 0)  
1177 - {  
1178 - usage("--copy-encryption must be given as"  
1179 - "--copy_encryption=file");  
1180 - }  
1181 - encryption_file = parameter;  
1182 - copy_encryption = true;  
1183 - encrypt = false;  
1184 - decrypt = false; 1195 + if (parameter == 0)
  1196 + {
  1197 + usage("--copy-encryption must be given as"
  1198 + "--copy_encryption=file");
  1199 + }
  1200 + o.encryption_file = parameter;
  1201 + o.copy_encryption = true;
  1202 + o.encrypt = false;
  1203 + o.decrypt = false;
1185 } 1204 }
1186 else if (strcmp(arg, "encryption-file-password") == 0) 1205 else if (strcmp(arg, "encryption-file-password") == 0)
1187 { 1206 {
1188 - if (parameter == 0)  
1189 - {  
1190 - usage("--encryption-file-password must be given as"  
1191 - "--encryption-file-password=password");  
1192 - }  
1193 - encryption_file_password = parameter; 1207 + if (parameter == 0)
  1208 + {
  1209 + usage("--encryption-file-password must be given as"
  1210 + "--encryption-file-password=password");
  1211 + }
  1212 + o.encryption_file_password = parameter;
1194 } 1213 }
1195 else if (strcmp(arg, "pages") == 0) 1214 else if (strcmp(arg, "pages") == 0)
1196 { 1215 {
1197 - page_specs = parse_pages_options(argc, argv, ++i);  
1198 - if (page_specs.empty()) 1216 + o.page_specs = parse_pages_options(argc, argv, ++i);
  1217 + if (o.page_specs.empty())
1199 { 1218 {
1200 usage("--pages: no page specifications given"); 1219 usage("--pages: no page specifications given");
1201 } 1220 }
1202 } 1221 }
1203 - else if (strcmp(arg, "stream-data") == 0)  
1204 - {  
1205 - if (parameter == 0)  
1206 - {  
1207 - usage("--stream-data must be given as"  
1208 - "--stream-data=option");  
1209 - }  
1210 - stream_data_set = true;  
1211 - if (strcmp(parameter, "compress") == 0)  
1212 - {  
1213 - stream_data_mode = qpdf_s_compress;  
1214 - }  
1215 - else if (strcmp(parameter, "preserve") == 0)  
1216 - {  
1217 - stream_data_mode = qpdf_s_preserve;  
1218 - }  
1219 - else if (strcmp(parameter, "uncompress") == 0)  
1220 - {  
1221 - stream_data_mode = qpdf_s_uncompress;  
1222 - }  
1223 - else  
1224 - {  
1225 - usage("invalid stream-data option");  
1226 - }  
1227 - }  
1228 - else if (strcmp(arg, "normalize-content") == 0)  
1229 - {  
1230 - if ((parameter == 0) || (*parameter == '\0'))  
1231 - {  
1232 - usage("--normalize-content must be given as"  
1233 - " --normalize-content=[yn]");  
1234 - }  
1235 - normalize_set = true;  
1236 - normalize = (parameter[0] == 'y');  
1237 - }  
1238 - else if (strcmp(arg, "suppress-recovery") == 0)  
1239 - {  
1240 - suppress_recovery = true;  
1241 - }  
1242 - else if (strcmp(arg, "object-streams") == 0)  
1243 - {  
1244 - if (parameter == 0)  
1245 - {  
1246 - usage("--object-streams must be given as"  
1247 - " --object-streams=option");  
1248 - }  
1249 - object_stream_set = true;  
1250 - if (strcmp(parameter, "disable") == 0)  
1251 - {  
1252 - object_stream_mode = qpdf_o_disable;  
1253 - }  
1254 - else if (strcmp(parameter, "preserve") == 0)  
1255 - {  
1256 - object_stream_mode = qpdf_o_preserve;  
1257 - }  
1258 - else if (strcmp(parameter, "generate") == 0)  
1259 - {  
1260 - object_stream_mode = qpdf_o_generate;  
1261 - }  
1262 - else  
1263 - {  
1264 - usage("invalid object stream mode");  
1265 - }  
1266 - }  
1267 - else if (strcmp(arg, "ignore-xref-streams") == 0)  
1268 - {  
1269 - ignore_xref_streams = true;  
1270 - }  
1271 - else if (strcmp(arg, "qdf") == 0)  
1272 - {  
1273 - qdf_mode = true;  
1274 - }  
1275 - else if (strcmp(arg, "precheck-streams") == 0)  
1276 - {  
1277 - precheck_streams = true;  
1278 - }  
1279 - else if (strcmp(arg, "preserve-unreferenced") == 0)  
1280 - {  
1281 - preserve_unreferenced_objects = true;  
1282 - }  
1283 - else if (strcmp(arg, "newline-before-endstream") == 0)  
1284 - {  
1285 - newline_before_endstream = true;  
1286 - }  
1287 - else if (strcmp(arg, "min-version") == 0)  
1288 - {  
1289 - if (parameter == 0)  
1290 - {  
1291 - usage("--min-version be given as"  
1292 - "--min-version=version");  
1293 - }  
1294 - min_version = parameter;  
1295 - }  
1296 - else if (strcmp(arg, "force-version") == 0)  
1297 - {  
1298 - if (parameter == 0)  
1299 - {  
1300 - usage("--force-version be given as"  
1301 - "--force-version=version");  
1302 - }  
1303 - force_version = parameter;  
1304 - }  
1305 - else if (strcmp(arg, "deterministic-id") == 0)  
1306 - {  
1307 - deterministic_id = true;  
1308 - }  
1309 - else if (strcmp(arg, "static-id") == 0)  
1310 - {  
1311 - static_id = true;  
1312 - }  
1313 - else if (strcmp(arg, "static-aes-iv") == 0)  
1314 - {  
1315 - static_aes_iv = true;  
1316 - }  
1317 - else if (strcmp(arg, "no-original-object-ids") == 0)  
1318 - {  
1319 - suppress_original_object_id = true;  
1320 - }  
1321 - else if (strcmp(arg, "show-encryption") == 0)  
1322 - {  
1323 - show_encryption = true;  
1324 - require_outfile = false;  
1325 - }  
1326 - else if (strcmp(arg, "check-linearization") == 0)  
1327 - {  
1328 - check_linearization = true;  
1329 - require_outfile = false;  
1330 - }  
1331 - else if (strcmp(arg, "show-linearization") == 0)  
1332 - {  
1333 - show_linearization = true;  
1334 - require_outfile = false;  
1335 - }  
1336 - else if (strcmp(arg, "show-xref") == 0)  
1337 - {  
1338 - show_xref = true;  
1339 - require_outfile = false;  
1340 - }  
1341 - else if (strcmp(arg, "show-object") == 0)  
1342 - {  
1343 - if (parameter == 0)  
1344 - {  
1345 - usage("--show-object must be given as"  
1346 - " --show-object=obj[,gen]");  
1347 - }  
1348 - char* obj = parameter;  
1349 - char* gen = obj;  
1350 - if ((gen = strchr(obj, ',')) != 0)  
1351 - {  
1352 - *gen++ = 0;  
1353 - show_gen = atoi(gen);  
1354 - }  
1355 - show_obj = atoi(obj);  
1356 - require_outfile = false;  
1357 - }  
1358 - else if (strcmp(arg, "raw-stream-data") == 0)  
1359 - {  
1360 - show_raw_stream_data = true;  
1361 - }  
1362 - else if (strcmp(arg, "filtered-stream-data") == 0)  
1363 - {  
1364 - show_filtered_stream_data = true;  
1365 - }  
1366 - else if (strcmp(arg, "show-npages") == 0) 1222 + else if (strcmp(arg, "stream-data") == 0)
1367 { 1223 {
1368 - show_npages = true;  
1369 - require_outfile = false;  
1370 - }  
1371 - else if (strcmp(arg, "show-pages") == 0)  
1372 - {  
1373 - show_pages = true;  
1374 - require_outfile = false;  
1375 - }  
1376 - else if (strcmp(arg, "with-images") == 0)  
1377 - {  
1378 - show_page_images = true;  
1379 - }  
1380 - else if (strcmp(arg, "check") == 0)  
1381 - {  
1382 - check = true;  
1383 - require_outfile = false;  
1384 - }  
1385 - else  
1386 - {  
1387 - usage(std::string("unknown option --") + arg);  
1388 - }  
1389 - }  
1390 - else if (infilename == 0)  
1391 - {  
1392 - infilename = arg;  
1393 - }  
1394 - else if (outfilename == 0)  
1395 - {  
1396 - outfilename = arg;  
1397 - }  
1398 - else  
1399 - {  
1400 - usage(std::string("unknown argument ") + arg);  
1401 - }  
1402 - }  
1403 -  
1404 - if (infilename == 0)  
1405 - {  
1406 - usage("an input file name is required");  
1407 - }  
1408 - else if (require_outfile && (outfilename == 0))  
1409 - {  
1410 - usage("an output file name is required; use - for standard output");  
1411 - }  
1412 - else if ((! require_outfile) && (outfilename != 0))  
1413 - {  
1414 - usage("no output file may be given for this option");  
1415 - }  
1416 -  
1417 - if (QUtil::same_file(infilename, outfilename))  
1418 - {  
1419 - QTC::TC("qpdf", "qpdf same file error");  
1420 - usage("input file and output file are the same; this would cause input file to be lost");  
1421 - }  
1422 -  
1423 - try  
1424 - {  
1425 - QPDF pdf;  
1426 - QPDF encryption_pdf;  
1427 - if (ignore_xref_streams)  
1428 - {  
1429 - pdf.setIgnoreXRefStreams(true);  
1430 - }  
1431 - if (suppress_recovery)  
1432 - {  
1433 - pdf.setAttemptRecovery(false);  
1434 - }  
1435 - if (strcmp(infilename, "") == 0)  
1436 - {  
1437 - pdf.emptyPDF();  
1438 - }  
1439 - else  
1440 - {  
1441 - pdf.processFile(infilename, password);  
1442 - }  
1443 - if (outfilename == 0)  
1444 - {  
1445 - int exit_code = 0;  
1446 - if (check)  
1447 - {  
1448 - // Code below may set okay to false but not to true.  
1449 - // We assume okay until we prove otherwise but may  
1450 - // continue to perform additional checks after finding  
1451 - // errors.  
1452 - bool okay = true;  
1453 - std::cout << "checking " << infilename << std::endl;  
1454 - try  
1455 - {  
1456 - int extension_level = pdf.getExtensionLevel();  
1457 - std::cout << "PDF Version: " << pdf.getPDFVersion();  
1458 - if (extension_level > 0)  
1459 - {  
1460 - std::cout << " extension level "  
1461 - << pdf.getExtensionLevel();  
1462 - }  
1463 - std::cout << std::endl;  
1464 - ::show_encryption(pdf);  
1465 - if (pdf.isLinearized())  
1466 - {  
1467 - std::cout << "File is linearized\n";  
1468 - if (! pdf.checkLinearization())  
1469 - {  
1470 - // any errors are reported by checkLinearization()  
1471 - okay = false;  
1472 - }  
1473 - }  
1474 - else  
1475 - {  
1476 - std::cout << "File is not linearized\n";  
1477 - }  
1478 -  
1479 - // Write the file no nowhere, uncompressing  
1480 - // streams. This causes full file traversal and  
1481 - // decoding of all streams we can decode.  
1482 - QPDFWriter w(pdf);  
1483 - Pl_Discard discard;  
1484 - w.setOutputPipeline(&discard);  
1485 - w.setStreamDataMode(qpdf_s_uncompress);  
1486 - w.write();  
1487 -  
1488 - // Parse all content streams  
1489 - std::vector<QPDFObjectHandle> pages = pdf.getAllPages();  
1490 - DiscardContents discard_contents;  
1491 - int pageno = 0;  
1492 - for (std::vector<QPDFObjectHandle>::iterator iter =  
1493 - pages.begin();  
1494 - iter != pages.end(); ++iter)  
1495 - {  
1496 - ++pageno;  
1497 - try  
1498 - {  
1499 - QPDFObjectHandle::parseContentStream(  
1500 - (*iter).getKey("/Contents"),  
1501 - &discard_contents);  
1502 - }  
1503 - catch (QPDFExc& e)  
1504 - {  
1505 - okay = false;  
1506 - std::cout << "page " << pageno << ": "  
1507 - << e.what() << std::endl;  
1508 - }  
1509 - }  
1510 - }  
1511 - catch (std::exception& e)  
1512 - {  
1513 - std::cout << e.what() << std::endl;  
1514 - okay = false;  
1515 - }  
1516 - if (okay)  
1517 - {  
1518 - if (! pdf.getWarnings().empty())  
1519 - {  
1520 - exit_code = EXIT_WARNING;  
1521 - }  
1522 - else  
1523 - {  
1524 - std::cout << "No syntax or stream encoding errors"  
1525 - << " found; the file may still contain"  
1526 - << std::endl  
1527 - << "errors that qpdf cannot detect"  
1528 - << std::endl;  
1529 - }  
1530 - } 1224 + if (parameter == 0)
  1225 + {
  1226 + usage("--stream-data must be given as"
  1227 + "--stream-data=option");
  1228 + }
  1229 + o.stream_data_set = true;
  1230 + if (strcmp(parameter, "compress") == 0)
  1231 + {
  1232 + o.stream_data_mode = qpdf_s_compress;
  1233 + }
  1234 + else if (strcmp(parameter, "preserve") == 0)
  1235 + {
  1236 + o.stream_data_mode = qpdf_s_preserve;
  1237 + }
  1238 + else if (strcmp(parameter, "uncompress") == 0)
  1239 + {
  1240 + o.stream_data_mode = qpdf_s_uncompress;
  1241 + }
1531 else 1242 else
1532 { 1243 {
1533 - exit_code = EXIT_ERROR; 1244 + usage("invalid stream-data option");
1534 } 1245 }
1535 - }  
1536 - if (show_npages)  
1537 - {  
1538 - QTC::TC("qpdf", "qpdf npages");  
1539 - std::cout << pdf.getRoot().getKey("/Pages").  
1540 - getKey("/Count").getIntValue() << std::endl;  
1541 } 1246 }
1542 - if (show_encryption)  
1543 - {  
1544 - ::show_encryption(pdf);  
1545 - }  
1546 - if (check_linearization)  
1547 - {  
1548 - if (pdf.checkLinearization())  
1549 - {  
1550 - std::cout << infilename << ": no linearization errors"  
1551 - << std::endl;  
1552 - }  
1553 - else  
1554 - {  
1555 - exit_code = EXIT_ERROR;  
1556 - }  
1557 - }  
1558 - if (show_linearization)  
1559 - {  
1560 - if (pdf.isLinearized())  
1561 - {  
1562 - pdf.showLinearizationData();  
1563 - }  
1564 - else  
1565 - {  
1566 - std::cout << infilename << " is not linearized"  
1567 - << std::endl;  
1568 - }  
1569 - }  
1570 - if (show_xref)  
1571 - {  
1572 - pdf.showXRefTable();  
1573 - }  
1574 - if (show_obj > 0)  
1575 - {  
1576 - QPDFObjectHandle obj = pdf.getObjectByID(show_obj, show_gen);  
1577 - if (obj.isStream())  
1578 - {  
1579 - if (show_raw_stream_data || show_filtered_stream_data)  
1580 - {  
1581 - bool filter = show_filtered_stream_data;  
1582 - if (filter &&  
1583 - (! obj.pipeStreamData(0, true, false, false)))  
1584 - {  
1585 - QTC::TC("qpdf", "qpdf unable to filter");  
1586 - std::cerr << "Unable to filter stream data."  
1587 - << std::endl;  
1588 - exit_code = EXIT_ERROR;  
1589 - }  
1590 - else  
1591 - {  
1592 - QUtil::binary_stdout();  
1593 - Pl_StdioFile out("stdout", stdout);  
1594 - obj.pipeStreamData(&out, filter, normalize, false);  
1595 - }  
1596 - }  
1597 - else  
1598 - {  
1599 - std::cout  
1600 - << "Object is stream. Dictionary:" << std::endl  
1601 - << obj.getDict().unparseResolved() << std::endl;  
1602 - }  
1603 - }  
1604 - else  
1605 - {  
1606 - std::cout << obj.unparseResolved() << std::endl;  
1607 - }  
1608 - }  
1609 - if (show_pages)  
1610 - {  
1611 - if (show_page_images) 1247 + else if (strcmp(arg, "normalize-content") == 0)
  1248 + {
  1249 + if ((parameter == 0) || (*parameter == '\0'))
1612 { 1250 {
1613 - pdf.pushInheritedAttributesToPage(); 1251 + usage("--normalize-content must be given as"
  1252 + " --normalize-content=[yn]");
1614 } 1253 }
1615 - std::vector<QPDFObjectHandle> pages = pdf.getAllPages();  
1616 - int pageno = 0;  
1617 - for (std::vector<QPDFObjectHandle>::iterator iter =  
1618 - pages.begin();  
1619 - iter != pages.end(); ++iter)  
1620 - {  
1621 - QPDFObjectHandle& page = *iter;  
1622 - ++pageno;  
1623 -  
1624 - std::cout << "page " << pageno << ": "  
1625 - << page.getObjectID() << " "  
1626 - << page.getGeneration() << " R" << std::endl;  
1627 - if (show_page_images)  
1628 - {  
1629 - std::map<std::string, QPDFObjectHandle> images =  
1630 - page.getPageImages();  
1631 - if (! images.empty())  
1632 - {  
1633 - std::cout << " images:" << std::endl;  
1634 - for (std::map<std::string,  
1635 - QPDFObjectHandle>::iterator  
1636 - iter = images.begin();  
1637 - iter != images.end(); ++iter)  
1638 - {  
1639 - std::string const& name = (*iter).first;  
1640 - QPDFObjectHandle image = (*iter).second;  
1641 - QPDFObjectHandle dict = image.getDict();  
1642 - int width =  
1643 - dict.getKey("/Width").getIntValue();  
1644 - int height =  
1645 - dict.getKey("/Height").getIntValue();  
1646 - std::cout << " " << name << ": "  
1647 - << image.unparse()  
1648 - << ", " << width << " x " << height  
1649 - << std::endl;  
1650 - }  
1651 - }  
1652 - }  
1653 -  
1654 - std::cout << " content:" << std::endl;  
1655 - std::vector<QPDFObjectHandle> content =  
1656 - page.getPageContents();  
1657 - for (std::vector<QPDFObjectHandle>::iterator iter =  
1658 - content.begin();  
1659 - iter != content.end(); ++iter)  
1660 - {  
1661 - std::cout << " " << (*iter).unparse() << std::endl;  
1662 - }  
1663 - }  
1664 - }  
1665 - if (exit_code) 1254 + o.normalize_set = true;
  1255 + o.normalize = (parameter[0] == 'y');
  1256 + }
  1257 + else if (strcmp(arg, "suppress-recovery") == 0)
1666 { 1258 {
1667 - exit(exit_code); 1259 + o.suppress_recovery = true;
1668 } 1260 }
1669 - }  
1670 - else  
1671 - {  
1672 - std::vector<PointerHolder<QPDF> > page_heap;  
1673 - if (! page_specs.empty()) 1261 + else if (strcmp(arg, "object-streams") == 0)
1674 { 1262 {
1675 - // Parse all page specifications and translate them  
1676 - // into lists of actual pages.  
1677 -  
1678 - // Create a QPDF object for each file that we may take  
1679 - // pages from.  
1680 - std::map<std::string, QPDF*> page_spec_qpdfs;  
1681 - page_spec_qpdfs[infilename] = &pdf;  
1682 - std::vector<QPDFPageData> parsed_specs;  
1683 - for (std::vector<PageSpec>::iterator iter = page_specs.begin();  
1684 - iter != page_specs.end(); ++iter) 1263 + if (parameter == 0)
1685 { 1264 {
1686 - PageSpec& page_spec = *iter;  
1687 - if (page_spec_qpdfs.count(page_spec.filename) == 0)  
1688 - {  
1689 - // Open the PDF file and store the QPDF  
1690 - // object. Throw a PointerHolder to the qpdf  
1691 - // into a heap so that it survives through  
1692 - // writing the output but gets cleaned up  
1693 - // automatically at the end. Do not  
1694 - // canonicalize the file name. Using two  
1695 - // different paths to refer to the same file  
1696 - // is a document workaround for duplicating a  
1697 - // page. If you are using this an example of  
1698 - // how to do this with the API, you can just  
1699 - // create two different QPDF objects to the  
1700 - // same underlying file with the same path to  
1701 - // achieve the same affect.  
1702 - PointerHolder<QPDF> qpdf_ph = new QPDF();  
1703 - page_heap.push_back(qpdf_ph);  
1704 - QPDF* qpdf = qpdf_ph.getPointer();  
1705 - char const* password = page_spec.password;  
1706 - if (encryption_file && (password == 0) &&  
1707 - (page_spec.filename == encryption_file))  
1708 - {  
1709 - QTC::TC("qpdf", "qpdf pages encryption password");  
1710 - password = encryption_file_password;  
1711 - }  
1712 - qpdf->processFile(  
1713 - page_spec.filename.c_str(), password);  
1714 - page_spec_qpdfs[page_spec.filename] = qpdf;  
1715 - }  
1716 -  
1717 - // Read original pages from the PDF, and parse the  
1718 - // page range associated with this occurrence of  
1719 - // the file.  
1720 - parsed_specs.push_back(  
1721 - QPDFPageData(page_spec_qpdfs[page_spec.filename],  
1722 - page_spec.range)); 1265 + usage("--object-streams must be given as"
  1266 + " --object-streams=option");
1723 } 1267 }
1724 -  
1725 - // Clear all pages out of the primary QPDF's pages  
1726 - // tree but leave the objects in place in the file so  
1727 - // they can be re-added without changing their object  
1728 - // numbers. This enables other things in the original  
1729 - // file, such as outlines, to continue to work.  
1730 - std::vector<QPDFObjectHandle> orig_pages = pdf.getAllPages();  
1731 - for (std::vector<QPDFObjectHandle>::iterator iter =  
1732 - orig_pages.begin();  
1733 - iter != orig_pages.end(); ++iter) 1268 + o.object_stream_set = true;
  1269 + if (strcmp(parameter, "disable") == 0)
1734 { 1270 {
1735 - pdf.removePage(*iter); 1271 + o.object_stream_mode = qpdf_o_disable;
1736 } 1272 }
1737 -  
1738 - // Add all the pages from all the files in the order  
1739 - // specified. Keep track of any pages from the  
1740 - // original file that we are selecting.  
1741 - std::set<int> selected_from_orig;  
1742 - for (std::vector<QPDFPageData>::iterator iter =  
1743 - parsed_specs.begin();  
1744 - iter != parsed_specs.end(); ++iter) 1273 + else if (strcmp(parameter, "preserve") == 0)
1745 { 1274 {
1746 - QPDFPageData& page_data = *iter;  
1747 - for (std::vector<int>::iterator pageno_iter =  
1748 - page_data.selected_pages.begin();  
1749 - pageno_iter != page_data.selected_pages.end();  
1750 - ++pageno_iter)  
1751 - {  
1752 - // Pages are specified from 1 but numbered  
1753 - // from 0 in the vector  
1754 - int pageno = *pageno_iter - 1;  
1755 - pdf.addPage(page_data.orig_pages.at(pageno), false);  
1756 - if (page_data.qpdf == &pdf)  
1757 - {  
1758 - // This is a page from the original file.  
1759 - // Keep track of the fact that we are  
1760 - // using it.  
1761 - selected_from_orig.insert(pageno);  
1762 - }  
1763 - } 1275 + o.object_stream_mode = qpdf_o_preserve;
1764 } 1276 }
1765 -  
1766 - // Delete page objects for unused page in primary.  
1767 - // This prevents those objects from being preserved by  
1768 - // being referred to from other places, such as the  
1769 - // outlines dictionary.  
1770 - for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) 1277 + else if (strcmp(parameter, "generate") == 0)
1771 { 1278 {
1772 - if (selected_from_orig.count(pageno) == 0)  
1773 - {  
1774 - pdf.replaceObject(orig_pages.at(pageno).getObjGen(),  
1775 - QPDFObjectHandle::newNull());  
1776 - } 1279 + o.object_stream_mode = qpdf_o_generate;
  1280 + }
  1281 + else
  1282 + {
  1283 + usage("invalid object stream mode");
1777 } 1284 }
1778 } 1285 }
1779 -  
1780 - if (strcmp(outfilename, "-") == 0)  
1781 - {  
1782 - outfilename = 0;  
1783 - }  
1784 - QPDFWriter w(pdf, outfilename);  
1785 - if (qdf_mode)  
1786 - {  
1787 - w.setQDFMode(true);  
1788 - }  
1789 - if (precheck_streams) 1286 + else if (strcmp(arg, "ignore-xref-streams") == 0)
1790 { 1287 {
1791 - w.setPrecheckStreams(true); 1288 + o.ignore_xref_streams = true;
1792 } 1289 }
1793 - if (preserve_unreferenced_objects) 1290 + else if (strcmp(arg, "qdf") == 0)
1794 { 1291 {
1795 - w.setPreserveUnreferencedObjects(true); 1292 + o.qdf_mode = true;
1796 } 1293 }
1797 - if (newline_before_endstream) 1294 + else if (strcmp(arg, "precheck-streams") == 0)
1798 { 1295 {
1799 - w.setNewlineBeforeEndstream(true); 1296 + o.precheck_streams = true;
1800 } 1297 }
1801 - if (normalize_set)  
1802 - {  
1803 - w.setContentNormalization(normalize);  
1804 - }  
1805 - if (stream_data_set)  
1806 - {  
1807 - w.setStreamDataMode(stream_data_mode);  
1808 - }  
1809 - if (decrypt)  
1810 - {  
1811 - w.setPreserveEncryption(false);  
1812 - }  
1813 - if (deterministic_id) 1298 + else if (strcmp(arg, "preserve-unreferenced") == 0)
1814 { 1299 {
1815 - w.setDeterministicID(true); 1300 + o.preserve_unreferenced_objects = true;
1816 } 1301 }
1817 - if (static_id)  
1818 - {  
1819 - w.setStaticID(true);  
1820 - }  
1821 - if (static_aes_iv)  
1822 - {  
1823 - w.setStaticAesIV(true);  
1824 - }  
1825 - if (suppress_original_object_id)  
1826 - {  
1827 - w.setSuppressOriginalObjectIDs(true);  
1828 - }  
1829 - if (copy_encryption) 1302 + else if (strcmp(arg, "newline-before-endstream") == 0)
1830 { 1303 {
1831 - encryption_pdf.processFile(  
1832 - encryption_file, encryption_file_password);  
1833 - w.copyEncryptionParameters(encryption_pdf); 1304 + o.newline_before_endstream = true;
1834 } 1305 }
1835 - if (encrypt)  
1836 - {  
1837 - int R = 0;  
1838 - if (keylen == 40)  
1839 - {  
1840 - R = 2;  
1841 - }  
1842 - else if (keylen == 128)  
1843 - {  
1844 - if (force_V4 || cleartext_metadata || use_aes)  
1845 - {  
1846 - R = 4;  
1847 - }  
1848 - else  
1849 - {  
1850 - R = 3;  
1851 - }  
1852 - }  
1853 - else if (keylen == 256)  
1854 - {  
1855 - if (force_R5)  
1856 - {  
1857 - R = 5;  
1858 - }  
1859 - else  
1860 - {  
1861 - R = 6;  
1862 - }  
1863 - }  
1864 - else  
1865 - {  
1866 - throw std::logic_error("bad encryption keylen");  
1867 - }  
1868 - if ((R > 3) && (r3_accessibility == false)) 1306 + else if (strcmp(arg, "min-version") == 0)
  1307 + {
  1308 + if (parameter == 0)
1869 { 1309 {
1870 - std::cerr << whoami  
1871 - << ": -accessibility=n is ignored for modern"  
1872 - << " encryption formats" << std::endl; 1310 + usage("--min-version be given as"
  1311 + "--min-version=version");
1873 } 1312 }
1874 - switch (R) 1313 + o.min_version = parameter;
  1314 + }
  1315 + else if (strcmp(arg, "force-version") == 0)
  1316 + {
  1317 + if (parameter == 0)
1875 { 1318 {
1876 - case 2:  
1877 - w.setR2EncryptionParameters(  
1878 - user_password.c_str(), owner_password.c_str(),  
1879 - r2_print, r2_modify, r2_extract, r2_annotate);  
1880 - break;  
1881 - case 3:  
1882 - w.setR3EncryptionParameters(  
1883 - user_password.c_str(), owner_password.c_str(),  
1884 - r3_accessibility, r3_extract, r3_print, r3_modify);  
1885 - break;  
1886 - case 4:  
1887 - w.setR4EncryptionParameters(  
1888 - user_password.c_str(), owner_password.c_str(),  
1889 - r3_accessibility, r3_extract, r3_print, r3_modify,  
1890 - !cleartext_metadata, use_aes);  
1891 - break;  
1892 - case 5:  
1893 - w.setR5EncryptionParameters(  
1894 - user_password.c_str(), owner_password.c_str(),  
1895 - r3_accessibility, r3_extract, r3_print, r3_modify,  
1896 - !cleartext_metadata);  
1897 - break;  
1898 - case 6:  
1899 - w.setR6EncryptionParameters(  
1900 - user_password.c_str(), owner_password.c_str(),  
1901 - r3_accessibility, r3_extract, r3_print, r3_modify,  
1902 - !cleartext_metadata);  
1903 - break;  
1904 - default:  
1905 - throw std::logic_error("bad encryption R value");  
1906 - break; 1319 + usage("--force-version be given as"
  1320 + "--force-version=version");
1907 } 1321 }
1908 - }  
1909 - if (linearize)  
1910 - {  
1911 - w.setLinearization(true);  
1912 - }  
1913 - if (object_stream_set)  
1914 - {  
1915 - w.setObjectStreamMode(object_stream_mode);  
1916 - }  
1917 - if (! min_version.empty())  
1918 - {  
1919 - std::string version;  
1920 - int extension_level = 0;  
1921 - parse_version(min_version, version, extension_level);  
1922 - w.setMinimumPDFVersion(version, extension_level);  
1923 - }  
1924 - if (! force_version.empty())  
1925 - {  
1926 - std::string version;  
1927 - int extension_level = 0;  
1928 - parse_version(force_version, version, extension_level);  
1929 - w.forcePDFVersion(version, extension_level);  
1930 - }  
1931 - w.write(); 1322 + o.force_version = parameter;
  1323 + }
  1324 + else if (strcmp(arg, "deterministic-id") == 0)
  1325 + {
  1326 + o.deterministic_id = true;
  1327 + }
  1328 + else if (strcmp(arg, "static-id") == 0)
  1329 + {
  1330 + o.static_id = true;
  1331 + }
  1332 + else if (strcmp(arg, "static-aes-iv") == 0)
  1333 + {
  1334 + o.static_aes_iv = true;
  1335 + }
  1336 + else if (strcmp(arg, "no-original-object-ids") == 0)
  1337 + {
  1338 + o.suppress_original_object_id = true;
  1339 + }
  1340 + else if (strcmp(arg, "show-encryption") == 0)
  1341 + {
  1342 + o.show_encryption = true;
  1343 + o.require_outfile = false;
  1344 + }
  1345 + else if (strcmp(arg, "check-linearization") == 0)
  1346 + {
  1347 + o.check_linearization = true;
  1348 + o.require_outfile = false;
  1349 + }
  1350 + else if (strcmp(arg, "show-linearization") == 0)
  1351 + {
  1352 + o.show_linearization = true;
  1353 + o.require_outfile = false;
  1354 + }
  1355 + else if (strcmp(arg, "show-xref") == 0)
  1356 + {
  1357 + o.show_xref = true;
  1358 + o.require_outfile = false;
  1359 + }
  1360 + else if (strcmp(arg, "show-object") == 0)
  1361 + {
  1362 + if (parameter == 0)
  1363 + {
  1364 + usage("--show-object must be given as"
  1365 + " --show-object=obj[,gen]");
  1366 + }
  1367 + char* obj = parameter;
  1368 + char* gen = obj;
  1369 + if ((gen = strchr(obj, ',')) != 0)
  1370 + {
  1371 + *gen++ = 0;
  1372 + o.show_gen = atoi(gen);
  1373 + }
  1374 + o.show_obj = atoi(obj);
  1375 + o.require_outfile = false;
  1376 + }
  1377 + else if (strcmp(arg, "raw-stream-data") == 0)
  1378 + {
  1379 + o.show_raw_stream_data = true;
  1380 + }
  1381 + else if (strcmp(arg, "filtered-stream-data") == 0)
  1382 + {
  1383 + o.show_filtered_stream_data = true;
  1384 + }
  1385 + else if (strcmp(arg, "show-npages") == 0)
  1386 + {
  1387 + o.show_npages = true;
  1388 + o.require_outfile = false;
  1389 + }
  1390 + else if (strcmp(arg, "show-pages") == 0)
  1391 + {
  1392 + o.show_pages = true;
  1393 + o.require_outfile = false;
  1394 + }
  1395 + else if (strcmp(arg, "with-images") == 0)
  1396 + {
  1397 + o.show_page_images = true;
  1398 + }
  1399 + else if (strcmp(arg, "check") == 0)
  1400 + {
  1401 + o.check = true;
  1402 + o.require_outfile = false;
  1403 + }
  1404 + else
  1405 + {
  1406 + usage(std::string("unknown option --") + arg);
  1407 + }
  1408 + }
  1409 + else if (o.infilename == 0)
  1410 + {
  1411 + o.infilename = arg;
  1412 + }
  1413 + else if (o.outfilename == 0)
  1414 + {
  1415 + o.outfilename = arg;
  1416 + }
  1417 + else
  1418 + {
  1419 + usage(std::string("unknown argument ") + arg);
  1420 + }
  1421 + }
  1422 +
  1423 + if (o.infilename == 0)
  1424 + {
  1425 + usage("an input file name is required");
  1426 + }
  1427 + else if (o.require_outfile && (o.outfilename == 0))
  1428 + {
  1429 + usage("an output file name is required; use - for standard output");
  1430 + }
  1431 + else if ((! o.require_outfile) && (o.outfilename != 0))
  1432 + {
  1433 + usage("no output file may be given for this option");
  1434 + }
  1435 +
  1436 + if (QUtil::same_file(o.infilename, o.outfilename))
  1437 + {
  1438 + QTC::TC("qpdf", "qpdf same file error");
  1439 + usage("input file and output file are the same; this would cause input file to be lost");
  1440 + }
  1441 +}
  1442 +
  1443 +static void set_qpdf_options(QPDF& pdf, Options& o)
  1444 +{
  1445 + if (o.ignore_xref_streams)
  1446 + {
  1447 + pdf.setIgnoreXRefStreams(true);
  1448 + }
  1449 + if (o.suppress_recovery)
  1450 + {
  1451 + pdf.setAttemptRecovery(false);
  1452 + }
  1453 +}
  1454 +
  1455 +static void do_check(QPDF& pdf, Options& o, int& exit_code)
  1456 +{
  1457 + // Code below may set okay to false but not to true.
  1458 + // We assume okay until we prove otherwise but may
  1459 + // continue to perform additional checks after finding
  1460 + // errors.
  1461 + bool okay = true;
  1462 + std::cout << "checking " << o.infilename << std::endl;
  1463 + try
  1464 + {
  1465 + int extension_level = pdf.getExtensionLevel();
  1466 + std::cout << "PDF Version: " << pdf.getPDFVersion();
  1467 + if (extension_level > 0)
  1468 + {
  1469 + std::cout << " extension level "
  1470 + << pdf.getExtensionLevel();
  1471 + }
  1472 + std::cout << std::endl;
  1473 + show_encryption(pdf);
  1474 + if (pdf.isLinearized())
  1475 + {
  1476 + std::cout << "File is linearized\n";
  1477 + if (! pdf.checkLinearization())
  1478 + {
  1479 + // any errors are reported by checkLinearization()
  1480 + okay = false;
  1481 + }
  1482 + }
  1483 + else
  1484 + {
  1485 + std::cout << "File is not linearized\n";
  1486 + }
  1487 +
  1488 + // Write the file no nowhere, uncompressing
  1489 + // streams. This causes full file traversal and
  1490 + // decoding of all streams we can decode.
  1491 + QPDFWriter w(pdf);
  1492 + Pl_Discard discard;
  1493 + w.setOutputPipeline(&discard);
  1494 + w.setStreamDataMode(qpdf_s_uncompress);
  1495 + w.write();
  1496 +
  1497 + // Parse all content streams
  1498 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1499 + DiscardContents discard_contents;
  1500 + int pageno = 0;
  1501 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1502 + pages.begin();
  1503 + iter != pages.end(); ++iter)
  1504 + {
  1505 + ++pageno;
  1506 + try
  1507 + {
  1508 + QPDFObjectHandle::parseContentStream(
  1509 + (*iter).getKey("/Contents"),
  1510 + &discard_contents);
  1511 + }
  1512 + catch (QPDFExc& e)
  1513 + {
  1514 + okay = false;
  1515 + std::cout << "page " << pageno << ": "
  1516 + << e.what() << std::endl;
  1517 + }
  1518 + }
  1519 + }
  1520 + catch (std::exception& e)
  1521 + {
  1522 + std::cout << e.what() << std::endl;
  1523 + okay = false;
  1524 + }
  1525 + if (okay)
  1526 + {
  1527 + if (! pdf.getWarnings().empty())
  1528 + {
  1529 + exit_code = EXIT_WARNING;
  1530 + }
  1531 + else
  1532 + {
  1533 + std::cout << "No syntax or stream encoding errors"
  1534 + << " found; the file may still contain"
  1535 + << std::endl
  1536 + << "errors that qpdf cannot detect"
  1537 + << std::endl;
  1538 + }
  1539 + }
  1540 + else
  1541 + {
  1542 + exit_code = EXIT_ERROR;
  1543 + }
  1544 +}
  1545 +
  1546 +static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
  1547 +{
  1548 + QPDFObjectHandle obj = pdf.getObjectByID(o.show_obj, o.show_gen);
  1549 + if (obj.isStream())
  1550 + {
  1551 + if (o.show_raw_stream_data || o.show_filtered_stream_data)
  1552 + {
  1553 + bool filter = o.show_filtered_stream_data;
  1554 + if (filter &&
  1555 + (! obj.pipeStreamData(0, true, false, false)))
  1556 + {
  1557 + QTC::TC("qpdf", "qpdf unable to filter");
  1558 + std::cerr << "Unable to filter stream data."
  1559 + << std::endl;
  1560 + exit_code = EXIT_ERROR;
  1561 + }
  1562 + else
  1563 + {
  1564 + QUtil::binary_stdout();
  1565 + Pl_StdioFile out("stdout", stdout);
  1566 + obj.pipeStreamData(&out, filter, o.normalize, false);
  1567 + }
  1568 + }
  1569 + else
  1570 + {
  1571 + std::cout
  1572 + << "Object is stream. Dictionary:" << std::endl
  1573 + << obj.getDict().unparseResolved() << std::endl;
  1574 + }
  1575 + }
  1576 + else
  1577 + {
  1578 + std::cout << obj.unparseResolved() << std::endl;
  1579 + }
  1580 +}
  1581 +
  1582 +static void do_show_pages(QPDF& pdf, Options& o)
  1583 +{
  1584 + if (o.show_page_images)
  1585 + {
  1586 + pdf.pushInheritedAttributesToPage();
  1587 + }
  1588 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1589 + int pageno = 0;
  1590 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1591 + pages.begin();
  1592 + iter != pages.end(); ++iter)
  1593 + {
  1594 + QPDFObjectHandle& page = *iter;
  1595 + ++pageno;
  1596 +
  1597 + std::cout << "page " << pageno << ": "
  1598 + << page.getObjectID() << " "
  1599 + << page.getGeneration() << " R" << std::endl;
  1600 + if (o.show_page_images)
  1601 + {
  1602 + std::map<std::string, QPDFObjectHandle> images =
  1603 + page.getPageImages();
  1604 + if (! images.empty())
  1605 + {
  1606 + std::cout << " images:" << std::endl;
  1607 + for (std::map<std::string,
  1608 + QPDFObjectHandle>::iterator
  1609 + iter = images.begin();
  1610 + iter != images.end(); ++iter)
  1611 + {
  1612 + std::string const& name = (*iter).first;
  1613 + QPDFObjectHandle image = (*iter).second;
  1614 + QPDFObjectHandle dict = image.getDict();
  1615 + int width =
  1616 + dict.getKey("/Width").getIntValue();
  1617 + int height =
  1618 + dict.getKey("/Height").getIntValue();
  1619 + std::cout << " " << name << ": "
  1620 + << image.unparse()
  1621 + << ", " << width << " x " << height
  1622 + << std::endl;
  1623 + }
  1624 + }
  1625 + }
  1626 +
  1627 + std::cout << " content:" << std::endl;
  1628 + std::vector<QPDFObjectHandle> content =
  1629 + page.getPageContents();
  1630 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1631 + content.begin();
  1632 + iter != content.end(); ++iter)
  1633 + {
  1634 + std::cout << " " << (*iter).unparse() << std::endl;
  1635 + }
  1636 + }
  1637 +}
  1638 +
  1639 +static void do_inspection(QPDF& pdf, Options& o)
  1640 +{
  1641 + int exit_code = 0;
  1642 + if (o.check)
  1643 + {
  1644 + do_check(pdf, o, exit_code);
  1645 + }
  1646 + if (o.show_npages)
  1647 + {
  1648 + QTC::TC("qpdf", "qpdf npages");
  1649 + std::cout << pdf.getRoot().getKey("/Pages").
  1650 + getKey("/Count").getIntValue() << std::endl;
  1651 + }
  1652 + if (o.show_encryption)
  1653 + {
  1654 + show_encryption(pdf);
  1655 + }
  1656 + if (o.check_linearization)
  1657 + {
  1658 + if (pdf.checkLinearization())
  1659 + {
  1660 + std::cout << o.infilename << ": no linearization errors"
  1661 + << std::endl;
  1662 + }
  1663 + else
  1664 + {
  1665 + exit_code = EXIT_ERROR;
  1666 + }
  1667 + }
  1668 + if (o.show_linearization)
  1669 + {
  1670 + if (pdf.isLinearized())
  1671 + {
  1672 + pdf.showLinearizationData();
  1673 + }
  1674 + else
  1675 + {
  1676 + std::cout << o.infilename << " is not linearized"
  1677 + << std::endl;
  1678 + }
  1679 + }
  1680 + if (o.show_xref)
  1681 + {
  1682 + pdf.showXRefTable();
  1683 + }
  1684 + if (o.show_obj > 0)
  1685 + {
  1686 + do_show_obj(pdf, o, exit_code);
  1687 + }
  1688 + if (o.show_pages)
  1689 + {
  1690 + do_show_pages(pdf, o);
  1691 + }
  1692 + if (exit_code)
  1693 + {
  1694 + exit(exit_code);
  1695 + }
  1696 +}
  1697 +
  1698 +static void handle_page_specs(QPDF& pdf, Options& o,
  1699 + std::vector<PointerHolder<QPDF> >& page_heap)
  1700 +{
  1701 + // Parse all page specifications and translate them into lists of
  1702 + // actual pages.
  1703 +
  1704 + // Create a QPDF object for each file that we may take pages from.
  1705 + std::map<std::string, QPDF*> page_spec_qpdfs;
  1706 + page_spec_qpdfs[o.infilename] = &pdf;
  1707 + std::vector<QPDFPageData> parsed_specs;
  1708 + for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
  1709 + iter != o.page_specs.end(); ++iter)
  1710 + {
  1711 + PageSpec& page_spec = *iter;
  1712 + if (page_spec_qpdfs.count(page_spec.filename) == 0)
  1713 + {
  1714 + // Open the PDF file and store the QPDF object. Throw a
  1715 + // PointerHolder to the qpdf into a heap so that it
  1716 + // survives through writing the output but gets cleaned up
  1717 + // automatically at the end. Do not canonicalize the file
  1718 + // name. Using two different paths to refer to the same
  1719 + // file is a document workaround for duplicating a page.
  1720 + // If you are using this an example of how to do this with
  1721 + // the API, you can just create two different QPDF objects
  1722 + // to the same underlying file with the same path to
  1723 + // achieve the same affect.
  1724 + PointerHolder<QPDF> qpdf_ph = new QPDF();
  1725 + page_heap.push_back(qpdf_ph);
  1726 + QPDF* qpdf = qpdf_ph.getPointer();
  1727 + char const* password = page_spec.password;
  1728 + if (o.encryption_file && (password == 0) &&
  1729 + (page_spec.filename == o.encryption_file))
  1730 + {
  1731 + QTC::TC("qpdf", "qpdf pages encryption password");
  1732 + password = o.encryption_file_password;
  1733 + }
  1734 + qpdf->processFile(
  1735 + page_spec.filename.c_str(), password);
  1736 + page_spec_qpdfs[page_spec.filename] = qpdf;
  1737 + }
  1738 +
  1739 + // Read original pages from the PDF, and parse the page range
  1740 + // associated with this occurrence of the file.
  1741 + parsed_specs.push_back(
  1742 + QPDFPageData(page_spec_qpdfs[page_spec.filename],
  1743 + page_spec.range));
  1744 + }
  1745 +
  1746 + // Clear all pages out of the primary QPDF's pages tree but leave
  1747 + // the objects in place in the file so they can be re-added
  1748 + // without changing their object numbers. This enables other
  1749 + // things in the original file, such as outlines, to continue to
  1750 + // work.
  1751 + std::vector<QPDFObjectHandle> orig_pages = pdf.getAllPages();
  1752 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1753 + orig_pages.begin();
  1754 + iter != orig_pages.end(); ++iter)
  1755 + {
  1756 + pdf.removePage(*iter);
  1757 + }
  1758 +
  1759 + // Add all the pages from all the files in the order specified.
  1760 + // Keep track of any pages from the original file that we are
  1761 + // selecting.
  1762 + std::set<int> selected_from_orig;
  1763 + for (std::vector<QPDFPageData>::iterator iter =
  1764 + parsed_specs.begin();
  1765 + iter != parsed_specs.end(); ++iter)
  1766 + {
  1767 + QPDFPageData& page_data = *iter;
  1768 + for (std::vector<int>::iterator pageno_iter =
  1769 + page_data.selected_pages.begin();
  1770 + pageno_iter != page_data.selected_pages.end();
  1771 + ++pageno_iter)
  1772 + {
  1773 + // Pages are specified from 1 but numbered from 0 in the
  1774 + // vector
  1775 + int pageno = *pageno_iter - 1;
  1776 + pdf.addPage(page_data.orig_pages.at(pageno), false);
  1777 + if (page_data.qpdf == &pdf)
  1778 + {
  1779 + // This is a page from the original file. Keep track
  1780 + // of the fact that we are using it.
  1781 + selected_from_orig.insert(pageno);
  1782 + }
  1783 + }
  1784 + }
  1785 +
  1786 + // Delete page objects for unused page in primary. This prevents
  1787 + // those objects from being preserved by being referred to from
  1788 + // other places, such as the outlines dictionary.
  1789 + for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
  1790 + {
  1791 + if (selected_from_orig.count(pageno) == 0)
  1792 + {
  1793 + pdf.replaceObject(orig_pages.at(pageno).getObjGen(),
  1794 + QPDFObjectHandle::newNull());
  1795 + }
  1796 + }
  1797 +}
  1798 +
  1799 +static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
  1800 +{
  1801 + int R = 0;
  1802 + if (o.keylen == 40)
  1803 + {
  1804 + R = 2;
  1805 + }
  1806 + else if (o.keylen == 128)
  1807 + {
  1808 + if (o.force_V4 || o.cleartext_metadata || o.use_aes)
  1809 + {
  1810 + R = 4;
  1811 + }
  1812 + else
  1813 + {
  1814 + R = 3;
  1815 + }
  1816 + }
  1817 + else if (o.keylen == 256)
  1818 + {
  1819 + if (o.force_R5)
  1820 + {
  1821 + R = 5;
  1822 + }
  1823 + else
  1824 + {
  1825 + R = 6;
  1826 + }
  1827 + }
  1828 + else
  1829 + {
  1830 + throw std::logic_error("bad encryption keylen");
  1831 + }
  1832 + if ((R > 3) && (o.r3_accessibility == false))
  1833 + {
  1834 + std::cerr << whoami
  1835 + << ": -accessibility=n is ignored for modern"
  1836 + << " encryption formats" << std::endl;
  1837 + }
  1838 + switch (R)
  1839 + {
  1840 + case 2:
  1841 + w.setR2EncryptionParameters(
  1842 + o.user_password.c_str(), o.owner_password.c_str(),
  1843 + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate);
  1844 + break;
  1845 + case 3:
  1846 + w.setR3EncryptionParameters(
  1847 + o.user_password.c_str(), o.owner_password.c_str(),
  1848 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify);
  1849 + break;
  1850 + case 4:
  1851 + w.setR4EncryptionParameters(
  1852 + o.user_password.c_str(), o.owner_password.c_str(),
  1853 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1854 + !o.cleartext_metadata, o.use_aes);
  1855 + break;
  1856 + case 5:
  1857 + w.setR5EncryptionParameters(
  1858 + o.user_password.c_str(), o.owner_password.c_str(),
  1859 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1860 + !o.cleartext_metadata);
  1861 + break;
  1862 + case 6:
  1863 + w.setR6EncryptionParameters(
  1864 + o.user_password.c_str(), o.owner_password.c_str(),
  1865 + o.r3_accessibility, o.r3_extract, o.r3_print, o.r3_modify,
  1866 + !o.cleartext_metadata);
  1867 + break;
  1868 + default:
  1869 + throw std::logic_error("bad encryption R value");
  1870 + break;
  1871 + }
  1872 +}
  1873 +
  1874 +static void write_outfile(QPDF& pdf, Options& o)
  1875 +{
  1876 + QPDF encryption_pdf;
  1877 + std::vector<PointerHolder<QPDF> > page_heap;
  1878 + if (! o.page_specs.empty())
  1879 + {
  1880 + handle_page_specs(pdf, o, page_heap);
  1881 + }
  1882 +
  1883 + if (strcmp(o.outfilename, "-") == 0)
  1884 + {
  1885 + o.outfilename = 0;
  1886 + }
  1887 + QPDFWriter w(pdf, o.outfilename);
  1888 + if (o.qdf_mode)
  1889 + {
  1890 + w.setQDFMode(true);
  1891 + }
  1892 + if (o.precheck_streams)
  1893 + {
  1894 + w.setPrecheckStreams(true);
  1895 + }
  1896 + if (o.preserve_unreferenced_objects)
  1897 + {
  1898 + w.setPreserveUnreferencedObjects(true);
  1899 + }
  1900 + if (o.newline_before_endstream)
  1901 + {
  1902 + w.setNewlineBeforeEndstream(true);
  1903 + }
  1904 + if (o.normalize_set)
  1905 + {
  1906 + w.setContentNormalization(o.normalize);
  1907 + }
  1908 + if (o.stream_data_set)
  1909 + {
  1910 + w.setStreamDataMode(o.stream_data_mode);
  1911 + }
  1912 + if (o.decrypt)
  1913 + {
  1914 + w.setPreserveEncryption(false);
  1915 + }
  1916 + if (o.deterministic_id)
  1917 + {
  1918 + w.setDeterministicID(true);
  1919 + }
  1920 + if (o.static_id)
  1921 + {
  1922 + w.setStaticID(true);
  1923 + }
  1924 + if (o.static_aes_iv)
  1925 + {
  1926 + w.setStaticAesIV(true);
  1927 + }
  1928 + if (o.suppress_original_object_id)
  1929 + {
  1930 + w.setSuppressOriginalObjectIDs(true);
  1931 + }
  1932 + if (o.copy_encryption)
  1933 + {
  1934 + encryption_pdf.processFile(
  1935 + o.encryption_file, o.encryption_file_password);
  1936 + w.copyEncryptionParameters(encryption_pdf);
  1937 + }
  1938 + if (o.encrypt)
  1939 + {
  1940 + set_encryption_options(pdf, o, w);
  1941 + }
  1942 + if (o.linearize)
  1943 + {
  1944 + w.setLinearization(true);
  1945 + }
  1946 + if (o.object_stream_set)
  1947 + {
  1948 + w.setObjectStreamMode(o.object_stream_mode);
  1949 + }
  1950 + if (! o.min_version.empty())
  1951 + {
  1952 + std::string version;
  1953 + int extension_level = 0;
  1954 + parse_version(o.min_version, version, extension_level);
  1955 + w.setMinimumPDFVersion(version, extension_level);
  1956 + }
  1957 + if (! o.force_version.empty())
  1958 + {
  1959 + std::string version;
  1960 + int extension_level = 0;
  1961 + parse_version(o.force_version, version, extension_level);
  1962 + w.forcePDFVersion(version, extension_level);
  1963 + }
  1964 + w.write();
  1965 +}
  1966 +
  1967 +int main(int argc, char* argv[])
  1968 +{
  1969 + whoami = QUtil::getWhoami(argv[0]);
  1970 + QUtil::setLineBuf(stdout);
  1971 +
  1972 + // For libtool's sake....
  1973 + if (strncmp(whoami, "lt-", 3) == 0)
  1974 + {
  1975 + whoami += 3;
  1976 + }
  1977 +
  1978 + handle_help_verison(argc, argv);
  1979 +
  1980 + // Support reading arguments from files. Create a new argv. Ensure
  1981 + // that argv itself as well as all its contents are automatically
  1982 + // deleted by using PointerHolder objects to back the pointers in
  1983 + // argv.
  1984 + std::vector<PointerHolder<char> > new_argv;
  1985 + new_argv.push_back(PointerHolder<char>(QUtil::copy_string(argv[0]), true));
  1986 + for (int i = 1; i < argc; ++i)
  1987 + {
  1988 + if ((strlen(argv[i]) > 1) && (argv[i][0] == '@'))
  1989 + {
  1990 + read_args_from_file(1+argv[i], new_argv);
  1991 + }
  1992 + else
  1993 + {
  1994 + new_argv.push_back(
  1995 + PointerHolder<char>(QUtil::copy_string(argv[i]), true));
  1996 + }
  1997 + }
  1998 + PointerHolder<char*> argv_ph(new char*[1+new_argv.size()], true);
  1999 + argv = argv_ph.getPointer();
  2000 + for (size_t i = 0; i < new_argv.size(); ++i)
  2001 + {
  2002 + argv[i] = new_argv.at(i).getPointer();
  2003 + }
  2004 + argc = static_cast<int>(new_argv.size());
  2005 + argv[argc] = 0;
  2006 +
  2007 + Options o;
  2008 + parse_options(argc, argv, o);
  2009 +
  2010 + try
  2011 + {
  2012 + QPDF pdf;
  2013 + set_qpdf_options(pdf, o);
  2014 + if (strcmp(o.infilename, "") == 0)
  2015 + {
  2016 + pdf.emptyPDF();
  2017 + }
  2018 + else
  2019 + {
  2020 + pdf.processFile(o.infilename, o.password);
  2021 + }
  2022 +
  2023 + if (o.outfilename == 0)
  2024 + {
  2025 + do_inspection(pdf, o);
  2026 + }
  2027 + else
  2028 + {
  2029 + write_outfile(pdf, o);
1932 } 2030 }
1933 if (! pdf.getWarnings().empty()) 2031 if (! pdf.getWarnings().empty())
1934 { 2032 {