diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 7d43db1..54da62b 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -6,6 +6,5 @@ add_subdirectory(examples) # Build additional OpenBR utilities if(NOT ${BR_EMBEDDED}) - add_subdirectory(br-crawl) add_subdirectory(br-gui) endif() diff --git a/app/br-crawl/CMakeLists.txt b/app/br-crawl/CMakeLists.txt deleted file mode 100644 index f64263b..0000000 --- a/app/br-crawl/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_executable(br-crawl br-crawl.cpp ${BR_RESOURCES}) -target_link_libraries(br-crawl openbr ${BR_THIRDPARTY_LIBS}) -qt5_use_modules(br-crawl ${QT_DEPENDENCIES}) -install(TARGETS br-crawl RUNTIME DESTINATION bin) diff --git a/app/br-crawl/br-crawl.cpp b/app/br-crawl/br-crawl.cpp deleted file mode 100644 index 0a9ec74..0000000 --- a/app/br-crawl/br-crawl.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Copyright 2014 Noblis * - * * - * Licensed under the Apache License, Version 2.0 (the "License"); * - * you may not use this file except in compliance with the License. * - * You may obtain a copy of the License at * - * * - * http://www.apache.org/licenses/LICENSE-2.0 * - * * - * Unless required by applicable law or agreed to in writing, software * - * distributed under the License is distributed on an "AS IS" BASIS, * - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * - * See the License for the specific language governing permissions and * - * limitations under the License. * - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#include - -static void help() -{ - printf("br-crawl [URL] [args]\n" - "=====================\n" - "* __stdin__ - URLs\n" - "* __stdout__ - Image URLs/JSON\n" - "\n" - "_br-crawl_ conducts a recursive descent search for images from a root URL.\n" - "Crawl will read root URLs from _stdin_ if none are provided.\n" - "Crawl writes every discovered image URL in a new line to _stdout_.\n" - "Arguments specifiying the duration of crawl are on a per-root-URL basis.\n" - "\n" - "Crawl identifies image URLs based on known image file extensions like `.png`.\n" - "Crawl is not expected to verify that URLs are images and may produce false positives.\n" - "\n" - "Optional Arguments\n" - "------------------\n" - "* -auto - Crawl chooses its own root URL (must be specified otherwise).\n" - "* -depth - The levels to recursively search (unlimited otherwise).\n" - "* -depthFirst - Depth-first search (breadth-first otherwise).\n" - "* -help - Print usage information.\n" - "* -images - The number of image URLs to obtain (unlimited otherwise).\n" - "* -json - Output JSON instead or URLs.\n" - "* -time - The seconds to spend searching for images (unlimited otherwise).\n"); -} - -static const char *root = NULL; -static bool autoRoot = false; -static int depth = INT_MAX; -static bool depthFirst = false; -static int images = INT_MAX; -static bool json = false; -static int timeLimit = INT_MAX; - -static QTime elapsed; -static int currentImages = 0; - -static void crawl(QFileInfo url, int currentDepth = 0) -{ - if ((currentImages >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit)) - return; - - if (url.filePath().startsWith("file://")) - url = QFileInfo(url.filePath().mid(7)); - - if (url.isDir()) { - const QDir dir(url.absoluteFilePath()); - const QFileInfoList files = dir.entryInfoList(QDir::Files); - const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot); - foreach (const QFileInfo &first, depthFirst ? subdirs : files) - crawl(first, currentDepth + 1); - foreach (const QFileInfo &second, depthFirst ? files : subdirs) - crawl(second, currentDepth + 1); - } else if (url.isFile()) { - const QString suffix = url.suffix(); - if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) { - printf(json ? "{ \"URL\" : \"file://%s\" }\n" : "file://%s\n", qPrintable(url.canonicalFilePath())); - fflush(stdout); - currentImages++; - } - } -} - -int main(int argc, char *argv[]) -{ - for (int i=1; i("URL", ""); + const QString url = t.file.get("URL", t.file.name); if (!url.isEmpty()) { gallery.write(qPrintable(url)); gallery.write("\n"); @@ -459,6 +459,93 @@ BR_REGISTER(Gallery, EmptyGallery) /*! * \ingroup galleries + * \brief Crawl a root location for image files. + * \author Josh Klontz \cite jklontz + */ +class crawlGallery : public Gallery +{ + Q_OBJECT + Q_PROPERTY(bool autoRoot READ get_autoRoot WRITE set_autoRoot RESET reset_autoRoot STORED false) + Q_PROPERTY(int depth READ get_depth WRITE set_depth RESET reset_depth STORED false) + Q_PROPERTY(bool depthFirst READ get_depthFirst WRITE set_depthFirst RESET reset_depthFirst STORED false) + Q_PROPERTY(int images READ get_images WRITE set_images RESET reset_images STORED false) + Q_PROPERTY(bool json READ get_json WRITE set_json RESET reset_json STORED false) + Q_PROPERTY(int timeLimit READ get_timeLimit WRITE set_timeLimit RESET reset_timeLimit STORED false) + BR_PROPERTY(bool, autoRoot, false) + BR_PROPERTY(int, depth, INT_MAX) + BR_PROPERTY(bool, depthFirst, false) + BR_PROPERTY(int, images, INT_MAX) + BR_PROPERTY(bool, json, false) + BR_PROPERTY(int, timeLimit, INT_MAX) + + QTime elapsed; + TemplateList templates; + + void crawl(QFileInfo url, int currentDepth = 0) + { + if ((templates.size() >= images) || (currentDepth >= depth) || (elapsed.elapsed()/1000 >= timeLimit)) + return; + + if (url.filePath().startsWith("file://")) + url = QFileInfo(url.filePath().mid(7)); + + if (url.isDir()) { + const QDir dir(url.absoluteFilePath()); + const QFileInfoList files = dir.entryInfoList(QDir::Files); + const QFileInfoList subdirs = dir.entryInfoList(QDir::Dirs | QDir::NoDotAndDotDot); + foreach (const QFileInfo &first, depthFirst ? subdirs : files) + crawl(first, currentDepth + 1); + foreach (const QFileInfo &second, depthFirst ? files : subdirs) + crawl(second, currentDepth + 1); + } else if (url.isFile()) { + const QString suffix = url.suffix(); + if ((suffix == "bmp") || (suffix == "jpg") || (suffix == "jpeg") || (suffix == "png") || (suffix == "tiff")) { + File f; + if (json) f.set("URL", "file://"+url.canonicalFilePath()); + else f.name = "file://"+url.canonicalFilePath(); + templates.append(f); + } + } + } + + void init() + { + elapsed.start(); + const QString root = file.name.mid(0, file.name.size()-6); // Remove .crawl suffix"; + if (!root.isEmpty()) { + crawl(root); + } else { + if (autoRoot) { + foreach (const QString &path, QStandardPaths::standardLocations(QStandardPaths::HomeLocation)) + crawl(path); + } else { + QFile file; + file.open(stdin, QFile::ReadOnly); + while (!file.atEnd()) { + const QString url = QString::fromLocal8Bit(file.readLine()).simplified(); + if (!url.isEmpty()) + crawl(url); + } + } + } + } + + TemplateList readBlock(bool *done) + { + *done = true; + return templates; + } + + void write(const Template &) + { + qFatal("Not supported"); + } +}; + +BR_REGISTER(Gallery, crawlGallery) + +/*! + * \ingroup galleries * \brief Treats the gallery as a br::Format. * \author Josh Klontz \cite jklontz */