Commit ca0087b06617aa81661e352e1b40814c62d168ac

Authored by Christian Herdtweck
Committed by Philippe Lagadec
1 parent 27c1baca

unittest: create unittests for msodde blacklist feature

tests/msodde_docx/__init__.py 0 → 100644
tests/msodde_docx/test_blacklist.py 0 → 100644
  1 +""" Test the msodde blacklist feature
  2 +
  3 +Take a few examples from the standard (iso29500-1:2016) and see that they match
  4 +"""
  5 +
  6 +import unittest
  7 +from oletools.msodde import field_is_blacklisted
  8 +
  9 +EXAMPLES_MATCH = (
  10 + r'DATE',
  11 + r'DATE \@ "dddd, MMMM dd, yyyy"',
  12 + r'DATE \@ "dddd, MMMM dd, yyyy" \h',
  13 + r'DATE \@ "M/d/yyyy"',
  14 + r'DATE \@ "dddd, MMMM dd, yyyy"',
  15 + r'DATE \@ "MMMM d, yyyy"',
  16 + r'DATE \@ "M/d/yy"',
  17 + r'DATE \@ "yyyy-MM-dd"',
  18 + r'DATE \@ "d-MMM-yy"',
  19 + r'DATE \@ "M.d.yyyy"',
  20 + r'DATE \@ "MMM. d, yy"',
  21 + r'DATE \@ "d MMMM yyyy"',
  22 + r'DATE \@ "MMMM yy"',
  23 + r'DATE \@ "MMM-yy"',
  24 + r'DATE \@ "M/d/yyyy h:mm am/pm"',
  25 + r'DATE \@ "M/d/yyyy h:mm:ss am/pm"',
  26 + r'DATE \@ "h:mm am/pm"',
  27 + r'DATE \@ "h:mm:ss am/pm"',
  28 + r'DATE \@ "HH:mm"',
  29 + r'DATE \@ "\'Today is \'HH:mm:ss"',
  30 + r'USERNAME "mary smith" \* Caps',
  31 + r'USERNAME "mary smith" \* FirstCap',
  32 + r'USERNAME "Mary Smith" \* Lower',
  33 + r'USERNAME "Mary Smith" \* Upper',
  34 + r'DATE \* CHARFORMAT',
  35 + r'TIME \@ "HH:mm:ss" \* MERGEFORMAT',
  36 + r'ADVANCE \u 6',
  37 + r'ADVANCE \d 12',
  38 + r'ADVANCE \l 20',
  39 + r'ADVANCE \x 150',
  40 + r'AUTHOR',
  41 + r'AUTHOR "Tony Caruso"',
  42 + r'BIBLIOGRAPHY \l 1033', # note: the original example has "/l 1033"
  43 + r'CITATION Ecma01 \l 1033', # note: this also. Hope this is just a typo
  44 + r'COMMENTS',
  45 + r'COMMENTS "I came, I saw, I was not impressed."',
  46 + r'CREATEDATE',
  47 + r'CREATEDATE \@ "dddd, MMMM dd, yyyy HH:mm:ss"',
  48 + r'CREATEDATE \@ "dddd, MMMM dd, yyyy HH:mm:ss" \h',
  49 + r'CREATEDATE \@ "dddd, MMMM dd, yyyy HH:mm:ss" \s',
  50 + r'DATE',
  51 + r'DATE \@ "dddd, MMMM dd, yyyy HH:mm:ss"',
  52 + r'DATE \@ "dddd, MMMM dd, yyyy HH:mm:ss" \h',
  53 + r'DATE \@ "dddd, MMMM dd, yyyy HH:mm:ss" \s',
  54 + r'EDITTIME',
  55 + r'EDITTIME \* OrdText',
  56 + r'FILENAME \* Upper',
  57 + r'FILENAME \p',
  58 + r'FILESIZE \# #,##0',
  59 + r'FILESIZE \k',
  60 + r'FILESIZE \m',
  61 + r'FORMCHECKBOX',
  62 + r'FORMDROPDOWN',
  63 + r'FORMTEXT',
  64 + r'INDEX \c "1" \e "tab" \g " to " \h "A" \z "1033"',
  65 + r'KEYWORDS',
  66 + r'KEYWORDS "field, formatting, switch, syntax"',
  67 + r'LASTSAVEDBY \* Upper',
  68 + r'LISTNUM NumberDefault \l 3 \s 1',
  69 + r'LISTNUM',
  70 + r'LISTNUM NumberDefault',
  71 + r'LISTNUM NumberDefault \s 3',
  72 + r'LISTNUM NumberDefault \l 1',
  73 + r'LISTNUM NumberDefault \l 1 \s 1',
  74 + r'LISTNUM LegalDefault \l 1 \s 1', # note: original example uses '\1'
  75 + r'NOTEREF F10',
  76 + r'NUMCHARS',
  77 + r'NUMCHARS \# #,##0',
  78 + r'NUMPAGES \# #,##0',
  79 + r'NUMPAGES \* OrdText',
  80 + r'NUMWORDS',
  81 + r'NUMWORDS \# #,##0',
  82 + r'PAGE',
  83 + r'PAGE \* ArabicDash',
  84 + r'PAGE \* ALPHABETIC',
  85 + r'PAGE \* roman',
  86 + r'PAGEREF Worldpop1990 \p',
  87 + r'PRINTDATE',
  88 + r'PRINTDATE \@ "dddd, MMMM dd, yyyy HH:mm:ss"',
  89 + r'REVNUM',
  90 + r'SAVEDATE',
  91 + r'SAVEDATE \@ "dddd, MMMM dd, yyyy HH:mm:ss"',
  92 + r'SECTION',
  93 + r'SECTION \* ArabicDash',
  94 + r'SECTION \* ALPHABETIC',
  95 + r'SECTION \* roman',
  96 + r'SECTIONPAGES',
  97 + r'SECTIONPAGES \* ArabicDash',
  98 + r'SECTIONPAGES \* ALPHABETIC',
  99 + r'SECTIONPAGES \* roman',
  100 + r'SEQ Figure',
  101 + r'SEQ Figure \* roman',
  102 + r'SEQ Figure \n',
  103 + r'SEQ Figure \c',
  104 + r'SEQ Figure \h',
  105 + r'SEQ Figure',
  106 + r'SEQ Figure \r 1',
  107 + r'SEQ Figure',
  108 + r'STYLEREF "Heading 3"',
  109 + r'STYLEREF "Last Name"',
  110 + r'STYLEREF "Last Name" \l',
  111 + r'SUBJECT',
  112 + r'SUBJECT "A specification for WordprocessingML Fields"',
  113 + r'SYMBOL 65',
  114 + r'SYMBOL 66 \a',
  115 + r'SYMBOL 67 \u',
  116 + r'SYMBOL 0x20ac \u',
  117 + r'SYMBOL 68',
  118 + r'SYMBOL 68 \f Symbol',
  119 + r'SYMBOL 40 \f Wingdings \s 24',
  120 + r'TA \l "Hotels v. Leisure Time" \c 2',
  121 + r'TA \l "Baldwin v. Alberti, 58 Wn. 2d 243 (1961)" \s "Baldwin v. Alberti"'
  122 + r'\c 1 \b',
  123 + r'INDEX \e "tab" \c "1" \z "1033"',
  124 + r'TEMPLATE \* Upper',
  125 + r'TEMPLATE \p',
  126 + r'TIME',
  127 + r'TIME \@ "dddd, MMMM dd, yyyy HH:mm:ss"',
  128 + r'TITLE "My Life, the Fantasy" \* Upper',
  129 + r'TITLE',
  130 + r'TOC \o "3-3" \h \z \t "Heading 1,1,Heading 2,2,Appendix 1,1,'
  131 + r'Appendix 2,2,Unnumbered Heading,1"',
  132 + r'USERADDRESS',
  133 + r'USERADDRESS "10 Top Secret Lane, Chiswick" \* Upper',
  134 + r'USERINITIALS \* Lower',
  135 + r'USERINITIALS "JaJ"',
  136 + r'USERINITIALS "jaj" \* Upper',
  137 + r'XE "Office Open XML" \b',
  138 + r'XE "syntax" \f "Introduction"',
  139 + r'XE "behavior:implementation-defined" \b',
  140 + r'XE "Office Open XML" \i',
  141 + r'XE "behavior:implementation-defined:documenting" \b',
  142 + r'XE "grammar" \f "Introduction" \b',
  143 + r'XE "Office Open XML"',
  144 + r'XE "item: package-relationship" \t "See package-relationship item"',
  145 + r'XE "XML" \r OOXMLPageRange',
  146 + r'XE "grammar" \f "Introduction"',
  147 + r'XE "production" \f "Introduction"'
  148 + )
  149 +
  150 +# not (yet) covered
  151 +# (because it should be handled as bad or because our parser does not cover it)
  152 +EXAMPLES_NOMATCH = (
  153 + r'INCLUDETEXT "E:\\ReadMe.txt"',
  154 + r'IF DATE \@ "M-d"<>"1-1" "not " new year\'s day.',
  155 + r'=X + Y',
  156 + r'=Result * 10',
  157 + r'=((-1 + X^2) * 3 - Y)/2',
  158 + r'=COUNT(BELOW)',
  159 + r'=SUM(LEFT)',
  160 + r'=AVERAGE(ABOVE)',
  161 + r'=4+5 \# 00.00',
  162 + r'=9+6 \# $###',
  163 + r'=111053+111439 \# x##',
  164 + r'=1/8 \# 0.00x',
  165 + r'=3/4 \# .x',
  166 + r'=95.4 \# $###.00',
  167 + r'=2456800 \# $#,###,###',
  168 + r'=80-90 \# -##',
  169 + r'=90-80 \# -##',
  170 + r'=90-80 \# +##',
  171 + r'=33 \# ##%',
  172 + r'=Price*15% \# "##0.00 \'is the sales tax\'"',
  173 + r'=SUM(A1:D4) \# "##0.00 \'is the total of Table\' `table`"',
  174 + r'=Sales95 \# $#,##0.00;-$#,##0.00',
  175 + r'=Sales95 \# $#,##0.00;-$#,##0.00;$0',
  176 + r'1 \* AIUEO',
  177 + r'=54 \* ALPHABETIC',
  178 + r'=52 \* alphabetic',
  179 + r'AUTOTEXT "- PAGE -"',
  180 + r'AUTOTEXT "Yours truly,"',
  181 + r'AUTOTEXT Confidential',
  182 + r'AUTOTEXTLIST "List of salutations" \s Salutation '
  183 + r'\t "Choose a salutation"',
  184 + r'ADDRESSBLOCK \f "<<_TITLE0_ >><<_FIRST0_>><< _LAST0_>><< _SUFFIX0_>>\n'
  185 + r'<<_COMPANY_>>\n<<_STREET1_>>\n'
  186 + r'<<_STREET2_>>\n'
  187 + r'<<_CITY_>><<, _STATE_>><< _POSTAL_>><<_COUNTRY_>>"',
  188 + r'ASK AskResponse "What is your first name?"',
  189 + r'REF AskResponse',
  190 + r'{ IF { = OR ( { COMPARE { MERGEFIELD CustomerNumber } >= 4 },',
  191 + r'{ COMPARE { MERGEFIELD CustomerRating } <= 9 } ) } = 1 '
  192 + r'"Credit not acceptable" "Credit acceptable"}',
  193 + r'{ COMPARE "{ MERGEFIELD PostalCode }" = "985*" }',
  194 + r'{ DATABASE \d "C:\\Data\\Sales93.mdb" \c "DSN=MS Access Database;',
  195 + r'DBQ=C:\\Data\\Sales93.mdb; FIL=RedISAM" '
  196 + r'\s "select * from \"Customer List\"" \f "2445" \t "2486" \l "2"',
  197 + r' FILLIN "Please enter the appointment time for '
  198 + r'MERGEFIELD PatientName :"',
  199 + r'GOTOBUTTON MyBookmark Dest',
  200 + r'GOTOBUTTON p3 Page',
  201 + r'GOTOBUTTON "f 2" Footnote',
  202 + r'HYPERLINK http://www.example.com/',
  203 + r'HYPERLINK "E:\\ReadMe.txt"',
  204 + r'{IF order >= 100 "Thanks" "The minimum order is 100 units" }',
  205 + r'INCLUDEPICTURE "file:///g:/photos/Ellen%20in%20Oslo.jpg"',
  206 + r'INCLUDETEXT "file:///C:/Winword/Port Development RFP" Summary',
  207 + r'INCLUDETEXT "file:///C:/Resume.xml" \n xmlns:a=\"resume-schema\" '
  208 + r'\t "file:///C:/display.xsl" \x a:Resume/a:Name',
  209 + r'{ LINK Excel.Sheet.8 "C:\\My Documents\\Profits.xls"',
  210 + r'"Sheet1!R1C1:R4C4" \a \p }',
  211 + r'MERGEFIELD CoutesyTitle \f " "',
  212 + r'MERGEFIELD FirstName \f " "',
  213 + r'MERGEFIELD LastName',
  214 + r'= { PRINTDATE \@ "MMddyyyyHHmm" + MERGEREC }',
  215 + r'MERGEFIELD Name MERGEFIELD Phone',
  216 + r'NEXT MERGEFIELD Name MERGEFIELD Phone',
  217 + r'NEXT MERGEFIELD Name MERGEFIELD Phone',
  218 + r' QUOTE IF DATE \@ "M" = 1 "12" "= DATE \@ "M" - 1"/1/2000 \@',
  219 + r'"MMMM"',
  220 + r'RD C:\\Manual\\Chapters\\Chapter1.doc',
  221 + r'REF _Ref116788778 \r \h',
  222 + r'SET EnteredBy "Paul Smith"',
  223 + r'SET UnitCost 25.00',
  224 + r'SET Quantity FILLIN "Enter number of items ordered:"',
  225 + r'SET SalesTax 10%',
  226 + r'SET TotalCost = (UnitCost * Quantity) + ((UnitCost * Quantity) * '
  227 + r'SalesTax)',
  228 + r'SKIPIF MERGEFIELD Order < 100',
  229 + )
  230 +
  231 +class TestBlacklist(unittest.TestCase):
  232 + """ Tests msodde blacklist feature """
  233 +
  234 + def test_matches(self):
  235 + """ check a long list of examples that should match the blacklist """
  236 + for example in EXAMPLES_MATCH:
  237 + self.assertTrue(field_is_blacklisted(example),
  238 + msg="Failed to match: '{0}'".format(example))
  239 +
  240 + def test_nomatches(self):
  241 + """ check a long list of examples that should match the blacklist """
  242 + for example in EXAMPLES_NOMATCH:
  243 + self.assertFalse(field_is_blacklisted(example),
  244 + msg="Accidentally matched: '{0}'".format(example))
  245 +
  246 +
  247 +# just in case somebody calls this file as a script
  248 +if __name__ == '__main__':
  249 + unittest.main()