You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1265 lines
24 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

/*
* Online help index routines for CUPS.
*
* Copyright © 2007-2019 by Apple Inc.
* Copyright © 1997-2007 by Easy Software Products.
*
* Licensed under Apache License v2.0. See the file "LICENSE" for more
* information.
*/
/*
* Include necessary headers...
*/
#include "cgi-private.h"
#include <cups/dir.h>
/*
* List of common English words that should not be indexed...
*/
static char help_common_words[][6] =
{
"about",
"all",
"an",
"and",
"are",
"as",
"at",
"be",
"been",
"but",
"by",
"call",
"can",
"come",
"could",
"day",
"did",
"do",
"down",
"each",
"find",
"first",
"for",
"from",
"go",
"had",
"has",
"have",
"he",
"her",
"him",
"his",
"hot",
"how",
"if",
"in",
"is",
"it",
"know",
"like",
"long",
"look",
"make",
"many",
"may",
"more",
"most",
"my",
"no",
"now",
"of",
"on",
"one",
"or",
"other",
"out",
"over",
"said",
"see",
"she",
"side",
"so",
"some",
"sound",
"than",
"that",
"the",
"their",
"them",
"then",
"there",
"these",
"they",
"thing",
"this",
"time",
"to",
"two",
"up",
"use",
"was",
"water",
"way",
"we",
"were",
"what",
"when",
"which",
"who",
"will",
"with",
"word",
"would",
"write",
"you",
"your"
};
/*
* Local functions...
*/
static help_word_t *help_add_word(help_node_t *n, const char *text);
static void help_delete_node(help_node_t *n);
static void help_delete_word(help_word_t *w);
static int help_load_directory(help_index_t *hi,
const char *directory,
const char *relative);
static int help_load_file(help_index_t *hi,
const char *filename,
const char *relative,
time_t mtime);
static help_node_t *help_new_node(const char *filename, const char *anchor, const char *section, const char *text, time_t mtime, off_t offset, size_t length) _CUPS_NONNULL(1,3,4);
static int help_sort_by_name(help_node_t *p1, help_node_t *p2);
static int help_sort_by_score(help_node_t *p1, help_node_t *p2);
static int help_sort_words(help_word_t *w1, help_word_t *w2);
/*
* 'helpDeleteIndex()' - Delete an index, freeing all memory used.
*/
void
helpDeleteIndex(help_index_t *hi) /* I - Help index */
{
help_node_t *node; /* Current node */
if (!hi)
return;
for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
node;
node = (help_node_t *)cupsArrayNext(hi->nodes))
{
if (!hi->search)
help_delete_node(node);
}
cupsArrayDelete(hi->nodes);
cupsArrayDelete(hi->sorted);
free(hi);
}
/*
* 'helpFindNode()' - Find a node in an index.
*/
help_node_t * /* O - Node pointer or NULL */
helpFindNode(help_index_t *hi, /* I - Index */
const char *filename, /* I - Filename */
const char *anchor) /* I - Anchor */
{
help_node_t key; /* Search key */
/*
* Range check input...
*/
if (!hi || !filename)
return (NULL);
/*
* Initialize the search key...
*/
key.filename = (char *)filename;
key.anchor = (char *)anchor;
/*
* Return any match...
*/
return ((help_node_t *)cupsArrayFind(hi->nodes, &key));
}
/*
* 'helpLoadIndex()' - Load a help index from disk.
*/
help_index_t * /* O - Index pointer or NULL */
helpLoadIndex(const char *hifile, /* I - Index filename */
const char *directory) /* I - Directory that is indexed */
{
help_index_t *hi; /* Help index */
cups_file_t *fp; /* Current file */
char line[2048], /* Line from file */
*ptr, /* Pointer into line */
*filename, /* Filename in line */
*anchor, /* Anchor in line */
*sectptr, /* Section pointer in line */
section[1024], /* Section name */
*text; /* Text in line */
time_t mtime; /* Modification time */
off_t offset; /* Offset into file */
size_t length; /* Length in bytes */
int update; /* Update? */
help_node_t *node; /* Current node */
help_word_t *word; /* Current word */
/*
* Create a new, empty index.
*/
if ((hi = (help_index_t *)calloc(1, sizeof(help_index_t))) == NULL)
return (NULL);
hi->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
hi->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
if (!hi->nodes || !hi->sorted)
{
cupsArrayDelete(hi->nodes);
cupsArrayDelete(hi->sorted);
free(hi);
return (NULL);
}
/*
* Try loading the existing index file...
*/
if ((fp = cupsFileOpen(hifile, "r")) != NULL)
{
/*
* Lock the file and then read the first line...
*/
cupsFileLock(fp, 1);
if (cupsFileGets(fp, line, sizeof(line)) && !strcmp(line, "HELPV2"))
{
/*
* Got a valid header line, now read the data lines...
*/
node = NULL;
while (cupsFileGets(fp, line, sizeof(line)))
{
/*
* Each line looks like one of the following:
*
* filename mtime offset length "section" "text"
* filename#anchor offset length "text"
* SP count word
*/
if (line[0] == ' ')
{
/*
* Read a word in the current node...
*/
if (!node || (ptr = strrchr(line, ' ')) == NULL)
continue;
if ((word = help_add_word(node, ptr + 1)) != NULL)
word->count = atoi(line + 1);
}
else
{
/*
* Add a node...
*/
filename = line;
if ((ptr = strchr(line, ' ')) == NULL)
break;
while (isspace(*ptr & 255))
*ptr++ = '\0';
if ((anchor = strrchr(filename, '#')) != NULL)
{
*anchor++ = '\0';
mtime = 0;
}
else
mtime = strtol(ptr, &ptr, 10);
offset = strtoll(ptr, &ptr, 10);
length = (size_t)strtoll(ptr, &ptr, 10);
while (isspace(*ptr & 255))
ptr ++;
if (!anchor)
{
/*
* Get section...
*/
if (*ptr != '\"')
break;
ptr ++;
sectptr = ptr;
while (*ptr && *ptr != '\"')
ptr ++;
if (*ptr != '\"')
break;
*ptr++ = '\0';
strlcpy(section, sectptr, sizeof(section));
while (isspace(*ptr & 255))
ptr ++;
}
else
section[0] = '\0';
if (*ptr != '\"')
break;
ptr ++;
text = ptr;
while (*ptr && *ptr != '\"')
ptr ++;
if (*ptr != '\"')
break;
*ptr++ = '\0';
if ((node = help_new_node(filename, anchor, section, text,
mtime, offset, length)) == NULL)
break;
node->score = -1;
cupsArrayAdd(hi->nodes, node);
}
}
}
cupsFileClose(fp);
}
/*
* Scan for new/updated files...
*/
update = help_load_directory(hi, directory, NULL);
/*
* Remove any files that are no longer installed...
*/
for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
node;
node = (help_node_t *)cupsArrayNext(hi->nodes))
if (node->score < 0)
{
/*
* Delete this node...
*/
cupsArrayRemove(hi->nodes, node);
help_delete_node(node);
}
/*
* Add nodes to the sorted array...
*/
for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
node;
node = (help_node_t *)cupsArrayNext(hi->nodes))
cupsArrayAdd(hi->sorted, node);
/*
* Save the index if we updated it...
*/
if (update)
helpSaveIndex(hi, hifile);
/*
* Return the index...
*/
return (hi);
}
/*
* 'helpSaveIndex()' - Save a help index to disk.
*/
int /* O - 0 on success, -1 on error */
helpSaveIndex(help_index_t *hi, /* I - Index */
const char *hifile) /* I - Index filename */
{
cups_file_t *fp; /* Index file */
help_node_t *node; /* Current node */
help_word_t *word; /* Current word */
/*
* Try creating a new index file...
*/
if ((fp = cupsFileOpen(hifile, "w9")) == NULL)
return (-1);
/*
* Lock the file while we write it...
*/
cupsFileLock(fp, 1);
cupsFilePuts(fp, "HELPV2\n");
for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
node;
node = (help_node_t *)cupsArrayNext(hi->nodes))
{
/*
* Write the current node with/without the anchor...
*/
if (node->anchor)
{
if (cupsFilePrintf(fp, "%s#%s " CUPS_LLFMT " " CUPS_LLFMT " \"%s\"\n",
node->filename, node->anchor,
CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
node->text) < 0)
break;
}
else
{
if (cupsFilePrintf(fp, "%s %d " CUPS_LLFMT " " CUPS_LLFMT " \"%s\" \"%s\"\n",
node->filename, (int)node->mtime,
CUPS_LLCAST node->offset, CUPS_LLCAST node->length,
node->section ? node->section : "", node->text) < 0)
break;
}
/*
* Then write the words associated with the node...
*/
for (word = (help_word_t *)cupsArrayFirst(node->words);
word;
word = (help_word_t *)cupsArrayNext(node->words))
if (cupsFilePrintf(fp, " %d %s\n", word->count, word->text) < 0)
break;
}
cupsFileFlush(fp);
if (cupsFileClose(fp) < 0)
return (-1);
else if (node)
return (-1);
else
return (0);
}
/*
* 'helpSearchIndex()' - Search an index.
*/
help_index_t * /* O - Search index */
helpSearchIndex(help_index_t *hi, /* I - Index */
const char *query, /* I - Query string */
const char *section, /* I - Limit search to this section */
const char *filename) /* I - Limit search to this file */
{
help_index_t *search; /* Search index */
help_node_t *node; /* Current node */
help_word_t *word; /* Current word */
void *sc; /* Search context */
int matches; /* Number of matches */
/*
* Range check...
*/
if (!hi || !query)
return (NULL);
/*
* Reset the scores of all nodes to 0...
*/
for (node = (help_node_t *)cupsArrayFirst(hi->nodes);
node;
node = (help_node_t *)cupsArrayNext(hi->nodes))
node->score = 0;
/*
* Find the first node to search in...
*/
if (filename)
{
node = helpFindNode(hi, filename, NULL);
if (!node)
return (NULL);
}
else
node = (help_node_t *)cupsArrayFirst(hi->nodes);
/*
* Convert the query into a regular expression...
*/
sc = cgiCompileSearch(query);
if (!sc)
return (NULL);
/*
* Allocate a search index...
*/
search = calloc(1, sizeof(help_index_t));
if (!search)
{
cgiFreeSearch(sc);
return (NULL);
}
search->nodes = cupsArrayNew((cups_array_func_t)help_sort_by_name, NULL);
search->sorted = cupsArrayNew((cups_array_func_t)help_sort_by_score, NULL);
if (!search->nodes || !search->sorted)
{
cupsArrayDelete(search->nodes);
cupsArrayDelete(search->sorted);
free(search);
cgiFreeSearch(sc);
return (NULL);
}
search->search = 1;
/*
* Check each node in the index, adding matching nodes to the
* search index...
*/
for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
if (section && strcmp(node->section, section))
continue;
else if (filename && strcmp(node->filename, filename))
continue;
else
{
matches = cgiDoSearch(sc, node->text);
for (word = (help_word_t *)cupsArrayFirst(node->words);
word;
word = (help_word_t *)cupsArrayNext(node->words))
if (cgiDoSearch(sc, word->text) > 0)
matches += word->count;
if (matches > 0)
{
/*
* Found a match, add the node to the search index...
*/
node->score = matches;
cupsArrayAdd(search->nodes, node);
cupsArrayAdd(search->sorted, node);
}
}
/*
* Free the search context...
*/
cgiFreeSearch(sc);
/*
* Return the results...
*/
return (search);
}
/*
* 'help_add_word()' - Add a word to a node.
*/
static help_word_t * /* O - New word */
help_add_word(help_node_t *n, /* I - Node */
const char *text) /* I - Word text */
{
help_word_t *w, /* New word */
key; /* Search key */
/*
* Create the words array as needed...
*/
if (!n->words)
n->words = cupsArrayNew((cups_array_func_t)help_sort_words, NULL);
/*
* See if the word is already added...
*/
key.text = (char *)text;
if ((w = (help_word_t *)cupsArrayFind(n->words, &key)) == NULL)
{
/*
* Create a new word...
*/
if ((w = calloc(1, sizeof(help_word_t))) == NULL)
return (NULL);
if ((w->text = strdup(text)) == NULL)
{
free(w);
return (NULL);
}
cupsArrayAdd(n->words, w);
}
/*
* Bump the counter for this word and return it...
*/
w->count ++;
return (w);
}
/*
* 'help_delete_node()' - Free all memory used by a node.
*/
static void
help_delete_node(help_node_t *n) /* I - Node */
{
help_word_t *w; /* Current word */
if (!n)
return;
if (n->filename)
free(n->filename);
if (n->anchor)
free(n->anchor);
if (n->section)
free(n->section);
if (n->text)
free(n->text);
for (w = (help_word_t *)cupsArrayFirst(n->words);
w;
w = (help_word_t *)cupsArrayNext(n->words))
help_delete_word(w);
cupsArrayDelete(n->words);
free(n);
}
/*
* 'help_delete_word()' - Free all memory used by a word.
*/
static void
help_delete_word(help_word_t *w) /* I - Word */
{
if (!w)
return;
if (w->text)
free(w->text);
free(w);
}
/*
* 'help_load_directory()' - Load a directory of files into an index.
*/
static int /* O - 0 = success, -1 = error, 1 = updated */
help_load_directory(
help_index_t *hi, /* I - Index */
const char *directory, /* I - Directory */
const char *relative) /* I - Relative path */
{
cups_dir_t *dir; /* Directory file */
cups_dentry_t *dent; /* Directory entry */
char *ext, /* Pointer to extension */
filename[1024], /* Full filename */
relname[1024]; /* Relative filename */
int update; /* Updated? */
help_node_t *node; /* Current node */
/*
* Open the directory and scan it...
*/
if ((dir = cupsDirOpen(directory)) == NULL)
return (0);
update = 0;
while ((dent = cupsDirRead(dir)) != NULL)
{
/*
* Skip "." files...
*/
if (dent->filename[0] == '.')
continue;
/*
* Get absolute and relative filenames...
*/
snprintf(filename, sizeof(filename), "%s/%s", directory, dent->filename);
if (relative)
snprintf(relname, sizeof(relname), "%s/%s", relative, dent->filename);
else
strlcpy(relname, dent->filename, sizeof(relname));
/*
* Check if we have a HTML file...
*/
if ((ext = strstr(dent->filename, ".html")) != NULL &&
(!ext[5] || !strcmp(ext + 5, ".gz")))
{
/*
* HTML file, see if we have already indexed the file...
*/
if ((node = helpFindNode(hi, relname, NULL)) != NULL)
{
/*
* File already indexed - check dates to confirm that the
* index is up-to-date...
*/
if (node->mtime == dent->fileinfo.st_mtime)
{
/*
* Same modification time, so mark all of the nodes
* for this file as up-to-date...
*/
for (; node; node = (help_node_t *)cupsArrayNext(hi->nodes))
if (!strcmp(node->filename, relname))
node->score = 0;
else
break;
continue;
}
}
update = 1;
help_load_file(hi, filename, relname, dent->fileinfo.st_mtime);
}
else if (S_ISDIR(dent->fileinfo.st_mode))
{
/*
* Process sub-directory...
*/
if (help_load_directory(hi, filename, relname) == 1)
update = 1;
}
}
cupsDirClose(dir);
return (update);
}
/*
* 'help_load_file()' - Load a HTML files into an index.
*/
static int /* O - 0 = success, -1 = error */
help_load_file(
help_index_t *hi, /* I - Index */
const char *filename, /* I - Filename */
const char *relative, /* I - Relative path */
time_t mtime) /* I - Modification time */
{
cups_file_t *fp; /* HTML file */
help_node_t *node; /* Current node */
char line[1024], /* Line from file */
temp[1024], /* Temporary word */
section[1024], /* Section */
*ptr, /* Pointer into line */
*anchor, /* Anchor name */
*text; /* Text for anchor */
off_t offset; /* File offset */
char quote; /* Quote character */
help_word_t *word; /* Current word */
int wordlen; /* Length of word */
if ((fp = cupsFileOpen(filename, "r")) == NULL)
return (-1);
node = NULL;
offset = 0;
strlcpy(section, "Other", sizeof(section));
while (cupsFileGets(fp, line, sizeof(line)))
{
/*
* Look for "<TITLE>", "<A NAME", or "<!-- SECTION:" prefix...
*/
if ((ptr = strstr(line, "<!-- SECTION:")) != NULL)
{
/*
* Got section line, copy it!
*/
for (ptr += 13; isspace(*ptr & 255); ptr ++);
strlcpy(section, ptr, sizeof(section));
if ((ptr = strstr(section, "-->")) != NULL)
{
/*
* Strip comment stuff from end of line...
*/
for (*ptr-- = '\0'; ptr > line && isspace(*ptr & 255); *ptr-- = '\0');
if (isspace(*ptr & 255))
*ptr = '\0';
}
continue;
}
for (ptr = line; (ptr = strchr(ptr, '<')) != NULL;)
{
ptr ++;
if (!_cups_strncasecmp(ptr, "TITLE>", 6))
{
/*
* Found the title...
*/
anchor = NULL;
ptr += 6;
}
else
{
char *idptr; /* Pointer to ID */
if (!_cups_strncasecmp(ptr, "A NAME=", 7))
ptr += 7;
else if ((idptr = strstr(ptr, " ID=")) != NULL)
ptr = idptr + 4;
else if ((idptr = strstr(ptr, " id=")) != NULL)
ptr = idptr + 4;
else
continue;
/*
* Found an anchor...
*/
if (*ptr == '\"' || *ptr == '\'')
{
/*
* Get quoted anchor...
*/
quote = *ptr;
anchor = ptr + 1;
if ((ptr = strchr(anchor, quote)) != NULL)
*ptr++ = '\0';
else
break;
}
else
{
/*
* Get unquoted anchor...
*/
anchor = ptr + 1;
for (ptr = anchor; *ptr && *ptr != '>' && !isspace(*ptr & 255); ptr ++);
if (*ptr != '>')
*ptr++ = '\0';
else
break;
}
/*
* Got the anchor, now lets find the end...
*/
while (*ptr && *ptr != '>')
ptr ++;
if (*ptr != '>')
break;
*ptr++ = '\0';
}
/*
* Now collect text for the link...
*/
text = ptr;
while ((ptr = strchr(text, '<')) == NULL)
{
ptr = text + strlen(text);
if (ptr >= (line + sizeof(line) - 2))
break;
*ptr++ = ' ';
if (!cupsFileGets(fp, ptr, sizeof(line) - (size_t)(ptr - line) - 1))
break;
}
*ptr = '\0';
if (node)
node->length = (size_t)(offset - node->offset);
if (!*text)
{
node = NULL;
break;
}
if ((node = helpFindNode(hi, relative, anchor)) != NULL)
{
/*
* Node already in the index, so replace the text and other
* data...
*/
cupsArrayRemove(hi->nodes, node);
if (node->section)
free(node->section);
if (node->text)
free(node->text);
if (node->words)
{
for (word = (help_word_t *)cupsArrayFirst(node->words);
word;
word = (help_word_t *)cupsArrayNext(node->words))
help_delete_word(word);
cupsArrayDelete(node->words);
node->words = NULL;
}
node->section = section[0] ? strdup(section) : NULL;
node->text = strdup(text);
node->mtime = mtime;
node->offset = offset;
node->score = 0;
}
else
{
/*
* New node...
*/
node = help_new_node(relative, anchor, section, text, mtime, offset, 0);
}
/*
* Go through the text value and replace tabs and newlines with
* whitespace and eliminate extra whitespace...
*/
for (ptr = node->text, text = node->text; *ptr;)
if (isspace(*ptr & 255))
{
while (isspace(*ptr & 255))
ptr ++;
*text++ = ' ';
}
else if (text != ptr)
*text++ = *ptr++;
else
{
text ++;
ptr ++;
}
*text = '\0';
/*
* (Re)add the node to the array...
*/
cupsArrayAdd(hi->nodes, node);
if (!anchor)
node = NULL;
break;
}
if (node)
{
/*
* Scan this line for words...
*/
for (ptr = line; *ptr; ptr ++)
{
/*
* Skip HTML stuff...
*/
if (*ptr == '<')
{
if (!strncmp(ptr, "<!--", 4))
{
/*
* Skip HTML comment...
*/
if ((text = strstr(ptr + 4, "-->")) == NULL)
ptr += strlen(ptr) - 1;
else
ptr = text + 2;
}
else
{
/*
* Skip HTML element...
*/
for (ptr ++; *ptr && *ptr != '>'; ptr ++)
{
if (*ptr == '\"' || *ptr == '\'')
{
for (quote = *ptr++; *ptr && *ptr != quote; ptr ++);
if (!*ptr)
ptr --;
}
}
if (!*ptr)
ptr --;
}
continue;
}
else if (*ptr == '&')
{
/*
* Skip HTML entity...
*/
for (ptr ++; *ptr && *ptr != ';'; ptr ++);
if (!*ptr)
ptr --;
continue;
}
else if (!isalnum(*ptr & 255))
continue;
/*
* Found the start of a word, search until we find the end...
*/
for (text = ptr, ptr ++; *ptr && isalnum(*ptr & 255); ptr ++);
wordlen = (int)(ptr - text);
memcpy(temp, text, (size_t)wordlen);
temp[wordlen] = '\0';
ptr --;
if (wordlen > 1 && !bsearch(temp, help_common_words,
(sizeof(help_common_words) /
sizeof(help_common_words[0])),
sizeof(help_common_words[0]),
(int (*)(const void *, const void *))
_cups_strcasecmp))
help_add_word(node, temp);
}
}
/*
* Get the offset of the next line...
*/
offset = cupsFileTell(fp);
}
cupsFileClose(fp);
if (node)
node->length = (size_t)(offset - node->offset);
return (0);
}
/*
* 'help_new_node()' - Create a new node and add it to an index.
*/
static help_node_t * /* O - Node pointer or NULL on error */
help_new_node(const char *filename, /* I - Filename */
const char *anchor, /* I - Anchor */
const char *section, /* I - Section */
const char *text, /* I - Text */
time_t mtime, /* I - Modification time */
off_t offset, /* I - Offset in file */
size_t length) /* I - Length in bytes */
{
help_node_t *n; /* Node */
n = (help_node_t *)calloc(1, sizeof(help_node_t));
if (!n)
return (NULL);
n->filename = strdup(filename);
n->anchor = anchor ? strdup(anchor) : NULL;
n->section = (section && *section) ? strdup(section) : NULL;
n->text = strdup(text);
n->mtime = mtime;
n->offset = offset;
n->length = length;
return (n);
}
/*
* 'help_sort_nodes_by_name()' - Sort nodes by section, filename, and anchor.
*/
static int /* O - Difference */
help_sort_by_name(help_node_t *n1, /* I - First node */
help_node_t *n2) /* I - Second node */
{
int diff; /* Difference */
if ((diff = strcmp(n1->filename, n2->filename)) != 0)
return (diff);
if (!n1->anchor && !n2->anchor)
return (0);
else if (!n1->anchor)
return (-1);
else if (!n2->anchor)
return (1);
else
return (strcmp(n1->anchor, n2->anchor));
}
/*
* 'help_sort_nodes_by_score()' - Sort nodes by score and text.
*/
static int /* O - Difference */
help_sort_by_score(help_node_t *n1, /* I - First node */
help_node_t *n2) /* I - Second node */
{
int diff; /* Difference */
if (n1->score != n2->score)
return (n2->score - n1->score);
if (n1->section && !n2->section)
return (1);
else if (!n1->section && n2->section)
return (-1);
else if (n1->section && n2->section &&
(diff = strcmp(n1->section, n2->section)) != 0)
return (diff);
return (_cups_strcasecmp(n1->text, n2->text));
}
/*
* 'help_sort_words()' - Sort words alphabetically.
*/
static int /* O - Difference */
help_sort_words(help_word_t *w1, /* I - Second word */
help_word_t *w2) /* I - Second word */
{
return (_cups_strcasecmp(w1->text, w2->text));
}