Is the word "text" an operator or reserved word in Oracle Text?

DB: Oracle 10.2.0.3.0
I have created a CONTEXT index on a table which uses a Multi-Column Datastore on two columns (text and keywords)
When I perform this query, I'm getting back every row in the table even though none contain a value of "text" within the TEXT or KEYWORDS columns.
SELECT *
FROM tb_rh_links
WHERE CONTAINS(search_column, 'text') > 0
** UPDATE ** it does the same things when searching for word "keywords." I'm beginning to think it has something to do with the section groups...
SELECT *
FROM tb_rh_links
WHERE CONTAINS(search_column, 'keywords') > 0
Here is how the index and multi-column datastore is setup:
BEGIN
ctx_ddl.create_preference ('rh_lex', 'basic_lexer');
ctx_ddl.set_attribute ('rh_lex', 'index_stems', 'english');
ctx_ddl.create_preference('rh_links_mcds', 'MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('rh_links_mcds', 'columns', 'text, keywords');
END;
BEGIN
ctx_ddl.create_section_group('rh_links_sg','basic_section_group');
ctx_ddl.add_field_section(group_name => 'rh_links_sg',
section_name => 'text',
tag => 'text');
ctx_ddl.add_field_section(group_name => 'rh_links_sg',
section_name => 'keywords',
tag => 'keywords');
END;
CREATE INDEX TB_RH_LINKS_IDX5 ON TB_RH_LINKS (KEYWORDS)
INDEXTYPE IS ctxsys.CONTEXT
PARAMETERS('datastore rh_links_mcds section group rh_links_sg');
Edited by: Dishoom on Apr 20, 2009 11:07 AM

When you create a multi_column_datastore without specifying a delimiter, by default the column names are tokenized and indexed. If you specify the newline delimiter, then the column names are not tokenized. Or, once you have created sections and used the section group in your parameters, then the column names are not tokenized. You may have created your multi_column_datastore, then created your index, tokenized the column names, then added the sections, but the tokenized column names weren't dropped until you recreated the index. Please see the demonstration below.
SCOTT@orcl_11g> CREATE TABLE tb_rh_links
  2    (text            VARCHAR2(10),
  3       keywords       VARCHAR2(10),
  4       search_column  VARCHAR2( 1))
  5  /
Table created.
SCOTT@orcl_11g> INSERT ALL
  2  INTO tb_rh_links VALUES ('word1', 'word2', null)
  3  INTO tb_rh_links VALUES ('word3', 'word4', null)
  4  SELECT * FROM DUAL
  5  /
2 rows created.
SCOTT@orcl_11g> BEGIN
  2    ctx_ddl.create_preference ('rh_lex', 'basic_lexer');
  3    ctx_ddl.set_attribute ('rh_lex', 'index_stems', 'english');
  4    ctx_ddl.create_preference('rh_links_mcds', 'MULTI_COLUMN_DATASTORE');
  5    ctx_ddl.set_attribute('rh_links_mcds', 'columns', 'text, keywords');
  6  END;
  7  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> CREATE INDEX TB_RH_LINKS_IDX5 ON TB_RH_LINKS (search_column)
  2  INDEXTYPE IS ctxsys.CONTEXT
  3  PARAMETERS('datastore rh_links_mcds')
  4  /
Index created.
SCOTT@orcl_11g> -- the column names are tokenized:
SCOTT@orcl_11g> COLUMN token_text FORMAT A10
SCOTT@orcl_11g> SELECT token_text, token_first, token_last, token_count
  2  FROM   dr$tb_rh_links_idx5$i
  3  /
TOKEN_TEXT TOKEN_FIRST TOKEN_LAST TOKEN_COUNT
KEYWORDS             1          2           2
TEXT                 1          2           2
WORD1                1          1           1
WORD2                1          1           1
WORD3                2          2           1
WORD4                2          2           1
6 rows selected.
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'text') > 0
  4  /
TEXT       KEYWORDS   S
word1      word2
word3      word4
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'keywords') > 0
  4  /
TEXT       KEYWORDS   S
word1      word2
word3      word4
SCOTT@orcl_11g> -- if you use the newline delimieter, the column names are not tokenized:
SCOTT@orcl_11g> BEGIN
  2    CTX_DDL.SET_ATTRIBUTE ('rh_links_mcds', 'DELIMITER', 'NEWLINE');
  3  END;
  4  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> DROP INDEX tb_rh_links_idx5
  2  /
Index dropped.
SCOTT@orcl_11g> CREATE INDEX TB_RH_LINKS_IDX5 ON TB_RH_LINKS (search_column)
  2  INDEXTYPE IS ctxsys.CONTEXT
  3  PARAMETERS('datastore rh_links_mcds')
  4  /
Index created.
SCOTT@orcl_11g> SELECT token_text, token_first, token_last, token_count
  2  FROM   dr$tb_rh_links_idx5$i
  3  /
TOKEN_TEXT TOKEN_FIRST TOKEN_LAST TOKEN_COUNT
WORD1                1          1           1
WORD2                1          1           1
WORD3                2          2           1
WORD4                2          2           1
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'text') > 0
  4  /
no rows selected
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'keywords') > 0
  4  /
no rows selected
SCOTT@orcl_11g> -- or, with the section groups, the column names are not tokenized:
SCOTT@orcl_11g> BEGIN
  2    -- return to default:
  3    CTX_DDL.SET_ATTRIBUTE ('rh_links_mcds', 'DELIMITER', 'COLUMN_NAME_TAG');
  4  END;
  5  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> BEGIN
  2    ctx_ddl.create_section_group('rh_links_sg','basic_section_group');
  3    ctx_ddl.add_field_section(group_name => 'rh_links_sg',
  4        section_name => 'text',
  5        tag => 'text');
  6    ctx_ddl.add_field_section(group_name => 'rh_links_sg',
  7        section_name => 'keywords',
  8        tag => 'keywords');
  9  END;
10  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11g> DROP INDEX tb_rh_links_idx5
  2  /
Index dropped.
SCOTT@orcl_11g> CREATE INDEX TB_RH_LINKS_IDX5 ON TB_RH_LINKS (search_column)
  2  INDEXTYPE IS ctxsys.CONTEXT
  3  PARAMETERS('datastore rh_links_mcds section group rh_links_sg')
  4  /
Index created.
SCOTT@orcl_11g> SELECT token_text, token_first, token_last, token_count
  2  FROM   dr$tb_rh_links_idx5$i
  3  /
TOKEN_TEXT TOKEN_FIRST TOKEN_LAST TOKEN_COUNT
WORD1                1          1           1
WORD2                1          1           1
WORD3                2          2           1
WORD4                2          2           1
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'text') > 0
  4  /
no rows selected
SCOTT@orcl_11g> SELECT *
  2  FROM tb_rh_links
  3  WHERE CONTAINS(search_column, 'keywords') > 0
  4  /
no rows selected
SCOTT@orcl_11g>

Similar Messages

  • NEAR operator alternative when not using. oracle Text ?

    hi,
    I'm working on a project where i would need a Oracle Text 'NEAR like' operator ...
    here is my scenario ...
    in db we have Customers ... and every customer has some criterias like different search words( names, towns,cars,etc...) so for every customer i can create an SQL query out of criterias . ....
    now .... we can have a criteria like. ...... WHERE fulltext like 'john%'. or even distance search line NEAR inside CONTAINS. ... but then the Oracle text index is needed .....
    the only tAble on which Text index is created is our storage table that holds more then 4mil records and growing...
    my question is ... is there any way to have a query that would do the same thing as NEAR but without Text index ?
    here is how I start ....
    I get full newspaper article text from our OCR library ......
    then i need to check customer's criterias against this text to see which article is for which customer and then bind the article to the customer
    I could do it without Oracle using RegEx , but criterias can get really complicated ... like customer wants only specific MEDIA, or specific category , type , only articles that are from medias that are from specific country etc ... and many more different criterias ... and all this can be wrapped inside brackets with ANDs, ORs, NOT. ....
    So the only way to do it is to put it in Oracle and execute the correct query and let Oracle decide if the result is true or false .... but due to NEAR operator I need Oracle text ...
    So if I decide to first insert article into our storage table which has Oracle text index to be able to do the correct search .... how fast will this be ????
    will the the search become slower when there are 6mil records ? I know I can use FILTER BY to help Text index to do a better and quicker seach ... and how to optimize index ....but still
    I'm always asking my self..... why insert the article in a table where there are already 6mil articles and execute query when I only need to check data on one single article and. i already know this article ...
    I see two solutions :
    - if there is alternative for NEAR without using Oracle text index then i would insert data into temporary table and execute query on this table..... table would always contain only this one article. maybe one option would be to have one 'temp' table with Oracle text index in which i insert this one article and with help of Oracle text based on this one article do the search , and then maybe on a daily basis clear index ..... or when the article is removed from the table ... but this would mean having two Orcle text indexes, cause we already have Oracle text index on our storage table anyway....
    - another is to use Oracle text index and insert it into our storage table and hope for the best quick results ....
    Maybe I'm exaggerating and query like WHERE id=1234 and CONTAINS(...). will execute faster then I think
    If anyone would have any other suggestion I will be happy to try it ..
    thanks,
    Kris

    Hi,
    this is to my knowledge not possible. It is hard for Oracle to do, think about a table with many rows, every row with that column must be checked. So I think only a single varchar2 is possible. Maybe for you will a function work. It is possible to give a function as second parameter.
    function return_signup
    return varchar2
    is
      l_signup_name signup.signup_name%type;
    begin
      select signup_name
      into l_signup_name
      from signup
      where signup_id = 1
      and rownum = 1
      return l_signup_name;
    exception
      when no_data_found
      then
        l_signup_name := 'abracadabra'; -- hope does not exist
        return l_signup_name;
    end;Now you can use above function in the contains.
    select * from user_history_view users --, signup new_user
    --where new_user.signup_id = 1
    where contains(users.user_name, return_signup)>0;I didn't test the code! Maybe you have to adjust the function for your needs. But it is a idea how this can be done.
    Otherwise you must make the check by normaly check the columns by simple using a join:
    select * from user_history_view users, signup new_user
    where new_user.signup_id = 1
    and users.user_name = new_user.signup_name;Herald ten Dam
    htendam.wordpress.com

  • Problem full-text in blob column index created using Oracle Text

    Hi,
    I'm running Oracle Database 10g 10.2 on solaris
    I configure Oracle text if i look for in a varchar2 column is ok, but with blob column doesn't works the search.
    I have a table with a blob column which contains document. I load document with Oracle UCM (stellent)
    My index scripts is:
    CREATE INDEX ORAUCM.FT_IDCCOLL1 ON ORAUCM.IDCCOLL1
    (DDOCFULLTEXT)
    INDEXTYPE IS CTXSYS.CONTEXT
    PARAMETERS('DATASTORE CTXSYS.DEFAULT_DATASTORE FILTER CTXSYS.AUTO_FILTER FORMAT COLUMN DFULLTEXTFORMAT CHARSET
    COLUMN DFULLTEXTCHARSET LEXER OCS_IDCCOLL1_LEXER SYNC (ON COMMIT)')
    NOPARALLEL;
    And my select retunm 0 rows although it will be many documents:
    SELECT IdcColl2.dID, dDocName, dDocTitle, dDocType, dRevisionID, dSecurityGroup, dDocAuthor,
    dDocAccount, dRevLabel, dFormat, dOriginalName, dExtension, dWebExtension, dInDate, dOutDate,
    dPublishType, dRendition1, dRendition2, VaultFileSize, WebFileSize, URL, dFullTextFormat,
    dFullTextCharset, DocMeta.*
    FROM IdcColl1, DocMeta
    WHERE IdcColl1.dID=DocMeta.dID AND (CONTAINS(dDocFullText,'SUBIR') > 0 )
    ORDER BY dInDate Desc
    Thanks in advance.

    Thank you for your answer.
    I response your question:
    - yes DDOCFULLTEXT is a BLOB column.
    - The document that word, excels, whatever. We load the document with UCM (universal Content Management)
    because i need full-test search form UCM tool.
    - Yes 'subir' containts in the word document.
    - select * from CTX_USER_INDEX_ERRORS ;
    No rows returned.
    - SELECT TOKEN_TEXT FROM DR$FT_IDCCOLL1$I
    No rows returned.
    - I tried create symplifying index and doen't work.
    I tried create table and index context on oracle 10.2.0.3 (test database)and works ok.
    I compared both context (test database and ucm database) and i saw a difference:
    In ucm database there are these preferences "analyze text"
    BEGIN ctx_ddl.create_preference('ORAUCM.', 'WORLD_LEXER'); end;
    BEGIN ctx_ddl.create_preference('ORAUCM.', 'DETAIL_DATASTORE'); end;
    I don't know if is important diference or no.
    Please if you need more information, tell me.
    Thanks for your time.

  • "MS" reserved word in oracle text query?

    Wondering if anyone has run into the string "MS" behaving as a reserved word in oracle text queries. For example, this specification returns all records from Texas:
    '<query>
    <textquery>
    <progression>
    <seq> TX WITHIN CUSTOMER_STATE </seq>
    </progression>
    </textquery>
    </query>'
    But this one does NOT find any results for Mississippi:
    '<query>
    <textquery>
    <progression>
    <seq> MS WITHIN CUSTOMER_STATE </seq>
    </progression>
    </textquery>
    </query>'
    I've confirmed we have data that should match, and I've tried escaping it with the sequences as described in the SQL docs (I've tried single quotes, pairs of single quotes, braces, and combinations of those) . And trying to find info on the web is tough since all web queries that contain 'MS' bring back tons of Microsoft-relevant information.
    Can anyone nudge me in the right direction for a better google-search, or some materials in these forums (my initial searches here didn't turn anything up either).
    Thanks for any feedback!
    jh

    Wondering if anyone has run into the string "MS" behaving as a reserved word in oracle text queries.Maybe because »MS« is in the default english stoplist?:
    English Default Stoplist.

  • Is Oracle Text the right solution for this need of a specific search!

    Hi ,
    We are on Oracle 11.2.0.2 on Solaris 10. We have the need to be able to do search on data that are having diacritical marks and we should be able to do the serach ignoring this diacritical marks. That is the requirement. Now I got to hear that Oracle Text has a preference called BASIC_LEXER which can bypass the diacritical marks and so solely due to this feature I implemented Oracle Text and just for this diacritical search and no other need.
    I mean I set up preference like this:
      ctxsys.ctx_ddl.create_preference ('cust_lexer', 'BASIC_LEXER');
      ctxsys.ctx_ddl.set_attribute ('cust_lexer', 'base_letter', 'YES'); -- removes diacritics
    With this I set up like this:
    CREATE TABLE TEXT_TEST
      NAME  VARCHAR2(255 BYTE)
    --created Oracle Text index
    CREATE INDEX TEXT_TEST_IDX1 ON TEXT_TEST
    (NAME)
    INDEXTYPE IS CTXSYS.CONTEXT
    PARAMETERS('LEXER cust_lexer WORDLIST cust_wl SYNC (ON COMMIT)');
    --sample data to illustrate the problem
    Insert into TEXT_TEST
       (NAME)
    Values
       ('muller');
    Insert into TEXT_TEST
       (NAME)
    Values
       ('müller');
    Insert into TEXT_TEST
       (NAME)
    Values
       ('MULLER');
    Insert into TEXT_TEST
       (NAME)
    Values
       ('MÜLLER');
    Insert into TEXT_TEST
       (NAME)
    Values
       ('PAUL HERNANDEZ');
    Insert into TEXT_TEST
       (NAME)
    Values
       ('CHRISTOPHER Phil');
    COMMIT;
    --Now there is an alternative solution that is there,  instead of thee Oracle Text which is just a plain function given below (and it seems to work neat for my simple need of removing diacritical characters effect in search)
    --I need to evaluate which is better given my specific needs -the function below or Oracle Text.
    CREATE OR REPLACE FUNCTION remove_dia(p_value IN VARCHAR2, p_doUpper IN VARCHAR2 := 'Y')
    RETURN VARCHAR2 DETERMINISTIC
    IS
    OUTPUT_STR VARCHAR2(4000);
    begin
    IF (p_doUpper = 'Y') THEN
       OUTPUT_STR := UPPER(p_value);
    ELSE
       OUTPUT_STR := p_value;
    END IF;
    OUTPUT_STR := TRANSLATE(OUTPUT_STR,'ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ', 'AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy');
    RETURN (OUTPUT_STR);
    end;
    --now I query for which name stats with  a P%:
    --Below query gets me unexpected result of one row as I am using Oracle Text where each word is parsed for search using CONTAINS...
    SQL> select * from text_test where contains(name,'P%')>0;
    NAME
    PAUL HERNANDEZ
    CHRISTOPHER Phil
    --Below query gets me the right and expected result of one row...
    SQL> select * from text_test where name like 'P%';
    NAME
    PAUL HERNANDEZ
    --Below query gets me the right and expected result of one row...
    SQL>  select * from text_test where remove_dia(name) like remove_dia('P%');
    NAME
    PAUL HERNANDEZMy entire need was only to be able to do a search that bypasses diacritical characters. To implement Oracle Text for that reason, I am wondering if that was the right choice! More so when I am now finding that the functionality of LIKE is not available in Oracle Text - the Oracle text search are based on tokens or words and they are different from output of the LIKE operator. So may be should I have just used a simple function like below and used that for my purpose instead of using Oracle Text:
    This function (remove_dia) just removes the diacritical characters and may be for my need this is all that is needed. Can someone help to review that given my need I am better of not using Oracle Text? I need to continue using the functionality of Like operator and also need to bypass diacritical characters so the simple function that I have meets my need whereas Oracle Text causes a change in behaviour of search queries.
    Thanks,
    OrauserN

    If all you need is LIKE functionality and you do not need any of the complex search capabilities of Oracle Text, then I would not use Oracle Text. I would create a function-based index on your name column that uses your function that removes the diacritical marks, so that your searches will be faster. Please see the demonstration below.
    SCOTT@orcl_11gR2> CREATE TABLE TEXT_TEST
      2    (NAME  VARCHAR2(255 BYTE))
      3  /
    Table created.
    SCOTT@orcl_11gR2> Insert all
      2  into TEXT_TEST (NAME) Values ('muller')
      3  into TEXT_TEST (NAME) Values ('müller')
      4  into TEXT_TEST (NAME) Values ('MULLER')
      5  into TEXT_TEST (NAME) Values ('MÜLLER')
      6  into TEXT_TEST (NAME) Values ('PAUL HERNANDEZ')
      7  into TEXT_TEST (NAME) Values ('CHRISTOPHER Phil')
      8  select * from dual
      9  /
    6 rows created.
    SCOTT@orcl_11gR2> CREATE OR REPLACE FUNCTION remove_dia
      2    (p_value   IN VARCHAR2,
      3       p_doUpper IN VARCHAR2 := 'Y')
      4    RETURN VARCHAR2 DETERMINISTIC
      5  IS
      6    OUTPUT_STR VARCHAR2(4000);
      7  begin
      8    IF (p_doUpper = 'Y') THEN
      9        OUTPUT_STR := UPPER(p_value);
    10    ELSE
    11        OUTPUT_STR := p_value;
    12    END IF;
    13    RETURN
    14        TRANSLATE
    15          (OUTPUT_STR,
    16           'ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ',
    17           'AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy');
    18  end;
    19  /
    Function created.
    SCOTT@orcl_11gR2> show errors
    No errors.
    SCOTT@orcl_11gR2> CREATE INDEX text_test_remove_dia_name
      2  ON text_test (remove_dia (name))
      3  /
    Index created.
    SCOTT@orcl_11gR2> set autotrace on explain
    SCOTT@orcl_11gR2> select * from text_test
      2  where  remove_dia (name) like remove_dia ('mü%')
      3  /
    NAME
    muller
    müller
    MULLER
    MÜLLER
    4 rows selected.
    Execution Plan
    Plan hash value: 3139591283
    | Id  | Operation                   | Name                      | Rows  | Bytes | Cost (%CPU)| Time     |
    |   0 | SELECT STATEMENT            |                           |     1 |  2131 |     2   (0)| 00:00:01 |
    |   1 |  TABLE ACCESS BY INDEX ROWID| TEXT_TEST                 |     1 |  2131 |     2   (0)| 00:00:01 |
    |*  2 |   INDEX RANGE SCAN          | TEXT_TEST_REMOVE_DIA_NAME |     1 |       |     1   (0)| 00:00:01 |
    Predicate Information (identified by operation id):
       2 - access("SCOTT"."REMOVE_DIA"("NAME") LIKE "REMOVE_DIA"('mü%'))
           filter("SCOTT"."REMOVE_DIA"("NAME") LIKE "REMOVE_DIA"('mü%'))
    Note
       - dynamic sampling used for this statement (level=2)
    SCOTT@orcl_11gR2> select * from text_test
      2  where  remove_dia (name) like remove_dia ('P%')
      3  /
    NAME
    PAUL HERNANDEZ
    1 row selected.
    Execution Plan
    Plan hash value: 3139591283
    | Id  | Operation                   | Name                      | Rows  | Bytes | Cost (%CPU)| Time     |
    |   0 | SELECT STATEMENT            |                           |     1 |  2131 |     2   (0)| 00:00:01 |
    |   1 |  TABLE ACCESS BY INDEX ROWID| TEXT_TEST                 |     1 |  2131 |     2   (0)| 00:00:01 |
    |*  2 |   INDEX RANGE SCAN          | TEXT_TEST_REMOVE_DIA_NAME |     1 |       |     1   (0)| 00:00:01 |
    Predicate Information (identified by operation id):
       2 - access("SCOTT"."REMOVE_DIA"("NAME") LIKE "REMOVE_DIA"('P%'))
           filter("SCOTT"."REMOVE_DIA"("NAME") LIKE "REMOVE_DIA"('P%'))
    Note
       - dynamic sampling used for this statement (level=2)
    SCOTT@orcl_11gR2>

  • Equivalence Operator - Oracle Text Search

    Hi,
    I am trying to use Equivalence Operator(=) as following,
    SELECT SCORE (1), CLOB_COL
    FROM TEST_CLOB
    +WHERE CONTAINS (CLOB_COL, '({AIR}={WA@TER})', 1) > 0+
    This seems to work fine for normal words, but when the word has any special characters(WA@TER) the equivalence operator throws the following error.
    ORA-29902: error in executing ODCIIndexStart() routine
    ORA-20000: Oracle Text error:
    DRG-50921: EQUIV operand not a word or another EQUIV expression
    DRG-50921: EQUIV operand not a word or another EQUIV expression
    Please let me know if there is a way to escape the special characters.
    The same query seem to work if i use "OR" operator instead of "Equivalence" but need to know whether it is possible to do it with "Equivalence" operator.
    Thanks.

    865045 wrote:
    Hi,
    I am trying to use Equivalence Operator(=) as following,
    SELECT SCORE (1), CLOB_COL
    FROM TEST_CLOB
    +WHERE CONTAINS (CLOB_COL, '({AIR}={WA@TER})', 1) > 0+
    This seems to work fine for normal words, but when the word has any special characters(WA@TER) the equivalence operator throws the following error.
    ORA-29902: error in executing ODCIIndexStart() routine
    ORA-20000: Oracle Text error:
    DRG-50921: EQUIV operand not a word or another EQUIV expression
    DRG-50921: EQUIV operand not a word or another EQUIV expression
    Please let me know if there is a way to escape the special characters.
    The same query seem to work if i use "OR" operator instead of "Equivalence" but need to know whether it is possible to do it with "Equivalence" operator.
    Thanks.The second result in a simple google query for "oracle text escape special characters" returned this.
    http://download.oracle.com/docs/cd/B28359_01/text.111/b28304/cqspcl.htm
    Please keep google (and the Oracle documentation itself) in mind in the future, it's faster for you and less pollution to the forums for simple document reference questions like this.

  • Using Oracle Text to search through WORD, EXCEL and PDF documents

    Hello again,
    What I would like to know is if I have a WORD or PDF document stored in a table. Is it possible to use Oracle Text to search through the actual WORD or PDF document?
    Thanks
    Doug

    Yes you can do context sensitive searches on both PDF and Word docs. With the PDF you need to make sure they are text and not images. Some scanners will create PDFs that are nothing more than images of document.
    Below is code sample that I made some time back to demonstrate the searching capabilities of Oracle Text. Note that the example makes use of the inso_filter that is no longer shipped with Oracle begging with Patch set 10.1.0.4. See metalink note 298017.1 for the changes. See the following link for more information on developing with Oracle Text.
    http://download-west.oracle.com/docs/cd/B14117_01/text.101/b10729/toc.htm
    begin example.
    -- The following needs to be executed
    -- as sys.
    DROP DIRECTORY docs_dir;
    CREATE OR REPLACE DIRECTORY docs_dir
    AS 'C:\sql\oracle_text\documents';
    GRANT READ ON DIRECTORY docs_dir TO text;
    -- End sys ran SQL
    DROP TABLE db_docs CASCADE CONSTRAINTS PURGE;
    CREATE TABLE db_docs (
    id NUMBER,
    format VARCHAR2(10),
    location VARCHAR2(50),
    document BLOB,
    CONSTRAINT i_db_docs_p PRIMARY KEY(id)
    -- Several notes need to be made about this anonymous block.
    -- First the 'DOCS_DIR' parameter is a directory object name.
    -- This directory object name must be in upper case.
    DECLARE
    f_lob BFILE;
    b_lob BLOB;
    document_name VARCHAR2(50);
    BEGIN
    document_name := 'externaltables.doc';
    INSERT INTO db_docs
    VALUES (1, 'binary', 'C:\sql\oracle_text\documents\externaltables.doc', empty_blob())
    RETURN document INTO b_lob;
    f_lob := BFILENAME('DOCS_DIR', document_name);
    DBMS_LOB.FILEOPEN(f_lob, DBMS_LOB.FILE_READONLY);
    DBMS_LOB.LOADFROMFILE(b_lob, f_lob, DBMS_LOB.GETLENGTH(f_lob));
    DBMS_LOB.FILECLOSE(f_lob);
    COMMIT;
    END;
    -- build the index
    -- Note that this index differs than the file system stored file
    -- in that paramter datastore is ctxsys.defautl_datastore and not
    -- ctxsys.file_datastore. FILE_DATASTORE is for documents that
    -- exist on the file system. DEFAULT_DATASTORE is for documents
    -- that are stored in the column.
    create index db_docs_ctx on db_docs(document)
    indextype is ctxsys.context
    parameters (
    'datastore ctxsys.default_datastore
    filter ctxsys.inso_filter
    format column format');
    --search for something that is known to not be in the document.
    SELECT SCORE(1), id, location
    FROM db_docs
    WHERE CONTAINS(document, 'Jenkinson', 1) > 0;
    --search for something that is known to be in the document.  
    SELECT SCORE(1), id, location
    FROM db_docs
    WHERE CONTAINS(document, 'Albright', 1) > 0;

  • How Badly Do You Abuse Reserved Words in Column Names

    I have a challenge for all you DBA's out there.  Most of us agree that reserved words as column names is a bad practice but how clean is your database and some people are quit outspoken about it.  Run the following query on some of your custom, non-COTS, databases and post your top 5.  You may want to run it in Development to make sure new tables don't violate the best practice.  Often we don't think when creating column names or inherited ugly databases, but the fact is, we all have reserved word column names.
    Marcus Bacon
    SELECT   col.column_name , COUNT(1)
    FROM     all_tab_columns col, sys.v_$reserved_words rwrd
    WHERE    col.column_name = rwrd.keyword
    AND      owner NOT IN
               ( 'SYS',
                'SYSTEM',
                'MDSYS',
                'DBSNMP',
                'WMSYS',
                'XDB',
                'APPQOSSYS',
                'OPSG',
                'ORDDATA',
                'ORDSYS',
                'OUTLN' ,
                'CTXSYS',
                'OE',
                'HR',
                'TOAD')
    GROUP BY col.column_name
    ORDER BY count(1) desc,col.column_name;
    COLUMN_NAME                 
    COUNT(1)
    TO_DATE                           
    32
    NAME                              
    21
    ID                                
    14
    OWNER                              
    9
    CLASS                              
    6

    Hi,
    Interesting exercise!
    I modified your query, showing separate counts for Oracle, COTS and In-House schemas:
    WITH   got_developer  AS
        SELECT  CASE
                    WHEN  a.owner  IN ( 'APPQOSSYS'
                                      , 'CTXSYS'
                                      , 'DBSNMP', 'DMSYS'
                                      , 'HR'
                                      , 'MDSYS'
                                      , 'OE', 'OLAPSYS', 'OPSG', 'ORDDATA', 'ORDSYS', 'OUTLN'
                                      , 'SCOTT', 'SYS', 'SYSTEM'
                                      , 'TOAD', 'TSMSYS'
                                      , 'WKSYS', 'WMSYS'
                                      , 'XDB'
                                      )                THEN  'ORACLE'
                    WHEN  a.owner  IN ( 'FUBAR'
                                      )                THEN  'COTS'
                                                       ELSE  'IN_HOUSE'
                END   AS developer
        ,       r.keyword
        ,       r.reserved
        FROM    all_tab_columns        a
        JOIN    sys.v_$reserved_words  r  ON  r.keyword = a.column_name
    SELECT    keyword
    ,         reserved
    ,         SUM (CASE WHEN developer = 'IN_HOUSE' THEN 1 END)     AS in_house
    ,         SUM (CASE WHEN developer = 'COTS'     THEN 1 END)     AS cots
    ,         SUM (CASE WHEN developer = 'ORACLE'   THEN 1 END)     AS oracle
    ,         COUNT (keyword)                                       AS all_sources
    FROM      got_developer
    GROUP BY  GROUPING SETS ( (keyword, reserved)
                            , (reserved)
    ORDER BY  keyword
    ,         reserved
    Output from one database:
    KEYWORD         R   IN_HOUSE       COTS     ORACLE ALL_SOURCES
    A               N                                1           1
    ACCOUNT         N                     2                      2
    ADMIN           N                                3           3
    ADMINISTRATOR   N          3                                 3
    ADVISE          N                     2                      2
    ALIAS           N                     2                      2
    ALWAYS          N                                3           3
    ATTRIBUTE       N          8                    34          42
    ATTRIBUTES      N                               11          11
    AUTHENTICATION  N                                3           3
    AUTHID          N                                3           3
    BIGFILE         N                                4           4
    VALUE           N          8          3        173         184
    VERSION         N          1          2        124         127
    WAIT            N                                2           2
    WHEN            N                                1           1
    WHERE           Y                                2           2
    WRITE           N                                1           1
    XID             N                               20          20
    XMLSCHEMA       N                               15          15
    YEAR            N         21          1                     22
    ZONE            N          2         30                     32
                    N        479        182       4253        4914
                    Y                                3           3
                             479        182       4256        4917
    241 rows selected.
    Over 85% of the cases (including all 3 of the reserved words (1 ORDER and 2 WHEREs) were in Oracle-supplied schemas.
    The most commonly used keywords, outside of Oracle schemas, were
    ID (used in 206 tables)
    NAME (80)
    PERCENT (38)
    ZONE (32)
    YEAR (22)
    TIMESTAMP (18)
    USAGE (17)
    COST (15)
    CLASS (12)
    STATEMENT_ID (12)
    LOCATION, which was one of your most common examples, only occured 4 times in this database, and OWNER not at all  (outside of Oracle schemas).

  • Does anyone know official page of Oracle Text in the OTN

    http://technet.oracle.com/products/text/ and http://www.oracle.com/technology/products/text/index.html don't work any more.

    Someone decided it would be a good idea to move all the OTN pages and break a million links.
    Oracle Text is now at the not-so-memorable http://www.oracle.com/technetwork/database/enterprise-edition/index-098492.html

  • Oracle Text query: Escaping characters and specifying progression sequences

    How can I combine the escaping of a search string and the specification of progression sequences within an oracle text query
    so that in all cases the correct results are delivered (see example below)?
    The scenario in which to use this is the following:
    + Database: Oracle Database 10g Enterprise Edition Release 10.2.0.2.0
    + Requirement: Hitlist of results ordered by score whereby the different part within
    the result list are specified using progression sequences within oracle text query
    Example:
    create table service_provider (
    id number,
    name_c varchar(100),
    uri_c varchar(255)
    insert into service_provider values (1,'ABB Company Mgmt','http://www.abb-company-mgmt.de');
    insert into service_provider values (2,'Dr. Abbas Ming','http://www.dr-abbas-ming.de');
    insert into service_provider values (3,'SABBATA United','http://www.sabbata-united.de');
    insert into service_provider values (4,'ABB','http://www.abb.de');
    insert into service_provider values (5,'AND Company Mgmt','http://www.and-company-mgmt.de');
    insert into service_provider values (6,'Dr. Andas Ming','http://www.dr-andas-ming.de');
    insert into service_provider values (7,'SANDATA United','http://www.sandata-united.de');
    insert into service_provider values (8,'AND','http://www.and.de');
    Query 1: works correctly in this case
    select * from (
    select /*+ FIRST_ROWS */ score(1), this_.*
    from service_provider this_
    where
    CONTAINS ( this_.NAME_C , '<QUERY><textquery grammar="CONTEXT">' ||
    '<progression>' ||
    '<seq>abb</seq>' ||
    '<seq>abb%</seq>' ||
    '<seq>%abb%</seq>' ||
    '<seq>fuzzy(abb,1,100,WEIGHT)</seq>' ||
    '</progression></textquery></QUERY>', 1 ) > 0
    order by score(1) desc, this_.NAME_C
    ) where rownum < 21
    delivers
    76     4     ABB     http://www.abb.de
    76     1     ABB Company Mgmt     http://www.abb-company-mgmt.de
    51     2     Dr. Abbas Ming     http://www.dr-abbas-ming.de
    26     3     SABBATA United     http://www.sabbata-united.de
    Query 2: procudes error
    select * from (
    select /*+ FIRST_ROWS */ score(1), this_.*
    from service_provider this_
    where
    CONTAINS ( this_.NAME_C , '<QUERY><textquery grammar="CONTEXT">' ||
    '<progression>' ||
    '<seq>and</seq>' ||
    '<seq>and%</seq>' ||
    '<seq>%and%</seq>' ||
    '<seq>fuzzy(and,1,100,WEIGHT)</seq>' ||
    '</progression></textquery></QUERY>', 1 ) > 0
    order by score(1) desc, this_.NAME_C
    ) where rownum < 21
    produces ORA-29902, ORA-20000, DRG-50901 because AND is a reserved word in oracle text
    So we need escaping ...
    Query 3: does not work correctly
    select * from (
    select /*+ FIRST_ROWS */ score(1), this_.*
    from service_provider this_
    where
    CONTAINS ( this_.NAME_C , '<QUERY><textquery grammar="CONTEXT">' ||
    '<progression>' ||
    '<seq>{abb}</seq>' ||
    '<seq>{abb%}</seq>' ||
    '<seq>{%abb%}</seq>' ||
    '<seq>fuzzy({abb},1,100,WEIGHT)</seq>' ||
    '</progression></textquery></QUERY>', 1 ) > 0
    order by score(1) desc, this_.NAME_C
    ) where rownum < 21
    delivers
    76     4     ABB     http://www.abb.de
    76     1     ABB Company Mgmt     http://www.abb-company-mgmt.de
    Query 4: does not produce an error, but also does not work correctly
    select * from (
    select /*+ FIRST_ROWS */ score(1), this_.*
    from service_provider this_
    where
    CONTAINS ( this_.NAME_C , '<QUERY><textquery grammar="CONTEXT">' ||
    '<progression>' ||
    '<seq>{and}</seq>' ||
    '<seq>{and%}</seq>' ||
    '<seq>{%and%}</seq>' ||
    '<seq>fuzzy({and},1,100,WEIGHT)</seq>' ||
    '</progression></textquery></QUERY>', 1 ) > 0
    order by score(1) desc, this_.NAME_C
    ) where rownum < 21
    delivers
    76     8     AND     http://www.and.de
    76     5     AND Company Mgmt     http://www.and-company-mgmt.de

    Anywhere that you just use the word by itself, enclose it in {}, but anywhere that you add % on either side or both don't enclose it in {}. Please see the demonstration below.
    SCOTT@10gXE> SELECT * FROM v$version
      2  /
    BANNER
    Oracle Database 10g Express Edition Release 10.2.0.1.0 - Product
    PL/SQL Release 10.2.0.1.0 - Production
    CORE     10.2.0.1.0     Production
    TNS for 32-bit Windows: Version 10.2.0.1.0 - Production
    NLSRTL Version 10.2.0.1.0 - Production
    SCOTT@10gXE> create table service_provider
      2    (id     number,
      3       name_c     varchar(100),
      4       uri_c     varchar(255))
      5  /
    Table created.
    SCOTT@10gXE> insert all
      2  into service_provider values (1,'ABB Company Mgmt','http://www.abb-company-mgmt.de')
      3  into service_provider values (2,'Dr. Abbas Ming','http://www.dr-abbas-ming.de')
      4  into service_provider values (3,'SABBATA United','http://www.sabbata-united.de')
      5  into service_provider values (4,'ABB','http://www.abb.de')
      6  into service_provider values (5,'AND Company Mgmt','http://www.and-company-mgmt.de')
      7  into service_provider values (6,'Dr. Andas Ming','http://www.dr-andas-ming.de')
      8  into service_provider values (7,'SANDATA United','http://www.sandata-united.de')
      9  into service_provider values (8,'AND','http://www.and.de')
    10  into service_provider values (9,'EBB','fuzzy test')
    11  into service_provider values (10,'OND','fuzzy test')
    12  select * from dual
    13  /
    10 rows created.
    SCOTT@10gXE> CREATE INDEX your_index
      2  ON service_provider (name_c)
      3  INDEXTYPE IS CTXSYS.CONTEXT
      4  PARAMETERS ('STOPLIST CTXSYS.EMPTY_STOPLIST')
      5  /
    Index created.
    SCOTT@10gXE> VARIABLE search_string VARCHAR2 (100)
    SCOTT@10gXE> EXEC :search_string := 'abb'
    PL/SQL procedure successfully completed.
    SCOTT@10gXE> COLUMN name_c FORMAT A20 WORD_WRAPPED
    SCOTT@10gXE> COLUMN uri_c  FORMAT A40
    SCOTT@10gXE> select *
      2  from   (select /*+ FIRST_ROWS */ score(1), this_.*
      3            from   service_provider this_
      4            where  CONTAINS
      5                  (this_.NAME_C ,
      6                   '<QUERY>
      7                   <textquery grammar="CONTEXT">
      8                     <progression>
      9                       <seq>{'         || :search_string || '}</seq>
    10                       <seq>'         || :search_string || '%</seq>
    11                       <seq>%'         || :search_string || '%</seq>
    12                       <seq>fuzzy({' || :search_string || '},1,100,WEIGHT)</seq>
    13                     </progression>
    14                  </textquery>
    15                   </QUERY>', 1 ) > 0
    16            order  by score(1) desc, this_.NAME_C)
    17  where  rownum < 21
    18  /
      SCORE(1)         ID NAME_C               URI_C
            76          4 ABB                  http://www.abb.de
            76          1 ABB Company Mgmt     http://www.abb-company-mgmt.de
            51          2 Dr. Abbas Ming       http://www.dr-abbas-ming.de
            26          3 SABBATA United       http://www.sabbata-united.de
             4          9 EBB                  fuzzy test
    SCOTT@10gXE> EXEC :search_string := 'and'
    PL/SQL procedure successfully completed.
    SCOTT@10gXE> /
      SCORE(1)         ID NAME_C               URI_C
            76          8 AND                  http://www.and.de
            76          5 AND Company Mgmt     http://www.and-company-mgmt.de
            51          6 Dr. Andas Ming       http://www.dr-andas-ming.de
            26          7 SANDATA United       http://www.sandata-united.de
             5         10 OND                  fuzzy test
    SCOTT@10gXE>

  • Index rules in oracle text and query using matches

    Dear All,
    I would like to ask about rules and matches function in oracle text.
    I followed an example in oracle text application developer's guide.
    I have a rule table like this :
    1 oracle
    2 larry or ellison
    3 oracle and text
    4 market share
    then, I create an index to that table. This is needed for calling matches function. Here is the syntax :
    create index queryx on queries(query_string)
    indextype is ctxsys.ctxrule;
    then, I noticed that the result on DR$QUERYX$I table as follows :
    LARRY 0 2 2 1 (BLOB)
    MARKET 0 4 4 1 (BLOB) {MARKET} {SHARE}
    ORACLE 0 1 1 1 (BLOB)
    ORACLE 0 3 3 1 (BLOB) {TEXT}
    ELLISON 0 2 2 1 (BLOB)
    What I want to ask is why doesn't the words 'share' and 'text' appear in the DR$QUERYX$ table?
    When we use matches function, it then search on the index result and consequently it wion't find the 'share' word. so when for example I do query like this :
    select query_id from queries where matches(query_string,' It only share ten percent of all products sold')>0
    it will give 0 result since the no word in ' It only share ten percent of all products sold' was in index table. But actually it could possibly be categorized as the 4 category which rules is 'market share'
    I tried this in a larger set of data and get same result.
    Here is my generated rules from my document collection :
    1 {REQUIREMENTS} & {ELICITATION}
    1 {REQUIREMENTS} ~ {ELICITATION} & {ACTOR}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} & {FURPS}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} & {PROC}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} & {SPEED}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} ~ {SPEED} & {DOCUME}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} ~ {SPEED} ~ {DOCUME} & {PLACED}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} ~ {SPEED} ~ {DOCUME} ~ {PLACED} & {UNNECESSARY}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} ~ {SPEED} ~ {DOCUME} ~ {PLACED} ~ {UNNECESSARY} & {MISUSE}
    1 {INTERPRETATION} ~ {REQUIREMENTS}
    2 {DESIGN} & {REPRESENTATION}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} & {OCTOBER}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} & {PROCEDURAL}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} ~ {PROCEDURAL} & {STRICT}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} ~ {PROCEDURAL} ~ {STRICT} & {GRASP}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} ~ {PROCEDURAL} ~ {STRICT} ~ {GRASP} & {MANY} & {LAYER}
    2 {DESIGN} ~ {REPRESENTATION} ~ {MAY}
    3 {PM} & {TESTING} & {ATTRIBUTI}
    And this is the index table result with ctxrule :
    (only the token_text column shown)
    PM
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    INTERPRETATION
    so when I try to classify a document with the word ouline inside it, it should produce category 1 (based on the rules) but since there are no word 'outline' in index tabel, the matches will return 0 means that the document is not classifiedto any category. I don't understand why it happen. Anybody knows about this? I would really appreciate any help.
    Thank you very much.

    Hm, I see. It do make sense. so nice to know.
    But then in the second example I gift where I used larger table, as shown below :
    Here is my generated rules from my document collection :
    1 {REQUIREMENTS} & {ELICITATION}
    1 {REQUIREMENTS} ~ {ELICITATION} & {ACTOR}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} & {FURPS}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} & {PROC}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} & {SPEED}
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE} ~ {PROC} ~ {SPEED} & {DOCUME}
    1 {INTERPRETATION} ~ {REQUIREMENTS}
    2 {DESIGN} & {REPRESENTATION}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} & {OCTOBER}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} & {PROCEDURAL}
    2 {DESIGN} ~ {REPRESENTATION} & {MAY} & {FOUNDATI} ~ {OCTOBER} ~ {PROCEDURAL} & {STRICT}
    2 {DESIGN} ~ {REPRESENTATION} ~ {MAY}
    3 {PM} & {TESTING} & {ATTRIBUTI}
    As far as I know, the sign ' ~ ' means 'OR' and '&' means 'and' . So based on the 4th line in my table :
    1 {REQUIREMENTS} ~ {ELICITATION} ~ {ACTOR} ~ {FURPS} ~ {OUTLINE}
    it can be concluded that if any of the words stated there been queried, so the category '1' will appear as a result. But then before we can use 'matches' to query it, we need ti create index for the rules table . I did it and the result were :
    (only the token_text column shown)
    PM
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    DESIGN
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    REQUIREMENTS
    INTERPRETATION
    there were no words other than PM, DESIGN< REQUIREMENTS and INTERPRETATION. Why the words REQUIREMENTS, ELICITATION, ACTOR, FURPS, OUTLINE don't appear in the index result?

  • Text mining using oracle text

    i am about to embark on an ambitious project. i want to create a document server that can archive documents such as word, pdfs etc. i would also like to create a feature by which users can compare documents to find how similar they are. how do i go about building such an application. do i need to use data mining algorithms like neural network to compare documents. what mining algorithm should i use. or should i create indexes or something. what are the file types supported by oracle text.
    please help me.
    even if u don't have correct answers, please feel free to add your opinion.

    Take a look at the following Oracle 10gR2 Documentation books (available on OTN):
    - Oracle Data Mining Concepts: Chapter 6
    - Oracle Data Mining Application Developer's Guide: Chapter 5 and 7
    You may also take a look at the Oracle Text documentation:
    - Oracle Text Application Developer's Guide: Chapter 6
    Oracle Text uses Oracle Data Mining behind the scene for some of its capabilities (SVM and K-Means). They have different capabilities as explained in Chapter 6 of the Oracle Data Mining Concepts.
    --Marcos                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

  • Product Search Using Oracle Text or By Any Other Methods using PL/SQL

    Hi All,
    I have requirement for product search using the product table which has around 5 million products. I Need to show top 100 disitnct products searched  in the following order
    1. = ProductDescription
    2. ProductDescription_%
    3. %_ProductDescription_%
    4. %_ProductDescription
    5. ProductDescription%
    6. %ProductDescription
    Where '_' is space.  If first two/three/or any criteria itslef gives me 100 records then i need not search for another patterns
    Table Structure Is as follows
    Create Table Tbl_Product_Lookup
        Barcode_number                Varchar2(9),
        Product_Description Varchar2(200),
        Product_Start_Date Date,
        Product_End_Date Date,
        Product_Price Number(12,4)
    Could you please help me implementing this one ? SLA for the search result is 2 seconds
    Thanks,
    Varun

    You could use an Oracle Text context index with a wordlist to speed up substring searches and return all rows that match any of your criteria, combined with a case statement to provide a ranking that can be ordered by within an inner query, then use rownum to limit the rows in an outer query.  You could also use the first_rows(n) hint to speed up the return of limited rows.  Please see the demonstration below.  If you decide to use Oracle Text, you may want to ask further questions in the Oracle Text sub-forum on this forum or space or whatever they call it now.
    SCOTT@orcl_11gR2> -- table:
    SCOTT@orcl_11gR2> Create Table Tbl_Product_Lookup
      2    (
      3       Barcode_number       Varchar2(9),
      4       Product_Description  Varchar2(200),
      5       Product_Start_Date   Date,
      6       Product_End_Date     Date,
      7       Product_Price          Number(12,4)
      8    )
      9  /
    Table created.
    SCOTT@orcl_11gR2> -- sample data:
    SCOTT@orcl_11gR2> insert all
      2  into tbl_product_lookup (product_description) values ('test product')
      3  into tbl_product_lookup (product_description) values ('test product and more')
      4  into tbl_product_lookup (product_description) values ('another test product and more')
      5  into tbl_product_lookup (product_description) values ('another test product')
      6  into tbl_product_lookup (product_description) values ('test products')
      7  into tbl_product_lookup (product_description) values ('selftest product')
      8  select * from dual
      9  /
    6 rows created.
    SCOTT@orcl_11gR2> insert into tbl_product_lookup (product_description) select object_name from all_objects
      2  /
    75046 rows created.
    SCOTT@orcl_11gR2> -- wordlist:
    SCOTT@orcl_11gR2> begin
      2    ctx_ddl.create_preference('mywordlist', 'BASIC_WORDLIST');
      3    ctx_ddl.set_attribute('mywordlist','PREFIX_INDEX','TRUE');
      4    ctx_ddl.set_attribute('mywordlist','PREFIX_MIN_LENGTH', '3');
      5    ctx_ddl.set_attribute('mywordlist','PREFIX_MAX_LENGTH', '4');
      6    ctx_ddl.set_attribute('mywordlist','SUBSTRING_INDEX', 'YES');
      7    ctx_ddl.set_attribute('mywordlist', 'wildcard_maxterms', 0) ;
      8  end;
      9  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl_11gR2> -- context index that uses wordlist:
    SCOTT@orcl_11gR2> create index prod_desc_text_idx
      2  on tbl_product_lookup (product_description)
      3  indextype is ctxsys.context
      4  parameters ('wordlist mywordlist')
      5  /
    Index created.
    SCOTT@orcl_11gR2> -- gather statistics:
    SCOTT@orcl_11gR2> exec dbms_stats.gather_table_stats (user, 'TBL_PRODUCT_LOOKUP')
    PL/SQL procedure successfully completed.
    SCOTT@orcl_11gR2> -- query:
    SCOTT@orcl_11gR2> variable productdescription varchar2(100)
    SCOTT@orcl_11gR2> exec :productdescription := 'test product'
    PL/SQL procedure successfully completed.
    SCOTT@orcl_11gR2> column product_description format a45
    SCOTT@orcl_11gR2> set autotrace on explain
    SCOTT@orcl_11gR2> set timing on
    SCOTT@orcl_11gR2> select /*+ FIRST_ROWS(100) */ *
      2  from   (select /*+ FIRST_ROWS(100) */ distinct
      3              case when product_description = :productdescription            then 1
      4               when product_description like :productdescription || ' %'       then 2
      5               when product_description like '% ' || :productdescription || ' %' then 3
      6               when product_description like '% ' || :productdescription       then 4
      7               when product_description like :productdescription || '%'       then 5
      8               when product_description like '%' || :productdescription       then 6
      9              end as ranking,
    10              product_description
    11           from   tbl_product_lookup
    12           where  contains (product_description, '%' || :productdescription || '%') > 0
    13           order  by ranking)
    14  where  rownum <= 100
    15  /
       RANKING PRODUCT_DESCRIPTION
             1 test product
             2 test product and more
             3 another test product and more
             4 another test product
             5 test products
             6 selftest product
    6 rows selected.
    Elapsed: 00:00:00.10
    Execution Plan
    Plan hash value: 459057338
    | Id  | Operation                      | Name               | Rows  | Bytes | Cost (%CPU)| Time     |
    |   0 | SELECT STATEMENT               |                    |    38 |  3990 |    13  (16)| 00:00:01 |
    |*  1 |  COUNT STOPKEY                 |                    |       |       |            |          |
    |   2 |   VIEW                         |                    |    38 |  3990 |    13  (16)| 00:00:01 |
    |*  3 |    SORT UNIQUE STOPKEY         |                    |    38 |   988 |    12   (9)| 00:00:01 |
    |   4 |     TABLE ACCESS BY INDEX ROWID| TBL_PRODUCT_LOOKUP |    38 |   988 |    11   (0)| 00:00:01 |
    |*  5 |      DOMAIN INDEX              | PROD_DESC_TEXT_IDX |       |       |     4   (0)| 00:00:01 |
    Predicate Information (identified by operation id):
       1 - filter(ROWNUM<=100)
       3 - filter(ROWNUM<=100)
       5 - access("CTXSYS"."CONTAINS"("PRODUCT_DESCRIPTION",'%'||:PRODUCTDESCRIPTION||'%')>0)
    SCOTT@orcl_11gR2>

  • Performance issues and options to reduce load with Oracle text implementation

    Hi Experts,
    My database on Oracle 11.2.0.2 on Linux. We have Oracle Text implemented for fuzzy search. Our oracle text indexes are defined as sync on commit as we can not afford to have stale data.  Now our application does literally thousands of inserts/updates/deletes to those columns where we have these Oracle text indexes defined. As a result, we are seeing a lot of performance impact due to the oracle text sync routines being called on each commit. We are doing the index optimization every night (full optimization every night at 3 am).  The oracle text index related internal operations are showing up as top sql in our AWR report and there are concerns that it is causing lot of load on the DB.  Since we do the full index optimization only once at night, I am thinking should I change that , and if I do so, will it help us?
    For example here are some data from my one day's AWR report:
    Elapsed Time (s)
    Executions
    Elapsed Time per Exec (s)
    %Total
    %CPU
    %IO
    SQL Id
    SQL Module
    SQL Text
    27,386.25
    305,441
    0.09
    16.50
    15.82
    9.98
    ddr8uck5s5kp3
    begin ctxsys.drvdml.com_sync_i...
    14,618.81
    213,980
    0.07
    8.81
    8.39
    27.79
    02yb6k216ntqf
    begin ctxsys.syncrn(:idxownid,...
    Full Text of above top sql:
    ddr8uck5s5kp3
    begin ctxsys.drvdml.com_sync_index(:idxname, :idxmem, :partname);
    end
    02yb6k216ntqf
    begin ctxsys.syncrn(:idxownid, :idxoname, :idxid, :ixpid, :rtabnm, :flg); end;
    Now if I do the full index optimization more often and not just once at night 3 PM, will that mean, the load on DB due to sync on commit will decrease? If yes how often should I optimized and doesn't the optimization itself lead to some load? Can someone suggest?
    Thanks,
    OrauserN

    You can query the ctx_parameters view to see what your default and maximum memory values are:
    SCOTT@orcl12c> COLUMN bytes    FORMAT 9,999,999,999
    SCOTT@orcl12c> COLUMN megabytes FORMAT 9,999,999,999
    SCOTT@orcl12c> SELECT par_name AS parameter,
      2          TO_NUMBER (par_value) AS bytes,
      3          par_value / 1048576 AS megabytes
      4  FROM   ctx_parameters
      5  WHERE  par_name IN ('DEFAULT_INDEX_MEMORY', 'MAX_INDEX_MEMORY')
      6  ORDER  BY par_name
      7  /
    PARAMETER                               BYTES      MEGABYTES
    DEFAULT_INDEX_MEMORY               67,108,864             64
    MAX_INDEX_MEMORY                1,073,741,824          1,024
    2 rows selected.
    You can set the memory value in your index parameters:
    SCOTT@orcl12c> CREATE INDEX EMPLOYEE_IDX01
      2  ON EMPLOYEES (EMP_NAME)
      3  INDEXTYPE IS CTXSYS.CONTEXT
      4  PARAMETERS ('SYNC (ON COMMIT) MEMORY 1024M')
      5  /
    Index created.
    You can also modify the default and maximum values using CTX_ADM.SET_PARAMETER:
    http://docs.oracle.com/cd/E11882_01/text.112/e24436/cadmpkg.htm#CCREF2096
    The following contains general guidelines for what to set the max_index_memory parameter and others to:
    http://docs.oracle.com/cd/E11882_01/text.112/e24435/aoptim.htm#CCAPP9274

  • Oracle Text - CTX Context Index Soundex Problem

    Hi,
    I'm running into a problem with Oracle Text when searching using the ! (soundex) option. I've created a simple test example to highlight the issue.
    Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bit
    Windows 2008 Server 64-bit
    create table test_tab (test_col  varchar2(200));
    insert all
      into test_tab (test_col) values ('ab-tönes')
      into test_tab (test_col) values ('ab-tones')
      into test_tab (test_col) values ('abtones')
      into test_tab (test_col) values ('ab tones')
      into test_tab (test_col) values ('ab-tanes')
      select * from dual
    select * from test_tab
    begin
          ctx_ddl.create_preference ('test_lex1', 'basic_lexer');
          ctx_ddl.set_attribute ('test_lex1', 'whitespace', '/\|-_+&''');
          ctx_ddl.set_attribute('test_lex1','base_letter','YES');
          -- ctx_ddl.set_attribute('test_lex1','skipjoins','-');
    end;
    create index test_idx on test_tab (test_col)
      indextype is ctxsys.context
        parameters
          ('lexer        test_lex1'     
    select token_text from dr$test_idx$i;
    TOKEN_TEXT
    AB
    ABTONES
    TANES
    TONES
    select * from test_tab where contains (test_col, '!ab tones') > 0;
    TEST_COL
    ab-tönes
    ab-tones
    ab tones
    select * from test_tab where soundex(test_col) = soundex('ab tones');
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    So my question is, can anyone suggest an approach whereby I can get the Oracle Text Context index (or CTXCAT index if it's more appropriate) to return all 5 rows like the simple Soundex is doing?
    I can't really use soundex as this search query will form part of a search screen for a multi-language application. Soundex is limited to English sounding words, so I need the solution to be able to compare strings that may not "sound" English.
    It must be an attribute of the BASIC_LEXER, and I've tried skipjoins, start/end-joins, stop lists, but I just cannot get the Soundex feature of Oracle Text to function like the SOUNDEX() function!
    Looking at how the tokens are stored dr$test_idx$i I need Oracle Text to almost concat 'AB' and 'TONES' to search as a single string.
    Any help greatly appreciated.
    Thanks,

    I am not getting the same problem that you are getting with the umlat, but I don't see what is different.  Please post the result of:
    select ctx_report.create_index_script ('test_idx') from dual;
    Here are the results on my system.  Perhaps you can spot the difference.  I added an empty_stoplist, so that it won't print out a long list of stopwords.
    SCOTT@orcl12c> create table test_tab (test_col    varchar2(200))
      2  /
    Table created.
    SCOTT@orcl12c> insert all
      2    into test_tab (test_col) values ('ab-tönes')
      3    into test_tab (test_col) values ('ab-tones')
      4    into test_tab (test_col) values ('abtones')
      5    into test_tab (test_col) values ('ab tones')
      6    into test_tab (test_col) values ('ab-tanes')
      7  select * from dual
      8  /
    5 rows created.
    SCOTT@orcl12c> select * from test_tab
      2  /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> begin
      2    ctx_ddl.create_preference ('test_lex1', 'basic_lexer');
      3    ctx_ddl.set_attribute('test_lex1','base_letter','YES');
      4  end;
      5  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> create or replace procedure test_proc
      2    (p_rowid in          rowid,
      3      p_clob    in out nocopy clob)
      4  as
      5  begin
      6    select replace (translate (test_col, '/\|-_+&''', '      '), ' ', '')
      7    into   p_clob
      8    from   test_tab
      9    where  rowid = p_rowid;
    10  end test_proc;
    11  /
    Procedure created.
    SCOTT@orcl12c> show errors
    No errors.
    SCOTT@orcl12c> begin
      2    ctx_ddl.create_preference ('test_ds', 'user_datastore');
      3    ctx_ddl.set_attribute ('test_ds', 'procedure', 'test_proc');
      4  end;
      5  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> create index test_idx on test_tab (test_col)
      2    indextype is ctxsys.context
      3    parameters
      4       ('lexer    test_lex1
      5         datastore    test_ds
      6         stoplist    ctxsys.empty_stoplist')
      7  /
    Index created.
    SCOTT@orcl12c> select token_text from dr$test_idx$i
      2  /
    TOKEN_TEXT
    ABTANES
    ABTONES
    2 rows selected.
    SCOTT@orcl12c> variable search_string varchar2(100)
    SCOTT@orcl12c> exec :search_string := 'ab tones'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> select * from test_tab
      2  where  contains
      3            (test_col,
      4             '!' || replace (:search_string, ' ', ' !') ||
      5             ' or !' || replace (:search_string, ' ', '')) > 0
      6  /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> exec :search_string := 'abtones'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> exec :search_string := 'ab tönes'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> select ctx_report.create_index_script ('test_idx') from dual
      2  /
    CTX_REPORT.CREATE_INDEX_SCRIPT('TEST_IDX')
    begin
      ctx_ddl.create_preference('"TEST_IDX_DST"','USER_DATASTORE');
      ctx_ddl.set_attribute('"TEST_IDX_DST"','PROCEDURE','"SCOTT"."TEST_PROC"');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_FIL"','NULL_FILTER');
    end;
    begin
      ctx_ddl.create_section_group('"TEST_IDX_SGP"','NULL_SECTION_GROUP');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_LEX"','BASIC_LEXER');
      ctx_ddl.set_attribute('"TEST_IDX_LEX"','BASE_LETTER','YES');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_WDL"','BASIC_WORDLIST');
      ctx_ddl.set_attribute('"TEST_IDX_WDL"','STEMMER','ENGLISH');
      ctx_ddl.set_attribute('"TEST_IDX_WDL"','FUZZY_MATCH','GENERIC');
    end;
    begin
      ctx_ddl.create_stoplist('"TEST_IDX_SPL"','BASIC_STOPLIST');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_STO"','BASIC_STORAGE');
      ctx_ddl.set_attribute('"TEST_IDX_STO"','R_TABLE_CLAUSE','lob (data) store as (
    cache)');
      ctx_ddl.set_attribute('"TEST_IDX_STO"','I_INDEX_CLAUSE','compress 2');
    end;
    begin
      ctx_output.start_log('TEST_IDX_LOG');
    end;
    create index "SCOTT"."TEST_IDX"
      on "SCOTT"."TEST_TAB"
          ("TEST_COL")
      indextype is ctxsys.context
      parameters('
        datastore       "TEST_IDX_DST"
        filter          "TEST_IDX_FIL"
        section group   "TEST_IDX_SGP"
        lexer           "TEST_IDX_LEX"
        wordlist        "TEST_IDX_WDL"
        stoplist        "TEST_IDX_SPL"
        storage         "TEST_IDX_STO"
    begin
      ctx_output.end_log;
    end;
    1 row selected.

Maybe you are looking for