Oracle Text: CTXCAT

Hi all, I have the following table
CREATE TABLE SEM_TRAJS OF SEM_TRAJECTORY
NESTED TABLE EPISODES STORE AS EPISODES_NESTEDTAB ...
and the following types
CREATE OR REPLACE
type SEM_TRAJECTORY as object
-- Attributes
sem_trajectory_tag varchar2(50),
srid integer,
episodes sem_episode_tab,
o_id integer,
semtraj_id integer,
and CREATE OR REPLACE
type EPISODE as object
-- Attributes
defining_tag varchar2(4),
episode_tag varchar2(50),
activity_tag varchar2(50),
Now i want to create a pattern matching query with Oracle Text so I can search in defining_tag, episode_tag, activity_tag ... How can I create a CTXCAT index and use it?
Thanks in advance.

You won't be able to create a Text index (ctxcat or context) on an object table directly, so I would create a materialized view, then create the index on the materialized view. The following is a fairly simple example, using a materialized view, a multi_column_datastore, and a context index. You could use a user_datastore with a procedure instead of the multi_column_datastore, but that would be a bit more complicated. You will need to deal with refreshing your materialized view and synchronizing your index and you could add sections if you want to search within individual attributes of the episodes. This is just a simple starter example. It looks like your problem is an interesting combination of Text, Objects, and Spatial. The data that I used is just for the simple demonstration and is not valid spatial data.
SCOTT@orcl_11gR2> CREATE OR REPLACE TYPE episode AS OBJECT
  2    (defining_tag  VARCHAR2( 4),
  3       episode_tag   VARCHAR2(50),
  4       activity_tag  VARCHAR2(50));
  5  /
Type created.
SCOTT@orcl_11gR2> CREATE OR REPLACE TYPE sem_episode_tab AS TABLE OF episode
  2  /
Type created.
SCOTT@orcl_11gR2> CREATE OR REPLACE TYPE sem_trajectory AS OBJECT
  2    (sem_trajectory_tag  VARCHAR2(50),
  3       srid              INTEGER,
  4       episodes         sem_episode_tab,
  5       o_id              INTEGER,
  6       semtraj_id         INTEGER)
  7  /
Type created.
SCOTT@orcl_11gR2> SHOW ERRORS
No errors.
SCOTT@orcl_11gR2> CREATE TABLE sem_trajs OF sem_trajectory
  2  NESTED TABLE episodes STORE AS episodes_nestedtab
  3  /
Table created.
SCOTT@orcl_11gR2> INSERT INTO sem_trajs
  2  VALUES
  3    ('semtrajtag1',
  4       1,
  5       sem_episode_tab
  6         (episode ('dt1a', 'etag1a', 'atag1a'),
  7          episode ('dt1b', 'etag1b', 'atag1b')),
  8       1,
  9       1)
10  /
1 row created.
SCOTT@orcl_11gR2> INSERT INTO sem_trajs
  2  VALUES
  3    ('semtrajtag2',
  4       2,
  5       sem_episode_tab
  6         (episode ('dt2a', 'etag2a', 'atag2a'),
  7          episode ('dt2b', 'etag2b', 'atag2b')),
  8       2,
  9       2)
10  /
1 row created.
SCOTT@orcl_11gR2> CREATE MATERIALIZED VIEW episodes_mv
  2  AS
  3  SELECT st.sem_trajectory_tag, st.srid, st.o_id, semtraj_id,
  4           e.defining_tag, e.episode_tag, e.activity_tag,
  5           ' ' AS episodes
  6  FROM   sem_trajs st,
  7           TABLE (st.episodes) e
  8  /
Materialized view created.
SCOTT@orcl_11gR2> COLUMN sem_trajectory_tag FORMAT A18
SCOTT@orcl_11gR2> COLUMN defining_tag         FORMAT A12
SCOTT@orcl_11gR2> COLUMN episode_tag         FORMAT A11
SCOTT@orcl_11gR2> COLUMN activity_tag         FORMAT A12
SCOTT@orcl_11gR2> SELECT * FROM episodes_mv
  2  /
SEM_TRAJECTORY_TAG       SRID       O_ID SEMTRAJ_ID DEFINING_TAG EPISODE_TAG ACTIVITY_TAG E
semtrajtag1                 1          1          1 dt1a         etag1a      atag1a
semtrajtag1                 1          1          1 dt1b         etag1b      atag1b
semtrajtag2                 2          2          2 dt2a         etag2a      atag2a
semtrajtag2                 2          2          2 dt2b         etag2b      atag2b
4 rows selected.
SCOTT@orcl_11gR2> BEGIN
  2    CTX_DDL.CREATE_PREFERENCE ('emv_mcds', 'MULTI_COLUMN_DATASTORE');
  3    CTX_DDL.SET_ATTRIBUTE ('emv_mcds', 'COLUMNS', 'defining_tag, episode_tag, activity_tag');
  4  END;
  5  /
PL/SQL procedure successfully completed.
SCOTT@orcl_11gR2> CREATE INDEX indexname
  2    ON episodes_mv (episodes)
  3    INDEXTYPE IS CTXSYS.CONTEXT
  4    PARAMETERS ('DATASTORE emv_mcds')
  5  /
Index created.
SCOTT@orcl_11gR2> SELECT * FROM episodes_mv
  2  WHERE  CONTAINS (episodes, 'dt1a') > 0
  3  /
SEM_TRAJECTORY_TAG       SRID       O_ID SEMTRAJ_ID DEFINING_TAG EPISODE_TAG ACTIVITY_TAG E
semtrajtag1                 1          1          1 dt1a         etag1a      atag1a
1 row selected.
SCOTT@orcl_11gR2> SELECT * FROM episodes_mv
  2  WHERE  CONTAINS (episodes, 'atag2b') > 0
  3  /
SEM_TRAJECTORY_TAG       SRID       O_ID SEMTRAJ_ID DEFINING_TAG EPISODE_TAG ACTIVITY_TAG E
semtrajtag2                 2          2          2 dt2b         etag2b      atag2b
1 row selected.

Similar Messages

  • ORACLE TEXT INDEX ON VARCHAR2 COLUMN

    Hello All,
    I find a search in our application very slow so i thought of using ORACLE TEXT CTXCAT index based search but i find certain inconsistencies . How can this be avoided....The following query should not return result if i can replace with oracle text but i find few values....why is that...i have also given few sample results below....
    SELECT first_name
    FROM uc_partner_ms
    WHERE
    Upper(first_name) LIKE '%WIE%'
    minus
    SELECT first_name
    FROM uc_partner_ms
    WHERE CATSEARCH (first_name,'*wie*', null) > 0
    RESULTS ....
    Hans-Werner Mrowiec
    Heinz Oesterwiemann GmbH
    Helmut Froitzheim GmbH, Neuwied
    Heribert Schwies
    Hermann Twieling GmbH & Co. KG
    Horst Breitwieser
    Horst-Dieter Swie
    The script used for creating index is
    begin
    ctx_ddl.create_preference('mylex', 'BASIC_LEXER');
    ctx_ddl.set_attribute ( 'mylex', 'index_themes', 'NO');
    ctx_ddl.set_attribute ( 'mylex', 'mixed_case', 'NO');
    end;
    CREATE INDEX partner_index ON uc_partner_ms (first_name)
    INDEXTYPE IS CTXSYS.CTXCAT
    parameters ( 'LEXER mylex' );
    Where am i wrong i could not guess a trend in the results other than all being in lower case.....

    Catsearch does not support leading wildcards. As a workaround, you can use a query template with context grammar. Please see the reproduction and solution below.
    SCOTT@orcl_11g> -- test environment:
    SCOTT@orcl_11g> CREATE TABLE uc_partner_ms
      2    (first_name  VARCHAR2 (60))
      3  /
    Table created.
    SCOTT@orcl_11g> SET DEFINE OFF
    SCOTT@orcl_11g> INSERT ALL
      2  INTO uc_partner_ms VALUES ('Hans-Werner Mrowiec')
      3  INTO uc_partner_ms VALUES ('Heinz Oesterwiemann GmbH')
      4  INTO uc_partner_ms VALUES ('Helmut Froitzheim GmbH, Neuwied')
      5  INTO uc_partner_ms VALUES ('Heribert Schwies')
      6  INTO uc_partner_ms VALUES ('Hermann Twieling GmbH & Co. KG')
      7  INTO uc_partner_ms VALUES ('Horst Breitwieser')
      8  INTO uc_partner_ms VALUES ('Horst-Dieter Swie')
      9  SELECT * FROM DUAL
    10  /
    7 rows created.
    SCOTT@orcl_11g> begin
      2    ctx_ddl.create_preference('mylex', 'BASIC_LEXER');
      3    ctx_ddl.set_attribute ( 'mylex', 'index_themes', 'NO');
      4    ctx_ddl.set_attribute ( 'mylex', 'mixed_case', 'NO');
      5  end;
      6  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl_11g> CREATE INDEX partner_index ON uc_partner_ms (first_name)
      2  INDEXTYPE IS CTXSYS.CTXCAT
      3  parameters ( 'LEXER mylex' )
      4  /
    Index created.
    SCOTT@orcl_11g> -- reproduction:
    SCOTT@orcl_11g> SELECT first_name
      2  FROM uc_partner_ms
      3  WHERE
      4  Upper(first_name) LIKE '%WIE%'
      5  minus
      6  SELECT first_name
      7  FROM uc_partner_ms
      8  WHERE CATSEARCH (first_name,'*wie*', null) > 0
      9  /
    FIRST_NAME
    Hans-Werner Mrowiec
    Heinz Oesterwiemann GmbH
    Helmut Froitzheim GmbH, Neuwied
    Heribert Schwies
    Hermann Twieling GmbH & Co. KG
    Horst Breitwieser
    Horst-Dieter Swie
    7 rows selected.
    SCOTT@orcl_11g> -- solution:
    SCOTT@orcl_11g> SELECT first_name
      2  FROM uc_partner_ms
      3  WHERE
      4  Upper(first_name) LIKE '%WIE%'
      5  minus
      6  SELECT first_name
      7  FROM   uc_partner_ms
      8  WHERE  CATSEARCH
      9             (first_name,
    10              '<query>
    11              <textquery grammar="CONTEXT">
    12                %wie%
    13              </textquery>
    14            </query>',
    15              null) > 0
    16  /
    no rows selected
    SCOTT@orcl_11g>

  • Upgrading Oracle Text - Post upgrade step 10.2 to 11.2

    I already upgraded my 10.2.0.4 database to 11.2.0.1 and have to do post upgrade steps. In step 39 of Manual guideline (837570.1) is not clear for me. If some one can explain further would be appriciated. When i check my source ORACLE_HOME/ctx/admin/ctxf102.txt or ctxf102.sql
    Step 39
    Upgrading Oracle Text
    Copy the following files from the previous Oracle home to the new Oracle home:
    * Stemming user-dictionary files
    * User-modified KOREAN_MORPH_LEXER dictionary files
    * USER_FILTER executables
    To obtain a list of the above files, use:
    $ORACLE_HOME/ctx/admin/ctxf<version>.txt
    $ORACLE_HOME/ctx/admin/ctxf<version>.sql
    where version is 920,101,102
    For instance, if upgrading from 10.2.0
    *1. For dictionary files check*
    *$ORACLE_HOME/ctx/admin/ctxf102.txt*
    *2. Execute the script as database user SYS,SYSTEM, or CTXSYS*
    *$ORACLE_HOME/ctx/admin/ctxf102.sql*
    If your Oracle Text index uses KOREAN_LEXER which was deprecated in Oracle 9i and desupported in Oracle 10g Release 2, see below Note for further information on manual migration from KOREAN_LEXER to KOREAN_MORPH_LEXER.
    Note 300172.1 Obsolescence of KOREAN_LEXER Lexer Type

    Hi Srini
    Thank you very much. now i got it.
    Oracle asked me to identify the CTXCAT indexes with KOREAN_LEXER execute the following query as user CTXSYS: if nothing return then i can skip this step.
    SELECT idx_name
    FROM ctxsys.ctx_indexes
    WHERE idx_type = 'CTXCAT'
    AND idx_name IN
    (SELECT ixo_index_name
    FROM ctxsys.ctx_index_objects
    WHERE ixo_class = 'LEXER'
    AND ixo_object = 'KOREAN_MORPH_LEXER ');
    SELECT isl_index_owner,isl_index_name,isl_language
    FROM CTXSYS.ctx_index_sub_lexers
    WHERE isl_object = 'KOREAN_MORPH_LEXER';

  • Using Oracle Text with Apex

    Can someone point me to some resources on how to integrate Oracle Text and APEX to do searches, highlight results, etc (all the features of Oracle Text)?
    The data to be indexed is in files on the filesystem, so I would like to keep it that way and use the FILE_DATASTORE option for Text.
    Thanks for any pointers.
    Update: Yes, I did see http://www.oracle.com/technology/products/database/application_express/pdf/apex_text_application_v1.6.pdf
    but the search results there just returns the URL/file containing the "hit". It doesn't show the actual text fragment that caused the match, doesn't highlight it, etc. I am looking for a real Google-like search. Hm, having said that, I might as well use Google Desktop! Nah, where's the fun in that?

    This is a very simple application for my own use. It started life in 8i when there were fewer Text options.
    As such, it uses the query string as entered. This returns all of the matches:
    select msgid, msgdate, Box, fromaddr, subject
      from eudora.inbox
    where contains(body, :P703_MailSearch) > 0
    order by msgdate descI display the selected result like this:
    select subject,
      Replace(eudora.mmarkup(:P704_MSGID, :P702_SEARCH), Chr(13), '<BR>') Body
      from eudora.inbox
    where msgid = :P704_MSGIDIn a newer application, I experimented with the CTXCAT grammer.
    That query looks like this:
    select m.ID, m.pdpno, m.shortdesc
      from pdp_mast m
    where contains(m.dphistory, '<query><textquery lang="ENGLISH" grammar="CTXCAT">
                                             ' || :P1_Text || '
                                         </textquery>
                                      <score datatype="INTEGER"/>
                                  </query>') > 0     
        or contains(m.shortdesc, '<query><textquery lang="ENGLISH" grammar="CTXCAT">
                                             ' || :P1_Text || '
                                         </textquery>
                                      <score datatype="INTEGER"/>
                                  </query>') > 0As always, once you figure out the syntax, its easy to make it work in Apex.
    Text indexes are very fast. On my old 600MHz PC, searches in 250MB of text take less than a second.

  • Searching using Oracle Text instead of LIKE '%'

    Hello all,
    I hope you help me in this:
    I have a table looks like this
    create table subscribers (
    id numer(10),
    first_name varchar2(30),
    father_name varchar2(30),
    grandfather_name varchar2(30),
    last_name varchar2(30))
    The application is built using Oracle Forms. Many times, the end users are not so sure of the spelling of the name, therefore they use the "%" wildcard with name fields. This will be reflected to the queries the application will send them to the Oracle Server.
    We have the following queries
    1) select *
    from subscribers
    where last_name like '%family_name%';
    2) select *
    from subscribers
    where last_name like 'family_name%';
    3) select *
    from subscribers
    where last_name like '%family_name%' and first_name like '%first_name%';
    4) select *
    from subscribers
    where last_name like 'family_name%' and first_name like 'first_name%';
    As well as searching on the father_name and grandfather_name fields. But most of the search are on the first_name and the last_name.
    These queries are killing the server since we have millions of records. BTree indexes will not help here because of the LIKE and the "%"
    I am thinking to use Oracle Text here, but I am not sure whether I have to go for a CONTEXT index on each individual column, or I can use the MULTI_COLUMN_DATASTORE indexing.
    Any idea will be appreciated

    The ctxcat index and catsearch operator are generally intended for usage with one text column and one or more columns of structured data. You would have to pick just one of your columns as the text column and the others as structured columns. I would be more inclined to use the multi_column_datastore with a context index and contains operator, so that you can search all of your columns as text columns.

  • Structure of Indexes in oracle Text

    Hi,
    I like to know which Indextyp ist using Oracle Text.
    Is it B-Tree Indexes? Or which structure does oracle use to index the dokuments?
    I know there are some differences between ctxcat and context.
    Can anyone help?
    regars,
    Arsineh

    context is an inverted index structure. ctxcat is based on a b-tree.

  • Oracle Text and TREC

    Hello,
    I am new to SQL, Oracle, and Oracle Text and need to use Oracle Text to index about 2GB of files (located on the filesystem), each of which contain multiple documents. These documents are all in SGML format with the relevant data I need being inbetween DOCNO and TEXT tags.
    So far I understand I need to create a table similar to the following...
    create table "DocTable" ("Docno" number, "Text" text)
    ...and then either use a CONTEXT or CTXCAT index, but im not sure which.
    In general im not too sure what to do. Any help is appreciated.
    Thanks :)

    it actually depends upon what u r searching for
    A file datastore is works as below
    1) a location where all your files are stored - say /mydocs
    so you need to create a preference
    begin
    ctx_ddl.create_preference('COMMON_DIR','FILE_DATASTORE');
    ctx_ddl.set_attribute('COMMON_DIR','PATH','/mydocs');
    end;
    now create a table where you list down all the file names, the doc id is something for your reference. This can be any number which you prefer. But has to be unique, as this a pkey.
    create table mytable(id number primary key, docs varchar2(2000));
    insert into mytable values(111555,'first.txt');
    insert into mytable values(111556,'second.txt');
    commit;
    Now indexing, which ctaully fetches the documents from the file location
    create index myindex on mytable(docs)
    indextype is ctxsys.context
    parameters ('datastore COMMON_DIR');
    now the queries on the table will be using CONTAINS operator - as you have created a CONTEXT index.
    So , you need to fist determine what kind of queries you need to make. On the basis of that you can create the index.

  • Deciding between Oracle Text  v/s PL/SQL

    In the Oracle Text technical document it is mentioned that the Standard ( CONTEXT ) and Catalog ( CTXCAT ) types of index are used to build index for larger co-herent documents and performing mixed querires respectively.
    As I read furthur, I understand that if the requirement is not heavily document centric, then may be Oracle Text is not an ideal candidate to use. If most of the data is going to reside primarily in tables, then standard PL/ SQL queries and joins is the way to go. But on the other hand using standard SQL for names matches using LIKE operator, for eg, may not guarantee to work or may be complex to implement when trying wildcard or theme matches.
    So the question is do we use Oracle text irrespective of the type of content being indexed i.e table data v/s documents ? How do we make that judgement?

    What type of data do you have and what types of searches do you want to be able to do? If you need features that are only available in Text, then you need Text. For example, if you will be searching documents that are stored in operatinig system files or in blob columns and you want to do stem searches or fuzzy searches or use a thesaurus, then you will need Oracle Text. If, on the other hand, the data that you have and the searches that you want can be done with or without Text, then you have a choice to make, with the major issue being which is more efficient. When in doubt, a little testing can help you decide. Set up a realistic test environment, test some queries both ways, and see which is fastest. If you are just doing standard searches on varchar2 columns, you may get better performance without Text.

  • Oracle Text performance -- failed attempts

    We are trying to implement a simple search of text data stored in a heavily used table (inserts/updates). There are 3 columns to index --
    Headline (varchar2(255))
    Subheadline (varchar2(255))
    Teaser (varchar2(4000))
    The first attempt to implement Oracle text w/ CATSEARCH
    begin
    ctx_ddl.create_index_set('cms_iset');
    ctx_ddl.add_index('cms_iset','poolid_cp, mediaid_cp'); /* sub-index A */
    end;
    ---- We knew we were going to filter on poolid_cp and mediaid_cp ---
    CREATE INDEX cms_headlineidx ON con_properties (headline)
    INDEXTYPE IS ctxsys.CTXCAT
    PARAMETERS ('index set cms_iset');
    CREATE INDEX cms_subheadlineidx ON con_properties (subheadline)
    INDEXTYPE IS ctxsys.CTXCAT
    PARAMETERS ('index set cms_iset');
    CREATE INDEX cms_teaseridx ON con_properties (teaser)
    INDEXTYPE IS ctxsys.CTXCAT
    PARAMETERS ('index set cms_iset');
    *********THE RESULTS*************
    Our application server would spin up threads that would appear to be hanging. The load on the DB servers (RAC) were higher than normal. This implementation would have saved on having to do resync's manually.
    The next attempt was implementing w/ CONTEXT:
    alter table con_properties add (dummy varchar2(1));
    begin
    ctx_ddl.create_preference('con_propsearch', 'MULTI_COLUMN_DATASTORE');
    ctx_ddl.set_attribute('con_propsearch', 'columns', 'headline,subheadline,teaser');
    end;
    CREATE INDEX con_properties_searchidx
    ON con_properties(dummy)
    INDEXTYPE IS CTXSYS.CONTEXT
    PARAMETERS ('datastore CTXSYS.con_propsearch')
    Records getting put into the ctx_user_pending table a few hundred per hour.
    ********THE RESULTS*************
    Same issue with the application servers spinning off threads that seem to be hung. Spikey load on the DB servers (RAC).
    NOTE: In both implementations, running search querys ran OK. However, dropping the text index in BOTH cases caused the application servers to behave normally.
    Can anyone tell me what's going on internally with Oracle TEXT when a table is heavily inserted and updated? What is going on in the background. Is there some sort of lock that the app servers are waiting on? I know there is "overhead" with inserts on a normal b-tree index. Is it "exponential" with Oracle Text?
    Thank you!

    When documents in the base table are inserted, updated, or deleted, their ROWIDs are held in a DML queue until you synchronize the index. You can view this queue with the CTX_USER_PENDING view. Apparently, you are not synchronizing your context index, so the queue is building infinitely. You need to establish some method of synchronizing your index. You can use parameters('sync(on commit)') in your index creation or create an after insert or update statement level trigger, not row trigger, that uses dbms_job.submit to schedule ctx_ddl.sync_index to synchronize the index upon commit of the dml or you can manually run ctx_ddl.sync_index periodically or schedule it or you can alter and rebuild your index periodically or you can drop and recreate it periodically. Which method you choose depends on how current the information that you query needs to be. If your data needs to be current up to the moment, the you should sync on commit. Otherwise it may be better to do it in periodic batches.

  • Oracle text error when generating random rows

    Hello
    firing
    SELECT *
    FROM
    SELECT NAME,EMAIL,ADDRESS1,ADDRESS2,CONTACT_NAME,MOBILE,TELEPHONE FROM MV_CAT_SEG_REG_PROD
    WHERE CATSEARCH(CAT_TYPES,'security services*' ,NULL)>0 AND
    PLAN_ID=1 AND ACT_STATUS='N'
    ORDER BY DBMS_RANDOM.VALUE
    where rownum < 4;
    returns
    ORA-20000: Oracle Text error:
    DRG-10849: catsearch does not support functional invocation
    DRG-10599: column is not indexed
    20000. 00000 - "%s"
    *Cause:    The stored procedure 'raise_application_error'
    was called which causes this error to be generated.
    *Action:   Correct the problem as described in the error message or contact
    the application administrator or DBA for more information.
    using oracle 10gr2 on windows server 2003
    i have tried 1)dropping the index and creating it again, the index type is "CTXSYS"."CTXCAT"
    2) deleting the stats -checking
    3)recreating the stats- checking
    the table here is a materialized view
    i need to tell you people that
    there are two indexes cat_types_ind and cat_ids_idx on cat_types and cat_ids columns respectively
    the inner query uses cat_types_idx index when executed and seen in sqladvisor
    1)removing the order by clause will make the query work but i really want that order by clause
    2)the inner-query-only works fine
    3) i have seen the forums and they have helped regarding the things i tried above but it does not work
    please tell me if i need to further elaborate on anything
    thanks in advance

    I have same problem, my query is:
    SELECT *
    FROM
    (SELECT
    /*+ FIRST_ROWS(50) */
    NTQ.*,
    ROWNUM RNUM1
    FROM
    (SELECT
    /*+ INDEX(DL_TSD_DEFTR_CI) */
    FROM ima_ol.DL_TSD_SITUATION s
    WHERE (CATSEARCH(DEF_TRANS,'milano ',NULL)>0)
    AND (s.FORECAST = 0)
    AND (s.STATE IN (1,0,4))
    AND (s.ARCH_STATE = 0)
    ORDER BY s.VET_TS DESC
    ) NTQ
    WHERE ROWNUM <=50
    WHERE RNUM1 >=1
    my oracle and system version:
    Oracle Database 11g Enterprise Edition Release 11.1.0.7.0 - 64bit Production
    PL/SQL Release 11.1.0.7.0 - Production
    "CORE     11.1.0.7.0     Production"
    TNS for Solaris: Version 11.1.0.7.0 - Production
    NLSRTL Version 11.1.0.7.0 - Production
    I have suggested that to solve the problem I should alter the statistics of the offending table to force to use this index .. how do I do? thanks in advance

  • Oracle text indexes

    Can anybody know is it possible to create two oracle text indexes on one column, for example, CTXCAT index and CTXRULE index and what will be during the querying of that column? is it a good practise?
    Thanks in advance.

    When in doubt, test and see.  Yes, you can create two different types of Oracle Text indexes on the same column.  If you create a CTXCAT index and a CTXRULE index, then queries using CATSEARCH will use the CTXCAT index and queries using MATCHES will use the CTXRULE index.  When querying with CATSEARCH, it will find all rows where the terms searched for are found within the column value.  When querying with CTXRULE, it does the opposite, and finds all rows where the column values are found within the terms searched for.  Please see the demonstration below.  As to whether it is a good practice, it depends on what you need.  If you need both types of searches, then yes.  If not, then no, it would be unnecessary overhead.
    SCOTT@orcl_11gR2> create table test_tab (test_col  varchar2(60))
      2  /
    Table created.
    SCOTT@orcl_11gR2> insert all
      2  into test_tab values ('test')
      3  into test_tab values ('data')
      4  into test_tab values ('test data')
      5  into test_tab values ('other stuff')
      6  select * from dual
      7  /
    4 rows created.
    SCOTT@orcl_11gR2> create index ctxcat_idx on test_tab (test_col)
      2  indextype is ctxsys.ctxcat
      3  /
    Index created.
    SCOTT@orcl_11gR2> create index ctxrule_idx on test_tab (test_col)
      2  indextype is ctxsys.ctxrule
      3  /
    Index created.
    SCOTT@orcl_11gR2> set autotrace on explain
    SCOTT@orcl_11gR2> select * from test_tab
      2  where  catsearch (test_col, 'test data', null) > 0
      3  /
    TEST_COL
    test data
    1 row selected.
    Execution Plan
    Plan hash value: 399706479
    | Id  | Operation                   | Name       | Rows  | Bytes | Cost (%CPU)| Time     |
    |   0 | SELECT STATEMENT            |            |     1 |    44 |     3   (0)| 00:00:01 |
    |   1 |  TABLE ACCESS BY INDEX ROWID| TEST_TAB   |     1 |    44 |     3   (0)| 00:00:01 |
    |*  2 |   DOMAIN INDEX              | CTXCAT_IDX |       |       |            |          |
    Predicate Information (identified by operation id):
       2 - access("CTXSYS"."CATSEARCH"("TEST_COL",'test data',NULL)>0)
    Note
       - dynamic sampling used for this statement (level=2)
    SCOTT@orcl_11gR2> select * from test_tab
      2  where  matches (test_col, 'test data') > 0
      3  /
    TEST_COL
    test
    data
    test data
    3 rows selected.
    Execution Plan
    Plan hash value: 1476734355
    | Id  | Operation                   | Name        | Rows  | Bytes | Cost (%CPU)| Time     |
    |   0 | SELECT STATEMENT            |             |     1 |    44 |     1   (0)| 00:00:01 |
    |   1 |  TABLE ACCESS BY INDEX ROWID| TEST_TAB    |     1 |    44 |     1   (0)| 00:00:01 |
    |*  2 |   DOMAIN INDEX              | CTXRULE_IDX |       |       |     0   (0)| 00:00:01 |
    Predicate Information (identified by operation id):
       2 - access("CTXSYS"."MATCHES"("TEST_COL",'test data')>0)
    Note
       - dynamic sampling used for this statement (level=2)
    SCOTT@orcl_11gR2>
    Message was edited by: BarbaraBoehmer

  • Process for Oracle Text

    am working as a dba and we plan to introduce oracle text for text search. since I am new to this concept, I would like to know step by step implementation of Oracle text. I've searched some web sites but still not clear on the implementation part.
    Please help me out

    Hi,
    Oracle Text is included in both standard and enterprise editions of the data server. When you are creating your database, select Oracle Text during configuration (one of the options). You will then have Oracle Text available on your database. The schema name is CTXSYS. You need to unlock this account just like any other.
    To use Text, either grant permissions on the specific objects you need for the user, or use the CTXAPP role. It is up to you to know the permissions required for the objects (in other words, I can't tell you your requirements), so research this in the reference manual.
    At this point, it is ready to use. Just create your indexes according to the Oracle Text Developer's Guide (you can find this with all of the documentation - look at the Application Developer's tab in Doc Library). Your search syntax depends totally on your requirements, and the type of index you choose to create. For example, the CONTEXT index uses the CONTAINS operator, and the CTXCAT index uses CATSEARCH (unless of course you want to use templates, but let's not go there just yet...).
    There are two references you will want to review: The Oracle Text Developer's Guide and the Oracle Text Reference.
    Hope this helps,
    Ron

  • Does Oracle Text need to be "enabled"?

    We want to start using Oracle Text. Does it need to be "enabled"? Any scripts that need to be run first?
    Oracle version 10.1.0.4

    Hi,
    There is nothing to "enable" in order to use Oracle Text. But you need to create domain indexes(CONTEXT, CTXCAT, CTXRULE etc) depending on which features of Oracle Text you want to use. Also, in order to use some of the Oracle Text procedures and packages, you will need the CTXAPP role assigned to you.
    Enjoy searching!
    Regards,
    VenkatR

  • Should Oracle Text be used here?

    Hi,
    We are developing a search feature for a bank that has thousands of documents. Each document has a set of free-form comments written by multiple bank officials. The comments are in a table in a Oracle 9i database. The comments can be 10 to 10000 characters. The actual documents are not available in the database. Only a document identifier is kept in the table containing the comments.
    The search engine will have single-word as well as phrase searches. Do you think using Oracle Text is the best approach for such a serach facility?
    Thanks
    Yash

    Sameer,
    I guess I was right about it being personal. I do feel like we're getting somewhere though! DETAILS!!!
    This isn't about me, you, or Text. The posts are to help those that need assistance, and for some of us (like me) to gain some exposure to problems I have not run into. You'll see a fair amount of research goes into many people's posts here.
    Your earlier write-up telling the person to stay away provided no specifics about your situation. It was alarmist. They might have totally different requirements than you, so it pays to ask follow-up questions to find out if they are going to hit what you did.
    I don't mind 'talking crap' about something so long as it is specific and can be addressed, or people can find situations where it should/should not be used by comparing their system to yours. If I tell you that cars suck because they break down, it isn't terribly useful to anyone. If I tell you that the 1982 Pontiac Bonneville's transmission needed to be replaced 13 times since I bought it...that kind of detail might be of some use to someone with that car. This is what I was soliciting from you.
    What do we have from your last post:
    * 9.2.0.5
    * Millions of user's per day
    * Peak time the box was only 5-10% idle
    * The problem was with queries on a CONTEXT index
    (rather than the indexing process itself)
    * You couldn't use CTXCAT because of application requirements
    Two things you said that I will agree with.
    1) CONTAINS queries can be costly. There are some ways to improve the performance if you post more detail about your requirements. This is the first time you mentioned that you were using the CONTEXT index instead of CTXCAT...good information. It would indicate that your 'this totally sucks' gut response from earlier is from the perspective of CONTEXT and doesn't extend to the other index types (or am I mistaken?).
    2) 10g has improved upon some things. Later patchsets on Release 1, and the newly available Release 2 have a different filter as well. 9i made some major changes from prior releases though, so if 9i is the only option, I'd still take it.
    If you are interested in continuing, I'd like to find out how many indexes, the number of documents indexed, the size of the index tables and data tables, and some more about the application. Give us your best shot.
    As for hurting the ego...I'll still sleep well tonight. I would just like to see this remain a constructive place for people to post questions and try out solutions. That can't happen if the replies are a blanket 'stay away' without justification or a matching of requirements to problems.
    Finally, feel free to post to anything I participate in...just remember that I'm not shy (and neither are you it seems), so if a debate happens it will likely be lively.
    -Ron

  • Oracle Text - CTX Context Index Soundex Problem

    Hi,
    I'm running into a problem with Oracle Text when searching using the ! (soundex) option. I've created a simple test example to highlight the issue.
    Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bit
    Windows 2008 Server 64-bit
    create table test_tab (test_col  varchar2(200));
    insert all
      into test_tab (test_col) values ('ab-tönes')
      into test_tab (test_col) values ('ab-tones')
      into test_tab (test_col) values ('abtones')
      into test_tab (test_col) values ('ab tones')
      into test_tab (test_col) values ('ab-tanes')
      select * from dual
    select * from test_tab
    begin
          ctx_ddl.create_preference ('test_lex1', 'basic_lexer');
          ctx_ddl.set_attribute ('test_lex1', 'whitespace', '/\|-_+&''');
          ctx_ddl.set_attribute('test_lex1','base_letter','YES');
          -- ctx_ddl.set_attribute('test_lex1','skipjoins','-');
    end;
    create index test_idx on test_tab (test_col)
      indextype is ctxsys.context
        parameters
          ('lexer        test_lex1'     
    select token_text from dr$test_idx$i;
    TOKEN_TEXT
    AB
    ABTONES
    TANES
    TONES
    select * from test_tab where contains (test_col, '!ab tones') > 0;
    TEST_COL
    ab-tönes
    ab-tones
    ab tones
    select * from test_tab where soundex(test_col) = soundex('ab tones');
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    So my question is, can anyone suggest an approach whereby I can get the Oracle Text Context index (or CTXCAT index if it's more appropriate) to return all 5 rows like the simple Soundex is doing?
    I can't really use soundex as this search query will form part of a search screen for a multi-language application. Soundex is limited to English sounding words, so I need the solution to be able to compare strings that may not "sound" English.
    It must be an attribute of the BASIC_LEXER, and I've tried skipjoins, start/end-joins, stop lists, but I just cannot get the Soundex feature of Oracle Text to function like the SOUNDEX() function!
    Looking at how the tokens are stored dr$test_idx$i I need Oracle Text to almost concat 'AB' and 'TONES' to search as a single string.
    Any help greatly appreciated.
    Thanks,

    I am not getting the same problem that you are getting with the umlat, but I don't see what is different.  Please post the result of:
    select ctx_report.create_index_script ('test_idx') from dual;
    Here are the results on my system.  Perhaps you can spot the difference.  I added an empty_stoplist, so that it won't print out a long list of stopwords.
    SCOTT@orcl12c> create table test_tab (test_col    varchar2(200))
      2  /
    Table created.
    SCOTT@orcl12c> insert all
      2    into test_tab (test_col) values ('ab-tönes')
      3    into test_tab (test_col) values ('ab-tones')
      4    into test_tab (test_col) values ('abtones')
      5    into test_tab (test_col) values ('ab tones')
      6    into test_tab (test_col) values ('ab-tanes')
      7  select * from dual
      8  /
    5 rows created.
    SCOTT@orcl12c> select * from test_tab
      2  /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> begin
      2    ctx_ddl.create_preference ('test_lex1', 'basic_lexer');
      3    ctx_ddl.set_attribute('test_lex1','base_letter','YES');
      4  end;
      5  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> create or replace procedure test_proc
      2    (p_rowid in          rowid,
      3      p_clob    in out nocopy clob)
      4  as
      5  begin
      6    select replace (translate (test_col, '/\|-_+&''', '      '), ' ', '')
      7    into   p_clob
      8    from   test_tab
      9    where  rowid = p_rowid;
    10  end test_proc;
    11  /
    Procedure created.
    SCOTT@orcl12c> show errors
    No errors.
    SCOTT@orcl12c> begin
      2    ctx_ddl.create_preference ('test_ds', 'user_datastore');
      3    ctx_ddl.set_attribute ('test_ds', 'procedure', 'test_proc');
      4  end;
      5  /
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> create index test_idx on test_tab (test_col)
      2    indextype is ctxsys.context
      3    parameters
      4       ('lexer    test_lex1
      5         datastore    test_ds
      6         stoplist    ctxsys.empty_stoplist')
      7  /
    Index created.
    SCOTT@orcl12c> select token_text from dr$test_idx$i
      2  /
    TOKEN_TEXT
    ABTANES
    ABTONES
    2 rows selected.
    SCOTT@orcl12c> variable search_string varchar2(100)
    SCOTT@orcl12c> exec :search_string := 'ab tones'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> select * from test_tab
      2  where  contains
      3            (test_col,
      4             '!' || replace (:search_string, ' ', ' !') ||
      5             ' or !' || replace (:search_string, ' ', '')) > 0
      6  /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> exec :search_string := 'abtones'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> exec :search_string := 'ab tönes'
    PL/SQL procedure successfully completed.
    SCOTT@orcl12c> /
    TEST_COL
    ab-tönes
    ab-tones
    abtones
    ab tones
    ab-tanes
    5 rows selected.
    SCOTT@orcl12c> select ctx_report.create_index_script ('test_idx') from dual
      2  /
    CTX_REPORT.CREATE_INDEX_SCRIPT('TEST_IDX')
    begin
      ctx_ddl.create_preference('"TEST_IDX_DST"','USER_DATASTORE');
      ctx_ddl.set_attribute('"TEST_IDX_DST"','PROCEDURE','"SCOTT"."TEST_PROC"');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_FIL"','NULL_FILTER');
    end;
    begin
      ctx_ddl.create_section_group('"TEST_IDX_SGP"','NULL_SECTION_GROUP');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_LEX"','BASIC_LEXER');
      ctx_ddl.set_attribute('"TEST_IDX_LEX"','BASE_LETTER','YES');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_WDL"','BASIC_WORDLIST');
      ctx_ddl.set_attribute('"TEST_IDX_WDL"','STEMMER','ENGLISH');
      ctx_ddl.set_attribute('"TEST_IDX_WDL"','FUZZY_MATCH','GENERIC');
    end;
    begin
      ctx_ddl.create_stoplist('"TEST_IDX_SPL"','BASIC_STOPLIST');
    end;
    begin
      ctx_ddl.create_preference('"TEST_IDX_STO"','BASIC_STORAGE');
      ctx_ddl.set_attribute('"TEST_IDX_STO"','R_TABLE_CLAUSE','lob (data) store as (
    cache)');
      ctx_ddl.set_attribute('"TEST_IDX_STO"','I_INDEX_CLAUSE','compress 2');
    end;
    begin
      ctx_output.start_log('TEST_IDX_LOG');
    end;
    create index "SCOTT"."TEST_IDX"
      on "SCOTT"."TEST_TAB"
          ("TEST_COL")
      indextype is ctxsys.context
      parameters('
        datastore       "TEST_IDX_DST"
        filter          "TEST_IDX_FIL"
        section group   "TEST_IDX_SGP"
        lexer           "TEST_IDX_LEX"
        wordlist        "TEST_IDX_WDL"
        stoplist        "TEST_IDX_SPL"
        storage         "TEST_IDX_STO"
    begin
      ctx_output.end_log;
    end;
    1 row selected.

Maybe you are looking for